feat: Add recent performance stability stats (matches/days) to player profile
This commit is contained in:
82
ETL/verify/verify_deep.py
Normal file
82
ETL/verify/verify_deep.py
Normal file
@@ -0,0 +1,82 @@
|
||||
import sqlite3
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import sys
|
||||
|
||||
# 设置pandas显示选项,确保不省略任何行和列
|
||||
pd.set_option('display.max_rows', None)
|
||||
pd.set_option('display.max_columns', None)
|
||||
pd.set_option('display.width', 2000)
|
||||
pd.set_option('display.float_format', '{:.2f}'.format)
|
||||
pd.set_option('display.max_colwidth', None)
|
||||
|
||||
db_path = 'database/L2/L2_Main.sqlite'
|
||||
|
||||
def check_all_tables():
|
||||
conn = sqlite3.connect(db_path)
|
||||
|
||||
# 获取所有表名
|
||||
tables = pd.read_sql("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'", conn)['name'].tolist()
|
||||
|
||||
for table in tables:
|
||||
print(f"\n{'='*20} Table: {table} {'='*20}")
|
||||
|
||||
# 获取表的所有列
|
||||
cols_info = pd.read_sql(f"PRAGMA table_info({table})", conn)
|
||||
cols = cols_info['name'].tolist()
|
||||
|
||||
# 读取全表数据
|
||||
df = pd.read_sql(f"SELECT * FROM {table}", conn)
|
||||
total = len(df)
|
||||
|
||||
if total == 0:
|
||||
print(f"Table is empty (0 rows)")
|
||||
continue
|
||||
|
||||
print(f"Total Rows: {total}")
|
||||
print("-" * 60)
|
||||
|
||||
stats = []
|
||||
for col in cols:
|
||||
# 1. Null Check
|
||||
nulls = df[col].isnull().sum()
|
||||
|
||||
# 2. Zero Check (仅对数值型或可转换为数值的列)
|
||||
zeros = 0
|
||||
try:
|
||||
# 尝试转为数值,无法转换的变为NaN
|
||||
numeric_series = pd.to_numeric(df[col], errors='coerce')
|
||||
# 统计0值 (排除原本就是NaN的)
|
||||
zeros = (numeric_series == 0).sum()
|
||||
except:
|
||||
zeros = 0
|
||||
|
||||
# 3. Unique Count (基数)
|
||||
unique_count = df[col].nunique()
|
||||
|
||||
# 4. Example Value (取第一个非空值)
|
||||
example = df[col].dropna().iloc[0] if df[col].count() > 0 else 'ALL NULL'
|
||||
|
||||
stats.append({
|
||||
'Field': col,
|
||||
'Nulls': nulls,
|
||||
'Null%': (nulls/total)*100,
|
||||
'Zeros': zeros,
|
||||
'Zero%': (zeros/total)*100,
|
||||
'Unique': unique_count,
|
||||
'Example': str(example)[:50] # 截断过长示例
|
||||
})
|
||||
|
||||
# 输出完整统计表
|
||||
df_stats = pd.DataFrame(stats)
|
||||
# 按 Zero% 降序排列,但保证 Null% 高的也显眼,这里默认不排序直接按字段序,或者按关注度排序
|
||||
# 用户要求全面探查,按字段原序输出可能更符合直觉,或者按Zero%排序
|
||||
# 这里为了排查问题,按 Zero% 降序输出
|
||||
df_stats = df_stats.sort_values('Zero%', ascending=False)
|
||||
print(df_stats.to_string(index=False))
|
||||
print("\n")
|
||||
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
check_all_tables()
|
||||
Reference in New Issue
Block a user