import sqlite3 import pandas as pd import numpy as np pd.set_option('display.max_columns', None) pd.set_option('display.width', 1000) pd.set_option('display.float_format', '{:.2f}'.format) db_path = 'database/L2/L2_Main.sqlite' def check_nulls_zeros(): conn = sqlite3.connect(db_path) print("=== 1. Fact Match Players: 关键字段零值/空值检查 ===") df_players = pd.read_sql(""" SELECT kills, deaths, assists, adr, rating, rating2, kast, awp_kills, flash_duration, jump_count, elo_change FROM fact_match_players """, conn) stats = [] for col in df_players.columns: total = len(df_players) nulls = df_players[col].isnull().sum() zeros = (df_players[col] == 0).sum() stats.append({ 'Field': col, 'Total': total, 'Nulls': nulls, 'Null%': (nulls/total)*100, 'Zeros': zeros, 'Zero%': (zeros/total)*100 }) print(pd.DataFrame(stats)) print("\n=== 2. Fact Round Events (Kills): 击杀完整性检查 ===") # 只检查 event_type = 'kill' 的记录 df_kills = pd.read_sql(""" SELECT attacker_steam_id, victim_steam_id, event_time, weapon, attacker_pos_x, score_change_attacker FROM fact_round_events WHERE event_type = 'kill' """, conn) total_kills = len(df_kills) missing_attacker = df_kills['attacker_steam_id'].isnull().sum() + (df_kills['attacker_steam_id'] == '').sum() missing_victim = df_kills['victim_steam_id'].isnull().sum() + (df_kills['victim_steam_id'] == '').sum() # 检查 attacker 和 victim 是否相同(自杀) self_kills = (df_kills['attacker_steam_id'] == df_kills['victim_steam_id']).sum() print(f"Total Kill Events: {total_kills}") print(f"Missing Attacker: {missing_attacker} ({missing_attacker/total_kills*100:.2f}%)") print(f"Missing Victim: {missing_victim} ({missing_victim/total_kills*100:.2f}%)") print(f"Self Kills (Suicide?): {self_kills}") print("\n=== 3. Fact Round Events: 坐标与评分覆盖率 ===") # 坐标应该在 classic 比赛中有值,leetify 比赛中可能为空 # 评分应该在 leetify 比赛中有值 df_events = pd.read_sql(""" SELECT m.data_source_type, COUNT(*) as total_events, SUM(CASE WHEN e.attacker_pos_x IS NOT NULL AND e.attacker_pos_x != 0 THEN 1 ELSE 0 END) as has_pos, SUM(CASE WHEN e.score_change_attacker IS NOT NULL AND e.score_change_attacker != 0 THEN 1 ELSE 0 END) as has_score FROM fact_round_events e JOIN fact_matches m ON e.match_id = m.match_id WHERE e.event_type = 'kill' GROUP BY m.data_source_type """, conn) print(df_events) conn.close() if __name__ == "__main__": check_nulls_zeros()