82 lines
2.9 KiB
Python
82 lines
2.9 KiB
Python
|
|
import sqlite3
|
|||
|
|
import pandas as pd
|
|||
|
|
import numpy as np
|
|||
|
|
|
|||
|
|
pd.set_option('display.max_columns', None)
|
|||
|
|
pd.set_option('display.width', 1000)
|
|||
|
|
pd.set_option('display.float_format', '{:.2f}'.format)
|
|||
|
|
|
|||
|
|
db_path = 'database/L2/L2_Main.sqlite'
|
|||
|
|
|
|||
|
|
def check_nulls_zeros():
|
|||
|
|
conn = sqlite3.connect(db_path)
|
|||
|
|
|
|||
|
|
print("=== 1. Fact Match Players: 关键字段零值/空值检查 ===")
|
|||
|
|
df_players = pd.read_sql("""
|
|||
|
|
SELECT
|
|||
|
|
kills, deaths, assists, adr, rating, rating2,
|
|||
|
|
kast, awp_kills, flash_duration, jump_count,
|
|||
|
|
elo_change
|
|||
|
|
FROM fact_match_players
|
|||
|
|
""", conn)
|
|||
|
|
|
|||
|
|
stats = []
|
|||
|
|
for col in df_players.columns:
|
|||
|
|
total = len(df_players)
|
|||
|
|
nulls = df_players[col].isnull().sum()
|
|||
|
|
zeros = (df_players[col] == 0).sum()
|
|||
|
|
stats.append({
|
|||
|
|
'Field': col,
|
|||
|
|
'Total': total,
|
|||
|
|
'Nulls': nulls,
|
|||
|
|
'Null%': (nulls/total)*100,
|
|||
|
|
'Zeros': zeros,
|
|||
|
|
'Zero%': (zeros/total)*100
|
|||
|
|
})
|
|||
|
|
print(pd.DataFrame(stats))
|
|||
|
|
|
|||
|
|
print("\n=== 2. Fact Round Events (Kills): 击杀完整性检查 ===")
|
|||
|
|
# 只检查 event_type = 'kill' 的记录
|
|||
|
|
df_kills = pd.read_sql("""
|
|||
|
|
SELECT
|
|||
|
|
attacker_steam_id, victim_steam_id,
|
|||
|
|
event_time, weapon,
|
|||
|
|
attacker_pos_x, score_change_attacker
|
|||
|
|
FROM fact_round_events
|
|||
|
|
WHERE event_type = 'kill'
|
|||
|
|
""", conn)
|
|||
|
|
|
|||
|
|
total_kills = len(df_kills)
|
|||
|
|
missing_attacker = df_kills['attacker_steam_id'].isnull().sum() + (df_kills['attacker_steam_id'] == '').sum()
|
|||
|
|
missing_victim = df_kills['victim_steam_id'].isnull().sum() + (df_kills['victim_steam_id'] == '').sum()
|
|||
|
|
|
|||
|
|
# 检查 attacker 和 victim 是否相同(自杀)
|
|||
|
|
self_kills = (df_kills['attacker_steam_id'] == df_kills['victim_steam_id']).sum()
|
|||
|
|
|
|||
|
|
print(f"Total Kill Events: {total_kills}")
|
|||
|
|
print(f"Missing Attacker: {missing_attacker} ({missing_attacker/total_kills*100:.2f}%)")
|
|||
|
|
print(f"Missing Victim: {missing_victim} ({missing_victim/total_kills*100:.2f}%)")
|
|||
|
|
print(f"Self Kills (Suicide?): {self_kills}")
|
|||
|
|
|
|||
|
|
print("\n=== 3. Fact Round Events: 坐标与评分覆盖率 ===")
|
|||
|
|
# 坐标应该在 classic 比赛中有值,leetify 比赛中可能为空
|
|||
|
|
# 评分应该在 leetify 比赛中有值
|
|||
|
|
|
|||
|
|
df_events = pd.read_sql("""
|
|||
|
|
SELECT
|
|||
|
|
m.data_source_type,
|
|||
|
|
COUNT(*) as total_events,
|
|||
|
|
SUM(CASE WHEN e.attacker_pos_x IS NOT NULL AND e.attacker_pos_x != 0 THEN 1 ELSE 0 END) as has_pos,
|
|||
|
|
SUM(CASE WHEN e.score_change_attacker IS NOT NULL AND e.score_change_attacker != 0 THEN 1 ELSE 0 END) as has_score
|
|||
|
|
FROM fact_round_events e
|
|||
|
|
JOIN fact_matches m ON e.match_id = m.match_id
|
|||
|
|
WHERE e.event_type = 'kill'
|
|||
|
|
GROUP BY m.data_source_type
|
|||
|
|
""", conn)
|
|||
|
|
print(df_events)
|
|||
|
|
|
|||
|
|
conn.close()
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
check_nulls_zeros()
|