0.4.4 : L2 ver3 fixed several empty stats in L2.

This commit is contained in:
2026-01-24 02:32:31 +08:00
parent 7ab9df2acf
commit 01451c0b4b
4 changed files with 103 additions and 66 deletions

View File

@@ -643,6 +643,16 @@ class MatchParser:
side_stats.throw_harm_enemy = safe_int(fight_side.get('throw_harm_enemy')) side_stats.throw_harm_enemy = safe_int(fight_side.get('throw_harm_enemy'))
side_stats.uid = safe_int(fight_side.get('uid')) side_stats.uid = safe_int(fight_side.get('uid'))
side_stats.year = safe_text(fight_side.get('year')) side_stats.year = safe_text(fight_side.get('year'))
# Map missing fields
side_stats.clutch_1v1 = side_stats.end_1v1
side_stats.clutch_1v2 = side_stats.end_1v2
side_stats.clutch_1v3 = side_stats.end_1v3
side_stats.clutch_1v4 = side_stats.end_1v4
side_stats.clutch_1v5 = side_stats.end_1v5
side_stats.entry_kills = side_stats.first_kill
side_stats.entry_deaths = side_stats.first_death
return side_stats return side_stats
team_id_value = safe_int(fight.get('match_team_id')) team_id_value = safe_int(fight.get('match_team_id'))
@@ -725,6 +735,15 @@ class MatchParser:
stats.uid = safe_int(fight.get('uid')) stats.uid = safe_int(fight.get('uid'))
stats.year = safe_text(fight.get('year')) stats.year = safe_text(fight.get('year'))
# Map missing fields
stats.clutch_1v1 = stats.end_1v1
stats.clutch_1v2 = stats.end_1v2
stats.clutch_1v3 = stats.end_1v3
stats.clutch_1v4 = stats.end_1v4
stats.clutch_1v5 = stats.end_1v5
stats.entry_kills = stats.first_kill
stats.entry_deaths = stats.first_death
except Exception as e: except Exception as e:
logger.error(f"Error parsing stats for {steam_id} in {self.match_id}: {e}") logger.error(f"Error parsing stats for {steam_id} in {self.match_id}: {e}")
pass pass
@@ -754,6 +773,9 @@ class MatchParser:
p.fd_t = int(vdata.get('fd_t', 0)) p.fd_t = int(vdata.get('fd_t', 0))
p.damage_receive = int(vdata.get('damage_receive', 0)) p.damage_receive = int(vdata.get('damage_receive', 0))
p.damage_stats = int(vdata.get('damage_stats', 0)) p.damage_stats = int(vdata.get('damage_stats', 0))
p.damage_total = int(vdata.get('damage_total', 0))
p.damage_received = int(vdata.get('damage_received', 0))
p.flash_assists = int(vdata.get('flash_assists', 0))
else: else:
# Try to match by 5E ID if possible, but here keys are steamids usually # Try to match by 5E ID if possible, but here keys are steamids usually
pass pass
@@ -888,12 +910,26 @@ class MatchParser:
equipment_value = player_bron_crash.get(str(sid)) equipment_value = player_bron_crash.get(str(sid))
equipment_value = int(equipment_value) if equipment_value is not None else 0 equipment_value = int(equipment_value) if equipment_value is not None else 0
main_weapon = pick_main_weapon(items) main_weapon = pick_main_weapon(items)
has_helmet = False
has_defuser = False
if isinstance(items, list):
for it in items:
if isinstance(it, dict):
name = it.get('WeaponName', '')
if name == 'item_assaultsuit':
has_helmet = True
elif name == 'item_defuser':
has_defuser = True
rd.economies.append(PlayerEconomy( rd.economies.append(PlayerEconomy(
steam_id_64=str(sid), steam_id_64=str(sid),
side=side, side=side,
start_money=start_money, start_money=start_money,
equipment_value=equipment_value, equipment_value=equipment_value,
main_weapon=main_weapon, main_weapon=main_weapon,
has_helmet=has_helmet,
has_defuser=has_defuser,
round_performance_score=float(score) round_performance_score=float(score)
)) ))

View File

@@ -1,81 +1,82 @@
import sqlite3 import sqlite3
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import sys
# 设置pandas显示选项确保不省略任何行和列
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None) pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000) pd.set_option('display.width', 2000)
pd.set_option('display.float_format', '{:.2f}'.format) pd.set_option('display.float_format', '{:.2f}'.format)
pd.set_option('display.max_colwidth', None)
db_path = 'database/L2/L2_Main.sqlite' db_path = 'database/L2/L2_Main.sqlite'
def check_nulls_zeros(): def check_all_tables():
conn = sqlite3.connect(db_path) conn = sqlite3.connect(db_path)
print("=== 1. Fact Match Players: 关键字段零值/空值检查 ===") # 获取所有表名
df_players = pd.read_sql(""" tables = pd.read_sql("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'", conn)['name'].tolist()
SELECT
kills, deaths, assists, adr, rating, rating2,
kast, awp_kills, flash_duration, jump_count,
elo_change
FROM fact_match_players
""", conn)
stats = []
for col in df_players.columns:
total = len(df_players)
nulls = df_players[col].isnull().sum()
zeros = (df_players[col] == 0).sum()
stats.append({
'Field': col,
'Total': total,
'Nulls': nulls,
'Null%': (nulls/total)*100,
'Zeros': zeros,
'Zero%': (zeros/total)*100
})
print(pd.DataFrame(stats))
print("\n=== 2. Fact Round Events (Kills): 击杀完整性检查 ===")
# 只检查 event_type = 'kill' 的记录
df_kills = pd.read_sql("""
SELECT
attacker_steam_id, victim_steam_id,
event_time, weapon,
attacker_pos_x, score_change_attacker
FROM fact_round_events
WHERE event_type = 'kill'
""", conn)
total_kills = len(df_kills)
missing_attacker = df_kills['attacker_steam_id'].isnull().sum() + (df_kills['attacker_steam_id'] == '').sum()
missing_victim = df_kills['victim_steam_id'].isnull().sum() + (df_kills['victim_steam_id'] == '').sum()
# 检查 attacker 和 victim 是否相同(自杀)
self_kills = (df_kills['attacker_steam_id'] == df_kills['victim_steam_id']).sum()
print(f"Total Kill Events: {total_kills}")
print(f"Missing Attacker: {missing_attacker} ({missing_attacker/total_kills*100:.2f}%)")
print(f"Missing Victim: {missing_victim} ({missing_victim/total_kills*100:.2f}%)")
print(f"Self Kills (Suicide?): {self_kills}")
print("\n=== 3. Fact Round Events: 坐标与评分覆盖率 ===")
# 坐标应该在 classic 比赛中有值leetify 比赛中可能为空
# 评分应该在 leetify 比赛中有值
df_events = pd.read_sql("""
SELECT
m.data_source_type,
COUNT(*) as total_events,
SUM(CASE WHEN e.attacker_pos_x IS NOT NULL AND e.attacker_pos_x != 0 THEN 1 ELSE 0 END) as has_pos,
SUM(CASE WHEN e.score_change_attacker IS NOT NULL AND e.score_change_attacker != 0 THEN 1 ELSE 0 END) as has_score
FROM fact_round_events e
JOIN fact_matches m ON e.match_id = m.match_id
WHERE e.event_type = 'kill'
GROUP BY m.data_source_type
""", conn)
print(df_events)
for table in tables:
print(f"\n{'='*20} Table: {table} {'='*20}")
# 获取表的所有列
cols_info = pd.read_sql(f"PRAGMA table_info({table})", conn)
cols = cols_info['name'].tolist()
# 读取全表数据
df = pd.read_sql(f"SELECT * FROM {table}", conn)
total = len(df)
if total == 0:
print(f"Table is empty (0 rows)")
continue
print(f"Total Rows: {total}")
print("-" * 60)
stats = []
for col in cols:
# 1. Null Check
nulls = df[col].isnull().sum()
# 2. Zero Check (仅对数值型或可转换为数值的列)
zeros = 0
try:
# 尝试转为数值无法转换的变为NaN
numeric_series = pd.to_numeric(df[col], errors='coerce')
# 统计0值 (排除原本就是NaN的)
zeros = (numeric_series == 0).sum()
except:
zeros = 0
# 3. Unique Count (基数)
unique_count = df[col].nunique()
# 4. Example Value (取第一个非空值)
example = df[col].dropna().iloc[0] if df[col].count() > 0 else 'ALL NULL'
stats.append({
'Field': col,
'Nulls': nulls,
'Null%': (nulls/total)*100,
'Zeros': zeros,
'Zero%': (zeros/total)*100,
'Unique': unique_count,
'Example': str(example)[:50] # 截断过长示例
})
# 输出完整统计表
df_stats = pd.DataFrame(stats)
# 按 Zero% 降序排列,但保证 Null% 高的也显眼,这里默认不排序直接按字段序,或者按关注度排序
# 用户要求全面探查按字段原序输出可能更符合直觉或者按Zero%排序
# 这里为了排查问题,按 Zero% 降序输出
df_stats = df_stats.sort_values('Zero%', ascending=False)
print(df_stats.to_string(index=False))
print("\n")
conn.close() conn.close()
if __name__ == "__main__": if __name__ == "__main__":
check_nulls_zeros() check_all_tables()

Binary file not shown.

Binary file not shown.