0.4.1: L2ver2 finished

This commit is contained in:
2026-01-24 00:43:05 +08:00
parent 879f63302c
commit 1deda4393c
8 changed files with 1277 additions and 271 deletions

View File

@@ -52,8 +52,12 @@ class PlayerStats:
jump_count: int = 0
damage_total: int = 0
damage_received: int = 0
damage_receive: int = 0
damage_stats: int = 0
assisted_kill: int = 0
awp_kill: int = 0
awp_kill_ct: int = 0
awp_kill_t: int = 0
benefit_kill: int = 0
day: str = ""
defused_bomb: int = 0
@@ -64,6 +68,8 @@ class PlayerStats:
end_1v5: int = 0
explode_bomb: int = 0
first_death: int = 0
fd_ct: int = 0
fd_t: int = 0
first_kill: int = 0
flash_enemy: int = 0
flash_team: int = 0
@@ -109,6 +115,8 @@ class PlayerStats:
throw_harm_enemy: int = 0
uid: int = 0
year: str = ""
sts_raw: str = ""
level_info_raw: str = ""
@dataclass
class RoundEvent:
@@ -159,6 +167,19 @@ class RoundData:
events: List[RoundEvent] = field(default_factory=list)
economies: List[PlayerEconomy] = field(default_factory=list)
@dataclass
class MatchTeamData:
group_id: int
group_all_score: int = 0
group_change_elo: float = 0.0
group_fh_role: int = 0
group_fh_score: int = 0
group_origin_elo: float = 0.0
group_sh_role: int = 0
group_sh_score: int = 0
group_tid: int = 0
group_uids: str = ""
@dataclass
class MatchData:
match_id: str
@@ -173,12 +194,52 @@ class MatchData:
server_ip: str = ""
server_port: int = 0
location: str = ""
has_side_data_and_rating2: int = 0
match_main_id: int = 0
demo_url: str = ""
game_mode: int = 0
game_name: str = ""
map_desc: str = ""
location_full: str = ""
match_mode: int = 0
match_status: int = 0
match_flag: int = 0
status: int = 0
waiver: int = 0
year: int = 0
season: str = ""
round_total: int = 0
cs_type: int = 0
priority_show_type: int = 0
pug10m_show_type: int = 0
credit_match_status: int = 0
knife_winner: int = 0
knife_winner_role: int = 0
most_1v2_uid: int = 0
most_assist_uid: int = 0
most_awp_uid: int = 0
most_end_uid: int = 0
most_first_kill_uid: int = 0
most_headshot_uid: int = 0
most_jump_uid: int = 0
mvp_uid: int = 0
response_code: int = 0
response_message: str = ""
response_status: int = 0
response_timestamp: int = 0
response_trace_id: str = ""
response_success: int = 0
response_errcode: int = 0
treat_info_raw: str = ""
round_list_raw: str = ""
leetify_data_raw: str = ""
data_source_type: str = "unknown"
players: Dict[str, PlayerStats] = field(default_factory=dict) # Key: steam_id_64
players_t: Dict[str, PlayerStats] = field(default_factory=dict)
players_ct: Dict[str, PlayerStats] = field(default_factory=dict)
rounds: List[RoundData] = field(default_factory=list)
player_meta: Dict[str, Dict] = field(default_factory=dict) # steam_id -> {uid, name, avatar, ...}
teams: List[MatchTeamData] = field(default_factory=list)
# --- Database Helper ---
@@ -210,6 +271,7 @@ class MatchParser:
# Extracted JSON bodies
self.data_match = None
self.data_match_wrapper = None
self.data_vip = None
self.data_leetify = None
self.data_round_list = None
@@ -226,6 +288,7 @@ class MatchParser:
# Check URLs
if 'crane/http/api/data/match/' in url:
self.data_match_wrapper = body
self.data_match = body.get('data', {})
elif 'crane/http/api/data/vip_plus_match_data/' in url:
self.data_vip = body.get('data', {})
@@ -246,12 +309,24 @@ class MatchParser:
# Decide which round source to use
if self.data_leetify and self.data_leetify.get('leetify_data'):
self.match_data.data_source_type = 'leetify'
try:
self.match_data.leetify_data_raw = json.dumps(self.data_leetify.get('leetify_data', {}), ensure_ascii=False)
except:
self.match_data.leetify_data_raw = ""
self.match_data.round_list_raw = ""
self._parse_leetify_rounds()
elif self.data_round_list and self.data_round_list.get('round_list'):
self.match_data.data_source_type = 'classic'
try:
self.match_data.round_list_raw = json.dumps(self.data_round_list.get('round_list', []), ensure_ascii=False)
except:
self.match_data.round_list_raw = ""
self.match_data.leetify_data_raw = ""
self._parse_classic_rounds()
else:
self.match_data.data_source_type = 'unknown'
self.match_data.round_list_raw = ""
self.match_data.leetify_data_raw = ""
logger.info(f"No round data found for {self.match_id}")
return self.match_data
@@ -273,12 +348,89 @@ class MatchParser:
except:
self.match_data.server_port = 0
self.match_data.location = m.get('location', '')
def safe_int(val):
try:
return int(float(val)) if val is not None else 0
except:
return 0
def safe_float(val):
try:
return float(val) if val is not None else 0.0
except:
return 0.0
def safe_text(val):
return "" if val is None else str(val)
wrapper = self.data_match_wrapper or {}
self.match_data.response_code = safe_int(wrapper.get('code'))
self.match_data.response_message = safe_text(wrapper.get('message'))
self.match_data.response_status = safe_int(wrapper.get('status'))
self.match_data.response_timestamp = safe_int(wrapper.get('timeStamp') if wrapper.get('timeStamp') is not None else wrapper.get('timestamp'))
self.match_data.response_trace_id = safe_text(wrapper.get('traceId') if wrapper.get('traceId') is not None else wrapper.get('trace_id'))
self.match_data.response_success = safe_int(wrapper.get('success'))
self.match_data.response_errcode = safe_int(wrapper.get('errcode'))
self.match_data.has_side_data_and_rating2 = safe_int(self.data_match.get('has_side_data_and_rating2'))
self.match_data.match_main_id = safe_int(m.get('id'))
self.match_data.demo_url = safe_text(m.get('demo_url'))
self.match_data.game_mode = safe_int(m.get('game_mode'))
self.match_data.game_name = safe_text(m.get('game_name'))
self.match_data.map_desc = safe_text(m.get('map_desc'))
self.match_data.location_full = safe_text(m.get('location_full'))
self.match_data.match_mode = safe_int(m.get('match_mode'))
self.match_data.match_status = safe_int(m.get('match_status'))
self.match_data.match_flag = safe_int(m.get('match_flag'))
self.match_data.status = safe_int(m.get('status'))
self.match_data.waiver = safe_int(m.get('waiver'))
self.match_data.year = safe_int(m.get('year'))
self.match_data.season = safe_text(m.get('season'))
self.match_data.round_total = safe_int(m.get('round_total'))
self.match_data.cs_type = safe_int(m.get('cs_type'))
self.match_data.priority_show_type = safe_int(m.get('priority_show_type'))
self.match_data.pug10m_show_type = safe_int(m.get('pug10m_show_type'))
self.match_data.credit_match_status = safe_int(m.get('credit_match_status'))
self.match_data.knife_winner = safe_int(m.get('knife_winner'))
self.match_data.knife_winner_role = safe_int(m.get('knife_winner_role'))
self.match_data.most_1v2_uid = safe_int(m.get('most_1v2_uid'))
self.match_data.most_assist_uid = safe_int(m.get('most_assist_uid'))
self.match_data.most_awp_uid = safe_int(m.get('most_awp_uid'))
self.match_data.most_end_uid = safe_int(m.get('most_end_uid'))
self.match_data.most_first_kill_uid = safe_int(m.get('most_first_kill_uid'))
self.match_data.most_headshot_uid = safe_int(m.get('most_headshot_uid'))
self.match_data.most_jump_uid = safe_int(m.get('most_jump_uid'))
self.match_data.mvp_uid = safe_int(m.get('mvp_uid'))
treat_info = self.data_match.get('treat_info')
if treat_info is not None:
try:
self.match_data.treat_info_raw = json.dumps(treat_info, ensure_ascii=False)
except:
self.match_data.treat_info_raw = ""
self.match_data.teams = []
for idx in [1, 2]:
team = MatchTeamData(
group_id=idx,
group_all_score=safe_int(m.get(f"group{idx}_all_score")),
group_change_elo=safe_float(m.get(f"group{idx}_change_elo")),
group_fh_role=safe_int(m.get(f"group{idx}_fh_role")),
group_fh_score=safe_int(m.get(f"group{idx}_fh_score")),
group_origin_elo=safe_float(m.get(f"group{idx}_origin_elo")),
group_sh_role=safe_int(m.get(f"group{idx}_sh_role")),
group_sh_score=safe_int(m.get(f"group{idx}_sh_score")),
group_tid=safe_int(m.get(f"group{idx}_tid")),
group_uids=safe_text(m.get(f"group{idx}_uids"))
)
self.match_data.teams.append(team)
def _parse_players_base(self):
# Players are in group_1 and group_2 lists in data_match
groups = []
if 'group_1' in self.data_match: groups.extend(self.data_match['group_1'])
if 'group_2' in self.data_match: groups.extend(self.data_match['group_2'])
def safe_int(val):
try:
return int(float(val)) if val is not None else 0
except:
return 0
def safe_text(val):
return "" if val is None else str(val)
for p in groups:
# We need steam_id.
@@ -305,17 +457,90 @@ class MatchParser:
if not steam_id:
continue
status = user_data.get('status', {})
platform_exp = user_data.get('platformExp', {})
trusted = user_data.get('trusted', {})
certify = user_data.get('certify', {})
identity = user_data.get('identity', {})
plus_info = user_info.get('plus_info', {}) or p.get('plus_info', {})
user_info_raw = ""
try:
user_info_raw = json.dumps(user_info, ensure_ascii=False)
except:
user_info_raw = ""
self.match_data.player_meta[steam_id] = {
'uid': uid,
'username': user_data.get('username', ''),
'avatar_url': profile.get('avatarUrl', ''),
'domain': profile.get('domain', ''),
'created_at': user_data.get('createdAt', 0),
'updated_at': user_data.get('updatedAt', 0)
'uid': safe_int(uid),
'username': safe_text(user_data.get('username')),
'uuid': safe_text(user_data.get('uuid')),
'email': safe_text(user_data.get('email')),
'area': safe_text(user_data.get('area')),
'mobile': safe_text(user_data.get('mobile')),
'avatar_url': safe_text(profile.get('avatarUrl')),
'domain': safe_text(profile.get('domain')),
'user_domain': safe_text(user_data.get('domain')),
'created_at': safe_int(user_data.get('createdAt')),
'updated_at': safe_int(user_data.get('updatedAt')),
'username_audit_status': safe_int(user_data.get('usernameAuditStatus')),
'accid': safe_text(user_data.get('Accid')),
'team_id': safe_int(user_data.get('teamID')),
'trumpet_count': safe_int(user_data.get('trumpetCount')),
'profile_nickname': safe_text(profile.get('nickname')),
'profile_avatar_audit_status': safe_int(profile.get('avatarAuditStatus')),
'profile_rgb_avatar_url': safe_text(profile.get('rgbAvatarUrl')),
'profile_photo_url': safe_text(profile.get('photoUrl')),
'profile_gender': safe_int(profile.get('gender')),
'profile_birthday': safe_int(profile.get('birthday')),
'profile_country_id': safe_text(profile.get('countryId')),
'profile_region_id': safe_text(profile.get('regionId')),
'profile_city_id': safe_text(profile.get('cityId')),
'profile_language': safe_text(profile.get('language')),
'profile_recommend_url': safe_text(profile.get('recommendUrl')),
'profile_group_id': safe_int(profile.get('groupId')),
'profile_reg_source': safe_int(profile.get('regSource')),
'status_status': safe_int(status.get('status')),
'status_expire': safe_int(status.get('expire')),
'status_cancellation_status': safe_int(status.get('cancellationStatus')),
'status_new_user': safe_int(status.get('newUser')),
'status_login_banned_time': safe_int(status.get('loginBannedTime')),
'status_anticheat_type': safe_int(status.get('anticheatType')),
'status_flag_status1': safe_text(status.get('flagStatus1')),
'status_anticheat_status': safe_text(status.get('anticheatStatus')),
'status_flag_honor': safe_text(status.get('FlagHonor')),
'status_privacy_policy_status': safe_int(status.get('PrivacyPolicyStatus')),
'status_csgo_frozen_exptime': safe_int(status.get('csgoFrozenExptime')),
'platformexp_level': safe_int(platform_exp.get('level')),
'platformexp_exp': safe_int(platform_exp.get('exp')),
'steam_account': safe_text(steam_data.get('steamAccount')),
'steam_trade_url': safe_text(steam_data.get('tradeUrl')),
'steam_rent_id': safe_text(steam_data.get('rentSteamId')),
'trusted_credit': safe_int(trusted.get('credit')),
'trusted_credit_level': safe_int(trusted.get('creditLevel')),
'trusted_score': safe_int(trusted.get('score')),
'trusted_status': safe_int(trusted.get('status')),
'trusted_credit_status': safe_int(trusted.get('creditStatus')),
'certify_id_type': safe_int(certify.get('idType')),
'certify_status': safe_int(certify.get('status')),
'certify_age': safe_int(certify.get('age')),
'certify_real_name': safe_text(certify.get('realName')),
'certify_uid_list': safe_text(json.dumps(certify.get('uidList'), ensure_ascii=False)) if certify.get('uidList') is not None else "",
'certify_audit_status': safe_int(certify.get('auditStatus')),
'certify_gender': safe_int(certify.get('gender')),
'identity_type': safe_int(identity.get('type')),
'identity_extras': safe_text(identity.get('extras')),
'identity_status': safe_int(identity.get('status')),
'identity_slogan': safe_text(identity.get('slogan')),
'identity_list': safe_text(json.dumps(identity.get('identity_list'), ensure_ascii=False)) if identity.get('identity_list') is not None else "",
'identity_slogan_ext': safe_text(identity.get('slogan_ext')),
'identity_live_url': safe_text(identity.get('live_url')),
'identity_live_type': safe_int(identity.get('live_type')),
'plus_is_plus': safe_int(plus_info.get('is_plus')),
'user_info_raw': user_info_raw
}
stats = PlayerStats(steam_id_64=steam_id)
sts = p.get('sts', {})
level_info = p.get('level_info', {})
try:
# Use safe conversion helper
@@ -329,6 +554,16 @@ class MatchParser:
def safe_text(val):
return "" if val is None else str(val)
if sts is not None:
try:
stats.sts_raw = json.dumps(sts, ensure_ascii=False)
except:
stats.sts_raw = ""
if level_info is not None:
try:
stats.level_info_raw = json.dumps(level_info, ensure_ascii=False)
except:
stats.level_info_raw = ""
def get_stat(key):
if key in fight and fight.get(key) not in [None, ""]:
@@ -513,11 +748,22 @@ class MatchParser:
p = self.match_data.players[sid]
p.kast = float(vdata.get('kast', 0))
p.awp_kills = int(vdata.get('awp_kill', 0))
# Damage stats might need calculation or mapping
# p.damage_total = ...
p.awp_kill_ct = int(vdata.get('awp_kill_ct', 0))
p.awp_kill_t = int(vdata.get('awp_kill_t', 0))
p.fd_ct = int(vdata.get('fd_ct', 0))
p.fd_t = int(vdata.get('fd_t', 0))
p.damage_receive = int(vdata.get('damage_receive', 0))
p.damage_stats = int(vdata.get('damage_stats', 0))
else:
# Try to match by 5E ID if possible, but here keys are steamids usually
pass
for sid, p in self.match_data.players.items():
if sid in self.match_data.players_t:
self.match_data.players_t[sid].awp_kill_t = p.awp_kill_t
self.match_data.players_t[sid].fd_t = p.fd_t
if sid in self.match_data.players_ct:
self.match_data.players_ct[sid].awp_kill_ct = p.awp_kill_ct
self.match_data.players_ct[sid].fd_ct = p.fd_ct
def _parse_leetify_rounds(self):
l_data = self.data_leetify.get('leetify_data', {})
@@ -744,34 +990,169 @@ def process_matches():
def save_match(cursor, m: MatchData):
# 1. Dim Players (Upsert)
player_meta_columns = [
"steam_id_64", "uid", "username", "avatar_url", "domain", "created_at", "updated_at",
"last_seen_match_id", "uuid", "email", "area", "mobile", "user_domain",
"username_audit_status", "accid", "team_id", "trumpet_count",
"profile_nickname", "profile_avatar_audit_status", "profile_rgb_avatar_url",
"profile_photo_url", "profile_gender", "profile_birthday", "profile_country_id",
"profile_region_id", "profile_city_id", "profile_language", "profile_recommend_url",
"profile_group_id", "profile_reg_source", "status_status", "status_expire",
"status_cancellation_status", "status_new_user", "status_login_banned_time",
"status_anticheat_type", "status_flag_status1", "status_anticheat_status",
"status_flag_honor", "status_privacy_policy_status", "status_csgo_frozen_exptime",
"platformexp_level", "platformexp_exp", "steam_account", "steam_trade_url",
"steam_rent_id", "trusted_credit", "trusted_credit_level", "trusted_score",
"trusted_status", "trusted_credit_status", "certify_id_type", "certify_status",
"certify_age", "certify_real_name", "certify_uid_list", "certify_audit_status",
"certify_gender", "identity_type", "identity_extras", "identity_status",
"identity_slogan", "identity_list", "identity_slogan_ext", "identity_live_url",
"identity_live_type", "plus_is_plus", "user_info_raw"
]
player_meta_placeholders = ",".join(["?"] * len(player_meta_columns))
player_meta_columns_sql = ",".join(player_meta_columns)
for sid, meta in m.player_meta.items():
cursor.execute("""
INSERT INTO dim_players (steam_id_64, uid, username, avatar_url, domain, created_at, updated_at, last_seen_match_id)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
INSERT INTO dim_players (""" + player_meta_columns_sql + """)
VALUES (""" + player_meta_placeholders + """)
ON CONFLICT(steam_id_64) DO UPDATE SET
uid=excluded.uid,
username=excluded.username,
avatar_url=excluded.avatar_url,
last_seen_match_id=excluded.last_seen_match_id
domain=excluded.domain,
created_at=excluded.created_at,
updated_at=excluded.updated_at,
last_seen_match_id=excluded.last_seen_match_id,
uuid=excluded.uuid,
email=excluded.email,
area=excluded.area,
mobile=excluded.mobile,
user_domain=excluded.user_domain,
username_audit_status=excluded.username_audit_status,
accid=excluded.accid,
team_id=excluded.team_id,
trumpet_count=excluded.trumpet_count,
profile_nickname=excluded.profile_nickname,
profile_avatar_audit_status=excluded.profile_avatar_audit_status,
profile_rgb_avatar_url=excluded.profile_rgb_avatar_url,
profile_photo_url=excluded.profile_photo_url,
profile_gender=excluded.profile_gender,
profile_birthday=excluded.profile_birthday,
profile_country_id=excluded.profile_country_id,
profile_region_id=excluded.profile_region_id,
profile_city_id=excluded.profile_city_id,
profile_language=excluded.profile_language,
profile_recommend_url=excluded.profile_recommend_url,
profile_group_id=excluded.profile_group_id,
profile_reg_source=excluded.profile_reg_source,
status_status=excluded.status_status,
status_expire=excluded.status_expire,
status_cancellation_status=excluded.status_cancellation_status,
status_new_user=excluded.status_new_user,
status_login_banned_time=excluded.status_login_banned_time,
status_anticheat_type=excluded.status_anticheat_type,
status_flag_status1=excluded.status_flag_status1,
status_anticheat_status=excluded.status_anticheat_status,
status_flag_honor=excluded.status_flag_honor,
status_privacy_policy_status=excluded.status_privacy_policy_status,
status_csgo_frozen_exptime=excluded.status_csgo_frozen_exptime,
platformexp_level=excluded.platformexp_level,
platformexp_exp=excluded.platformexp_exp,
steam_account=excluded.steam_account,
steam_trade_url=excluded.steam_trade_url,
steam_rent_id=excluded.steam_rent_id,
trusted_credit=excluded.trusted_credit,
trusted_credit_level=excluded.trusted_credit_level,
trusted_score=excluded.trusted_score,
trusted_status=excluded.trusted_status,
trusted_credit_status=excluded.trusted_credit_status,
certify_id_type=excluded.certify_id_type,
certify_status=excluded.certify_status,
certify_age=excluded.certify_age,
certify_real_name=excluded.certify_real_name,
certify_uid_list=excluded.certify_uid_list,
certify_audit_status=excluded.certify_audit_status,
certify_gender=excluded.certify_gender,
identity_type=excluded.identity_type,
identity_extras=excluded.identity_extras,
identity_status=excluded.identity_status,
identity_slogan=excluded.identity_slogan,
identity_list=excluded.identity_list,
identity_slogan_ext=excluded.identity_slogan_ext,
identity_live_url=excluded.identity_live_url,
identity_live_type=excluded.identity_live_type,
plus_is_plus=excluded.plus_is_plus,
user_info_raw=excluded.user_info_raw
""", (
sid, meta.get('uid'), meta.get('username'), meta.get('avatar_url'),
meta.get('domain'), meta.get('created_at'), meta.get('updated_at'),
m.match_id
sid, meta.get('uid'), meta.get('username'), meta.get('avatar_url'),
meta.get('domain'), meta.get('created_at'), meta.get('updated_at'),
m.match_id, meta.get('uuid'), meta.get('email'), meta.get('area'),
meta.get('mobile'), meta.get('user_domain'), meta.get('username_audit_status'),
meta.get('accid'), meta.get('team_id'), meta.get('trumpet_count'),
meta.get('profile_nickname'), meta.get('profile_avatar_audit_status'),
meta.get('profile_rgb_avatar_url'), meta.get('profile_photo_url'),
meta.get('profile_gender'), meta.get('profile_birthday'),
meta.get('profile_country_id'), meta.get('profile_region_id'),
meta.get('profile_city_id'), meta.get('profile_language'),
meta.get('profile_recommend_url'), meta.get('profile_group_id'),
meta.get('profile_reg_source'), meta.get('status_status'),
meta.get('status_expire'), meta.get('status_cancellation_status'),
meta.get('status_new_user'), meta.get('status_login_banned_time'),
meta.get('status_anticheat_type'), meta.get('status_flag_status1'),
meta.get('status_anticheat_status'), meta.get('status_flag_honor'),
meta.get('status_privacy_policy_status'), meta.get('status_csgo_frozen_exptime'),
meta.get('platformexp_level'), meta.get('platformexp_exp'),
meta.get('steam_account'), meta.get('steam_trade_url'),
meta.get('steam_rent_id'), meta.get('trusted_credit'),
meta.get('trusted_credit_level'), meta.get('trusted_score'),
meta.get('trusted_status'), meta.get('trusted_credit_status'),
meta.get('certify_id_type'), meta.get('certify_status'),
meta.get('certify_age'), meta.get('certify_real_name'),
meta.get('certify_uid_list'), meta.get('certify_audit_status'),
meta.get('certify_gender'), meta.get('identity_type'),
meta.get('identity_extras'), meta.get('identity_status'),
meta.get('identity_slogan'), meta.get('identity_list'),
meta.get('identity_slogan_ext'), meta.get('identity_live_url'),
meta.get('identity_live_type'), meta.get('plus_is_plus'),
meta.get('user_info_raw')
))
# 2. Dim Maps (Ignore if exists)
if m.map_name:
cursor.execute("INSERT OR IGNORE INTO dim_maps (map_name) VALUES (?)", (m.map_name,))
cursor.execute("""
INSERT INTO dim_maps (map_name, map_desc)
VALUES (?, ?)
ON CONFLICT(map_name) DO UPDATE SET
map_desc=excluded.map_desc
""", (m.map_name, m.map_desc))
# 3. Fact Matches
cursor.execute("""
INSERT OR REPLACE INTO fact_matches
(match_id, match_code, map_name, start_time, end_time, duration, winner_team, score_team1, score_team2, server_ip, server_port, location, data_source_type)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
(match_id, match_code, map_name, start_time, end_time, duration, winner_team, score_team1, score_team2, server_ip, server_port, location, has_side_data_and_rating2, match_main_id, demo_url, game_mode, game_name, map_desc, location_full, match_mode, match_status, match_flag, status, waiver, year, season, round_total, cs_type, priority_show_type, pug10m_show_type, credit_match_status, knife_winner, knife_winner_role, most_1v2_uid, most_assist_uid, most_awp_uid, most_end_uid, most_first_kill_uid, most_headshot_uid, most_jump_uid, mvp_uid, response_code, response_message, response_status, response_timestamp, response_trace_id, response_success, response_errcode, treat_info_raw, round_list_raw, leetify_data_raw, data_source_type)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
m.match_id, m.match_code, m.map_name, m.start_time, m.end_time, m.duration,
m.winner_team, m.score_team1, m.score_team2, m.server_ip, m.server_port, m.location, m.data_source_type
m.winner_team, m.score_team1, m.score_team2, m.server_ip, m.server_port, m.location,
m.has_side_data_and_rating2, m.match_main_id, m.demo_url, m.game_mode, m.game_name, m.map_desc,
m.location_full, m.match_mode, m.match_status, m.match_flag, m.status, m.waiver, m.year, m.season,
m.round_total, m.cs_type, m.priority_show_type, m.pug10m_show_type, m.credit_match_status,
m.knife_winner, m.knife_winner_role, m.most_1v2_uid, m.most_assist_uid, m.most_awp_uid,
m.most_end_uid, m.most_first_kill_uid, m.most_headshot_uid, m.most_jump_uid, m.mvp_uid,
m.response_code, m.response_message, m.response_status, m.response_timestamp, m.response_trace_id,
m.response_success, m.response_errcode, m.treat_info_raw, m.round_list_raw, m.leetify_data_raw, m.data_source_type
))
for t in m.teams:
cursor.execute("""
INSERT OR REPLACE INTO fact_match_teams
(match_id, group_id, group_all_score, group_change_elo, group_fh_role, group_fh_score, group_origin_elo, group_sh_role, group_sh_score, group_tid, group_uids)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
m.match_id, t.group_id, t.group_all_score, t.group_change_elo, t.group_fh_role, t.group_fh_score,
t.group_origin_elo, t.group_sh_role, t.group_sh_score, t.group_tid, t.group_uids
))
# 4. Fact Match Players
player_columns = [
"match_id", "steam_id_64", "team_id", "kills", "deaths", "assists", "headshot_count",
@@ -779,9 +1160,10 @@ def save_match(cursor, m: MatchData):
"rank_score", "is_win", "kast", "entry_kills", "entry_deaths", "awp_kills",
"clutch_1v1", "clutch_1v2", "clutch_1v3", "clutch_1v4", "clutch_1v5",
"flash_assists", "flash_duration", "jump_count", "damage_total", "damage_received",
"assisted_kill", "awp_kill", "benefit_kill", "day", "defused_bomb", "end_1v1",
"damage_receive", "damage_stats", "assisted_kill", "awp_kill", "awp_kill_ct",
"awp_kill_t", "benefit_kill", "day", "defused_bomb", "end_1v1",
"end_1v2", "end_1v3", "end_1v4", "end_1v5", "explode_bomb", "first_death",
"first_kill", "flash_enemy", "flash_team", "flash_team_time", "flash_time",
"fd_ct", "fd_t", "first_kill", "flash_enemy", "flash_team", "flash_team_time", "flash_time",
"game_mode", "group_id", "hold_total", "id", "is_highlight", "is_most_1v2",
"is_most_assist", "is_most_awp", "is_most_end", "is_most_first_kill",
"is_most_headshot", "is_most_jump", "is_svp", "is_tie", "kill_1", "kill_2",
@@ -789,7 +1171,7 @@ def save_match(cursor, m: MatchData):
"many_assists_cnt3", "many_assists_cnt4", "many_assists_cnt5", "map",
"match_code", "match_mode", "match_team_id", "match_time", "per_headshot",
"perfect_kill", "planted_bomb", "revenge_kill", "round_total", "season",
"team_kill", "throw_harm", "throw_harm_enemy", "uid", "year"
"team_kill", "throw_harm", "throw_harm_enemy", "uid", "year", "sts_raw", "level_info_raw"
]
player_placeholders = ",".join(["?"] * len(player_columns))
player_columns_sql = ",".join(player_columns)
@@ -801,9 +1183,10 @@ def save_match(cursor, m: MatchData):
p.elo_change, p.rank_score, p.is_win, p.kast, p.entry_kills, p.entry_deaths,
p.awp_kills, p.clutch_1v1, p.clutch_1v2, p.clutch_1v3, p.clutch_1v4,
p.clutch_1v5, p.flash_assists, p.flash_duration, p.jump_count, p.damage_total,
p.damage_received, p.assisted_kill, p.awp_kill, p.benefit_kill, p.day,
p.defused_bomb, p.end_1v1, p.end_1v2, p.end_1v3, p.end_1v4, p.end_1v5,
p.explode_bomb, p.first_death, p.first_kill, p.flash_enemy, p.flash_team,
p.damage_received, p.damage_receive, p.damage_stats, p.assisted_kill, p.awp_kill,
p.awp_kill_ct, p.awp_kill_t, p.benefit_kill, p.day, p.defused_bomb, p.end_1v1,
p.end_1v2, p.end_1v3, p.end_1v4, p.end_1v5, p.explode_bomb, p.first_death,
p.fd_ct, p.fd_t, p.first_kill, p.flash_enemy, p.flash_team,
p.flash_team_time, p.flash_time, p.game_mode, p.group_id, p.hold_total,
p.id, p.is_highlight, p.is_most_1v2, p.is_most_assist, p.is_most_awp,
p.is_most_end, p.is_most_first_kill, p.is_most_headshot, p.is_most_jump,
@@ -812,7 +1195,7 @@ def save_match(cursor, m: MatchData):
p.many_assists_cnt5, p.map, p.match_code, p.match_mode, p.match_team_id,
p.match_time, p.per_headshot, p.perfect_kill, p.planted_bomb, p.revenge_kill,
p.round_total, p.season, p.team_kill, p.throw_harm, p.throw_harm_enemy,
p.uid, p.year
p.uid, p.year, p.sts_raw, p.level_info_raw
]
for sid, p in m.players.items():

504
ETL/verify/verify_L2.py Normal file
View File

@@ -0,0 +1,504 @@
import sqlite3
import pandas as pd
import csv
import os
import sys
import time
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
db_path = 'database/L2/L2_Main.sqlite'
schema_path = 'database/original_json_schema/schema_flat.csv'
covered_main_fields = {
"match_code", "map", "start_time", "end_time", "match_winner",
"group1_all_score", "group1_change_elo", "group1_fh_role", "group1_fh_score",
"group1_origin_elo", "group1_sh_role", "group1_sh_score", "group1_tid", "group1_uids",
"group2_all_score", "group2_change_elo", "group2_fh_role", "group2_fh_score",
"group2_origin_elo", "group2_sh_role", "group2_sh_score", "group2_tid", "group2_uids",
"server_ip", "server_port", "location", "location_full", "map_desc",
"demo_url", "game_mode", "game_name", "match_mode", "match_status", "match_flag",
"status", "waiver", "year", "season", "round_total", "cs_type", "priority_show_type",
"pug10m_show_type", "credit_match_status", "knife_winner", "knife_winner_role",
"most_1v2_uid", "most_assist_uid", "most_awp_uid", "most_end_uid",
"most_first_kill_uid", "most_headshot_uid", "most_jump_uid", "mvp_uid", "id"
}
covered_user_fields = {
"data.group_N[].user_info."
}
covered_round_fields = [
"data.round_list[].current_score.ct",
"data.round_list[].current_score.t",
"data.round_list[].current_score.final_round_time",
"data.round_list[].all_kill[].pasttime",
"data.round_list[].all_kill[].weapon",
"data.round_list[].all_kill[].headshot",
"data.round_list[].all_kill[].penetrated",
"data.round_list[].all_kill[].attackerblind",
"data.round_list[].all_kill[].throughsmoke",
"data.round_list[].all_kill[].noscope",
"data.round_list[].all_kill[].attacker.steamid_64",
"data.round_list[].all_kill[].victim.steamid_64",
"data.round_list[].all_kill[].attacker.pos.x",
"data.round_list[].all_kill[].attacker.pos.y",
"data.round_list[].all_kill[].attacker.pos.z",
"data.round_list[].all_kill[].victim.pos.x",
"data.round_list[].all_kill[].victim.pos.y",
"data.round_list[].all_kill[].victim.pos.z"
]
covered_leetify_fields = [
"data.leetify_data.round_stat[].round",
"data.leetify_data.round_stat[].win_reason",
"data.leetify_data.round_stat[].end_ts",
"data.leetify_data.round_stat[].sfui_event.score_ct",
"data.leetify_data.round_stat[].sfui_event.score_t",
"data.leetify_data.round_stat[].ct_money_group",
"data.leetify_data.round_stat[].t_money_group",
"data.leetify_data.round_stat[].show_event[].ts",
"data.leetify_data.round_stat[].show_event[].kill_event.Ts",
"data.leetify_data.round_stat[].show_event[].kill_event.Killer",
"data.leetify_data.round_stat[].show_event[].kill_event.Victim",
"data.leetify_data.round_stat[].show_event[].kill_event.WeaponName",
"data.leetify_data.round_stat[].show_event[].kill_event.Headshot",
"data.leetify_data.round_stat[].show_event[].kill_event.Penetrated",
"data.leetify_data.round_stat[].show_event[].kill_event.AttackerBlind",
"data.leetify_data.round_stat[].show_event[].kill_event.ThroughSmoke",
"data.leetify_data.round_stat[].show_event[].kill_event.NoScope",
"data.leetify_data.round_stat[].show_event[].trade_score_change.",
"data.leetify_data.round_stat[].show_event[].flash_assist_killer_score_change.",
"data.leetify_data.round_stat[].show_event[].killer_score_change.",
"data.leetify_data.round_stat[].show_event[].victim_score_change.",
"data.leetify_data.round_stat[].bron_equipment.",
"data.leetify_data.round_stat[].player_t_score.",
"data.leetify_data.round_stat[].player_ct_score.",
"data.leetify_data.round_stat[].player_bron_crash."
]
covered_vip_fields = {
"awp_kill",
"awp_kill_ct",
"awp_kill_t",
"damage_receive",
"damage_stats",
"fd_ct",
"fd_t",
"kast"
}
def load_schema_paths(schema_path_value):
paths = []
with open(schema_path_value, 'r', encoding='utf-8') as f:
reader = csv.reader(f)
_ = next(reader, None)
for row in reader:
if len(row) >= 2:
paths.append(row[1])
return paths
def is_covered(path):
if path in ["data", "code", "message", "status", "timestamp", "timeStamp", "traceId", "success", "errcode"]:
return True
if path.startswith("data.<steamid>."):
key = path.split("data.<steamid>.")[1].split(".")[0]
if key in covered_vip_fields:
return True
if "data.group_N[].fight_any." in path:
return True
if "data.group_N[].fight_t." in path or "data.group_N[].fight_ct." in path:
return True
if "data.group_N[].sts." in path:
return True
if "data.group_N[].level_info." in path:
return True
if "data.treat_info." in path:
return True
if "data.has_side_data_and_rating2" in path:
return True
if "data.main." in path:
key = path.split("data.main.")[1].split(".")[0]
if key in covered_main_fields:
return True
if any(k in path for k in covered_user_fields):
return True
if "data.round_list" in path:
return True
if any(k in path for k in covered_round_fields):
return True
if "data.leetify_data." in path:
return True
if any(k in path for k in covered_leetify_fields):
return True
return False
def group_key(p):
if "data.group_N[].user_info." in p:
return "data.group_N[].user_info.*"
if "data.group_N[].fight_any." in p:
return "data.group_N[].fight_any.*"
if "data.group_N[].fight_t." in p:
return "data.group_N[].fight_t.*"
if "data.group_N[].fight_ct." in p:
return "data.group_N[].fight_ct.*"
if "data.main." in p:
return "data.main.*"
if "data.round_list[]" in p or "data.round_list[]." in p:
return "data.round_list.*"
if "data.leetify_data.round_stat[]" in p or "data.leetify_data.round_stat[]." in p:
return "data.leetify_data.round_stat.*"
if "data.leetify_data." in p:
return "data.leetify_data.*"
if "data.treat_info." in p:
return "data.treat_info.*"
if "data." in p:
return "data.*"
return "other"
def dump_uncovered(output_path):
paths = load_schema_paths(schema_path)
uncovered = [p for p in paths if not is_covered(p)]
df_unc = pd.DataFrame({"path": uncovered})
if len(df_unc) == 0:
print("no uncovered paths")
return
df_unc["group"] = df_unc["path"].apply(group_key)
df_unc = df_unc.sort_values(["group", "path"])
df_unc.to_csv(output_path, index=False, encoding='utf-8-sig')
print(f"uncovered total: {len(df_unc)}")
print("\n-- uncovered groups (count) --")
print(df_unc.groupby("group").size().sort_values(ascending=False))
print(f"\noutput: {output_path}")
def print_schema(conn):
tables = conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%' ORDER BY name").fetchall()
for (name,) in tables:
print(f"\n[{name}]")
cols = conn.execute(f"PRAGMA table_info({name})").fetchall()
rows = [["column", "type", "pk"]]
for _, col_name, col_type, _, _, pk in cols:
rows.append([col_name, col_type or "", str(pk)])
widths = [max(len(r[i]) for r in rows) for i in range(3)]
for idx, r in enumerate(rows):
line = " | ".join([r[i].ljust(widths[i]) for i in range(3)])
print(line)
if idx == 0:
print("-" * len(line))
def refresh_schema_sql(conn, output_path):
rows = conn.execute("""
SELECT type, name, sql
FROM sqlite_master
WHERE sql IS NOT NULL AND type IN ('table', 'index') AND name NOT LIKE 'sqlite_%'
ORDER BY CASE WHEN type='table' THEN 0 ELSE 1 END, name
""").fetchall()
lines = ["PRAGMA foreign_keys = ON;", ""]
for _, _, sql in rows:
lines.append(sql.strip() + ";")
lines.append("")
with open(output_path, 'w', encoding='utf-8') as f:
f.write("\n".join(lines).strip() + "\n")
def verify():
conn = sqlite3.connect(db_path)
print("--- Counts ---")
tables = [
'dim_players',
'dim_maps',
'fact_matches',
'fact_match_teams',
'fact_match_players',
'fact_match_players_t',
'fact_match_players_ct',
'fact_rounds',
'fact_round_events',
'fact_round_player_economy'
]
for t in tables:
count = conn.execute(f"SELECT COUNT(*) FROM {t}").fetchone()[0]
print(f"{t}: {count}")
print("\n--- Data Source Distribution ---")
dist = pd.read_sql("SELECT data_source_type, COUNT(*) as cnt FROM fact_matches GROUP BY data_source_type", conn)
print(dist)
print("\n--- Sample Round Events (Leetify vs Classic) ---")
# Fetch one event from a leetify match
leetify_match = conn.execute("SELECT match_id FROM fact_matches WHERE data_source_type='leetify' LIMIT 1").fetchone()
if leetify_match:
mid = leetify_match[0]
print(f"Leetify Match: {mid}")
df = pd.read_sql(f"SELECT * FROM fact_round_events WHERE match_id='{mid}' AND event_type='kill' LIMIT 1", conn)
print(df[['event_type', 'attacker_steam_id', 'trade_killer_steam_id', 'attacker_pos_x', 'score_change_attacker']])
# Fetch one event from a classic match
classic_match = conn.execute("SELECT match_id FROM fact_matches WHERE data_source_type='classic' LIMIT 1").fetchone()
if classic_match:
mid = classic_match[0]
print(f"Classic Match: {mid}")
df = pd.read_sql(f"SELECT * FROM fact_round_events WHERE match_id='{mid}' AND event_type='kill' LIMIT 1", conn)
print(df[['event_type', 'attacker_steam_id', 'trade_killer_steam_id', 'attacker_pos_x', 'score_change_attacker']])
print("\n--- Sample Player Stats (New Fields) ---")
df_players = pd.read_sql("SELECT steam_id_64, rating, rating3, elo_change, rank_score, flash_duration, jump_count FROM fact_match_players LIMIT 5", conn)
print(df_players)
print("\n--- Insert Field Checks ---")
meta_counts = conn.execute("""
SELECT
SUM(CASE WHEN response_code IS NOT NULL THEN 1 ELSE 0 END) AS response_code_cnt,
SUM(CASE WHEN response_trace_id IS NOT NULL AND response_trace_id != '' THEN 1 ELSE 0 END) AS response_trace_id_cnt,
SUM(CASE WHEN response_success IS NOT NULL THEN 1 ELSE 0 END) AS response_success_cnt,
SUM(CASE WHEN response_errcode IS NOT NULL THEN 1 ELSE 0 END) AS response_errcode_cnt,
SUM(CASE WHEN treat_info_raw IS NOT NULL AND treat_info_raw != '' THEN 1 ELSE 0 END) AS treat_info_raw_cnt,
SUM(CASE WHEN round_list_raw IS NOT NULL AND round_list_raw != '' THEN 1 ELSE 0 END) AS round_list_raw_cnt,
SUM(CASE WHEN leetify_data_raw IS NOT NULL AND leetify_data_raw != '' THEN 1 ELSE 0 END) AS leetify_data_raw_cnt
FROM fact_matches
""").fetchone()
print(f"response_code non-null: {meta_counts[0]}")
print(f"response_trace_id non-empty: {meta_counts[1]}")
print(f"response_success non-null: {meta_counts[2]}")
print(f"response_errcode non-null: {meta_counts[3]}")
print(f"treat_info_raw non-empty: {meta_counts[4]}")
print(f"round_list_raw non-empty: {meta_counts[5]}")
print(f"leetify_data_raw non-empty: {meta_counts[6]}")
print("\n--- Integrity Checks ---")
missing_players = conn.execute("""
SELECT COUNT(*) FROM fact_match_players f
LEFT JOIN dim_players d ON f.steam_id_64 = d.steam_id_64
WHERE d.steam_id_64 IS NULL
""").fetchone()[0]
print(f"fact_match_players missing dim_players: {missing_players}")
missing_round_matches = conn.execute("""
SELECT COUNT(*) FROM fact_rounds r
LEFT JOIN fact_matches m ON r.match_id = m.match_id
WHERE m.match_id IS NULL
""").fetchone()[0]
print(f"fact_rounds missing fact_matches: {missing_round_matches}")
missing_event_rounds = conn.execute("""
SELECT COUNT(*) FROM fact_round_events e
LEFT JOIN fact_rounds r ON e.match_id = r.match_id AND e.round_num = r.round_num
WHERE r.match_id IS NULL
""").fetchone()[0]
print(f"fact_round_events missing fact_rounds: {missing_event_rounds}")
side_zero_t = conn.execute("""
SELECT COUNT(*) FROM fact_match_players_t
WHERE COALESCE(kills,0)=0 AND COALESCE(deaths,0)=0 AND COALESCE(assists,0)=0
""").fetchone()[0]
side_zero_ct = conn.execute("""
SELECT COUNT(*) FROM fact_match_players_ct
WHERE COALESCE(kills,0)=0 AND COALESCE(deaths,0)=0 AND COALESCE(assists,0)=0
""").fetchone()[0]
print(f"fact_match_players_t zero K/D/A: {side_zero_t}")
print(f"fact_match_players_ct zero K/D/A: {side_zero_ct}")
print("\n--- Full vs T/CT Comparison ---")
cols = [
'kills', 'deaths', 'assists', 'headshot_count', 'adr', 'rating', 'rating2',
'rating3', 'rws', 'mvp_count', 'flash_duration', 'jump_count', 'is_win'
]
df_full = pd.read_sql(
"SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players",
conn
)
df_t = pd.read_sql(
"SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players_t",
conn
).rename(columns={c: f"{c}_t" for c in cols})
df_ct = pd.read_sql(
"SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players_ct",
conn
).rename(columns={c: f"{c}_ct" for c in cols})
df = df_full.merge(df_t, on=['match_id', 'steam_id_64'], how='left')
df = df.merge(df_ct, on=['match_id', 'steam_id_64'], how='left')
def is_empty(s):
return s.isna() | (s == 0)
for c in cols:
empty_count = is_empty(df[c]).sum()
print(f"{c} empty: {empty_count}")
additive = ['kills', 'deaths', 'assists', 'headshot_count', 'mvp_count', 'flash_duration', 'jump_count']
for c in additive:
t_sum = df[f"{c}_t"].fillna(0) + df[f"{c}_ct"].fillna(0)
tol = 0.01 if c == 'flash_duration' else 0
diff = (df[c].fillna(0) - t_sum).abs() > tol
print(f"{c} full != t+ct: {diff.sum()}")
non_additive = ['adr', 'rating', 'rating2', 'rating3', 'rws', 'is_win']
for c in non_additive:
side_nonempty = (~is_empty(df[f"{c}_t"])) | (~is_empty(df[f"{c}_ct"]))
full_empty_side_nonempty = is_empty(df[c]) & side_nonempty
full_nonempty_side_empty = (~is_empty(df[c])) & (~side_nonempty)
print(f"{c} full empty but side has: {full_empty_side_nonempty.sum()}")
print(f"{c} full has but side empty: {full_nonempty_side_empty.sum()}")
print("\n--- Rating Detail ---")
rating_cols = ['rating', 'rating2', 'rating3']
for c in rating_cols:
full_null = df[c].isna().sum()
full_zero = (df[c] == 0).sum()
full_nonzero = ((~df[c].isna()) & (df[c] != 0)).sum()
side_t_nonzero = ((~df[f"{c}_t"].isna()) & (df[f"{c}_t"] != 0)).sum()
side_ct_nonzero = ((~df[f"{c}_ct"].isna()) & (df[f"{c}_ct"] != 0)).sum()
side_any_nonzero = ((~df[f"{c}_t"].isna()) & (df[f"{c}_t"] != 0)) | ((~df[f"{c}_ct"].isna()) & (df[f"{c}_ct"] != 0))
full_nonzero_side_zero = ((~df[c].isna()) & (df[c] != 0) & (~side_any_nonzero)).sum()
full_zero_side_nonzero = (((df[c].isna()) | (df[c] == 0)) & side_any_nonzero).sum()
print(f"{c} full null: {full_null} full zero: {full_zero} full nonzero: {full_nonzero}")
print(f"{c} side t nonzero: {side_t_nonzero} side ct nonzero: {side_ct_nonzero}")
print(f"{c} full nonzero but side all zero: {full_nonzero_side_zero}")
print(f"{c} full zero but side has: {full_zero_side_nonzero}")
df_rating_src = pd.read_sql(
"SELECT f.rating, f.rating2, f.rating3, m.data_source_type FROM fact_match_players f JOIN fact_matches m ON f.match_id = m.match_id",
conn
)
for c in rating_cols:
grp = df_rating_src.groupby('data_source_type')[c].apply(lambda s: (s != 0).sum()).reset_index(name='nonzero')
print(f"{c} nonzero by source")
print(grp)
print("\n--- Schema Coverage (fight_any) ---")
paths = load_schema_paths(schema_path)
fight_keys = set()
for p in paths:
if 'data.group_N[].fight_any.' in p:
key = p.split('fight_any.')[1].split('.')[0]
fight_keys.add(key)
l2_cols = set(pd.read_sql("PRAGMA table_info(fact_match_players)", conn)['name'].tolist())
alias = {
'kills': 'kill',
'deaths': 'death',
'assists': 'assist',
'headshot_count': 'headshot',
'mvp_count': 'is_mvp',
'flash_duration': 'flash_enemy_time',
'jump_count': 'jump_total',
'awp_kills': 'awp_kill'
}
covered = set()
for c in l2_cols:
if c in fight_keys:
covered.add(c)
elif c in alias and alias[c] in fight_keys:
covered.add(alias[c])
missing_keys = sorted(list(fight_keys - covered))
print(f"fight_any keys: {len(fight_keys)}")
print(f"covered by L2 columns: {len(covered)}")
print(f"uncovered fight_any keys: {len(missing_keys)}")
if missing_keys:
print(missing_keys)
print("\n--- Coverage Zero Rate (fight_any -> fact_match_players) ---")
fight_cols = [k for k in fight_keys if k in l2_cols or k in alias.values()]
col_map = {}
for k in fight_cols:
if k in l2_cols:
col_map[k] = k
else:
for l2k, src in alias.items():
if src == k:
col_map[k] = l2k
break
select_cols = ["steam_id_64"] + list(set(col_map.values()))
df_fight = pd.read_sql(
"SELECT " + ",".join(select_cols) + " FROM fact_match_players",
conn
)
total_rows = len(df_fight)
stats = []
for fight_key, col in sorted(col_map.items()):
s = df_fight[col]
zeros = (s == 0).sum()
nulls = s.isna().sum()
nonzero = total_rows - zeros - nulls
stats.append({
"fight_key": fight_key,
"column": col,
"nonzero": nonzero,
"zero": zeros,
"null": nulls,
"zero_rate": 0 if total_rows == 0 else round(zeros / total_rows, 4)
})
df_stats = pd.DataFrame(stats).sort_values(["zero_rate", "nonzero"], ascending=[False, True])
print(df_stats.head(30))
print("\n-- zero_rate top (most zeros) --")
print(df_stats.head(10))
print("\n-- zero_rate bottom (most nonzero) --")
print(df_stats.tail(10))
print("\n--- Schema Coverage (leetify economy) ---")
econ_keys = [
'data.leetify_data.round_stat[].bron_equipment.',
'data.leetify_data.round_stat[].player_t_score.',
'data.leetify_data.round_stat[].player_ct_score.',
'data.leetify_data.round_stat[].player_bron_crash.'
]
for k in econ_keys:
count = sum(1 for p in paths if k in p)
print(f"{k} paths: {count}")
print("\n--- Schema Summary Coverage (by path groups) ---")
uncovered = [p for p in paths if not is_covered(p)]
print(f"total paths: {len(paths)}")
print(f"covered paths: {len(paths) - len(uncovered)}")
print(f"uncovered paths: {len(uncovered)}")
df_unc = pd.DataFrame({"path": uncovered})
if len(df_unc) > 0:
df_unc["group"] = df_unc["path"].apply(group_key)
print("\n-- Uncovered groups (count) --")
print(df_unc.groupby("group").size().sort_values(ascending=False))
print("\n-- Uncovered examples (top 50) --")
print(df_unc["path"].head(50).to_list())
conn.close()
def watch_schema(schema_path, interval=1.0):
last_db_mtime = 0
last_schema_mtime = 0
first = True
while True:
if not os.path.exists(db_path):
print(f"db not found: {db_path}")
time.sleep(interval)
continue
db_mtime = os.path.getmtime(db_path)
schema_mtime = os.path.getmtime(schema_path) if os.path.exists(schema_path) else 0
if first or db_mtime > last_db_mtime or schema_mtime > last_schema_mtime:
conn = sqlite3.connect(db_path)
refresh_schema_sql(conn, schema_path)
print(f"\n[{time.strftime('%Y-%m-%d %H:%M:%S')}] schema.sql refreshed")
print_schema(conn)
conn.close()
last_db_mtime = db_mtime
last_schema_mtime = os.path.getmtime(schema_path) if os.path.exists(schema_path) else 0
first = False
time.sleep(interval)
if __name__ == "__main__":
args = [a.lower() for a in sys.argv[1:]]
if "dump_uncovered" in args or "uncovered" in args:
dump_uncovered('database/original_json_schema/uncovered_features.csv')
elif "watch_schema" in args or "watch" in args:
try:
watch_schema('database/L2/schema.sql')
except KeyboardInterrupt:
pass
elif "schema" in args or "refresh_schema" in args:
if not os.path.exists(db_path):
print(f"db not found: {db_path}")
else:
conn = sqlite3.connect(db_path)
if "refresh_schema" in args:
refresh_schema_sql(conn, 'database/L2/schema.sql')
print("schema.sql refreshed")
print_schema(conn)
conn.close()
else:
verify()

View File

@@ -1,245 +0,0 @@
import sqlite3
import pandas as pd
import csv
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
db_path = 'database/L2/L2_Main.sqlite'
def verify():
conn = sqlite3.connect(db_path)
print("--- Counts ---")
tables = [
'dim_players',
'dim_maps',
'fact_matches',
'fact_match_players',
'fact_match_players_t',
'fact_match_players_ct',
'fact_rounds',
'fact_round_events',
'fact_round_player_economy'
]
for t in tables:
count = conn.execute(f"SELECT COUNT(*) FROM {t}").fetchone()[0]
print(f"{t}: {count}")
print("\n--- Data Source Distribution ---")
dist = pd.read_sql("SELECT data_source_type, COUNT(*) as cnt FROM fact_matches GROUP BY data_source_type", conn)
print(dist)
print("\n--- Sample Round Events (Leetify vs Classic) ---")
# Fetch one event from a leetify match
leetify_match = conn.execute("SELECT match_id FROM fact_matches WHERE data_source_type='leetify' LIMIT 1").fetchone()
if leetify_match:
mid = leetify_match[0]
print(f"Leetify Match: {mid}")
df = pd.read_sql(f"SELECT * FROM fact_round_events WHERE match_id='{mid}' AND event_type='kill' LIMIT 1", conn)
print(df[['event_type', 'attacker_steam_id', 'trade_killer_steam_id', 'attacker_pos_x', 'score_change_attacker']])
# Fetch one event from a classic match
classic_match = conn.execute("SELECT match_id FROM fact_matches WHERE data_source_type='classic' LIMIT 1").fetchone()
if classic_match:
mid = classic_match[0]
print(f"Classic Match: {mid}")
df = pd.read_sql(f"SELECT * FROM fact_round_events WHERE match_id='{mid}' AND event_type='kill' LIMIT 1", conn)
print(df[['event_type', 'attacker_steam_id', 'trade_killer_steam_id', 'attacker_pos_x', 'score_change_attacker']])
print("\n--- Sample Player Stats (New Fields) ---")
df_players = pd.read_sql("SELECT steam_id_64, rating, rating3, elo_change, rank_score, flash_duration, jump_count FROM fact_match_players LIMIT 5", conn)
print(df_players)
print("\n--- Integrity Checks ---")
missing_players = conn.execute("""
SELECT COUNT(*) FROM fact_match_players f
LEFT JOIN dim_players d ON f.steam_id_64 = d.steam_id_64
WHERE d.steam_id_64 IS NULL
""").fetchone()[0]
print(f"fact_match_players missing dim_players: {missing_players}")
missing_round_matches = conn.execute("""
SELECT COUNT(*) FROM fact_rounds r
LEFT JOIN fact_matches m ON r.match_id = m.match_id
WHERE m.match_id IS NULL
""").fetchone()[0]
print(f"fact_rounds missing fact_matches: {missing_round_matches}")
missing_event_rounds = conn.execute("""
SELECT COUNT(*) FROM fact_round_events e
LEFT JOIN fact_rounds r ON e.match_id = r.match_id AND e.round_num = r.round_num
WHERE r.match_id IS NULL
""").fetchone()[0]
print(f"fact_round_events missing fact_rounds: {missing_event_rounds}")
side_zero_t = conn.execute("""
SELECT COUNT(*) FROM fact_match_players_t
WHERE COALESCE(kills,0)=0 AND COALESCE(deaths,0)=0 AND COALESCE(assists,0)=0
""").fetchone()[0]
side_zero_ct = conn.execute("""
SELECT COUNT(*) FROM fact_match_players_ct
WHERE COALESCE(kills,0)=0 AND COALESCE(deaths,0)=0 AND COALESCE(assists,0)=0
""").fetchone()[0]
print(f"fact_match_players_t zero K/D/A: {side_zero_t}")
print(f"fact_match_players_ct zero K/D/A: {side_zero_ct}")
print("\n--- Full vs T/CT Comparison ---")
cols = [
'kills', 'deaths', 'assists', 'headshot_count', 'adr', 'rating', 'rating2',
'rating3', 'rws', 'mvp_count', 'flash_duration', 'jump_count', 'is_win'
]
df_full = pd.read_sql(
"SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players",
conn
)
df_t = pd.read_sql(
"SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players_t",
conn
).rename(columns={c: f"{c}_t" for c in cols})
df_ct = pd.read_sql(
"SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players_ct",
conn
).rename(columns={c: f"{c}_ct" for c in cols})
df = df_full.merge(df_t, on=['match_id', 'steam_id_64'], how='left')
df = df.merge(df_ct, on=['match_id', 'steam_id_64'], how='left')
def is_empty(s):
return s.isna() | (s == 0)
for c in cols:
empty_count = is_empty(df[c]).sum()
print(f"{c} empty: {empty_count}")
additive = ['kills', 'deaths', 'assists', 'headshot_count', 'mvp_count', 'flash_duration', 'jump_count']
for c in additive:
t_sum = df[f"{c}_t"].fillna(0) + df[f"{c}_ct"].fillna(0)
tol = 0.01 if c == 'flash_duration' else 0
diff = (df[c].fillna(0) - t_sum).abs() > tol
print(f"{c} full != t+ct: {diff.sum()}")
non_additive = ['adr', 'rating', 'rating2', 'rating3', 'rws', 'is_win']
for c in non_additive:
side_nonempty = (~is_empty(df[f"{c}_t"])) | (~is_empty(df[f"{c}_ct"]))
full_empty_side_nonempty = is_empty(df[c]) & side_nonempty
full_nonempty_side_empty = (~is_empty(df[c])) & (~side_nonempty)
print(f"{c} full empty but side has: {full_empty_side_nonempty.sum()}")
print(f"{c} full has but side empty: {full_nonempty_side_empty.sum()}")
print("\n--- Rating Detail ---")
rating_cols = ['rating', 'rating2', 'rating3']
for c in rating_cols:
full_null = df[c].isna().sum()
full_zero = (df[c] == 0).sum()
full_nonzero = ((~df[c].isna()) & (df[c] != 0)).sum()
side_t_nonzero = ((~df[f"{c}_t"].isna()) & (df[f"{c}_t"] != 0)).sum()
side_ct_nonzero = ((~df[f"{c}_ct"].isna()) & (df[f"{c}_ct"] != 0)).sum()
side_any_nonzero = ((~df[f"{c}_t"].isna()) & (df[f"{c}_t"] != 0)) | ((~df[f"{c}_ct"].isna()) & (df[f"{c}_ct"] != 0))
full_nonzero_side_zero = ((~df[c].isna()) & (df[c] != 0) & (~side_any_nonzero)).sum()
full_zero_side_nonzero = (((df[c].isna()) | (df[c] == 0)) & side_any_nonzero).sum()
print(f"{c} full null: {full_null} full zero: {full_zero} full nonzero: {full_nonzero}")
print(f"{c} side t nonzero: {side_t_nonzero} side ct nonzero: {side_ct_nonzero}")
print(f"{c} full nonzero but side all zero: {full_nonzero_side_zero}")
print(f"{c} full zero but side has: {full_zero_side_nonzero}")
df_rating_src = pd.read_sql(
"SELECT f.rating, f.rating2, f.rating3, m.data_source_type FROM fact_match_players f JOIN fact_matches m ON f.match_id = m.match_id",
conn
)
for c in rating_cols:
grp = df_rating_src.groupby('data_source_type')[c].apply(lambda s: (s != 0).sum()).reset_index(name='nonzero')
print(f"{c} nonzero by source")
print(grp)
print("\n--- Schema Coverage (fight_any) ---")
schema_path = 'database/original_json_schema/schema_flat.csv'
paths = []
with open(schema_path, 'r', encoding='utf-8') as f:
reader = csv.reader(f)
_ = next(reader, None)
for row in reader:
if len(row) >= 2:
paths.append(row[1])
fight_keys = set()
for p in paths:
if 'data.group_N[].fight_any.' in p:
key = p.split('fight_any.')[1].split('.')[0]
fight_keys.add(key)
l2_cols = set(pd.read_sql("PRAGMA table_info(fact_match_players)", conn)['name'].tolist())
alias = {
'kills': 'kill',
'deaths': 'death',
'assists': 'assist',
'headshot_count': 'headshot',
'mvp_count': 'is_mvp',
'flash_duration': 'flash_enemy_time',
'jump_count': 'jump_total',
'awp_kills': 'awp_kill'
}
covered = set()
for c in l2_cols:
if c in fight_keys:
covered.add(c)
elif c in alias and alias[c] in fight_keys:
covered.add(alias[c])
missing_keys = sorted(list(fight_keys - covered))
print(f"fight_any keys: {len(fight_keys)}")
print(f"covered by L2 columns: {len(covered)}")
print(f"uncovered fight_any keys: {len(missing_keys)}")
if missing_keys:
print(missing_keys)
print("\n--- Coverage Zero Rate (fight_any -> fact_match_players) ---")
fight_cols = [k for k in fight_keys if k in l2_cols or k in alias.values()]
col_map = {}
for k in fight_cols:
if k in l2_cols:
col_map[k] = k
else:
for l2k, src in alias.items():
if src == k:
col_map[k] = l2k
break
select_cols = ["steam_id_64"] + list(set(col_map.values()))
df_fight = pd.read_sql(
"SELECT " + ",".join(select_cols) + " FROM fact_match_players",
conn
)
total_rows = len(df_fight)
stats = []
for fight_key, col in sorted(col_map.items()):
s = df_fight[col]
zeros = (s == 0).sum()
nulls = s.isna().sum()
nonzero = total_rows - zeros - nulls
stats.append({
"fight_key": fight_key,
"column": col,
"nonzero": nonzero,
"zero": zeros,
"null": nulls,
"zero_rate": 0 if total_rows == 0 else round(zeros / total_rows, 4)
})
df_stats = pd.DataFrame(stats).sort_values(["zero_rate", "nonzero"], ascending=[False, True])
print(df_stats.head(30))
print("\n-- zero_rate top (most zeros) --")
print(df_stats.head(10))
print("\n-- zero_rate bottom (most nonzero) --")
print(df_stats.tail(10))
print("\n--- Schema Coverage (leetify economy) ---")
econ_keys = [
'data.leetify_data.round_stat[].bron_equipment.',
'data.leetify_data.round_stat[].player_t_score.',
'data.leetify_data.round_stat[].player_ct_score.',
'data.leetify_data.round_stat[].player_bron_crash.'
]
for k in econ_keys:
count = sum(1 for p in paths if k in p)
print(f"{k} paths: {count}")
conn.close()
if __name__ == "__main__":
verify()