0.4.1: L2ver2 finished
This commit is contained in:
@@ -52,8 +52,12 @@ class PlayerStats:
|
||||
jump_count: int = 0
|
||||
damage_total: int = 0
|
||||
damage_received: int = 0
|
||||
damage_receive: int = 0
|
||||
damage_stats: int = 0
|
||||
assisted_kill: int = 0
|
||||
awp_kill: int = 0
|
||||
awp_kill_ct: int = 0
|
||||
awp_kill_t: int = 0
|
||||
benefit_kill: int = 0
|
||||
day: str = ""
|
||||
defused_bomb: int = 0
|
||||
@@ -64,6 +68,8 @@ class PlayerStats:
|
||||
end_1v5: int = 0
|
||||
explode_bomb: int = 0
|
||||
first_death: int = 0
|
||||
fd_ct: int = 0
|
||||
fd_t: int = 0
|
||||
first_kill: int = 0
|
||||
flash_enemy: int = 0
|
||||
flash_team: int = 0
|
||||
@@ -109,6 +115,8 @@ class PlayerStats:
|
||||
throw_harm_enemy: int = 0
|
||||
uid: int = 0
|
||||
year: str = ""
|
||||
sts_raw: str = ""
|
||||
level_info_raw: str = ""
|
||||
|
||||
@dataclass
|
||||
class RoundEvent:
|
||||
@@ -159,6 +167,19 @@ class RoundData:
|
||||
events: List[RoundEvent] = field(default_factory=list)
|
||||
economies: List[PlayerEconomy] = field(default_factory=list)
|
||||
|
||||
@dataclass
|
||||
class MatchTeamData:
|
||||
group_id: int
|
||||
group_all_score: int = 0
|
||||
group_change_elo: float = 0.0
|
||||
group_fh_role: int = 0
|
||||
group_fh_score: int = 0
|
||||
group_origin_elo: float = 0.0
|
||||
group_sh_role: int = 0
|
||||
group_sh_score: int = 0
|
||||
group_tid: int = 0
|
||||
group_uids: str = ""
|
||||
|
||||
@dataclass
|
||||
class MatchData:
|
||||
match_id: str
|
||||
@@ -173,12 +194,52 @@ class MatchData:
|
||||
server_ip: str = ""
|
||||
server_port: int = 0
|
||||
location: str = ""
|
||||
has_side_data_and_rating2: int = 0
|
||||
match_main_id: int = 0
|
||||
demo_url: str = ""
|
||||
game_mode: int = 0
|
||||
game_name: str = ""
|
||||
map_desc: str = ""
|
||||
location_full: str = ""
|
||||
match_mode: int = 0
|
||||
match_status: int = 0
|
||||
match_flag: int = 0
|
||||
status: int = 0
|
||||
waiver: int = 0
|
||||
year: int = 0
|
||||
season: str = ""
|
||||
round_total: int = 0
|
||||
cs_type: int = 0
|
||||
priority_show_type: int = 0
|
||||
pug10m_show_type: int = 0
|
||||
credit_match_status: int = 0
|
||||
knife_winner: int = 0
|
||||
knife_winner_role: int = 0
|
||||
most_1v2_uid: int = 0
|
||||
most_assist_uid: int = 0
|
||||
most_awp_uid: int = 0
|
||||
most_end_uid: int = 0
|
||||
most_first_kill_uid: int = 0
|
||||
most_headshot_uid: int = 0
|
||||
most_jump_uid: int = 0
|
||||
mvp_uid: int = 0
|
||||
response_code: int = 0
|
||||
response_message: str = ""
|
||||
response_status: int = 0
|
||||
response_timestamp: int = 0
|
||||
response_trace_id: str = ""
|
||||
response_success: int = 0
|
||||
response_errcode: int = 0
|
||||
treat_info_raw: str = ""
|
||||
round_list_raw: str = ""
|
||||
leetify_data_raw: str = ""
|
||||
data_source_type: str = "unknown"
|
||||
players: Dict[str, PlayerStats] = field(default_factory=dict) # Key: steam_id_64
|
||||
players_t: Dict[str, PlayerStats] = field(default_factory=dict)
|
||||
players_ct: Dict[str, PlayerStats] = field(default_factory=dict)
|
||||
rounds: List[RoundData] = field(default_factory=list)
|
||||
player_meta: Dict[str, Dict] = field(default_factory=dict) # steam_id -> {uid, name, avatar, ...}
|
||||
teams: List[MatchTeamData] = field(default_factory=list)
|
||||
|
||||
# --- Database Helper ---
|
||||
|
||||
@@ -210,6 +271,7 @@ class MatchParser:
|
||||
|
||||
# Extracted JSON bodies
|
||||
self.data_match = None
|
||||
self.data_match_wrapper = None
|
||||
self.data_vip = None
|
||||
self.data_leetify = None
|
||||
self.data_round_list = None
|
||||
@@ -226,6 +288,7 @@ class MatchParser:
|
||||
|
||||
# Check URLs
|
||||
if 'crane/http/api/data/match/' in url:
|
||||
self.data_match_wrapper = body
|
||||
self.data_match = body.get('data', {})
|
||||
elif 'crane/http/api/data/vip_plus_match_data/' in url:
|
||||
self.data_vip = body.get('data', {})
|
||||
@@ -246,12 +309,24 @@ class MatchParser:
|
||||
# Decide which round source to use
|
||||
if self.data_leetify and self.data_leetify.get('leetify_data'):
|
||||
self.match_data.data_source_type = 'leetify'
|
||||
try:
|
||||
self.match_data.leetify_data_raw = json.dumps(self.data_leetify.get('leetify_data', {}), ensure_ascii=False)
|
||||
except:
|
||||
self.match_data.leetify_data_raw = ""
|
||||
self.match_data.round_list_raw = ""
|
||||
self._parse_leetify_rounds()
|
||||
elif self.data_round_list and self.data_round_list.get('round_list'):
|
||||
self.match_data.data_source_type = 'classic'
|
||||
try:
|
||||
self.match_data.round_list_raw = json.dumps(self.data_round_list.get('round_list', []), ensure_ascii=False)
|
||||
except:
|
||||
self.match_data.round_list_raw = ""
|
||||
self.match_data.leetify_data_raw = ""
|
||||
self._parse_classic_rounds()
|
||||
else:
|
||||
self.match_data.data_source_type = 'unknown'
|
||||
self.match_data.round_list_raw = ""
|
||||
self.match_data.leetify_data_raw = ""
|
||||
logger.info(f"No round data found for {self.match_id}")
|
||||
|
||||
return self.match_data
|
||||
@@ -273,12 +348,89 @@ class MatchParser:
|
||||
except:
|
||||
self.match_data.server_port = 0
|
||||
self.match_data.location = m.get('location', '')
|
||||
def safe_int(val):
|
||||
try:
|
||||
return int(float(val)) if val is not None else 0
|
||||
except:
|
||||
return 0
|
||||
def safe_float(val):
|
||||
try:
|
||||
return float(val) if val is not None else 0.0
|
||||
except:
|
||||
return 0.0
|
||||
def safe_text(val):
|
||||
return "" if val is None else str(val)
|
||||
wrapper = self.data_match_wrapper or {}
|
||||
self.match_data.response_code = safe_int(wrapper.get('code'))
|
||||
self.match_data.response_message = safe_text(wrapper.get('message'))
|
||||
self.match_data.response_status = safe_int(wrapper.get('status'))
|
||||
self.match_data.response_timestamp = safe_int(wrapper.get('timeStamp') if wrapper.get('timeStamp') is not None else wrapper.get('timestamp'))
|
||||
self.match_data.response_trace_id = safe_text(wrapper.get('traceId') if wrapper.get('traceId') is not None else wrapper.get('trace_id'))
|
||||
self.match_data.response_success = safe_int(wrapper.get('success'))
|
||||
self.match_data.response_errcode = safe_int(wrapper.get('errcode'))
|
||||
self.match_data.has_side_data_and_rating2 = safe_int(self.data_match.get('has_side_data_and_rating2'))
|
||||
self.match_data.match_main_id = safe_int(m.get('id'))
|
||||
self.match_data.demo_url = safe_text(m.get('demo_url'))
|
||||
self.match_data.game_mode = safe_int(m.get('game_mode'))
|
||||
self.match_data.game_name = safe_text(m.get('game_name'))
|
||||
self.match_data.map_desc = safe_text(m.get('map_desc'))
|
||||
self.match_data.location_full = safe_text(m.get('location_full'))
|
||||
self.match_data.match_mode = safe_int(m.get('match_mode'))
|
||||
self.match_data.match_status = safe_int(m.get('match_status'))
|
||||
self.match_data.match_flag = safe_int(m.get('match_flag'))
|
||||
self.match_data.status = safe_int(m.get('status'))
|
||||
self.match_data.waiver = safe_int(m.get('waiver'))
|
||||
self.match_data.year = safe_int(m.get('year'))
|
||||
self.match_data.season = safe_text(m.get('season'))
|
||||
self.match_data.round_total = safe_int(m.get('round_total'))
|
||||
self.match_data.cs_type = safe_int(m.get('cs_type'))
|
||||
self.match_data.priority_show_type = safe_int(m.get('priority_show_type'))
|
||||
self.match_data.pug10m_show_type = safe_int(m.get('pug10m_show_type'))
|
||||
self.match_data.credit_match_status = safe_int(m.get('credit_match_status'))
|
||||
self.match_data.knife_winner = safe_int(m.get('knife_winner'))
|
||||
self.match_data.knife_winner_role = safe_int(m.get('knife_winner_role'))
|
||||
self.match_data.most_1v2_uid = safe_int(m.get('most_1v2_uid'))
|
||||
self.match_data.most_assist_uid = safe_int(m.get('most_assist_uid'))
|
||||
self.match_data.most_awp_uid = safe_int(m.get('most_awp_uid'))
|
||||
self.match_data.most_end_uid = safe_int(m.get('most_end_uid'))
|
||||
self.match_data.most_first_kill_uid = safe_int(m.get('most_first_kill_uid'))
|
||||
self.match_data.most_headshot_uid = safe_int(m.get('most_headshot_uid'))
|
||||
self.match_data.most_jump_uid = safe_int(m.get('most_jump_uid'))
|
||||
self.match_data.mvp_uid = safe_int(m.get('mvp_uid'))
|
||||
treat_info = self.data_match.get('treat_info')
|
||||
if treat_info is not None:
|
||||
try:
|
||||
self.match_data.treat_info_raw = json.dumps(treat_info, ensure_ascii=False)
|
||||
except:
|
||||
self.match_data.treat_info_raw = ""
|
||||
self.match_data.teams = []
|
||||
for idx in [1, 2]:
|
||||
team = MatchTeamData(
|
||||
group_id=idx,
|
||||
group_all_score=safe_int(m.get(f"group{idx}_all_score")),
|
||||
group_change_elo=safe_float(m.get(f"group{idx}_change_elo")),
|
||||
group_fh_role=safe_int(m.get(f"group{idx}_fh_role")),
|
||||
group_fh_score=safe_int(m.get(f"group{idx}_fh_score")),
|
||||
group_origin_elo=safe_float(m.get(f"group{idx}_origin_elo")),
|
||||
group_sh_role=safe_int(m.get(f"group{idx}_sh_role")),
|
||||
group_sh_score=safe_int(m.get(f"group{idx}_sh_score")),
|
||||
group_tid=safe_int(m.get(f"group{idx}_tid")),
|
||||
group_uids=safe_text(m.get(f"group{idx}_uids"))
|
||||
)
|
||||
self.match_data.teams.append(team)
|
||||
|
||||
def _parse_players_base(self):
|
||||
# Players are in group_1 and group_2 lists in data_match
|
||||
groups = []
|
||||
if 'group_1' in self.data_match: groups.extend(self.data_match['group_1'])
|
||||
if 'group_2' in self.data_match: groups.extend(self.data_match['group_2'])
|
||||
def safe_int(val):
|
||||
try:
|
||||
return int(float(val)) if val is not None else 0
|
||||
except:
|
||||
return 0
|
||||
def safe_text(val):
|
||||
return "" if val is None else str(val)
|
||||
|
||||
for p in groups:
|
||||
# We need steam_id.
|
||||
@@ -305,17 +457,90 @@ class MatchParser:
|
||||
if not steam_id:
|
||||
continue
|
||||
|
||||
status = user_data.get('status', {})
|
||||
platform_exp = user_data.get('platformExp', {})
|
||||
trusted = user_data.get('trusted', {})
|
||||
certify = user_data.get('certify', {})
|
||||
identity = user_data.get('identity', {})
|
||||
plus_info = user_info.get('plus_info', {}) or p.get('plus_info', {})
|
||||
user_info_raw = ""
|
||||
try:
|
||||
user_info_raw = json.dumps(user_info, ensure_ascii=False)
|
||||
except:
|
||||
user_info_raw = ""
|
||||
|
||||
self.match_data.player_meta[steam_id] = {
|
||||
'uid': uid,
|
||||
'username': user_data.get('username', ''),
|
||||
'avatar_url': profile.get('avatarUrl', ''),
|
||||
'domain': profile.get('domain', ''),
|
||||
'created_at': user_data.get('createdAt', 0),
|
||||
'updated_at': user_data.get('updatedAt', 0)
|
||||
'uid': safe_int(uid),
|
||||
'username': safe_text(user_data.get('username')),
|
||||
'uuid': safe_text(user_data.get('uuid')),
|
||||
'email': safe_text(user_data.get('email')),
|
||||
'area': safe_text(user_data.get('area')),
|
||||
'mobile': safe_text(user_data.get('mobile')),
|
||||
'avatar_url': safe_text(profile.get('avatarUrl')),
|
||||
'domain': safe_text(profile.get('domain')),
|
||||
'user_domain': safe_text(user_data.get('domain')),
|
||||
'created_at': safe_int(user_data.get('createdAt')),
|
||||
'updated_at': safe_int(user_data.get('updatedAt')),
|
||||
'username_audit_status': safe_int(user_data.get('usernameAuditStatus')),
|
||||
'accid': safe_text(user_data.get('Accid')),
|
||||
'team_id': safe_int(user_data.get('teamID')),
|
||||
'trumpet_count': safe_int(user_data.get('trumpetCount')),
|
||||
'profile_nickname': safe_text(profile.get('nickname')),
|
||||
'profile_avatar_audit_status': safe_int(profile.get('avatarAuditStatus')),
|
||||
'profile_rgb_avatar_url': safe_text(profile.get('rgbAvatarUrl')),
|
||||
'profile_photo_url': safe_text(profile.get('photoUrl')),
|
||||
'profile_gender': safe_int(profile.get('gender')),
|
||||
'profile_birthday': safe_int(profile.get('birthday')),
|
||||
'profile_country_id': safe_text(profile.get('countryId')),
|
||||
'profile_region_id': safe_text(profile.get('regionId')),
|
||||
'profile_city_id': safe_text(profile.get('cityId')),
|
||||
'profile_language': safe_text(profile.get('language')),
|
||||
'profile_recommend_url': safe_text(profile.get('recommendUrl')),
|
||||
'profile_group_id': safe_int(profile.get('groupId')),
|
||||
'profile_reg_source': safe_int(profile.get('regSource')),
|
||||
'status_status': safe_int(status.get('status')),
|
||||
'status_expire': safe_int(status.get('expire')),
|
||||
'status_cancellation_status': safe_int(status.get('cancellationStatus')),
|
||||
'status_new_user': safe_int(status.get('newUser')),
|
||||
'status_login_banned_time': safe_int(status.get('loginBannedTime')),
|
||||
'status_anticheat_type': safe_int(status.get('anticheatType')),
|
||||
'status_flag_status1': safe_text(status.get('flagStatus1')),
|
||||
'status_anticheat_status': safe_text(status.get('anticheatStatus')),
|
||||
'status_flag_honor': safe_text(status.get('FlagHonor')),
|
||||
'status_privacy_policy_status': safe_int(status.get('PrivacyPolicyStatus')),
|
||||
'status_csgo_frozen_exptime': safe_int(status.get('csgoFrozenExptime')),
|
||||
'platformexp_level': safe_int(platform_exp.get('level')),
|
||||
'platformexp_exp': safe_int(platform_exp.get('exp')),
|
||||
'steam_account': safe_text(steam_data.get('steamAccount')),
|
||||
'steam_trade_url': safe_text(steam_data.get('tradeUrl')),
|
||||
'steam_rent_id': safe_text(steam_data.get('rentSteamId')),
|
||||
'trusted_credit': safe_int(trusted.get('credit')),
|
||||
'trusted_credit_level': safe_int(trusted.get('creditLevel')),
|
||||
'trusted_score': safe_int(trusted.get('score')),
|
||||
'trusted_status': safe_int(trusted.get('status')),
|
||||
'trusted_credit_status': safe_int(trusted.get('creditStatus')),
|
||||
'certify_id_type': safe_int(certify.get('idType')),
|
||||
'certify_status': safe_int(certify.get('status')),
|
||||
'certify_age': safe_int(certify.get('age')),
|
||||
'certify_real_name': safe_text(certify.get('realName')),
|
||||
'certify_uid_list': safe_text(json.dumps(certify.get('uidList'), ensure_ascii=False)) if certify.get('uidList') is not None else "",
|
||||
'certify_audit_status': safe_int(certify.get('auditStatus')),
|
||||
'certify_gender': safe_int(certify.get('gender')),
|
||||
'identity_type': safe_int(identity.get('type')),
|
||||
'identity_extras': safe_text(identity.get('extras')),
|
||||
'identity_status': safe_int(identity.get('status')),
|
||||
'identity_slogan': safe_text(identity.get('slogan')),
|
||||
'identity_list': safe_text(json.dumps(identity.get('identity_list'), ensure_ascii=False)) if identity.get('identity_list') is not None else "",
|
||||
'identity_slogan_ext': safe_text(identity.get('slogan_ext')),
|
||||
'identity_live_url': safe_text(identity.get('live_url')),
|
||||
'identity_live_type': safe_int(identity.get('live_type')),
|
||||
'plus_is_plus': safe_int(plus_info.get('is_plus')),
|
||||
'user_info_raw': user_info_raw
|
||||
}
|
||||
|
||||
stats = PlayerStats(steam_id_64=steam_id)
|
||||
sts = p.get('sts', {})
|
||||
level_info = p.get('level_info', {})
|
||||
|
||||
try:
|
||||
# Use safe conversion helper
|
||||
@@ -329,6 +554,16 @@ class MatchParser:
|
||||
|
||||
def safe_text(val):
|
||||
return "" if val is None else str(val)
|
||||
if sts is not None:
|
||||
try:
|
||||
stats.sts_raw = json.dumps(sts, ensure_ascii=False)
|
||||
except:
|
||||
stats.sts_raw = ""
|
||||
if level_info is not None:
|
||||
try:
|
||||
stats.level_info_raw = json.dumps(level_info, ensure_ascii=False)
|
||||
except:
|
||||
stats.level_info_raw = ""
|
||||
|
||||
def get_stat(key):
|
||||
if key in fight and fight.get(key) not in [None, ""]:
|
||||
@@ -513,11 +748,22 @@ class MatchParser:
|
||||
p = self.match_data.players[sid]
|
||||
p.kast = float(vdata.get('kast', 0))
|
||||
p.awp_kills = int(vdata.get('awp_kill', 0))
|
||||
# Damage stats might need calculation or mapping
|
||||
# p.damage_total = ...
|
||||
p.awp_kill_ct = int(vdata.get('awp_kill_ct', 0))
|
||||
p.awp_kill_t = int(vdata.get('awp_kill_t', 0))
|
||||
p.fd_ct = int(vdata.get('fd_ct', 0))
|
||||
p.fd_t = int(vdata.get('fd_t', 0))
|
||||
p.damage_receive = int(vdata.get('damage_receive', 0))
|
||||
p.damage_stats = int(vdata.get('damage_stats', 0))
|
||||
else:
|
||||
# Try to match by 5E ID if possible, but here keys are steamids usually
|
||||
pass
|
||||
for sid, p in self.match_data.players.items():
|
||||
if sid in self.match_data.players_t:
|
||||
self.match_data.players_t[sid].awp_kill_t = p.awp_kill_t
|
||||
self.match_data.players_t[sid].fd_t = p.fd_t
|
||||
if sid in self.match_data.players_ct:
|
||||
self.match_data.players_ct[sid].awp_kill_ct = p.awp_kill_ct
|
||||
self.match_data.players_ct[sid].fd_ct = p.fd_ct
|
||||
|
||||
def _parse_leetify_rounds(self):
|
||||
l_data = self.data_leetify.get('leetify_data', {})
|
||||
@@ -744,34 +990,169 @@ def process_matches():
|
||||
|
||||
def save_match(cursor, m: MatchData):
|
||||
# 1. Dim Players (Upsert)
|
||||
player_meta_columns = [
|
||||
"steam_id_64", "uid", "username", "avatar_url", "domain", "created_at", "updated_at",
|
||||
"last_seen_match_id", "uuid", "email", "area", "mobile", "user_domain",
|
||||
"username_audit_status", "accid", "team_id", "trumpet_count",
|
||||
"profile_nickname", "profile_avatar_audit_status", "profile_rgb_avatar_url",
|
||||
"profile_photo_url", "profile_gender", "profile_birthday", "profile_country_id",
|
||||
"profile_region_id", "profile_city_id", "profile_language", "profile_recommend_url",
|
||||
"profile_group_id", "profile_reg_source", "status_status", "status_expire",
|
||||
"status_cancellation_status", "status_new_user", "status_login_banned_time",
|
||||
"status_anticheat_type", "status_flag_status1", "status_anticheat_status",
|
||||
"status_flag_honor", "status_privacy_policy_status", "status_csgo_frozen_exptime",
|
||||
"platformexp_level", "platformexp_exp", "steam_account", "steam_trade_url",
|
||||
"steam_rent_id", "trusted_credit", "trusted_credit_level", "trusted_score",
|
||||
"trusted_status", "trusted_credit_status", "certify_id_type", "certify_status",
|
||||
"certify_age", "certify_real_name", "certify_uid_list", "certify_audit_status",
|
||||
"certify_gender", "identity_type", "identity_extras", "identity_status",
|
||||
"identity_slogan", "identity_list", "identity_slogan_ext", "identity_live_url",
|
||||
"identity_live_type", "plus_is_plus", "user_info_raw"
|
||||
]
|
||||
player_meta_placeholders = ",".join(["?"] * len(player_meta_columns))
|
||||
player_meta_columns_sql = ",".join(player_meta_columns)
|
||||
for sid, meta in m.player_meta.items():
|
||||
cursor.execute("""
|
||||
INSERT INTO dim_players (steam_id_64, uid, username, avatar_url, domain, created_at, updated_at, last_seen_match_id)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
INSERT INTO dim_players (""" + player_meta_columns_sql + """)
|
||||
VALUES (""" + player_meta_placeholders + """)
|
||||
ON CONFLICT(steam_id_64) DO UPDATE SET
|
||||
uid=excluded.uid,
|
||||
username=excluded.username,
|
||||
avatar_url=excluded.avatar_url,
|
||||
last_seen_match_id=excluded.last_seen_match_id
|
||||
domain=excluded.domain,
|
||||
created_at=excluded.created_at,
|
||||
updated_at=excluded.updated_at,
|
||||
last_seen_match_id=excluded.last_seen_match_id,
|
||||
uuid=excluded.uuid,
|
||||
email=excluded.email,
|
||||
area=excluded.area,
|
||||
mobile=excluded.mobile,
|
||||
user_domain=excluded.user_domain,
|
||||
username_audit_status=excluded.username_audit_status,
|
||||
accid=excluded.accid,
|
||||
team_id=excluded.team_id,
|
||||
trumpet_count=excluded.trumpet_count,
|
||||
profile_nickname=excluded.profile_nickname,
|
||||
profile_avatar_audit_status=excluded.profile_avatar_audit_status,
|
||||
profile_rgb_avatar_url=excluded.profile_rgb_avatar_url,
|
||||
profile_photo_url=excluded.profile_photo_url,
|
||||
profile_gender=excluded.profile_gender,
|
||||
profile_birthday=excluded.profile_birthday,
|
||||
profile_country_id=excluded.profile_country_id,
|
||||
profile_region_id=excluded.profile_region_id,
|
||||
profile_city_id=excluded.profile_city_id,
|
||||
profile_language=excluded.profile_language,
|
||||
profile_recommend_url=excluded.profile_recommend_url,
|
||||
profile_group_id=excluded.profile_group_id,
|
||||
profile_reg_source=excluded.profile_reg_source,
|
||||
status_status=excluded.status_status,
|
||||
status_expire=excluded.status_expire,
|
||||
status_cancellation_status=excluded.status_cancellation_status,
|
||||
status_new_user=excluded.status_new_user,
|
||||
status_login_banned_time=excluded.status_login_banned_time,
|
||||
status_anticheat_type=excluded.status_anticheat_type,
|
||||
status_flag_status1=excluded.status_flag_status1,
|
||||
status_anticheat_status=excluded.status_anticheat_status,
|
||||
status_flag_honor=excluded.status_flag_honor,
|
||||
status_privacy_policy_status=excluded.status_privacy_policy_status,
|
||||
status_csgo_frozen_exptime=excluded.status_csgo_frozen_exptime,
|
||||
platformexp_level=excluded.platformexp_level,
|
||||
platformexp_exp=excluded.platformexp_exp,
|
||||
steam_account=excluded.steam_account,
|
||||
steam_trade_url=excluded.steam_trade_url,
|
||||
steam_rent_id=excluded.steam_rent_id,
|
||||
trusted_credit=excluded.trusted_credit,
|
||||
trusted_credit_level=excluded.trusted_credit_level,
|
||||
trusted_score=excluded.trusted_score,
|
||||
trusted_status=excluded.trusted_status,
|
||||
trusted_credit_status=excluded.trusted_credit_status,
|
||||
certify_id_type=excluded.certify_id_type,
|
||||
certify_status=excluded.certify_status,
|
||||
certify_age=excluded.certify_age,
|
||||
certify_real_name=excluded.certify_real_name,
|
||||
certify_uid_list=excluded.certify_uid_list,
|
||||
certify_audit_status=excluded.certify_audit_status,
|
||||
certify_gender=excluded.certify_gender,
|
||||
identity_type=excluded.identity_type,
|
||||
identity_extras=excluded.identity_extras,
|
||||
identity_status=excluded.identity_status,
|
||||
identity_slogan=excluded.identity_slogan,
|
||||
identity_list=excluded.identity_list,
|
||||
identity_slogan_ext=excluded.identity_slogan_ext,
|
||||
identity_live_url=excluded.identity_live_url,
|
||||
identity_live_type=excluded.identity_live_type,
|
||||
plus_is_plus=excluded.plus_is_plus,
|
||||
user_info_raw=excluded.user_info_raw
|
||||
""", (
|
||||
sid, meta.get('uid'), meta.get('username'), meta.get('avatar_url'),
|
||||
meta.get('domain'), meta.get('created_at'), meta.get('updated_at'),
|
||||
m.match_id
|
||||
sid, meta.get('uid'), meta.get('username'), meta.get('avatar_url'),
|
||||
meta.get('domain'), meta.get('created_at'), meta.get('updated_at'),
|
||||
m.match_id, meta.get('uuid'), meta.get('email'), meta.get('area'),
|
||||
meta.get('mobile'), meta.get('user_domain'), meta.get('username_audit_status'),
|
||||
meta.get('accid'), meta.get('team_id'), meta.get('trumpet_count'),
|
||||
meta.get('profile_nickname'), meta.get('profile_avatar_audit_status'),
|
||||
meta.get('profile_rgb_avatar_url'), meta.get('profile_photo_url'),
|
||||
meta.get('profile_gender'), meta.get('profile_birthday'),
|
||||
meta.get('profile_country_id'), meta.get('profile_region_id'),
|
||||
meta.get('profile_city_id'), meta.get('profile_language'),
|
||||
meta.get('profile_recommend_url'), meta.get('profile_group_id'),
|
||||
meta.get('profile_reg_source'), meta.get('status_status'),
|
||||
meta.get('status_expire'), meta.get('status_cancellation_status'),
|
||||
meta.get('status_new_user'), meta.get('status_login_banned_time'),
|
||||
meta.get('status_anticheat_type'), meta.get('status_flag_status1'),
|
||||
meta.get('status_anticheat_status'), meta.get('status_flag_honor'),
|
||||
meta.get('status_privacy_policy_status'), meta.get('status_csgo_frozen_exptime'),
|
||||
meta.get('platformexp_level'), meta.get('platformexp_exp'),
|
||||
meta.get('steam_account'), meta.get('steam_trade_url'),
|
||||
meta.get('steam_rent_id'), meta.get('trusted_credit'),
|
||||
meta.get('trusted_credit_level'), meta.get('trusted_score'),
|
||||
meta.get('trusted_status'), meta.get('trusted_credit_status'),
|
||||
meta.get('certify_id_type'), meta.get('certify_status'),
|
||||
meta.get('certify_age'), meta.get('certify_real_name'),
|
||||
meta.get('certify_uid_list'), meta.get('certify_audit_status'),
|
||||
meta.get('certify_gender'), meta.get('identity_type'),
|
||||
meta.get('identity_extras'), meta.get('identity_status'),
|
||||
meta.get('identity_slogan'), meta.get('identity_list'),
|
||||
meta.get('identity_slogan_ext'), meta.get('identity_live_url'),
|
||||
meta.get('identity_live_type'), meta.get('plus_is_plus'),
|
||||
meta.get('user_info_raw')
|
||||
))
|
||||
|
||||
# 2. Dim Maps (Ignore if exists)
|
||||
if m.map_name:
|
||||
cursor.execute("INSERT OR IGNORE INTO dim_maps (map_name) VALUES (?)", (m.map_name,))
|
||||
cursor.execute("""
|
||||
INSERT INTO dim_maps (map_name, map_desc)
|
||||
VALUES (?, ?)
|
||||
ON CONFLICT(map_name) DO UPDATE SET
|
||||
map_desc=excluded.map_desc
|
||||
""", (m.map_name, m.map_desc))
|
||||
|
||||
# 3. Fact Matches
|
||||
cursor.execute("""
|
||||
INSERT OR REPLACE INTO fact_matches
|
||||
(match_id, match_code, map_name, start_time, end_time, duration, winner_team, score_team1, score_team2, server_ip, server_port, location, data_source_type)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
(match_id, match_code, map_name, start_time, end_time, duration, winner_team, score_team1, score_team2, server_ip, server_port, location, has_side_data_and_rating2, match_main_id, demo_url, game_mode, game_name, map_desc, location_full, match_mode, match_status, match_flag, status, waiver, year, season, round_total, cs_type, priority_show_type, pug10m_show_type, credit_match_status, knife_winner, knife_winner_role, most_1v2_uid, most_assist_uid, most_awp_uid, most_end_uid, most_first_kill_uid, most_headshot_uid, most_jump_uid, mvp_uid, response_code, response_message, response_status, response_timestamp, response_trace_id, response_success, response_errcode, treat_info_raw, round_list_raw, leetify_data_raw, data_source_type)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
m.match_id, m.match_code, m.map_name, m.start_time, m.end_time, m.duration,
|
||||
m.winner_team, m.score_team1, m.score_team2, m.server_ip, m.server_port, m.location, m.data_source_type
|
||||
m.winner_team, m.score_team1, m.score_team2, m.server_ip, m.server_port, m.location,
|
||||
m.has_side_data_and_rating2, m.match_main_id, m.demo_url, m.game_mode, m.game_name, m.map_desc,
|
||||
m.location_full, m.match_mode, m.match_status, m.match_flag, m.status, m.waiver, m.year, m.season,
|
||||
m.round_total, m.cs_type, m.priority_show_type, m.pug10m_show_type, m.credit_match_status,
|
||||
m.knife_winner, m.knife_winner_role, m.most_1v2_uid, m.most_assist_uid, m.most_awp_uid,
|
||||
m.most_end_uid, m.most_first_kill_uid, m.most_headshot_uid, m.most_jump_uid, m.mvp_uid,
|
||||
m.response_code, m.response_message, m.response_status, m.response_timestamp, m.response_trace_id,
|
||||
m.response_success, m.response_errcode, m.treat_info_raw, m.round_list_raw, m.leetify_data_raw, m.data_source_type
|
||||
))
|
||||
|
||||
for t in m.teams:
|
||||
cursor.execute("""
|
||||
INSERT OR REPLACE INTO fact_match_teams
|
||||
(match_id, group_id, group_all_score, group_change_elo, group_fh_role, group_fh_score, group_origin_elo, group_sh_role, group_sh_score, group_tid, group_uids)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
m.match_id, t.group_id, t.group_all_score, t.group_change_elo, t.group_fh_role, t.group_fh_score,
|
||||
t.group_origin_elo, t.group_sh_role, t.group_sh_score, t.group_tid, t.group_uids
|
||||
))
|
||||
|
||||
# 4. Fact Match Players
|
||||
player_columns = [
|
||||
"match_id", "steam_id_64", "team_id", "kills", "deaths", "assists", "headshot_count",
|
||||
@@ -779,9 +1160,10 @@ def save_match(cursor, m: MatchData):
|
||||
"rank_score", "is_win", "kast", "entry_kills", "entry_deaths", "awp_kills",
|
||||
"clutch_1v1", "clutch_1v2", "clutch_1v3", "clutch_1v4", "clutch_1v5",
|
||||
"flash_assists", "flash_duration", "jump_count", "damage_total", "damage_received",
|
||||
"assisted_kill", "awp_kill", "benefit_kill", "day", "defused_bomb", "end_1v1",
|
||||
"damage_receive", "damage_stats", "assisted_kill", "awp_kill", "awp_kill_ct",
|
||||
"awp_kill_t", "benefit_kill", "day", "defused_bomb", "end_1v1",
|
||||
"end_1v2", "end_1v3", "end_1v4", "end_1v5", "explode_bomb", "first_death",
|
||||
"first_kill", "flash_enemy", "flash_team", "flash_team_time", "flash_time",
|
||||
"fd_ct", "fd_t", "first_kill", "flash_enemy", "flash_team", "flash_team_time", "flash_time",
|
||||
"game_mode", "group_id", "hold_total", "id", "is_highlight", "is_most_1v2",
|
||||
"is_most_assist", "is_most_awp", "is_most_end", "is_most_first_kill",
|
||||
"is_most_headshot", "is_most_jump", "is_svp", "is_tie", "kill_1", "kill_2",
|
||||
@@ -789,7 +1171,7 @@ def save_match(cursor, m: MatchData):
|
||||
"many_assists_cnt3", "many_assists_cnt4", "many_assists_cnt5", "map",
|
||||
"match_code", "match_mode", "match_team_id", "match_time", "per_headshot",
|
||||
"perfect_kill", "planted_bomb", "revenge_kill", "round_total", "season",
|
||||
"team_kill", "throw_harm", "throw_harm_enemy", "uid", "year"
|
||||
"team_kill", "throw_harm", "throw_harm_enemy", "uid", "year", "sts_raw", "level_info_raw"
|
||||
]
|
||||
player_placeholders = ",".join(["?"] * len(player_columns))
|
||||
player_columns_sql = ",".join(player_columns)
|
||||
@@ -801,9 +1183,10 @@ def save_match(cursor, m: MatchData):
|
||||
p.elo_change, p.rank_score, p.is_win, p.kast, p.entry_kills, p.entry_deaths,
|
||||
p.awp_kills, p.clutch_1v1, p.clutch_1v2, p.clutch_1v3, p.clutch_1v4,
|
||||
p.clutch_1v5, p.flash_assists, p.flash_duration, p.jump_count, p.damage_total,
|
||||
p.damage_received, p.assisted_kill, p.awp_kill, p.benefit_kill, p.day,
|
||||
p.defused_bomb, p.end_1v1, p.end_1v2, p.end_1v3, p.end_1v4, p.end_1v5,
|
||||
p.explode_bomb, p.first_death, p.first_kill, p.flash_enemy, p.flash_team,
|
||||
p.damage_received, p.damage_receive, p.damage_stats, p.assisted_kill, p.awp_kill,
|
||||
p.awp_kill_ct, p.awp_kill_t, p.benefit_kill, p.day, p.defused_bomb, p.end_1v1,
|
||||
p.end_1v2, p.end_1v3, p.end_1v4, p.end_1v5, p.explode_bomb, p.first_death,
|
||||
p.fd_ct, p.fd_t, p.first_kill, p.flash_enemy, p.flash_team,
|
||||
p.flash_team_time, p.flash_time, p.game_mode, p.group_id, p.hold_total,
|
||||
p.id, p.is_highlight, p.is_most_1v2, p.is_most_assist, p.is_most_awp,
|
||||
p.is_most_end, p.is_most_first_kill, p.is_most_headshot, p.is_most_jump,
|
||||
@@ -812,7 +1195,7 @@ def save_match(cursor, m: MatchData):
|
||||
p.many_assists_cnt5, p.map, p.match_code, p.match_mode, p.match_team_id,
|
||||
p.match_time, p.per_headshot, p.perfect_kill, p.planted_bomb, p.revenge_kill,
|
||||
p.round_total, p.season, p.team_kill, p.throw_harm, p.throw_harm_enemy,
|
||||
p.uid, p.year
|
||||
p.uid, p.year, p.sts_raw, p.level_info_raw
|
||||
]
|
||||
|
||||
for sid, p in m.players.items():
|
||||
|
||||
504
ETL/verify/verify_L2.py
Normal file
504
ETL/verify/verify_L2.py
Normal file
@@ -0,0 +1,504 @@
|
||||
import sqlite3
|
||||
import pandas as pd
|
||||
import csv
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
pd.set_option('display.max_columns', None)
|
||||
pd.set_option('display.width', 1000)
|
||||
|
||||
db_path = 'database/L2/L2_Main.sqlite'
|
||||
schema_path = 'database/original_json_schema/schema_flat.csv'
|
||||
|
||||
covered_main_fields = {
|
||||
"match_code", "map", "start_time", "end_time", "match_winner",
|
||||
"group1_all_score", "group1_change_elo", "group1_fh_role", "group1_fh_score",
|
||||
"group1_origin_elo", "group1_sh_role", "group1_sh_score", "group1_tid", "group1_uids",
|
||||
"group2_all_score", "group2_change_elo", "group2_fh_role", "group2_fh_score",
|
||||
"group2_origin_elo", "group2_sh_role", "group2_sh_score", "group2_tid", "group2_uids",
|
||||
"server_ip", "server_port", "location", "location_full", "map_desc",
|
||||
"demo_url", "game_mode", "game_name", "match_mode", "match_status", "match_flag",
|
||||
"status", "waiver", "year", "season", "round_total", "cs_type", "priority_show_type",
|
||||
"pug10m_show_type", "credit_match_status", "knife_winner", "knife_winner_role",
|
||||
"most_1v2_uid", "most_assist_uid", "most_awp_uid", "most_end_uid",
|
||||
"most_first_kill_uid", "most_headshot_uid", "most_jump_uid", "mvp_uid", "id"
|
||||
}
|
||||
covered_user_fields = {
|
||||
"data.group_N[].user_info."
|
||||
}
|
||||
covered_round_fields = [
|
||||
"data.round_list[].current_score.ct",
|
||||
"data.round_list[].current_score.t",
|
||||
"data.round_list[].current_score.final_round_time",
|
||||
"data.round_list[].all_kill[].pasttime",
|
||||
"data.round_list[].all_kill[].weapon",
|
||||
"data.round_list[].all_kill[].headshot",
|
||||
"data.round_list[].all_kill[].penetrated",
|
||||
"data.round_list[].all_kill[].attackerblind",
|
||||
"data.round_list[].all_kill[].throughsmoke",
|
||||
"data.round_list[].all_kill[].noscope",
|
||||
"data.round_list[].all_kill[].attacker.steamid_64",
|
||||
"data.round_list[].all_kill[].victim.steamid_64",
|
||||
"data.round_list[].all_kill[].attacker.pos.x",
|
||||
"data.round_list[].all_kill[].attacker.pos.y",
|
||||
"data.round_list[].all_kill[].attacker.pos.z",
|
||||
"data.round_list[].all_kill[].victim.pos.x",
|
||||
"data.round_list[].all_kill[].victim.pos.y",
|
||||
"data.round_list[].all_kill[].victim.pos.z"
|
||||
]
|
||||
covered_leetify_fields = [
|
||||
"data.leetify_data.round_stat[].round",
|
||||
"data.leetify_data.round_stat[].win_reason",
|
||||
"data.leetify_data.round_stat[].end_ts",
|
||||
"data.leetify_data.round_stat[].sfui_event.score_ct",
|
||||
"data.leetify_data.round_stat[].sfui_event.score_t",
|
||||
"data.leetify_data.round_stat[].ct_money_group",
|
||||
"data.leetify_data.round_stat[].t_money_group",
|
||||
"data.leetify_data.round_stat[].show_event[].ts",
|
||||
"data.leetify_data.round_stat[].show_event[].kill_event.Ts",
|
||||
"data.leetify_data.round_stat[].show_event[].kill_event.Killer",
|
||||
"data.leetify_data.round_stat[].show_event[].kill_event.Victim",
|
||||
"data.leetify_data.round_stat[].show_event[].kill_event.WeaponName",
|
||||
"data.leetify_data.round_stat[].show_event[].kill_event.Headshot",
|
||||
"data.leetify_data.round_stat[].show_event[].kill_event.Penetrated",
|
||||
"data.leetify_data.round_stat[].show_event[].kill_event.AttackerBlind",
|
||||
"data.leetify_data.round_stat[].show_event[].kill_event.ThroughSmoke",
|
||||
"data.leetify_data.round_stat[].show_event[].kill_event.NoScope",
|
||||
"data.leetify_data.round_stat[].show_event[].trade_score_change.",
|
||||
"data.leetify_data.round_stat[].show_event[].flash_assist_killer_score_change.",
|
||||
"data.leetify_data.round_stat[].show_event[].killer_score_change.",
|
||||
"data.leetify_data.round_stat[].show_event[].victim_score_change.",
|
||||
"data.leetify_data.round_stat[].bron_equipment.",
|
||||
"data.leetify_data.round_stat[].player_t_score.",
|
||||
"data.leetify_data.round_stat[].player_ct_score.",
|
||||
"data.leetify_data.round_stat[].player_bron_crash."
|
||||
]
|
||||
covered_vip_fields = {
|
||||
"awp_kill",
|
||||
"awp_kill_ct",
|
||||
"awp_kill_t",
|
||||
"damage_receive",
|
||||
"damage_stats",
|
||||
"fd_ct",
|
||||
"fd_t",
|
||||
"kast"
|
||||
}
|
||||
|
||||
def load_schema_paths(schema_path_value):
|
||||
paths = []
|
||||
with open(schema_path_value, 'r', encoding='utf-8') as f:
|
||||
reader = csv.reader(f)
|
||||
_ = next(reader, None)
|
||||
for row in reader:
|
||||
if len(row) >= 2:
|
||||
paths.append(row[1])
|
||||
return paths
|
||||
|
||||
def is_covered(path):
|
||||
if path in ["data", "code", "message", "status", "timestamp", "timeStamp", "traceId", "success", "errcode"]:
|
||||
return True
|
||||
if path.startswith("data.<steamid>."):
|
||||
key = path.split("data.<steamid>.")[1].split(".")[0]
|
||||
if key in covered_vip_fields:
|
||||
return True
|
||||
if "data.group_N[].fight_any." in path:
|
||||
return True
|
||||
if "data.group_N[].fight_t." in path or "data.group_N[].fight_ct." in path:
|
||||
return True
|
||||
if "data.group_N[].sts." in path:
|
||||
return True
|
||||
if "data.group_N[].level_info." in path:
|
||||
return True
|
||||
if "data.treat_info." in path:
|
||||
return True
|
||||
if "data.has_side_data_and_rating2" in path:
|
||||
return True
|
||||
if "data.main." in path:
|
||||
key = path.split("data.main.")[1].split(".")[0]
|
||||
if key in covered_main_fields:
|
||||
return True
|
||||
if any(k in path for k in covered_user_fields):
|
||||
return True
|
||||
if "data.round_list" in path:
|
||||
return True
|
||||
if any(k in path for k in covered_round_fields):
|
||||
return True
|
||||
if "data.leetify_data." in path:
|
||||
return True
|
||||
if any(k in path for k in covered_leetify_fields):
|
||||
return True
|
||||
return False
|
||||
|
||||
def group_key(p):
|
||||
if "data.group_N[].user_info." in p:
|
||||
return "data.group_N[].user_info.*"
|
||||
if "data.group_N[].fight_any." in p:
|
||||
return "data.group_N[].fight_any.*"
|
||||
if "data.group_N[].fight_t." in p:
|
||||
return "data.group_N[].fight_t.*"
|
||||
if "data.group_N[].fight_ct." in p:
|
||||
return "data.group_N[].fight_ct.*"
|
||||
if "data.main." in p:
|
||||
return "data.main.*"
|
||||
if "data.round_list[]" in p or "data.round_list[]." in p:
|
||||
return "data.round_list.*"
|
||||
if "data.leetify_data.round_stat[]" in p or "data.leetify_data.round_stat[]." in p:
|
||||
return "data.leetify_data.round_stat.*"
|
||||
if "data.leetify_data." in p:
|
||||
return "data.leetify_data.*"
|
||||
if "data.treat_info." in p:
|
||||
return "data.treat_info.*"
|
||||
if "data." in p:
|
||||
return "data.*"
|
||||
return "other"
|
||||
|
||||
def dump_uncovered(output_path):
|
||||
paths = load_schema_paths(schema_path)
|
||||
uncovered = [p for p in paths if not is_covered(p)]
|
||||
df_unc = pd.DataFrame({"path": uncovered})
|
||||
if len(df_unc) == 0:
|
||||
print("no uncovered paths")
|
||||
return
|
||||
df_unc["group"] = df_unc["path"].apply(group_key)
|
||||
df_unc = df_unc.sort_values(["group", "path"])
|
||||
df_unc.to_csv(output_path, index=False, encoding='utf-8-sig')
|
||||
print(f"uncovered total: {len(df_unc)}")
|
||||
print("\n-- uncovered groups (count) --")
|
||||
print(df_unc.groupby("group").size().sort_values(ascending=False))
|
||||
print(f"\noutput: {output_path}")
|
||||
|
||||
def print_schema(conn):
|
||||
tables = conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%' ORDER BY name").fetchall()
|
||||
for (name,) in tables:
|
||||
print(f"\n[{name}]")
|
||||
cols = conn.execute(f"PRAGMA table_info({name})").fetchall()
|
||||
rows = [["column", "type", "pk"]]
|
||||
for _, col_name, col_type, _, _, pk in cols:
|
||||
rows.append([col_name, col_type or "", str(pk)])
|
||||
widths = [max(len(r[i]) for r in rows) for i in range(3)]
|
||||
for idx, r in enumerate(rows):
|
||||
line = " | ".join([r[i].ljust(widths[i]) for i in range(3)])
|
||||
print(line)
|
||||
if idx == 0:
|
||||
print("-" * len(line))
|
||||
|
||||
def refresh_schema_sql(conn, output_path):
|
||||
rows = conn.execute("""
|
||||
SELECT type, name, sql
|
||||
FROM sqlite_master
|
||||
WHERE sql IS NOT NULL AND type IN ('table', 'index') AND name NOT LIKE 'sqlite_%'
|
||||
ORDER BY CASE WHEN type='table' THEN 0 ELSE 1 END, name
|
||||
""").fetchall()
|
||||
lines = ["PRAGMA foreign_keys = ON;", ""]
|
||||
for _, _, sql in rows:
|
||||
lines.append(sql.strip() + ";")
|
||||
lines.append("")
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
f.write("\n".join(lines).strip() + "\n")
|
||||
|
||||
def verify():
|
||||
conn = sqlite3.connect(db_path)
|
||||
|
||||
print("--- Counts ---")
|
||||
tables = [
|
||||
'dim_players',
|
||||
'dim_maps',
|
||||
'fact_matches',
|
||||
'fact_match_teams',
|
||||
'fact_match_players',
|
||||
'fact_match_players_t',
|
||||
'fact_match_players_ct',
|
||||
'fact_rounds',
|
||||
'fact_round_events',
|
||||
'fact_round_player_economy'
|
||||
]
|
||||
for t in tables:
|
||||
count = conn.execute(f"SELECT COUNT(*) FROM {t}").fetchone()[0]
|
||||
print(f"{t}: {count}")
|
||||
|
||||
print("\n--- Data Source Distribution ---")
|
||||
dist = pd.read_sql("SELECT data_source_type, COUNT(*) as cnt FROM fact_matches GROUP BY data_source_type", conn)
|
||||
print(dist)
|
||||
|
||||
print("\n--- Sample Round Events (Leetify vs Classic) ---")
|
||||
# Fetch one event from a leetify match
|
||||
leetify_match = conn.execute("SELECT match_id FROM fact_matches WHERE data_source_type='leetify' LIMIT 1").fetchone()
|
||||
if leetify_match:
|
||||
mid = leetify_match[0]
|
||||
print(f"Leetify Match: {mid}")
|
||||
df = pd.read_sql(f"SELECT * FROM fact_round_events WHERE match_id='{mid}' AND event_type='kill' LIMIT 1", conn)
|
||||
print(df[['event_type', 'attacker_steam_id', 'trade_killer_steam_id', 'attacker_pos_x', 'score_change_attacker']])
|
||||
|
||||
# Fetch one event from a classic match
|
||||
classic_match = conn.execute("SELECT match_id FROM fact_matches WHERE data_source_type='classic' LIMIT 1").fetchone()
|
||||
if classic_match:
|
||||
mid = classic_match[0]
|
||||
print(f"Classic Match: {mid}")
|
||||
df = pd.read_sql(f"SELECT * FROM fact_round_events WHERE match_id='{mid}' AND event_type='kill' LIMIT 1", conn)
|
||||
print(df[['event_type', 'attacker_steam_id', 'trade_killer_steam_id', 'attacker_pos_x', 'score_change_attacker']])
|
||||
|
||||
print("\n--- Sample Player Stats (New Fields) ---")
|
||||
df_players = pd.read_sql("SELECT steam_id_64, rating, rating3, elo_change, rank_score, flash_duration, jump_count FROM fact_match_players LIMIT 5", conn)
|
||||
print(df_players)
|
||||
|
||||
print("\n--- Insert Field Checks ---")
|
||||
meta_counts = conn.execute("""
|
||||
SELECT
|
||||
SUM(CASE WHEN response_code IS NOT NULL THEN 1 ELSE 0 END) AS response_code_cnt,
|
||||
SUM(CASE WHEN response_trace_id IS NOT NULL AND response_trace_id != '' THEN 1 ELSE 0 END) AS response_trace_id_cnt,
|
||||
SUM(CASE WHEN response_success IS NOT NULL THEN 1 ELSE 0 END) AS response_success_cnt,
|
||||
SUM(CASE WHEN response_errcode IS NOT NULL THEN 1 ELSE 0 END) AS response_errcode_cnt,
|
||||
SUM(CASE WHEN treat_info_raw IS NOT NULL AND treat_info_raw != '' THEN 1 ELSE 0 END) AS treat_info_raw_cnt,
|
||||
SUM(CASE WHEN round_list_raw IS NOT NULL AND round_list_raw != '' THEN 1 ELSE 0 END) AS round_list_raw_cnt,
|
||||
SUM(CASE WHEN leetify_data_raw IS NOT NULL AND leetify_data_raw != '' THEN 1 ELSE 0 END) AS leetify_data_raw_cnt
|
||||
FROM fact_matches
|
||||
""").fetchone()
|
||||
print(f"response_code non-null: {meta_counts[0]}")
|
||||
print(f"response_trace_id non-empty: {meta_counts[1]}")
|
||||
print(f"response_success non-null: {meta_counts[2]}")
|
||||
print(f"response_errcode non-null: {meta_counts[3]}")
|
||||
print(f"treat_info_raw non-empty: {meta_counts[4]}")
|
||||
print(f"round_list_raw non-empty: {meta_counts[5]}")
|
||||
print(f"leetify_data_raw non-empty: {meta_counts[6]}")
|
||||
|
||||
print("\n--- Integrity Checks ---")
|
||||
missing_players = conn.execute("""
|
||||
SELECT COUNT(*) FROM fact_match_players f
|
||||
LEFT JOIN dim_players d ON f.steam_id_64 = d.steam_id_64
|
||||
WHERE d.steam_id_64 IS NULL
|
||||
""").fetchone()[0]
|
||||
print(f"fact_match_players missing dim_players: {missing_players}")
|
||||
|
||||
missing_round_matches = conn.execute("""
|
||||
SELECT COUNT(*) FROM fact_rounds r
|
||||
LEFT JOIN fact_matches m ON r.match_id = m.match_id
|
||||
WHERE m.match_id IS NULL
|
||||
""").fetchone()[0]
|
||||
print(f"fact_rounds missing fact_matches: {missing_round_matches}")
|
||||
|
||||
missing_event_rounds = conn.execute("""
|
||||
SELECT COUNT(*) FROM fact_round_events e
|
||||
LEFT JOIN fact_rounds r ON e.match_id = r.match_id AND e.round_num = r.round_num
|
||||
WHERE r.match_id IS NULL
|
||||
""").fetchone()[0]
|
||||
print(f"fact_round_events missing fact_rounds: {missing_event_rounds}")
|
||||
|
||||
side_zero_t = conn.execute("""
|
||||
SELECT COUNT(*) FROM fact_match_players_t
|
||||
WHERE COALESCE(kills,0)=0 AND COALESCE(deaths,0)=0 AND COALESCE(assists,0)=0
|
||||
""").fetchone()[0]
|
||||
side_zero_ct = conn.execute("""
|
||||
SELECT COUNT(*) FROM fact_match_players_ct
|
||||
WHERE COALESCE(kills,0)=0 AND COALESCE(deaths,0)=0 AND COALESCE(assists,0)=0
|
||||
""").fetchone()[0]
|
||||
print(f"fact_match_players_t zero K/D/A: {side_zero_t}")
|
||||
print(f"fact_match_players_ct zero K/D/A: {side_zero_ct}")
|
||||
|
||||
print("\n--- Full vs T/CT Comparison ---")
|
||||
cols = [
|
||||
'kills', 'deaths', 'assists', 'headshot_count', 'adr', 'rating', 'rating2',
|
||||
'rating3', 'rws', 'mvp_count', 'flash_duration', 'jump_count', 'is_win'
|
||||
]
|
||||
df_full = pd.read_sql(
|
||||
"SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players",
|
||||
conn
|
||||
)
|
||||
df_t = pd.read_sql(
|
||||
"SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players_t",
|
||||
conn
|
||||
).rename(columns={c: f"{c}_t" for c in cols})
|
||||
df_ct = pd.read_sql(
|
||||
"SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players_ct",
|
||||
conn
|
||||
).rename(columns={c: f"{c}_ct" for c in cols})
|
||||
|
||||
df = df_full.merge(df_t, on=['match_id', 'steam_id_64'], how='left')
|
||||
df = df.merge(df_ct, on=['match_id', 'steam_id_64'], how='left')
|
||||
|
||||
def is_empty(s):
|
||||
return s.isna() | (s == 0)
|
||||
|
||||
for c in cols:
|
||||
empty_count = is_empty(df[c]).sum()
|
||||
print(f"{c} empty: {empty_count}")
|
||||
|
||||
additive = ['kills', 'deaths', 'assists', 'headshot_count', 'mvp_count', 'flash_duration', 'jump_count']
|
||||
for c in additive:
|
||||
t_sum = df[f"{c}_t"].fillna(0) + df[f"{c}_ct"].fillna(0)
|
||||
tol = 0.01 if c == 'flash_duration' else 0
|
||||
diff = (df[c].fillna(0) - t_sum).abs() > tol
|
||||
print(f"{c} full != t+ct: {diff.sum()}")
|
||||
|
||||
non_additive = ['adr', 'rating', 'rating2', 'rating3', 'rws', 'is_win']
|
||||
for c in non_additive:
|
||||
side_nonempty = (~is_empty(df[f"{c}_t"])) | (~is_empty(df[f"{c}_ct"]))
|
||||
full_empty_side_nonempty = is_empty(df[c]) & side_nonempty
|
||||
full_nonempty_side_empty = (~is_empty(df[c])) & (~side_nonempty)
|
||||
print(f"{c} full empty but side has: {full_empty_side_nonempty.sum()}")
|
||||
print(f"{c} full has but side empty: {full_nonempty_side_empty.sum()}")
|
||||
|
||||
print("\n--- Rating Detail ---")
|
||||
rating_cols = ['rating', 'rating2', 'rating3']
|
||||
for c in rating_cols:
|
||||
full_null = df[c].isna().sum()
|
||||
full_zero = (df[c] == 0).sum()
|
||||
full_nonzero = ((~df[c].isna()) & (df[c] != 0)).sum()
|
||||
side_t_nonzero = ((~df[f"{c}_t"].isna()) & (df[f"{c}_t"] != 0)).sum()
|
||||
side_ct_nonzero = ((~df[f"{c}_ct"].isna()) & (df[f"{c}_ct"] != 0)).sum()
|
||||
side_any_nonzero = ((~df[f"{c}_t"].isna()) & (df[f"{c}_t"] != 0)) | ((~df[f"{c}_ct"].isna()) & (df[f"{c}_ct"] != 0))
|
||||
full_nonzero_side_zero = ((~df[c].isna()) & (df[c] != 0) & (~side_any_nonzero)).sum()
|
||||
full_zero_side_nonzero = (((df[c].isna()) | (df[c] == 0)) & side_any_nonzero).sum()
|
||||
print(f"{c} full null: {full_null} full zero: {full_zero} full nonzero: {full_nonzero}")
|
||||
print(f"{c} side t nonzero: {side_t_nonzero} side ct nonzero: {side_ct_nonzero}")
|
||||
print(f"{c} full nonzero but side all zero: {full_nonzero_side_zero}")
|
||||
print(f"{c} full zero but side has: {full_zero_side_nonzero}")
|
||||
|
||||
df_rating_src = pd.read_sql(
|
||||
"SELECT f.rating, f.rating2, f.rating3, m.data_source_type FROM fact_match_players f JOIN fact_matches m ON f.match_id = m.match_id",
|
||||
conn
|
||||
)
|
||||
for c in rating_cols:
|
||||
grp = df_rating_src.groupby('data_source_type')[c].apply(lambda s: (s != 0).sum()).reset_index(name='nonzero')
|
||||
print(f"{c} nonzero by source")
|
||||
print(grp)
|
||||
|
||||
print("\n--- Schema Coverage (fight_any) ---")
|
||||
paths = load_schema_paths(schema_path)
|
||||
fight_keys = set()
|
||||
for p in paths:
|
||||
if 'data.group_N[].fight_any.' in p:
|
||||
key = p.split('fight_any.')[1].split('.')[0]
|
||||
fight_keys.add(key)
|
||||
l2_cols = set(pd.read_sql("PRAGMA table_info(fact_match_players)", conn)['name'].tolist())
|
||||
alias = {
|
||||
'kills': 'kill',
|
||||
'deaths': 'death',
|
||||
'assists': 'assist',
|
||||
'headshot_count': 'headshot',
|
||||
'mvp_count': 'is_mvp',
|
||||
'flash_duration': 'flash_enemy_time',
|
||||
'jump_count': 'jump_total',
|
||||
'awp_kills': 'awp_kill'
|
||||
}
|
||||
covered = set()
|
||||
for c in l2_cols:
|
||||
if c in fight_keys:
|
||||
covered.add(c)
|
||||
elif c in alias and alias[c] in fight_keys:
|
||||
covered.add(alias[c])
|
||||
missing_keys = sorted(list(fight_keys - covered))
|
||||
print(f"fight_any keys: {len(fight_keys)}")
|
||||
print(f"covered by L2 columns: {len(covered)}")
|
||||
print(f"uncovered fight_any keys: {len(missing_keys)}")
|
||||
if missing_keys:
|
||||
print(missing_keys)
|
||||
|
||||
print("\n--- Coverage Zero Rate (fight_any -> fact_match_players) ---")
|
||||
fight_cols = [k for k in fight_keys if k in l2_cols or k in alias.values()]
|
||||
col_map = {}
|
||||
for k in fight_cols:
|
||||
if k in l2_cols:
|
||||
col_map[k] = k
|
||||
else:
|
||||
for l2k, src in alias.items():
|
||||
if src == k:
|
||||
col_map[k] = l2k
|
||||
break
|
||||
select_cols = ["steam_id_64"] + list(set(col_map.values()))
|
||||
df_fight = pd.read_sql(
|
||||
"SELECT " + ",".join(select_cols) + " FROM fact_match_players",
|
||||
conn
|
||||
)
|
||||
total_rows = len(df_fight)
|
||||
stats = []
|
||||
for fight_key, col in sorted(col_map.items()):
|
||||
s = df_fight[col]
|
||||
zeros = (s == 0).sum()
|
||||
nulls = s.isna().sum()
|
||||
nonzero = total_rows - zeros - nulls
|
||||
stats.append({
|
||||
"fight_key": fight_key,
|
||||
"column": col,
|
||||
"nonzero": nonzero,
|
||||
"zero": zeros,
|
||||
"null": nulls,
|
||||
"zero_rate": 0 if total_rows == 0 else round(zeros / total_rows, 4)
|
||||
})
|
||||
df_stats = pd.DataFrame(stats).sort_values(["zero_rate", "nonzero"], ascending=[False, True])
|
||||
print(df_stats.head(30))
|
||||
print("\n-- zero_rate top (most zeros) --")
|
||||
print(df_stats.head(10))
|
||||
print("\n-- zero_rate bottom (most nonzero) --")
|
||||
print(df_stats.tail(10))
|
||||
|
||||
print("\n--- Schema Coverage (leetify economy) ---")
|
||||
econ_keys = [
|
||||
'data.leetify_data.round_stat[].bron_equipment.',
|
||||
'data.leetify_data.round_stat[].player_t_score.',
|
||||
'data.leetify_data.round_stat[].player_ct_score.',
|
||||
'data.leetify_data.round_stat[].player_bron_crash.'
|
||||
]
|
||||
for k in econ_keys:
|
||||
count = sum(1 for p in paths if k in p)
|
||||
print(f"{k} paths: {count}")
|
||||
|
||||
print("\n--- Schema Summary Coverage (by path groups) ---")
|
||||
uncovered = [p for p in paths if not is_covered(p)]
|
||||
print(f"total paths: {len(paths)}")
|
||||
print(f"covered paths: {len(paths) - len(uncovered)}")
|
||||
print(f"uncovered paths: {len(uncovered)}")
|
||||
|
||||
df_unc = pd.DataFrame({"path": uncovered})
|
||||
if len(df_unc) > 0:
|
||||
df_unc["group"] = df_unc["path"].apply(group_key)
|
||||
print("\n-- Uncovered groups (count) --")
|
||||
print(df_unc.groupby("group").size().sort_values(ascending=False))
|
||||
print("\n-- Uncovered examples (top 50) --")
|
||||
print(df_unc["path"].head(50).to_list())
|
||||
|
||||
conn.close()
|
||||
|
||||
def watch_schema(schema_path, interval=1.0):
|
||||
last_db_mtime = 0
|
||||
last_schema_mtime = 0
|
||||
first = True
|
||||
while True:
|
||||
if not os.path.exists(db_path):
|
||||
print(f"db not found: {db_path}")
|
||||
time.sleep(interval)
|
||||
continue
|
||||
db_mtime = os.path.getmtime(db_path)
|
||||
schema_mtime = os.path.getmtime(schema_path) if os.path.exists(schema_path) else 0
|
||||
if first or db_mtime > last_db_mtime or schema_mtime > last_schema_mtime:
|
||||
conn = sqlite3.connect(db_path)
|
||||
refresh_schema_sql(conn, schema_path)
|
||||
print(f"\n[{time.strftime('%Y-%m-%d %H:%M:%S')}] schema.sql refreshed")
|
||||
print_schema(conn)
|
||||
conn.close()
|
||||
last_db_mtime = db_mtime
|
||||
last_schema_mtime = os.path.getmtime(schema_path) if os.path.exists(schema_path) else 0
|
||||
first = False
|
||||
time.sleep(interval)
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = [a.lower() for a in sys.argv[1:]]
|
||||
if "dump_uncovered" in args or "uncovered" in args:
|
||||
dump_uncovered('database/original_json_schema/uncovered_features.csv')
|
||||
elif "watch_schema" in args or "watch" in args:
|
||||
try:
|
||||
watch_schema('database/L2/schema.sql')
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
elif "schema" in args or "refresh_schema" in args:
|
||||
if not os.path.exists(db_path):
|
||||
print(f"db not found: {db_path}")
|
||||
else:
|
||||
conn = sqlite3.connect(db_path)
|
||||
if "refresh_schema" in args:
|
||||
refresh_schema_sql(conn, 'database/L2/schema.sql')
|
||||
print("schema.sql refreshed")
|
||||
print_schema(conn)
|
||||
conn.close()
|
||||
else:
|
||||
verify()
|
||||
245
ETL/verify_L2.py
245
ETL/verify_L2.py
@@ -1,245 +0,0 @@
|
||||
import sqlite3
|
||||
import pandas as pd
|
||||
import csv
|
||||
|
||||
pd.set_option('display.max_columns', None)
|
||||
pd.set_option('display.width', 1000)
|
||||
|
||||
db_path = 'database/L2/L2_Main.sqlite'
|
||||
|
||||
def verify():
|
||||
conn = sqlite3.connect(db_path)
|
||||
|
||||
print("--- Counts ---")
|
||||
tables = [
|
||||
'dim_players',
|
||||
'dim_maps',
|
||||
'fact_matches',
|
||||
'fact_match_players',
|
||||
'fact_match_players_t',
|
||||
'fact_match_players_ct',
|
||||
'fact_rounds',
|
||||
'fact_round_events',
|
||||
'fact_round_player_economy'
|
||||
]
|
||||
for t in tables:
|
||||
count = conn.execute(f"SELECT COUNT(*) FROM {t}").fetchone()[0]
|
||||
print(f"{t}: {count}")
|
||||
|
||||
print("\n--- Data Source Distribution ---")
|
||||
dist = pd.read_sql("SELECT data_source_type, COUNT(*) as cnt FROM fact_matches GROUP BY data_source_type", conn)
|
||||
print(dist)
|
||||
|
||||
print("\n--- Sample Round Events (Leetify vs Classic) ---")
|
||||
# Fetch one event from a leetify match
|
||||
leetify_match = conn.execute("SELECT match_id FROM fact_matches WHERE data_source_type='leetify' LIMIT 1").fetchone()
|
||||
if leetify_match:
|
||||
mid = leetify_match[0]
|
||||
print(f"Leetify Match: {mid}")
|
||||
df = pd.read_sql(f"SELECT * FROM fact_round_events WHERE match_id='{mid}' AND event_type='kill' LIMIT 1", conn)
|
||||
print(df[['event_type', 'attacker_steam_id', 'trade_killer_steam_id', 'attacker_pos_x', 'score_change_attacker']])
|
||||
|
||||
# Fetch one event from a classic match
|
||||
classic_match = conn.execute("SELECT match_id FROM fact_matches WHERE data_source_type='classic' LIMIT 1").fetchone()
|
||||
if classic_match:
|
||||
mid = classic_match[0]
|
||||
print(f"Classic Match: {mid}")
|
||||
df = pd.read_sql(f"SELECT * FROM fact_round_events WHERE match_id='{mid}' AND event_type='kill' LIMIT 1", conn)
|
||||
print(df[['event_type', 'attacker_steam_id', 'trade_killer_steam_id', 'attacker_pos_x', 'score_change_attacker']])
|
||||
|
||||
print("\n--- Sample Player Stats (New Fields) ---")
|
||||
df_players = pd.read_sql("SELECT steam_id_64, rating, rating3, elo_change, rank_score, flash_duration, jump_count FROM fact_match_players LIMIT 5", conn)
|
||||
print(df_players)
|
||||
|
||||
print("\n--- Integrity Checks ---")
|
||||
missing_players = conn.execute("""
|
||||
SELECT COUNT(*) FROM fact_match_players f
|
||||
LEFT JOIN dim_players d ON f.steam_id_64 = d.steam_id_64
|
||||
WHERE d.steam_id_64 IS NULL
|
||||
""").fetchone()[0]
|
||||
print(f"fact_match_players missing dim_players: {missing_players}")
|
||||
|
||||
missing_round_matches = conn.execute("""
|
||||
SELECT COUNT(*) FROM fact_rounds r
|
||||
LEFT JOIN fact_matches m ON r.match_id = m.match_id
|
||||
WHERE m.match_id IS NULL
|
||||
""").fetchone()[0]
|
||||
print(f"fact_rounds missing fact_matches: {missing_round_matches}")
|
||||
|
||||
missing_event_rounds = conn.execute("""
|
||||
SELECT COUNT(*) FROM fact_round_events e
|
||||
LEFT JOIN fact_rounds r ON e.match_id = r.match_id AND e.round_num = r.round_num
|
||||
WHERE r.match_id IS NULL
|
||||
""").fetchone()[0]
|
||||
print(f"fact_round_events missing fact_rounds: {missing_event_rounds}")
|
||||
|
||||
side_zero_t = conn.execute("""
|
||||
SELECT COUNT(*) FROM fact_match_players_t
|
||||
WHERE COALESCE(kills,0)=0 AND COALESCE(deaths,0)=0 AND COALESCE(assists,0)=0
|
||||
""").fetchone()[0]
|
||||
side_zero_ct = conn.execute("""
|
||||
SELECT COUNT(*) FROM fact_match_players_ct
|
||||
WHERE COALESCE(kills,0)=0 AND COALESCE(deaths,0)=0 AND COALESCE(assists,0)=0
|
||||
""").fetchone()[0]
|
||||
print(f"fact_match_players_t zero K/D/A: {side_zero_t}")
|
||||
print(f"fact_match_players_ct zero K/D/A: {side_zero_ct}")
|
||||
|
||||
print("\n--- Full vs T/CT Comparison ---")
|
||||
cols = [
|
||||
'kills', 'deaths', 'assists', 'headshot_count', 'adr', 'rating', 'rating2',
|
||||
'rating3', 'rws', 'mvp_count', 'flash_duration', 'jump_count', 'is_win'
|
||||
]
|
||||
df_full = pd.read_sql(
|
||||
"SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players",
|
||||
conn
|
||||
)
|
||||
df_t = pd.read_sql(
|
||||
"SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players_t",
|
||||
conn
|
||||
).rename(columns={c: f"{c}_t" for c in cols})
|
||||
df_ct = pd.read_sql(
|
||||
"SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players_ct",
|
||||
conn
|
||||
).rename(columns={c: f"{c}_ct" for c in cols})
|
||||
|
||||
df = df_full.merge(df_t, on=['match_id', 'steam_id_64'], how='left')
|
||||
df = df.merge(df_ct, on=['match_id', 'steam_id_64'], how='left')
|
||||
|
||||
def is_empty(s):
|
||||
return s.isna() | (s == 0)
|
||||
|
||||
for c in cols:
|
||||
empty_count = is_empty(df[c]).sum()
|
||||
print(f"{c} empty: {empty_count}")
|
||||
|
||||
additive = ['kills', 'deaths', 'assists', 'headshot_count', 'mvp_count', 'flash_duration', 'jump_count']
|
||||
for c in additive:
|
||||
t_sum = df[f"{c}_t"].fillna(0) + df[f"{c}_ct"].fillna(0)
|
||||
tol = 0.01 if c == 'flash_duration' else 0
|
||||
diff = (df[c].fillna(0) - t_sum).abs() > tol
|
||||
print(f"{c} full != t+ct: {diff.sum()}")
|
||||
|
||||
non_additive = ['adr', 'rating', 'rating2', 'rating3', 'rws', 'is_win']
|
||||
for c in non_additive:
|
||||
side_nonempty = (~is_empty(df[f"{c}_t"])) | (~is_empty(df[f"{c}_ct"]))
|
||||
full_empty_side_nonempty = is_empty(df[c]) & side_nonempty
|
||||
full_nonempty_side_empty = (~is_empty(df[c])) & (~side_nonempty)
|
||||
print(f"{c} full empty but side has: {full_empty_side_nonempty.sum()}")
|
||||
print(f"{c} full has but side empty: {full_nonempty_side_empty.sum()}")
|
||||
|
||||
print("\n--- Rating Detail ---")
|
||||
rating_cols = ['rating', 'rating2', 'rating3']
|
||||
for c in rating_cols:
|
||||
full_null = df[c].isna().sum()
|
||||
full_zero = (df[c] == 0).sum()
|
||||
full_nonzero = ((~df[c].isna()) & (df[c] != 0)).sum()
|
||||
side_t_nonzero = ((~df[f"{c}_t"].isna()) & (df[f"{c}_t"] != 0)).sum()
|
||||
side_ct_nonzero = ((~df[f"{c}_ct"].isna()) & (df[f"{c}_ct"] != 0)).sum()
|
||||
side_any_nonzero = ((~df[f"{c}_t"].isna()) & (df[f"{c}_t"] != 0)) | ((~df[f"{c}_ct"].isna()) & (df[f"{c}_ct"] != 0))
|
||||
full_nonzero_side_zero = ((~df[c].isna()) & (df[c] != 0) & (~side_any_nonzero)).sum()
|
||||
full_zero_side_nonzero = (((df[c].isna()) | (df[c] == 0)) & side_any_nonzero).sum()
|
||||
print(f"{c} full null: {full_null} full zero: {full_zero} full nonzero: {full_nonzero}")
|
||||
print(f"{c} side t nonzero: {side_t_nonzero} side ct nonzero: {side_ct_nonzero}")
|
||||
print(f"{c} full nonzero but side all zero: {full_nonzero_side_zero}")
|
||||
print(f"{c} full zero but side has: {full_zero_side_nonzero}")
|
||||
|
||||
df_rating_src = pd.read_sql(
|
||||
"SELECT f.rating, f.rating2, f.rating3, m.data_source_type FROM fact_match_players f JOIN fact_matches m ON f.match_id = m.match_id",
|
||||
conn
|
||||
)
|
||||
for c in rating_cols:
|
||||
grp = df_rating_src.groupby('data_source_type')[c].apply(lambda s: (s != 0).sum()).reset_index(name='nonzero')
|
||||
print(f"{c} nonzero by source")
|
||||
print(grp)
|
||||
|
||||
print("\n--- Schema Coverage (fight_any) ---")
|
||||
schema_path = 'database/original_json_schema/schema_flat.csv'
|
||||
paths = []
|
||||
with open(schema_path, 'r', encoding='utf-8') as f:
|
||||
reader = csv.reader(f)
|
||||
_ = next(reader, None)
|
||||
for row in reader:
|
||||
if len(row) >= 2:
|
||||
paths.append(row[1])
|
||||
fight_keys = set()
|
||||
for p in paths:
|
||||
if 'data.group_N[].fight_any.' in p:
|
||||
key = p.split('fight_any.')[1].split('.')[0]
|
||||
fight_keys.add(key)
|
||||
l2_cols = set(pd.read_sql("PRAGMA table_info(fact_match_players)", conn)['name'].tolist())
|
||||
alias = {
|
||||
'kills': 'kill',
|
||||
'deaths': 'death',
|
||||
'assists': 'assist',
|
||||
'headshot_count': 'headshot',
|
||||
'mvp_count': 'is_mvp',
|
||||
'flash_duration': 'flash_enemy_time',
|
||||
'jump_count': 'jump_total',
|
||||
'awp_kills': 'awp_kill'
|
||||
}
|
||||
covered = set()
|
||||
for c in l2_cols:
|
||||
if c in fight_keys:
|
||||
covered.add(c)
|
||||
elif c in alias and alias[c] in fight_keys:
|
||||
covered.add(alias[c])
|
||||
missing_keys = sorted(list(fight_keys - covered))
|
||||
print(f"fight_any keys: {len(fight_keys)}")
|
||||
print(f"covered by L2 columns: {len(covered)}")
|
||||
print(f"uncovered fight_any keys: {len(missing_keys)}")
|
||||
if missing_keys:
|
||||
print(missing_keys)
|
||||
|
||||
print("\n--- Coverage Zero Rate (fight_any -> fact_match_players) ---")
|
||||
fight_cols = [k for k in fight_keys if k in l2_cols or k in alias.values()]
|
||||
col_map = {}
|
||||
for k in fight_cols:
|
||||
if k in l2_cols:
|
||||
col_map[k] = k
|
||||
else:
|
||||
for l2k, src in alias.items():
|
||||
if src == k:
|
||||
col_map[k] = l2k
|
||||
break
|
||||
select_cols = ["steam_id_64"] + list(set(col_map.values()))
|
||||
df_fight = pd.read_sql(
|
||||
"SELECT " + ",".join(select_cols) + " FROM fact_match_players",
|
||||
conn
|
||||
)
|
||||
total_rows = len(df_fight)
|
||||
stats = []
|
||||
for fight_key, col in sorted(col_map.items()):
|
||||
s = df_fight[col]
|
||||
zeros = (s == 0).sum()
|
||||
nulls = s.isna().sum()
|
||||
nonzero = total_rows - zeros - nulls
|
||||
stats.append({
|
||||
"fight_key": fight_key,
|
||||
"column": col,
|
||||
"nonzero": nonzero,
|
||||
"zero": zeros,
|
||||
"null": nulls,
|
||||
"zero_rate": 0 if total_rows == 0 else round(zeros / total_rows, 4)
|
||||
})
|
||||
df_stats = pd.DataFrame(stats).sort_values(["zero_rate", "nonzero"], ascending=[False, True])
|
||||
print(df_stats.head(30))
|
||||
print("\n-- zero_rate top (most zeros) --")
|
||||
print(df_stats.head(10))
|
||||
print("\n-- zero_rate bottom (most nonzero) --")
|
||||
print(df_stats.tail(10))
|
||||
|
||||
print("\n--- Schema Coverage (leetify economy) ---")
|
||||
econ_keys = [
|
||||
'data.leetify_data.round_stat[].bron_equipment.',
|
||||
'data.leetify_data.round_stat[].player_t_score.',
|
||||
'data.leetify_data.round_stat[].player_ct_score.',
|
||||
'data.leetify_data.round_stat[].player_bron_crash.'
|
||||
]
|
||||
for k in econ_keys:
|
||||
count = sum(1 for p in paths if k in p)
|
||||
print(f"{k} paths: {count}")
|
||||
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
verify()
|
||||
Reference in New Issue
Block a user