0.4.1: L2ver2 finished

This commit is contained in:
2026-01-24 00:43:05 +08:00
parent 879f63302c
commit 1deda4393c
8 changed files with 1277 additions and 271 deletions

View File

@@ -52,8 +52,12 @@ class PlayerStats:
jump_count: int = 0 jump_count: int = 0
damage_total: int = 0 damage_total: int = 0
damage_received: int = 0 damage_received: int = 0
damage_receive: int = 0
damage_stats: int = 0
assisted_kill: int = 0 assisted_kill: int = 0
awp_kill: int = 0 awp_kill: int = 0
awp_kill_ct: int = 0
awp_kill_t: int = 0
benefit_kill: int = 0 benefit_kill: int = 0
day: str = "" day: str = ""
defused_bomb: int = 0 defused_bomb: int = 0
@@ -64,6 +68,8 @@ class PlayerStats:
end_1v5: int = 0 end_1v5: int = 0
explode_bomb: int = 0 explode_bomb: int = 0
first_death: int = 0 first_death: int = 0
fd_ct: int = 0
fd_t: int = 0
first_kill: int = 0 first_kill: int = 0
flash_enemy: int = 0 flash_enemy: int = 0
flash_team: int = 0 flash_team: int = 0
@@ -109,6 +115,8 @@ class PlayerStats:
throw_harm_enemy: int = 0 throw_harm_enemy: int = 0
uid: int = 0 uid: int = 0
year: str = "" year: str = ""
sts_raw: str = ""
level_info_raw: str = ""
@dataclass @dataclass
class RoundEvent: class RoundEvent:
@@ -159,6 +167,19 @@ class RoundData:
events: List[RoundEvent] = field(default_factory=list) events: List[RoundEvent] = field(default_factory=list)
economies: List[PlayerEconomy] = field(default_factory=list) economies: List[PlayerEconomy] = field(default_factory=list)
@dataclass
class MatchTeamData:
group_id: int
group_all_score: int = 0
group_change_elo: float = 0.0
group_fh_role: int = 0
group_fh_score: int = 0
group_origin_elo: float = 0.0
group_sh_role: int = 0
group_sh_score: int = 0
group_tid: int = 0
group_uids: str = ""
@dataclass @dataclass
class MatchData: class MatchData:
match_id: str match_id: str
@@ -173,12 +194,52 @@ class MatchData:
server_ip: str = "" server_ip: str = ""
server_port: int = 0 server_port: int = 0
location: str = "" location: str = ""
has_side_data_and_rating2: int = 0
match_main_id: int = 0
demo_url: str = ""
game_mode: int = 0
game_name: str = ""
map_desc: str = ""
location_full: str = ""
match_mode: int = 0
match_status: int = 0
match_flag: int = 0
status: int = 0
waiver: int = 0
year: int = 0
season: str = ""
round_total: int = 0
cs_type: int = 0
priority_show_type: int = 0
pug10m_show_type: int = 0
credit_match_status: int = 0
knife_winner: int = 0
knife_winner_role: int = 0
most_1v2_uid: int = 0
most_assist_uid: int = 0
most_awp_uid: int = 0
most_end_uid: int = 0
most_first_kill_uid: int = 0
most_headshot_uid: int = 0
most_jump_uid: int = 0
mvp_uid: int = 0
response_code: int = 0
response_message: str = ""
response_status: int = 0
response_timestamp: int = 0
response_trace_id: str = ""
response_success: int = 0
response_errcode: int = 0
treat_info_raw: str = ""
round_list_raw: str = ""
leetify_data_raw: str = ""
data_source_type: str = "unknown" data_source_type: str = "unknown"
players: Dict[str, PlayerStats] = field(default_factory=dict) # Key: steam_id_64 players: Dict[str, PlayerStats] = field(default_factory=dict) # Key: steam_id_64
players_t: Dict[str, PlayerStats] = field(default_factory=dict) players_t: Dict[str, PlayerStats] = field(default_factory=dict)
players_ct: Dict[str, PlayerStats] = field(default_factory=dict) players_ct: Dict[str, PlayerStats] = field(default_factory=dict)
rounds: List[RoundData] = field(default_factory=list) rounds: List[RoundData] = field(default_factory=list)
player_meta: Dict[str, Dict] = field(default_factory=dict) # steam_id -> {uid, name, avatar, ...} player_meta: Dict[str, Dict] = field(default_factory=dict) # steam_id -> {uid, name, avatar, ...}
teams: List[MatchTeamData] = field(default_factory=list)
# --- Database Helper --- # --- Database Helper ---
@@ -210,6 +271,7 @@ class MatchParser:
# Extracted JSON bodies # Extracted JSON bodies
self.data_match = None self.data_match = None
self.data_match_wrapper = None
self.data_vip = None self.data_vip = None
self.data_leetify = None self.data_leetify = None
self.data_round_list = None self.data_round_list = None
@@ -226,6 +288,7 @@ class MatchParser:
# Check URLs # Check URLs
if 'crane/http/api/data/match/' in url: if 'crane/http/api/data/match/' in url:
self.data_match_wrapper = body
self.data_match = body.get('data', {}) self.data_match = body.get('data', {})
elif 'crane/http/api/data/vip_plus_match_data/' in url: elif 'crane/http/api/data/vip_plus_match_data/' in url:
self.data_vip = body.get('data', {}) self.data_vip = body.get('data', {})
@@ -246,12 +309,24 @@ class MatchParser:
# Decide which round source to use # Decide which round source to use
if self.data_leetify and self.data_leetify.get('leetify_data'): if self.data_leetify and self.data_leetify.get('leetify_data'):
self.match_data.data_source_type = 'leetify' self.match_data.data_source_type = 'leetify'
try:
self.match_data.leetify_data_raw = json.dumps(self.data_leetify.get('leetify_data', {}), ensure_ascii=False)
except:
self.match_data.leetify_data_raw = ""
self.match_data.round_list_raw = ""
self._parse_leetify_rounds() self._parse_leetify_rounds()
elif self.data_round_list and self.data_round_list.get('round_list'): elif self.data_round_list and self.data_round_list.get('round_list'):
self.match_data.data_source_type = 'classic' self.match_data.data_source_type = 'classic'
try:
self.match_data.round_list_raw = json.dumps(self.data_round_list.get('round_list', []), ensure_ascii=False)
except:
self.match_data.round_list_raw = ""
self.match_data.leetify_data_raw = ""
self._parse_classic_rounds() self._parse_classic_rounds()
else: else:
self.match_data.data_source_type = 'unknown' self.match_data.data_source_type = 'unknown'
self.match_data.round_list_raw = ""
self.match_data.leetify_data_raw = ""
logger.info(f"No round data found for {self.match_id}") logger.info(f"No round data found for {self.match_id}")
return self.match_data return self.match_data
@@ -273,12 +348,89 @@ class MatchParser:
except: except:
self.match_data.server_port = 0 self.match_data.server_port = 0
self.match_data.location = m.get('location', '') self.match_data.location = m.get('location', '')
def safe_int(val):
try:
return int(float(val)) if val is not None else 0
except:
return 0
def safe_float(val):
try:
return float(val) if val is not None else 0.0
except:
return 0.0
def safe_text(val):
return "" if val is None else str(val)
wrapper = self.data_match_wrapper or {}
self.match_data.response_code = safe_int(wrapper.get('code'))
self.match_data.response_message = safe_text(wrapper.get('message'))
self.match_data.response_status = safe_int(wrapper.get('status'))
self.match_data.response_timestamp = safe_int(wrapper.get('timeStamp') if wrapper.get('timeStamp') is not None else wrapper.get('timestamp'))
self.match_data.response_trace_id = safe_text(wrapper.get('traceId') if wrapper.get('traceId') is not None else wrapper.get('trace_id'))
self.match_data.response_success = safe_int(wrapper.get('success'))
self.match_data.response_errcode = safe_int(wrapper.get('errcode'))
self.match_data.has_side_data_and_rating2 = safe_int(self.data_match.get('has_side_data_and_rating2'))
self.match_data.match_main_id = safe_int(m.get('id'))
self.match_data.demo_url = safe_text(m.get('demo_url'))
self.match_data.game_mode = safe_int(m.get('game_mode'))
self.match_data.game_name = safe_text(m.get('game_name'))
self.match_data.map_desc = safe_text(m.get('map_desc'))
self.match_data.location_full = safe_text(m.get('location_full'))
self.match_data.match_mode = safe_int(m.get('match_mode'))
self.match_data.match_status = safe_int(m.get('match_status'))
self.match_data.match_flag = safe_int(m.get('match_flag'))
self.match_data.status = safe_int(m.get('status'))
self.match_data.waiver = safe_int(m.get('waiver'))
self.match_data.year = safe_int(m.get('year'))
self.match_data.season = safe_text(m.get('season'))
self.match_data.round_total = safe_int(m.get('round_total'))
self.match_data.cs_type = safe_int(m.get('cs_type'))
self.match_data.priority_show_type = safe_int(m.get('priority_show_type'))
self.match_data.pug10m_show_type = safe_int(m.get('pug10m_show_type'))
self.match_data.credit_match_status = safe_int(m.get('credit_match_status'))
self.match_data.knife_winner = safe_int(m.get('knife_winner'))
self.match_data.knife_winner_role = safe_int(m.get('knife_winner_role'))
self.match_data.most_1v2_uid = safe_int(m.get('most_1v2_uid'))
self.match_data.most_assist_uid = safe_int(m.get('most_assist_uid'))
self.match_data.most_awp_uid = safe_int(m.get('most_awp_uid'))
self.match_data.most_end_uid = safe_int(m.get('most_end_uid'))
self.match_data.most_first_kill_uid = safe_int(m.get('most_first_kill_uid'))
self.match_data.most_headshot_uid = safe_int(m.get('most_headshot_uid'))
self.match_data.most_jump_uid = safe_int(m.get('most_jump_uid'))
self.match_data.mvp_uid = safe_int(m.get('mvp_uid'))
treat_info = self.data_match.get('treat_info')
if treat_info is not None:
try:
self.match_data.treat_info_raw = json.dumps(treat_info, ensure_ascii=False)
except:
self.match_data.treat_info_raw = ""
self.match_data.teams = []
for idx in [1, 2]:
team = MatchTeamData(
group_id=idx,
group_all_score=safe_int(m.get(f"group{idx}_all_score")),
group_change_elo=safe_float(m.get(f"group{idx}_change_elo")),
group_fh_role=safe_int(m.get(f"group{idx}_fh_role")),
group_fh_score=safe_int(m.get(f"group{idx}_fh_score")),
group_origin_elo=safe_float(m.get(f"group{idx}_origin_elo")),
group_sh_role=safe_int(m.get(f"group{idx}_sh_role")),
group_sh_score=safe_int(m.get(f"group{idx}_sh_score")),
group_tid=safe_int(m.get(f"group{idx}_tid")),
group_uids=safe_text(m.get(f"group{idx}_uids"))
)
self.match_data.teams.append(team)
def _parse_players_base(self): def _parse_players_base(self):
# Players are in group_1 and group_2 lists in data_match # Players are in group_1 and group_2 lists in data_match
groups = [] groups = []
if 'group_1' in self.data_match: groups.extend(self.data_match['group_1']) if 'group_1' in self.data_match: groups.extend(self.data_match['group_1'])
if 'group_2' in self.data_match: groups.extend(self.data_match['group_2']) if 'group_2' in self.data_match: groups.extend(self.data_match['group_2'])
def safe_int(val):
try:
return int(float(val)) if val is not None else 0
except:
return 0
def safe_text(val):
return "" if val is None else str(val)
for p in groups: for p in groups:
# We need steam_id. # We need steam_id.
@@ -305,17 +457,90 @@ class MatchParser:
if not steam_id: if not steam_id:
continue continue
status = user_data.get('status', {})
platform_exp = user_data.get('platformExp', {})
trusted = user_data.get('trusted', {})
certify = user_data.get('certify', {})
identity = user_data.get('identity', {})
plus_info = user_info.get('plus_info', {}) or p.get('plus_info', {})
user_info_raw = ""
try:
user_info_raw = json.dumps(user_info, ensure_ascii=False)
except:
user_info_raw = ""
self.match_data.player_meta[steam_id] = { self.match_data.player_meta[steam_id] = {
'uid': uid, 'uid': safe_int(uid),
'username': user_data.get('username', ''), 'username': safe_text(user_data.get('username')),
'avatar_url': profile.get('avatarUrl', ''), 'uuid': safe_text(user_data.get('uuid')),
'domain': profile.get('domain', ''), 'email': safe_text(user_data.get('email')),
'created_at': user_data.get('createdAt', 0), 'area': safe_text(user_data.get('area')),
'updated_at': user_data.get('updatedAt', 0) 'mobile': safe_text(user_data.get('mobile')),
'avatar_url': safe_text(profile.get('avatarUrl')),
'domain': safe_text(profile.get('domain')),
'user_domain': safe_text(user_data.get('domain')),
'created_at': safe_int(user_data.get('createdAt')),
'updated_at': safe_int(user_data.get('updatedAt')),
'username_audit_status': safe_int(user_data.get('usernameAuditStatus')),
'accid': safe_text(user_data.get('Accid')),
'team_id': safe_int(user_data.get('teamID')),
'trumpet_count': safe_int(user_data.get('trumpetCount')),
'profile_nickname': safe_text(profile.get('nickname')),
'profile_avatar_audit_status': safe_int(profile.get('avatarAuditStatus')),
'profile_rgb_avatar_url': safe_text(profile.get('rgbAvatarUrl')),
'profile_photo_url': safe_text(profile.get('photoUrl')),
'profile_gender': safe_int(profile.get('gender')),
'profile_birthday': safe_int(profile.get('birthday')),
'profile_country_id': safe_text(profile.get('countryId')),
'profile_region_id': safe_text(profile.get('regionId')),
'profile_city_id': safe_text(profile.get('cityId')),
'profile_language': safe_text(profile.get('language')),
'profile_recommend_url': safe_text(profile.get('recommendUrl')),
'profile_group_id': safe_int(profile.get('groupId')),
'profile_reg_source': safe_int(profile.get('regSource')),
'status_status': safe_int(status.get('status')),
'status_expire': safe_int(status.get('expire')),
'status_cancellation_status': safe_int(status.get('cancellationStatus')),
'status_new_user': safe_int(status.get('newUser')),
'status_login_banned_time': safe_int(status.get('loginBannedTime')),
'status_anticheat_type': safe_int(status.get('anticheatType')),
'status_flag_status1': safe_text(status.get('flagStatus1')),
'status_anticheat_status': safe_text(status.get('anticheatStatus')),
'status_flag_honor': safe_text(status.get('FlagHonor')),
'status_privacy_policy_status': safe_int(status.get('PrivacyPolicyStatus')),
'status_csgo_frozen_exptime': safe_int(status.get('csgoFrozenExptime')),
'platformexp_level': safe_int(platform_exp.get('level')),
'platformexp_exp': safe_int(platform_exp.get('exp')),
'steam_account': safe_text(steam_data.get('steamAccount')),
'steam_trade_url': safe_text(steam_data.get('tradeUrl')),
'steam_rent_id': safe_text(steam_data.get('rentSteamId')),
'trusted_credit': safe_int(trusted.get('credit')),
'trusted_credit_level': safe_int(trusted.get('creditLevel')),
'trusted_score': safe_int(trusted.get('score')),
'trusted_status': safe_int(trusted.get('status')),
'trusted_credit_status': safe_int(trusted.get('creditStatus')),
'certify_id_type': safe_int(certify.get('idType')),
'certify_status': safe_int(certify.get('status')),
'certify_age': safe_int(certify.get('age')),
'certify_real_name': safe_text(certify.get('realName')),
'certify_uid_list': safe_text(json.dumps(certify.get('uidList'), ensure_ascii=False)) if certify.get('uidList') is not None else "",
'certify_audit_status': safe_int(certify.get('auditStatus')),
'certify_gender': safe_int(certify.get('gender')),
'identity_type': safe_int(identity.get('type')),
'identity_extras': safe_text(identity.get('extras')),
'identity_status': safe_int(identity.get('status')),
'identity_slogan': safe_text(identity.get('slogan')),
'identity_list': safe_text(json.dumps(identity.get('identity_list'), ensure_ascii=False)) if identity.get('identity_list') is not None else "",
'identity_slogan_ext': safe_text(identity.get('slogan_ext')),
'identity_live_url': safe_text(identity.get('live_url')),
'identity_live_type': safe_int(identity.get('live_type')),
'plus_is_plus': safe_int(plus_info.get('is_plus')),
'user_info_raw': user_info_raw
} }
stats = PlayerStats(steam_id_64=steam_id) stats = PlayerStats(steam_id_64=steam_id)
sts = p.get('sts', {}) sts = p.get('sts', {})
level_info = p.get('level_info', {})
try: try:
# Use safe conversion helper # Use safe conversion helper
@@ -329,6 +554,16 @@ class MatchParser:
def safe_text(val): def safe_text(val):
return "" if val is None else str(val) return "" if val is None else str(val)
if sts is not None:
try:
stats.sts_raw = json.dumps(sts, ensure_ascii=False)
except:
stats.sts_raw = ""
if level_info is not None:
try:
stats.level_info_raw = json.dumps(level_info, ensure_ascii=False)
except:
stats.level_info_raw = ""
def get_stat(key): def get_stat(key):
if key in fight and fight.get(key) not in [None, ""]: if key in fight and fight.get(key) not in [None, ""]:
@@ -513,11 +748,22 @@ class MatchParser:
p = self.match_data.players[sid] p = self.match_data.players[sid]
p.kast = float(vdata.get('kast', 0)) p.kast = float(vdata.get('kast', 0))
p.awp_kills = int(vdata.get('awp_kill', 0)) p.awp_kills = int(vdata.get('awp_kill', 0))
# Damage stats might need calculation or mapping p.awp_kill_ct = int(vdata.get('awp_kill_ct', 0))
# p.damage_total = ... p.awp_kill_t = int(vdata.get('awp_kill_t', 0))
p.fd_ct = int(vdata.get('fd_ct', 0))
p.fd_t = int(vdata.get('fd_t', 0))
p.damage_receive = int(vdata.get('damage_receive', 0))
p.damage_stats = int(vdata.get('damage_stats', 0))
else: else:
# Try to match by 5E ID if possible, but here keys are steamids usually # Try to match by 5E ID if possible, but here keys are steamids usually
pass pass
for sid, p in self.match_data.players.items():
if sid in self.match_data.players_t:
self.match_data.players_t[sid].awp_kill_t = p.awp_kill_t
self.match_data.players_t[sid].fd_t = p.fd_t
if sid in self.match_data.players_ct:
self.match_data.players_ct[sid].awp_kill_ct = p.awp_kill_ct
self.match_data.players_ct[sid].fd_ct = p.fd_ct
def _parse_leetify_rounds(self): def _parse_leetify_rounds(self):
l_data = self.data_leetify.get('leetify_data', {}) l_data = self.data_leetify.get('leetify_data', {})
@@ -744,32 +990,167 @@ def process_matches():
def save_match(cursor, m: MatchData): def save_match(cursor, m: MatchData):
# 1. Dim Players (Upsert) # 1. Dim Players (Upsert)
player_meta_columns = [
"steam_id_64", "uid", "username", "avatar_url", "domain", "created_at", "updated_at",
"last_seen_match_id", "uuid", "email", "area", "mobile", "user_domain",
"username_audit_status", "accid", "team_id", "trumpet_count",
"profile_nickname", "profile_avatar_audit_status", "profile_rgb_avatar_url",
"profile_photo_url", "profile_gender", "profile_birthday", "profile_country_id",
"profile_region_id", "profile_city_id", "profile_language", "profile_recommend_url",
"profile_group_id", "profile_reg_source", "status_status", "status_expire",
"status_cancellation_status", "status_new_user", "status_login_banned_time",
"status_anticheat_type", "status_flag_status1", "status_anticheat_status",
"status_flag_honor", "status_privacy_policy_status", "status_csgo_frozen_exptime",
"platformexp_level", "platformexp_exp", "steam_account", "steam_trade_url",
"steam_rent_id", "trusted_credit", "trusted_credit_level", "trusted_score",
"trusted_status", "trusted_credit_status", "certify_id_type", "certify_status",
"certify_age", "certify_real_name", "certify_uid_list", "certify_audit_status",
"certify_gender", "identity_type", "identity_extras", "identity_status",
"identity_slogan", "identity_list", "identity_slogan_ext", "identity_live_url",
"identity_live_type", "plus_is_plus", "user_info_raw"
]
player_meta_placeholders = ",".join(["?"] * len(player_meta_columns))
player_meta_columns_sql = ",".join(player_meta_columns)
for sid, meta in m.player_meta.items(): for sid, meta in m.player_meta.items():
cursor.execute(""" cursor.execute("""
INSERT INTO dim_players (steam_id_64, uid, username, avatar_url, domain, created_at, updated_at, last_seen_match_id) INSERT INTO dim_players (""" + player_meta_columns_sql + """)
VALUES (?, ?, ?, ?, ?, ?, ?, ?) VALUES (""" + player_meta_placeholders + """)
ON CONFLICT(steam_id_64) DO UPDATE SET ON CONFLICT(steam_id_64) DO UPDATE SET
uid=excluded.uid,
username=excluded.username, username=excluded.username,
avatar_url=excluded.avatar_url, avatar_url=excluded.avatar_url,
last_seen_match_id=excluded.last_seen_match_id domain=excluded.domain,
created_at=excluded.created_at,
updated_at=excluded.updated_at,
last_seen_match_id=excluded.last_seen_match_id,
uuid=excluded.uuid,
email=excluded.email,
area=excluded.area,
mobile=excluded.mobile,
user_domain=excluded.user_domain,
username_audit_status=excluded.username_audit_status,
accid=excluded.accid,
team_id=excluded.team_id,
trumpet_count=excluded.trumpet_count,
profile_nickname=excluded.profile_nickname,
profile_avatar_audit_status=excluded.profile_avatar_audit_status,
profile_rgb_avatar_url=excluded.profile_rgb_avatar_url,
profile_photo_url=excluded.profile_photo_url,
profile_gender=excluded.profile_gender,
profile_birthday=excluded.profile_birthday,
profile_country_id=excluded.profile_country_id,
profile_region_id=excluded.profile_region_id,
profile_city_id=excluded.profile_city_id,
profile_language=excluded.profile_language,
profile_recommend_url=excluded.profile_recommend_url,
profile_group_id=excluded.profile_group_id,
profile_reg_source=excluded.profile_reg_source,
status_status=excluded.status_status,
status_expire=excluded.status_expire,
status_cancellation_status=excluded.status_cancellation_status,
status_new_user=excluded.status_new_user,
status_login_banned_time=excluded.status_login_banned_time,
status_anticheat_type=excluded.status_anticheat_type,
status_flag_status1=excluded.status_flag_status1,
status_anticheat_status=excluded.status_anticheat_status,
status_flag_honor=excluded.status_flag_honor,
status_privacy_policy_status=excluded.status_privacy_policy_status,
status_csgo_frozen_exptime=excluded.status_csgo_frozen_exptime,
platformexp_level=excluded.platformexp_level,
platformexp_exp=excluded.platformexp_exp,
steam_account=excluded.steam_account,
steam_trade_url=excluded.steam_trade_url,
steam_rent_id=excluded.steam_rent_id,
trusted_credit=excluded.trusted_credit,
trusted_credit_level=excluded.trusted_credit_level,
trusted_score=excluded.trusted_score,
trusted_status=excluded.trusted_status,
trusted_credit_status=excluded.trusted_credit_status,
certify_id_type=excluded.certify_id_type,
certify_status=excluded.certify_status,
certify_age=excluded.certify_age,
certify_real_name=excluded.certify_real_name,
certify_uid_list=excluded.certify_uid_list,
certify_audit_status=excluded.certify_audit_status,
certify_gender=excluded.certify_gender,
identity_type=excluded.identity_type,
identity_extras=excluded.identity_extras,
identity_status=excluded.identity_status,
identity_slogan=excluded.identity_slogan,
identity_list=excluded.identity_list,
identity_slogan_ext=excluded.identity_slogan_ext,
identity_live_url=excluded.identity_live_url,
identity_live_type=excluded.identity_live_type,
plus_is_plus=excluded.plus_is_plus,
user_info_raw=excluded.user_info_raw
""", ( """, (
sid, meta.get('uid'), meta.get('username'), meta.get('avatar_url'), sid, meta.get('uid'), meta.get('username'), meta.get('avatar_url'),
meta.get('domain'), meta.get('created_at'), meta.get('updated_at'), meta.get('domain'), meta.get('created_at'), meta.get('updated_at'),
m.match_id m.match_id, meta.get('uuid'), meta.get('email'), meta.get('area'),
meta.get('mobile'), meta.get('user_domain'), meta.get('username_audit_status'),
meta.get('accid'), meta.get('team_id'), meta.get('trumpet_count'),
meta.get('profile_nickname'), meta.get('profile_avatar_audit_status'),
meta.get('profile_rgb_avatar_url'), meta.get('profile_photo_url'),
meta.get('profile_gender'), meta.get('profile_birthday'),
meta.get('profile_country_id'), meta.get('profile_region_id'),
meta.get('profile_city_id'), meta.get('profile_language'),
meta.get('profile_recommend_url'), meta.get('profile_group_id'),
meta.get('profile_reg_source'), meta.get('status_status'),
meta.get('status_expire'), meta.get('status_cancellation_status'),
meta.get('status_new_user'), meta.get('status_login_banned_time'),
meta.get('status_anticheat_type'), meta.get('status_flag_status1'),
meta.get('status_anticheat_status'), meta.get('status_flag_honor'),
meta.get('status_privacy_policy_status'), meta.get('status_csgo_frozen_exptime'),
meta.get('platformexp_level'), meta.get('platformexp_exp'),
meta.get('steam_account'), meta.get('steam_trade_url'),
meta.get('steam_rent_id'), meta.get('trusted_credit'),
meta.get('trusted_credit_level'), meta.get('trusted_score'),
meta.get('trusted_status'), meta.get('trusted_credit_status'),
meta.get('certify_id_type'), meta.get('certify_status'),
meta.get('certify_age'), meta.get('certify_real_name'),
meta.get('certify_uid_list'), meta.get('certify_audit_status'),
meta.get('certify_gender'), meta.get('identity_type'),
meta.get('identity_extras'), meta.get('identity_status'),
meta.get('identity_slogan'), meta.get('identity_list'),
meta.get('identity_slogan_ext'), meta.get('identity_live_url'),
meta.get('identity_live_type'), meta.get('plus_is_plus'),
meta.get('user_info_raw')
)) ))
# 2. Dim Maps (Ignore if exists) # 2. Dim Maps (Ignore if exists)
if m.map_name: if m.map_name:
cursor.execute("INSERT OR IGNORE INTO dim_maps (map_name) VALUES (?)", (m.map_name,)) cursor.execute("""
INSERT INTO dim_maps (map_name, map_desc)
VALUES (?, ?)
ON CONFLICT(map_name) DO UPDATE SET
map_desc=excluded.map_desc
""", (m.map_name, m.map_desc))
# 3. Fact Matches # 3. Fact Matches
cursor.execute(""" cursor.execute("""
INSERT OR REPLACE INTO fact_matches INSERT OR REPLACE INTO fact_matches
(match_id, match_code, map_name, start_time, end_time, duration, winner_team, score_team1, score_team2, server_ip, server_port, location, data_source_type) (match_id, match_code, map_name, start_time, end_time, duration, winner_team, score_team1, score_team2, server_ip, server_port, location, has_side_data_and_rating2, match_main_id, demo_url, game_mode, game_name, map_desc, location_full, match_mode, match_status, match_flag, status, waiver, year, season, round_total, cs_type, priority_show_type, pug10m_show_type, credit_match_status, knife_winner, knife_winner_role, most_1v2_uid, most_assist_uid, most_awp_uid, most_end_uid, most_first_kill_uid, most_headshot_uid, most_jump_uid, mvp_uid, response_code, response_message, response_status, response_timestamp, response_trace_id, response_success, response_errcode, treat_info_raw, round_list_raw, leetify_data_raw, data_source_type)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", ( """, (
m.match_id, m.match_code, m.map_name, m.start_time, m.end_time, m.duration, m.match_id, m.match_code, m.map_name, m.start_time, m.end_time, m.duration,
m.winner_team, m.score_team1, m.score_team2, m.server_ip, m.server_port, m.location, m.data_source_type m.winner_team, m.score_team1, m.score_team2, m.server_ip, m.server_port, m.location,
m.has_side_data_and_rating2, m.match_main_id, m.demo_url, m.game_mode, m.game_name, m.map_desc,
m.location_full, m.match_mode, m.match_status, m.match_flag, m.status, m.waiver, m.year, m.season,
m.round_total, m.cs_type, m.priority_show_type, m.pug10m_show_type, m.credit_match_status,
m.knife_winner, m.knife_winner_role, m.most_1v2_uid, m.most_assist_uid, m.most_awp_uid,
m.most_end_uid, m.most_first_kill_uid, m.most_headshot_uid, m.most_jump_uid, m.mvp_uid,
m.response_code, m.response_message, m.response_status, m.response_timestamp, m.response_trace_id,
m.response_success, m.response_errcode, m.treat_info_raw, m.round_list_raw, m.leetify_data_raw, m.data_source_type
))
for t in m.teams:
cursor.execute("""
INSERT OR REPLACE INTO fact_match_teams
(match_id, group_id, group_all_score, group_change_elo, group_fh_role, group_fh_score, group_origin_elo, group_sh_role, group_sh_score, group_tid, group_uids)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
m.match_id, t.group_id, t.group_all_score, t.group_change_elo, t.group_fh_role, t.group_fh_score,
t.group_origin_elo, t.group_sh_role, t.group_sh_score, t.group_tid, t.group_uids
)) ))
# 4. Fact Match Players # 4. Fact Match Players
@@ -779,9 +1160,10 @@ def save_match(cursor, m: MatchData):
"rank_score", "is_win", "kast", "entry_kills", "entry_deaths", "awp_kills", "rank_score", "is_win", "kast", "entry_kills", "entry_deaths", "awp_kills",
"clutch_1v1", "clutch_1v2", "clutch_1v3", "clutch_1v4", "clutch_1v5", "clutch_1v1", "clutch_1v2", "clutch_1v3", "clutch_1v4", "clutch_1v5",
"flash_assists", "flash_duration", "jump_count", "damage_total", "damage_received", "flash_assists", "flash_duration", "jump_count", "damage_total", "damage_received",
"assisted_kill", "awp_kill", "benefit_kill", "day", "defused_bomb", "end_1v1", "damage_receive", "damage_stats", "assisted_kill", "awp_kill", "awp_kill_ct",
"awp_kill_t", "benefit_kill", "day", "defused_bomb", "end_1v1",
"end_1v2", "end_1v3", "end_1v4", "end_1v5", "explode_bomb", "first_death", "end_1v2", "end_1v3", "end_1v4", "end_1v5", "explode_bomb", "first_death",
"first_kill", "flash_enemy", "flash_team", "flash_team_time", "flash_time", "fd_ct", "fd_t", "first_kill", "flash_enemy", "flash_team", "flash_team_time", "flash_time",
"game_mode", "group_id", "hold_total", "id", "is_highlight", "is_most_1v2", "game_mode", "group_id", "hold_total", "id", "is_highlight", "is_most_1v2",
"is_most_assist", "is_most_awp", "is_most_end", "is_most_first_kill", "is_most_assist", "is_most_awp", "is_most_end", "is_most_first_kill",
"is_most_headshot", "is_most_jump", "is_svp", "is_tie", "kill_1", "kill_2", "is_most_headshot", "is_most_jump", "is_svp", "is_tie", "kill_1", "kill_2",
@@ -789,7 +1171,7 @@ def save_match(cursor, m: MatchData):
"many_assists_cnt3", "many_assists_cnt4", "many_assists_cnt5", "map", "many_assists_cnt3", "many_assists_cnt4", "many_assists_cnt5", "map",
"match_code", "match_mode", "match_team_id", "match_time", "per_headshot", "match_code", "match_mode", "match_team_id", "match_time", "per_headshot",
"perfect_kill", "planted_bomb", "revenge_kill", "round_total", "season", "perfect_kill", "planted_bomb", "revenge_kill", "round_total", "season",
"team_kill", "throw_harm", "throw_harm_enemy", "uid", "year" "team_kill", "throw_harm", "throw_harm_enemy", "uid", "year", "sts_raw", "level_info_raw"
] ]
player_placeholders = ",".join(["?"] * len(player_columns)) player_placeholders = ",".join(["?"] * len(player_columns))
player_columns_sql = ",".join(player_columns) player_columns_sql = ",".join(player_columns)
@@ -801,9 +1183,10 @@ def save_match(cursor, m: MatchData):
p.elo_change, p.rank_score, p.is_win, p.kast, p.entry_kills, p.entry_deaths, p.elo_change, p.rank_score, p.is_win, p.kast, p.entry_kills, p.entry_deaths,
p.awp_kills, p.clutch_1v1, p.clutch_1v2, p.clutch_1v3, p.clutch_1v4, p.awp_kills, p.clutch_1v1, p.clutch_1v2, p.clutch_1v3, p.clutch_1v4,
p.clutch_1v5, p.flash_assists, p.flash_duration, p.jump_count, p.damage_total, p.clutch_1v5, p.flash_assists, p.flash_duration, p.jump_count, p.damage_total,
p.damage_received, p.assisted_kill, p.awp_kill, p.benefit_kill, p.day, p.damage_received, p.damage_receive, p.damage_stats, p.assisted_kill, p.awp_kill,
p.defused_bomb, p.end_1v1, p.end_1v2, p.end_1v3, p.end_1v4, p.end_1v5, p.awp_kill_ct, p.awp_kill_t, p.benefit_kill, p.day, p.defused_bomb, p.end_1v1,
p.explode_bomb, p.first_death, p.first_kill, p.flash_enemy, p.flash_team, p.end_1v2, p.end_1v3, p.end_1v4, p.end_1v5, p.explode_bomb, p.first_death,
p.fd_ct, p.fd_t, p.first_kill, p.flash_enemy, p.flash_team,
p.flash_team_time, p.flash_time, p.game_mode, p.group_id, p.hold_total, p.flash_team_time, p.flash_time, p.game_mode, p.group_id, p.hold_total,
p.id, p.is_highlight, p.is_most_1v2, p.is_most_assist, p.is_most_awp, p.id, p.is_highlight, p.is_most_1v2, p.is_most_assist, p.is_most_awp,
p.is_most_end, p.is_most_first_kill, p.is_most_headshot, p.is_most_jump, p.is_most_end, p.is_most_first_kill, p.is_most_headshot, p.is_most_jump,
@@ -812,7 +1195,7 @@ def save_match(cursor, m: MatchData):
p.many_assists_cnt5, p.map, p.match_code, p.match_mode, p.match_team_id, p.many_assists_cnt5, p.map, p.match_code, p.match_mode, p.match_team_id,
p.match_time, p.per_headshot, p.perfect_kill, p.planted_bomb, p.revenge_kill, p.match_time, p.per_headshot, p.perfect_kill, p.planted_bomb, p.revenge_kill,
p.round_total, p.season, p.team_kill, p.throw_harm, p.throw_harm_enemy, p.round_total, p.season, p.team_kill, p.throw_harm, p.throw_harm_enemy,
p.uid, p.year p.uid, p.year, p.sts_raw, p.level_info_raw
] ]
for sid, p in m.players.items(): for sid, p in m.players.items():

504
ETL/verify/verify_L2.py Normal file
View File

@@ -0,0 +1,504 @@
import sqlite3
import pandas as pd
import csv
import os
import sys
import time
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
db_path = 'database/L2/L2_Main.sqlite'
schema_path = 'database/original_json_schema/schema_flat.csv'
covered_main_fields = {
"match_code", "map", "start_time", "end_time", "match_winner",
"group1_all_score", "group1_change_elo", "group1_fh_role", "group1_fh_score",
"group1_origin_elo", "group1_sh_role", "group1_sh_score", "group1_tid", "group1_uids",
"group2_all_score", "group2_change_elo", "group2_fh_role", "group2_fh_score",
"group2_origin_elo", "group2_sh_role", "group2_sh_score", "group2_tid", "group2_uids",
"server_ip", "server_port", "location", "location_full", "map_desc",
"demo_url", "game_mode", "game_name", "match_mode", "match_status", "match_flag",
"status", "waiver", "year", "season", "round_total", "cs_type", "priority_show_type",
"pug10m_show_type", "credit_match_status", "knife_winner", "knife_winner_role",
"most_1v2_uid", "most_assist_uid", "most_awp_uid", "most_end_uid",
"most_first_kill_uid", "most_headshot_uid", "most_jump_uid", "mvp_uid", "id"
}
covered_user_fields = {
"data.group_N[].user_info."
}
covered_round_fields = [
"data.round_list[].current_score.ct",
"data.round_list[].current_score.t",
"data.round_list[].current_score.final_round_time",
"data.round_list[].all_kill[].pasttime",
"data.round_list[].all_kill[].weapon",
"data.round_list[].all_kill[].headshot",
"data.round_list[].all_kill[].penetrated",
"data.round_list[].all_kill[].attackerblind",
"data.round_list[].all_kill[].throughsmoke",
"data.round_list[].all_kill[].noscope",
"data.round_list[].all_kill[].attacker.steamid_64",
"data.round_list[].all_kill[].victim.steamid_64",
"data.round_list[].all_kill[].attacker.pos.x",
"data.round_list[].all_kill[].attacker.pos.y",
"data.round_list[].all_kill[].attacker.pos.z",
"data.round_list[].all_kill[].victim.pos.x",
"data.round_list[].all_kill[].victim.pos.y",
"data.round_list[].all_kill[].victim.pos.z"
]
covered_leetify_fields = [
"data.leetify_data.round_stat[].round",
"data.leetify_data.round_stat[].win_reason",
"data.leetify_data.round_stat[].end_ts",
"data.leetify_data.round_stat[].sfui_event.score_ct",
"data.leetify_data.round_stat[].sfui_event.score_t",
"data.leetify_data.round_stat[].ct_money_group",
"data.leetify_data.round_stat[].t_money_group",
"data.leetify_data.round_stat[].show_event[].ts",
"data.leetify_data.round_stat[].show_event[].kill_event.Ts",
"data.leetify_data.round_stat[].show_event[].kill_event.Killer",
"data.leetify_data.round_stat[].show_event[].kill_event.Victim",
"data.leetify_data.round_stat[].show_event[].kill_event.WeaponName",
"data.leetify_data.round_stat[].show_event[].kill_event.Headshot",
"data.leetify_data.round_stat[].show_event[].kill_event.Penetrated",
"data.leetify_data.round_stat[].show_event[].kill_event.AttackerBlind",
"data.leetify_data.round_stat[].show_event[].kill_event.ThroughSmoke",
"data.leetify_data.round_stat[].show_event[].kill_event.NoScope",
"data.leetify_data.round_stat[].show_event[].trade_score_change.",
"data.leetify_data.round_stat[].show_event[].flash_assist_killer_score_change.",
"data.leetify_data.round_stat[].show_event[].killer_score_change.",
"data.leetify_data.round_stat[].show_event[].victim_score_change.",
"data.leetify_data.round_stat[].bron_equipment.",
"data.leetify_data.round_stat[].player_t_score.",
"data.leetify_data.round_stat[].player_ct_score.",
"data.leetify_data.round_stat[].player_bron_crash."
]
covered_vip_fields = {
"awp_kill",
"awp_kill_ct",
"awp_kill_t",
"damage_receive",
"damage_stats",
"fd_ct",
"fd_t",
"kast"
}
def load_schema_paths(schema_path_value):
paths = []
with open(schema_path_value, 'r', encoding='utf-8') as f:
reader = csv.reader(f)
_ = next(reader, None)
for row in reader:
if len(row) >= 2:
paths.append(row[1])
return paths
def is_covered(path):
if path in ["data", "code", "message", "status", "timestamp", "timeStamp", "traceId", "success", "errcode"]:
return True
if path.startswith("data.<steamid>."):
key = path.split("data.<steamid>.")[1].split(".")[0]
if key in covered_vip_fields:
return True
if "data.group_N[].fight_any." in path:
return True
if "data.group_N[].fight_t." in path or "data.group_N[].fight_ct." in path:
return True
if "data.group_N[].sts." in path:
return True
if "data.group_N[].level_info." in path:
return True
if "data.treat_info." in path:
return True
if "data.has_side_data_and_rating2" in path:
return True
if "data.main." in path:
key = path.split("data.main.")[1].split(".")[0]
if key in covered_main_fields:
return True
if any(k in path for k in covered_user_fields):
return True
if "data.round_list" in path:
return True
if any(k in path for k in covered_round_fields):
return True
if "data.leetify_data." in path:
return True
if any(k in path for k in covered_leetify_fields):
return True
return False
def group_key(p):
if "data.group_N[].user_info." in p:
return "data.group_N[].user_info.*"
if "data.group_N[].fight_any." in p:
return "data.group_N[].fight_any.*"
if "data.group_N[].fight_t." in p:
return "data.group_N[].fight_t.*"
if "data.group_N[].fight_ct." in p:
return "data.group_N[].fight_ct.*"
if "data.main." in p:
return "data.main.*"
if "data.round_list[]" in p or "data.round_list[]." in p:
return "data.round_list.*"
if "data.leetify_data.round_stat[]" in p or "data.leetify_data.round_stat[]." in p:
return "data.leetify_data.round_stat.*"
if "data.leetify_data." in p:
return "data.leetify_data.*"
if "data.treat_info." in p:
return "data.treat_info.*"
if "data." in p:
return "data.*"
return "other"
def dump_uncovered(output_path):
paths = load_schema_paths(schema_path)
uncovered = [p for p in paths if not is_covered(p)]
df_unc = pd.DataFrame({"path": uncovered})
if len(df_unc) == 0:
print("no uncovered paths")
return
df_unc["group"] = df_unc["path"].apply(group_key)
df_unc = df_unc.sort_values(["group", "path"])
df_unc.to_csv(output_path, index=False, encoding='utf-8-sig')
print(f"uncovered total: {len(df_unc)}")
print("\n-- uncovered groups (count) --")
print(df_unc.groupby("group").size().sort_values(ascending=False))
print(f"\noutput: {output_path}")
def print_schema(conn):
tables = conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%' ORDER BY name").fetchall()
for (name,) in tables:
print(f"\n[{name}]")
cols = conn.execute(f"PRAGMA table_info({name})").fetchall()
rows = [["column", "type", "pk"]]
for _, col_name, col_type, _, _, pk in cols:
rows.append([col_name, col_type or "", str(pk)])
widths = [max(len(r[i]) for r in rows) for i in range(3)]
for idx, r in enumerate(rows):
line = " | ".join([r[i].ljust(widths[i]) for i in range(3)])
print(line)
if idx == 0:
print("-" * len(line))
def refresh_schema_sql(conn, output_path):
rows = conn.execute("""
SELECT type, name, sql
FROM sqlite_master
WHERE sql IS NOT NULL AND type IN ('table', 'index') AND name NOT LIKE 'sqlite_%'
ORDER BY CASE WHEN type='table' THEN 0 ELSE 1 END, name
""").fetchall()
lines = ["PRAGMA foreign_keys = ON;", ""]
for _, _, sql in rows:
lines.append(sql.strip() + ";")
lines.append("")
with open(output_path, 'w', encoding='utf-8') as f:
f.write("\n".join(lines).strip() + "\n")
def verify():
conn = sqlite3.connect(db_path)
print("--- Counts ---")
tables = [
'dim_players',
'dim_maps',
'fact_matches',
'fact_match_teams',
'fact_match_players',
'fact_match_players_t',
'fact_match_players_ct',
'fact_rounds',
'fact_round_events',
'fact_round_player_economy'
]
for t in tables:
count = conn.execute(f"SELECT COUNT(*) FROM {t}").fetchone()[0]
print(f"{t}: {count}")
print("\n--- Data Source Distribution ---")
dist = pd.read_sql("SELECT data_source_type, COUNT(*) as cnt FROM fact_matches GROUP BY data_source_type", conn)
print(dist)
print("\n--- Sample Round Events (Leetify vs Classic) ---")
# Fetch one event from a leetify match
leetify_match = conn.execute("SELECT match_id FROM fact_matches WHERE data_source_type='leetify' LIMIT 1").fetchone()
if leetify_match:
mid = leetify_match[0]
print(f"Leetify Match: {mid}")
df = pd.read_sql(f"SELECT * FROM fact_round_events WHERE match_id='{mid}' AND event_type='kill' LIMIT 1", conn)
print(df[['event_type', 'attacker_steam_id', 'trade_killer_steam_id', 'attacker_pos_x', 'score_change_attacker']])
# Fetch one event from a classic match
classic_match = conn.execute("SELECT match_id FROM fact_matches WHERE data_source_type='classic' LIMIT 1").fetchone()
if classic_match:
mid = classic_match[0]
print(f"Classic Match: {mid}")
df = pd.read_sql(f"SELECT * FROM fact_round_events WHERE match_id='{mid}' AND event_type='kill' LIMIT 1", conn)
print(df[['event_type', 'attacker_steam_id', 'trade_killer_steam_id', 'attacker_pos_x', 'score_change_attacker']])
print("\n--- Sample Player Stats (New Fields) ---")
df_players = pd.read_sql("SELECT steam_id_64, rating, rating3, elo_change, rank_score, flash_duration, jump_count FROM fact_match_players LIMIT 5", conn)
print(df_players)
print("\n--- Insert Field Checks ---")
meta_counts = conn.execute("""
SELECT
SUM(CASE WHEN response_code IS NOT NULL THEN 1 ELSE 0 END) AS response_code_cnt,
SUM(CASE WHEN response_trace_id IS NOT NULL AND response_trace_id != '' THEN 1 ELSE 0 END) AS response_trace_id_cnt,
SUM(CASE WHEN response_success IS NOT NULL THEN 1 ELSE 0 END) AS response_success_cnt,
SUM(CASE WHEN response_errcode IS NOT NULL THEN 1 ELSE 0 END) AS response_errcode_cnt,
SUM(CASE WHEN treat_info_raw IS NOT NULL AND treat_info_raw != '' THEN 1 ELSE 0 END) AS treat_info_raw_cnt,
SUM(CASE WHEN round_list_raw IS NOT NULL AND round_list_raw != '' THEN 1 ELSE 0 END) AS round_list_raw_cnt,
SUM(CASE WHEN leetify_data_raw IS NOT NULL AND leetify_data_raw != '' THEN 1 ELSE 0 END) AS leetify_data_raw_cnt
FROM fact_matches
""").fetchone()
print(f"response_code non-null: {meta_counts[0]}")
print(f"response_trace_id non-empty: {meta_counts[1]}")
print(f"response_success non-null: {meta_counts[2]}")
print(f"response_errcode non-null: {meta_counts[3]}")
print(f"treat_info_raw non-empty: {meta_counts[4]}")
print(f"round_list_raw non-empty: {meta_counts[5]}")
print(f"leetify_data_raw non-empty: {meta_counts[6]}")
print("\n--- Integrity Checks ---")
missing_players = conn.execute("""
SELECT COUNT(*) FROM fact_match_players f
LEFT JOIN dim_players d ON f.steam_id_64 = d.steam_id_64
WHERE d.steam_id_64 IS NULL
""").fetchone()[0]
print(f"fact_match_players missing dim_players: {missing_players}")
missing_round_matches = conn.execute("""
SELECT COUNT(*) FROM fact_rounds r
LEFT JOIN fact_matches m ON r.match_id = m.match_id
WHERE m.match_id IS NULL
""").fetchone()[0]
print(f"fact_rounds missing fact_matches: {missing_round_matches}")
missing_event_rounds = conn.execute("""
SELECT COUNT(*) FROM fact_round_events e
LEFT JOIN fact_rounds r ON e.match_id = r.match_id AND e.round_num = r.round_num
WHERE r.match_id IS NULL
""").fetchone()[0]
print(f"fact_round_events missing fact_rounds: {missing_event_rounds}")
side_zero_t = conn.execute("""
SELECT COUNT(*) FROM fact_match_players_t
WHERE COALESCE(kills,0)=0 AND COALESCE(deaths,0)=0 AND COALESCE(assists,0)=0
""").fetchone()[0]
side_zero_ct = conn.execute("""
SELECT COUNT(*) FROM fact_match_players_ct
WHERE COALESCE(kills,0)=0 AND COALESCE(deaths,0)=0 AND COALESCE(assists,0)=0
""").fetchone()[0]
print(f"fact_match_players_t zero K/D/A: {side_zero_t}")
print(f"fact_match_players_ct zero K/D/A: {side_zero_ct}")
print("\n--- Full vs T/CT Comparison ---")
cols = [
'kills', 'deaths', 'assists', 'headshot_count', 'adr', 'rating', 'rating2',
'rating3', 'rws', 'mvp_count', 'flash_duration', 'jump_count', 'is_win'
]
df_full = pd.read_sql(
"SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players",
conn
)
df_t = pd.read_sql(
"SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players_t",
conn
).rename(columns={c: f"{c}_t" for c in cols})
df_ct = pd.read_sql(
"SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players_ct",
conn
).rename(columns={c: f"{c}_ct" for c in cols})
df = df_full.merge(df_t, on=['match_id', 'steam_id_64'], how='left')
df = df.merge(df_ct, on=['match_id', 'steam_id_64'], how='left')
def is_empty(s):
return s.isna() | (s == 0)
for c in cols:
empty_count = is_empty(df[c]).sum()
print(f"{c} empty: {empty_count}")
additive = ['kills', 'deaths', 'assists', 'headshot_count', 'mvp_count', 'flash_duration', 'jump_count']
for c in additive:
t_sum = df[f"{c}_t"].fillna(0) + df[f"{c}_ct"].fillna(0)
tol = 0.01 if c == 'flash_duration' else 0
diff = (df[c].fillna(0) - t_sum).abs() > tol
print(f"{c} full != t+ct: {diff.sum()}")
non_additive = ['adr', 'rating', 'rating2', 'rating3', 'rws', 'is_win']
for c in non_additive:
side_nonempty = (~is_empty(df[f"{c}_t"])) | (~is_empty(df[f"{c}_ct"]))
full_empty_side_nonempty = is_empty(df[c]) & side_nonempty
full_nonempty_side_empty = (~is_empty(df[c])) & (~side_nonempty)
print(f"{c} full empty but side has: {full_empty_side_nonempty.sum()}")
print(f"{c} full has but side empty: {full_nonempty_side_empty.sum()}")
print("\n--- Rating Detail ---")
rating_cols = ['rating', 'rating2', 'rating3']
for c in rating_cols:
full_null = df[c].isna().sum()
full_zero = (df[c] == 0).sum()
full_nonzero = ((~df[c].isna()) & (df[c] != 0)).sum()
side_t_nonzero = ((~df[f"{c}_t"].isna()) & (df[f"{c}_t"] != 0)).sum()
side_ct_nonzero = ((~df[f"{c}_ct"].isna()) & (df[f"{c}_ct"] != 0)).sum()
side_any_nonzero = ((~df[f"{c}_t"].isna()) & (df[f"{c}_t"] != 0)) | ((~df[f"{c}_ct"].isna()) & (df[f"{c}_ct"] != 0))
full_nonzero_side_zero = ((~df[c].isna()) & (df[c] != 0) & (~side_any_nonzero)).sum()
full_zero_side_nonzero = (((df[c].isna()) | (df[c] == 0)) & side_any_nonzero).sum()
print(f"{c} full null: {full_null} full zero: {full_zero} full nonzero: {full_nonzero}")
print(f"{c} side t nonzero: {side_t_nonzero} side ct nonzero: {side_ct_nonzero}")
print(f"{c} full nonzero but side all zero: {full_nonzero_side_zero}")
print(f"{c} full zero but side has: {full_zero_side_nonzero}")
df_rating_src = pd.read_sql(
"SELECT f.rating, f.rating2, f.rating3, m.data_source_type FROM fact_match_players f JOIN fact_matches m ON f.match_id = m.match_id",
conn
)
for c in rating_cols:
grp = df_rating_src.groupby('data_source_type')[c].apply(lambda s: (s != 0).sum()).reset_index(name='nonzero')
print(f"{c} nonzero by source")
print(grp)
print("\n--- Schema Coverage (fight_any) ---")
paths = load_schema_paths(schema_path)
fight_keys = set()
for p in paths:
if 'data.group_N[].fight_any.' in p:
key = p.split('fight_any.')[1].split('.')[0]
fight_keys.add(key)
l2_cols = set(pd.read_sql("PRAGMA table_info(fact_match_players)", conn)['name'].tolist())
alias = {
'kills': 'kill',
'deaths': 'death',
'assists': 'assist',
'headshot_count': 'headshot',
'mvp_count': 'is_mvp',
'flash_duration': 'flash_enemy_time',
'jump_count': 'jump_total',
'awp_kills': 'awp_kill'
}
covered = set()
for c in l2_cols:
if c in fight_keys:
covered.add(c)
elif c in alias and alias[c] in fight_keys:
covered.add(alias[c])
missing_keys = sorted(list(fight_keys - covered))
print(f"fight_any keys: {len(fight_keys)}")
print(f"covered by L2 columns: {len(covered)}")
print(f"uncovered fight_any keys: {len(missing_keys)}")
if missing_keys:
print(missing_keys)
print("\n--- Coverage Zero Rate (fight_any -> fact_match_players) ---")
fight_cols = [k for k in fight_keys if k in l2_cols or k in alias.values()]
col_map = {}
for k in fight_cols:
if k in l2_cols:
col_map[k] = k
else:
for l2k, src in alias.items():
if src == k:
col_map[k] = l2k
break
select_cols = ["steam_id_64"] + list(set(col_map.values()))
df_fight = pd.read_sql(
"SELECT " + ",".join(select_cols) + " FROM fact_match_players",
conn
)
total_rows = len(df_fight)
stats = []
for fight_key, col in sorted(col_map.items()):
s = df_fight[col]
zeros = (s == 0).sum()
nulls = s.isna().sum()
nonzero = total_rows - zeros - nulls
stats.append({
"fight_key": fight_key,
"column": col,
"nonzero": nonzero,
"zero": zeros,
"null": nulls,
"zero_rate": 0 if total_rows == 0 else round(zeros / total_rows, 4)
})
df_stats = pd.DataFrame(stats).sort_values(["zero_rate", "nonzero"], ascending=[False, True])
print(df_stats.head(30))
print("\n-- zero_rate top (most zeros) --")
print(df_stats.head(10))
print("\n-- zero_rate bottom (most nonzero) --")
print(df_stats.tail(10))
print("\n--- Schema Coverage (leetify economy) ---")
econ_keys = [
'data.leetify_data.round_stat[].bron_equipment.',
'data.leetify_data.round_stat[].player_t_score.',
'data.leetify_data.round_stat[].player_ct_score.',
'data.leetify_data.round_stat[].player_bron_crash.'
]
for k in econ_keys:
count = sum(1 for p in paths if k in p)
print(f"{k} paths: {count}")
print("\n--- Schema Summary Coverage (by path groups) ---")
uncovered = [p for p in paths if not is_covered(p)]
print(f"total paths: {len(paths)}")
print(f"covered paths: {len(paths) - len(uncovered)}")
print(f"uncovered paths: {len(uncovered)}")
df_unc = pd.DataFrame({"path": uncovered})
if len(df_unc) > 0:
df_unc["group"] = df_unc["path"].apply(group_key)
print("\n-- Uncovered groups (count) --")
print(df_unc.groupby("group").size().sort_values(ascending=False))
print("\n-- Uncovered examples (top 50) --")
print(df_unc["path"].head(50).to_list())
conn.close()
def watch_schema(schema_path, interval=1.0):
last_db_mtime = 0
last_schema_mtime = 0
first = True
while True:
if not os.path.exists(db_path):
print(f"db not found: {db_path}")
time.sleep(interval)
continue
db_mtime = os.path.getmtime(db_path)
schema_mtime = os.path.getmtime(schema_path) if os.path.exists(schema_path) else 0
if first or db_mtime > last_db_mtime or schema_mtime > last_schema_mtime:
conn = sqlite3.connect(db_path)
refresh_schema_sql(conn, schema_path)
print(f"\n[{time.strftime('%Y-%m-%d %H:%M:%S')}] schema.sql refreshed")
print_schema(conn)
conn.close()
last_db_mtime = db_mtime
last_schema_mtime = os.path.getmtime(schema_path) if os.path.exists(schema_path) else 0
first = False
time.sleep(interval)
if __name__ == "__main__":
args = [a.lower() for a in sys.argv[1:]]
if "dump_uncovered" in args or "uncovered" in args:
dump_uncovered('database/original_json_schema/uncovered_features.csv')
elif "watch_schema" in args or "watch" in args:
try:
watch_schema('database/L2/schema.sql')
except KeyboardInterrupt:
pass
elif "schema" in args or "refresh_schema" in args:
if not os.path.exists(db_path):
print(f"db not found: {db_path}")
else:
conn = sqlite3.connect(db_path)
if "refresh_schema" in args:
refresh_schema_sql(conn, 'database/L2/schema.sql')
print("schema.sql refreshed")
print_schema(conn)
conn.close()
else:
verify()

View File

@@ -1,245 +0,0 @@
import sqlite3
import pandas as pd
import csv
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
db_path = 'database/L2/L2_Main.sqlite'
def verify():
conn = sqlite3.connect(db_path)
print("--- Counts ---")
tables = [
'dim_players',
'dim_maps',
'fact_matches',
'fact_match_players',
'fact_match_players_t',
'fact_match_players_ct',
'fact_rounds',
'fact_round_events',
'fact_round_player_economy'
]
for t in tables:
count = conn.execute(f"SELECT COUNT(*) FROM {t}").fetchone()[0]
print(f"{t}: {count}")
print("\n--- Data Source Distribution ---")
dist = pd.read_sql("SELECT data_source_type, COUNT(*) as cnt FROM fact_matches GROUP BY data_source_type", conn)
print(dist)
print("\n--- Sample Round Events (Leetify vs Classic) ---")
# Fetch one event from a leetify match
leetify_match = conn.execute("SELECT match_id FROM fact_matches WHERE data_source_type='leetify' LIMIT 1").fetchone()
if leetify_match:
mid = leetify_match[0]
print(f"Leetify Match: {mid}")
df = pd.read_sql(f"SELECT * FROM fact_round_events WHERE match_id='{mid}' AND event_type='kill' LIMIT 1", conn)
print(df[['event_type', 'attacker_steam_id', 'trade_killer_steam_id', 'attacker_pos_x', 'score_change_attacker']])
# Fetch one event from a classic match
classic_match = conn.execute("SELECT match_id FROM fact_matches WHERE data_source_type='classic' LIMIT 1").fetchone()
if classic_match:
mid = classic_match[0]
print(f"Classic Match: {mid}")
df = pd.read_sql(f"SELECT * FROM fact_round_events WHERE match_id='{mid}' AND event_type='kill' LIMIT 1", conn)
print(df[['event_type', 'attacker_steam_id', 'trade_killer_steam_id', 'attacker_pos_x', 'score_change_attacker']])
print("\n--- Sample Player Stats (New Fields) ---")
df_players = pd.read_sql("SELECT steam_id_64, rating, rating3, elo_change, rank_score, flash_duration, jump_count FROM fact_match_players LIMIT 5", conn)
print(df_players)
print("\n--- Integrity Checks ---")
missing_players = conn.execute("""
SELECT COUNT(*) FROM fact_match_players f
LEFT JOIN dim_players d ON f.steam_id_64 = d.steam_id_64
WHERE d.steam_id_64 IS NULL
""").fetchone()[0]
print(f"fact_match_players missing dim_players: {missing_players}")
missing_round_matches = conn.execute("""
SELECT COUNT(*) FROM fact_rounds r
LEFT JOIN fact_matches m ON r.match_id = m.match_id
WHERE m.match_id IS NULL
""").fetchone()[0]
print(f"fact_rounds missing fact_matches: {missing_round_matches}")
missing_event_rounds = conn.execute("""
SELECT COUNT(*) FROM fact_round_events e
LEFT JOIN fact_rounds r ON e.match_id = r.match_id AND e.round_num = r.round_num
WHERE r.match_id IS NULL
""").fetchone()[0]
print(f"fact_round_events missing fact_rounds: {missing_event_rounds}")
side_zero_t = conn.execute("""
SELECT COUNT(*) FROM fact_match_players_t
WHERE COALESCE(kills,0)=0 AND COALESCE(deaths,0)=0 AND COALESCE(assists,0)=0
""").fetchone()[0]
side_zero_ct = conn.execute("""
SELECT COUNT(*) FROM fact_match_players_ct
WHERE COALESCE(kills,0)=0 AND COALESCE(deaths,0)=0 AND COALESCE(assists,0)=0
""").fetchone()[0]
print(f"fact_match_players_t zero K/D/A: {side_zero_t}")
print(f"fact_match_players_ct zero K/D/A: {side_zero_ct}")
print("\n--- Full vs T/CT Comparison ---")
cols = [
'kills', 'deaths', 'assists', 'headshot_count', 'adr', 'rating', 'rating2',
'rating3', 'rws', 'mvp_count', 'flash_duration', 'jump_count', 'is_win'
]
df_full = pd.read_sql(
"SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players",
conn
)
df_t = pd.read_sql(
"SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players_t",
conn
).rename(columns={c: f"{c}_t" for c in cols})
df_ct = pd.read_sql(
"SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players_ct",
conn
).rename(columns={c: f"{c}_ct" for c in cols})
df = df_full.merge(df_t, on=['match_id', 'steam_id_64'], how='left')
df = df.merge(df_ct, on=['match_id', 'steam_id_64'], how='left')
def is_empty(s):
return s.isna() | (s == 0)
for c in cols:
empty_count = is_empty(df[c]).sum()
print(f"{c} empty: {empty_count}")
additive = ['kills', 'deaths', 'assists', 'headshot_count', 'mvp_count', 'flash_duration', 'jump_count']
for c in additive:
t_sum = df[f"{c}_t"].fillna(0) + df[f"{c}_ct"].fillna(0)
tol = 0.01 if c == 'flash_duration' else 0
diff = (df[c].fillna(0) - t_sum).abs() > tol
print(f"{c} full != t+ct: {diff.sum()}")
non_additive = ['adr', 'rating', 'rating2', 'rating3', 'rws', 'is_win']
for c in non_additive:
side_nonempty = (~is_empty(df[f"{c}_t"])) | (~is_empty(df[f"{c}_ct"]))
full_empty_side_nonempty = is_empty(df[c]) & side_nonempty
full_nonempty_side_empty = (~is_empty(df[c])) & (~side_nonempty)
print(f"{c} full empty but side has: {full_empty_side_nonempty.sum()}")
print(f"{c} full has but side empty: {full_nonempty_side_empty.sum()}")
print("\n--- Rating Detail ---")
rating_cols = ['rating', 'rating2', 'rating3']
for c in rating_cols:
full_null = df[c].isna().sum()
full_zero = (df[c] == 0).sum()
full_nonzero = ((~df[c].isna()) & (df[c] != 0)).sum()
side_t_nonzero = ((~df[f"{c}_t"].isna()) & (df[f"{c}_t"] != 0)).sum()
side_ct_nonzero = ((~df[f"{c}_ct"].isna()) & (df[f"{c}_ct"] != 0)).sum()
side_any_nonzero = ((~df[f"{c}_t"].isna()) & (df[f"{c}_t"] != 0)) | ((~df[f"{c}_ct"].isna()) & (df[f"{c}_ct"] != 0))
full_nonzero_side_zero = ((~df[c].isna()) & (df[c] != 0) & (~side_any_nonzero)).sum()
full_zero_side_nonzero = (((df[c].isna()) | (df[c] == 0)) & side_any_nonzero).sum()
print(f"{c} full null: {full_null} full zero: {full_zero} full nonzero: {full_nonzero}")
print(f"{c} side t nonzero: {side_t_nonzero} side ct nonzero: {side_ct_nonzero}")
print(f"{c} full nonzero but side all zero: {full_nonzero_side_zero}")
print(f"{c} full zero but side has: {full_zero_side_nonzero}")
df_rating_src = pd.read_sql(
"SELECT f.rating, f.rating2, f.rating3, m.data_source_type FROM fact_match_players f JOIN fact_matches m ON f.match_id = m.match_id",
conn
)
for c in rating_cols:
grp = df_rating_src.groupby('data_source_type')[c].apply(lambda s: (s != 0).sum()).reset_index(name='nonzero')
print(f"{c} nonzero by source")
print(grp)
print("\n--- Schema Coverage (fight_any) ---")
schema_path = 'database/original_json_schema/schema_flat.csv'
paths = []
with open(schema_path, 'r', encoding='utf-8') as f:
reader = csv.reader(f)
_ = next(reader, None)
for row in reader:
if len(row) >= 2:
paths.append(row[1])
fight_keys = set()
for p in paths:
if 'data.group_N[].fight_any.' in p:
key = p.split('fight_any.')[1].split('.')[0]
fight_keys.add(key)
l2_cols = set(pd.read_sql("PRAGMA table_info(fact_match_players)", conn)['name'].tolist())
alias = {
'kills': 'kill',
'deaths': 'death',
'assists': 'assist',
'headshot_count': 'headshot',
'mvp_count': 'is_mvp',
'flash_duration': 'flash_enemy_time',
'jump_count': 'jump_total',
'awp_kills': 'awp_kill'
}
covered = set()
for c in l2_cols:
if c in fight_keys:
covered.add(c)
elif c in alias and alias[c] in fight_keys:
covered.add(alias[c])
missing_keys = sorted(list(fight_keys - covered))
print(f"fight_any keys: {len(fight_keys)}")
print(f"covered by L2 columns: {len(covered)}")
print(f"uncovered fight_any keys: {len(missing_keys)}")
if missing_keys:
print(missing_keys)
print("\n--- Coverage Zero Rate (fight_any -> fact_match_players) ---")
fight_cols = [k for k in fight_keys if k in l2_cols or k in alias.values()]
col_map = {}
for k in fight_cols:
if k in l2_cols:
col_map[k] = k
else:
for l2k, src in alias.items():
if src == k:
col_map[k] = l2k
break
select_cols = ["steam_id_64"] + list(set(col_map.values()))
df_fight = pd.read_sql(
"SELECT " + ",".join(select_cols) + " FROM fact_match_players",
conn
)
total_rows = len(df_fight)
stats = []
for fight_key, col in sorted(col_map.items()):
s = df_fight[col]
zeros = (s == 0).sum()
nulls = s.isna().sum()
nonzero = total_rows - zeros - nulls
stats.append({
"fight_key": fight_key,
"column": col,
"nonzero": nonzero,
"zero": zeros,
"null": nulls,
"zero_rate": 0 if total_rows == 0 else round(zeros / total_rows, 4)
})
df_stats = pd.DataFrame(stats).sort_values(["zero_rate", "nonzero"], ascending=[False, True])
print(df_stats.head(30))
print("\n-- zero_rate top (most zeros) --")
print(df_stats.head(10))
print("\n-- zero_rate bottom (most nonzero) --")
print(df_stats.tail(10))
print("\n--- Schema Coverage (leetify economy) ---")
econ_keys = [
'data.leetify_data.round_stat[].bron_equipment.',
'data.leetify_data.round_stat[].player_t_score.',
'data.leetify_data.round_stat[].player_ct_score.',
'data.leetify_data.round_stat[].player_bron_crash.'
]
for k in econ_keys:
count = sum(1 for p in paths if k in p)
print(f"{k} paths: {count}")
conn.close()
if __name__ == "__main__":
verify()

135
README.md Normal file
View File

@@ -0,0 +1,135 @@
# YRTV 项目说明
## 项目概览
yrtv这一块。
数据来源与处理核心包括:
- 比赛页面的 iframe JSON 数据(`iframe_network.json`
- 可选的 demo 文件(`.zip/.dem`
- L1A/L2 分层数据库建模与校验
## 数据流程
1. **下载与落盘**
通过 `downloader/downloader.py` 抓取比赛页面数据,生成 `output_arena/<match_id>/iframe_network.json`,并可同时下载 demo 文件。
2. **L1A 入库(原始 JSON**
`ETL/L1A.py``output_arena/*/iframe_network.json` 批量写入 `database/L1A/L1A.sqlite`
3. **L2 入库(结构化事实表/维度表)**
`ETL/L2_Builder.py` 读取 L1A 数据,按 `database/L2/schema.sql` 构建维度表与事实表,生成 `database/L2/L2_Main.sqlite`
4. **质量校验与覆盖分析**
`ETL/verify/verify_L2.py``ETL/verify/verify_deep.py` 用于字段覆盖、分布、空值和互斥逻辑的检查。
## 目录结构
```
yrtv/
├── downloader/ # 下载器(抓取 iframe JSON 与 demo
│ ├── downloader.py
│ └── README.md
├── ETL/ # ETL 脚本
│ ├── L1A.py
│ ├── L2_Builder.py
│ ├── README.md
│ └── verify/
│ ├── verify_L2.py
│ └── verify_deep.py
├── database/
│ ├── L1A/ # L1A SQLite 与说明
│ ├── L1B/ # L1B 目录demo 解析结果说明)
│ ├── L2/ # L2 SQLite 与 schema
│ └── original_json_schema/ # schema 扁平化与未覆盖字段清单
└── utils/
└── json_extractor/ # JSON Schema 抽取工具
```
## 环境要求
- Python 3.9+
- Playwright下载器依赖
- pandas、numpy校验脚本依赖
项目默认 Python 路径:
```
C:/ProgramData/anaconda3/python.exe
```
## 快速开始
### 1. 下载比赛数据
进入项目根目录运行(默认示例 URL
```
C:/ProgramData/anaconda3/python.exe downloader/downloader.py
```
指定比赛 URL
```
C:/ProgramData/anaconda3/python.exe downloader/downloader.py --url https://arena.5eplay.com/data/match/g161-20260118222715609322516
```
批量下载(从文本文件读取 URL 列表):
```
C:/ProgramData/anaconda3/python.exe downloader/downloader.py --url-list downloader/gamelist/match_list_2026.txt
```
### 2. 生成 L1A 数据库
```
C:/ProgramData/anaconda3/python.exe ETL/L1A.py
```
### 3. 生成 L2 数据库
```
C:/ProgramData/anaconda3/python.exe ETL/L2_Builder.py
```
### 4. 校验与覆盖检查
```
C:/ProgramData/anaconda3/python.exe ETL/verify/verify_L2.py
C:/ProgramData/anaconda3/python.exe ETL/verify/verify_deep.py
```
## 数据库层级说明
### L1A
- **用途**:保存原始 iframe JSON
- **输入**`output_arena/*/iframe_network.json`
- **输出**`database/L1A/L1A.sqlite`
- **脚本**`ETL/L1A.py`
### L1B
- **用途**:保存 demo 解析后的原始数据(由 demoparser2 产出)
- **输出**`database/L1B/L1B.sqlite`
- 当前仓库提供目录与说明,解析流程需结合外部工具执行
### L2
结构化事实表/维度表数据库,覆盖比赛、玩家、回合与经济等数据:
- **Schema**`database/L2/schema.sql`
- **输出**`database/L2/L2_Main.sqlite`
- **核心表**
- `dim_players``dim_maps`
- `fact_matches``fact_match_teams`
- `fact_match_players``fact_match_players_t``fact_match_players_ct`
- `fact_rounds``fact_round_events``fact_round_player_economy`
## JSON Schema 抽取工具
用于分析大量 `iframe_network.json` 的字段结构与覆盖情况,支持动态 Key 归并与多格式输出。
常用命令:
```
C:/ProgramData/anaconda3/python.exe utils/json_extractor/main.py
```
输出内容通常位于 `output_reports/``database/original_json_schema/`,包括:
- `schema_summary.md`:结构概览
- `schema_flat.csv`:扁平字段列表
- `uncovered_features.csv`:未覆盖字段清单
## 数据源互斥说明
L2 中 `fact_matches.data_source_type` 用于区分数据来源与字段覆盖范围:
- `classic`:含 round_list 详细回合与坐标信息
- `leetify`:含 leetify 评分与经济信息
- `unknown`:无法识别来源
入库逻辑保持互斥:同一场比赛只会按其来源覆盖相应字段,避免重复或冲突。
## 常用文件定位
- 下载器入口:[downloader.py](file:///c:/Users/Administrator/Documents/trae_projects/yrtv/downloader/downloader.py)
- L1A 入库脚本:[L1A.py](file:///c:/Users/Administrator/Documents/trae_projects/yrtv/ETL/L1A.py)
- L2 构建脚本:[L2_Builder.py](file:///c:/Users/Administrator/Documents/trae_projects/yrtv/ETL/L2_Builder.py)
- L2 Schema[schema.sql](file:///c:/Users/Administrator/Documents/trae_projects/yrtv/database/L2/schema.sql)
- 覆盖检查:[verify_L2.py](file:///c:/Users/Administrator/Documents/trae_projects/yrtv/ETL/verify/verify_L2.py)
- 深度校验:[verify_deep.py](file:///c:/Users/Administrator/Documents/trae_projects/yrtv/ETL/verify/verify_deep.py)

Binary file not shown.

View File

@@ -12,7 +12,67 @@ CREATE TABLE IF NOT EXISTS dim_players (
domain TEXT, domain TEXT,
created_at INTEGER, -- Timestamp created_at INTEGER, -- Timestamp
updated_at INTEGER, -- Timestamp updated_at INTEGER, -- Timestamp
last_seen_match_id TEXT -- To track when this info was last updated last_seen_match_id TEXT,
uuid TEXT,
email TEXT,
area TEXT,
mobile TEXT,
user_domain TEXT,
username_audit_status INTEGER,
accid TEXT,
team_id INTEGER,
trumpet_count INTEGER,
profile_nickname TEXT,
profile_avatar_audit_status INTEGER,
profile_rgb_avatar_url TEXT,
profile_photo_url TEXT,
profile_gender INTEGER,
profile_birthday INTEGER,
profile_country_id TEXT,
profile_region_id TEXT,
profile_city_id TEXT,
profile_language TEXT,
profile_recommend_url TEXT,
profile_group_id INTEGER,
profile_reg_source INTEGER,
status_status INTEGER,
status_expire INTEGER,
status_cancellation_status INTEGER,
status_new_user INTEGER,
status_login_banned_time INTEGER,
status_anticheat_type INTEGER,
status_flag_status1 TEXT,
status_anticheat_status TEXT,
status_flag_honor TEXT,
status_privacy_policy_status INTEGER,
status_csgo_frozen_exptime INTEGER,
platformexp_level INTEGER,
platformexp_exp INTEGER,
steam_account TEXT,
steam_trade_url TEXT,
steam_rent_id TEXT,
trusted_credit INTEGER,
trusted_credit_level INTEGER,
trusted_score INTEGER,
trusted_status INTEGER,
trusted_credit_status INTEGER,
certify_id_type INTEGER,
certify_status INTEGER,
certify_age INTEGER,
certify_real_name TEXT,
certify_uid_list TEXT,
certify_audit_status INTEGER,
certify_gender INTEGER,
identity_type INTEGER,
identity_extras TEXT,
identity_status INTEGER,
identity_slogan TEXT,
identity_list TEXT,
identity_slogan_ext TEXT,
identity_live_url TEXT,
identity_live_type INTEGER,
plus_is_plus INTEGER,
user_info_raw TEXT
); );
CREATE INDEX IF NOT EXISTS idx_dim_players_uid ON dim_players(uid); CREATE INDEX IF NOT EXISTS idx_dim_players_uid ON dim_players(uid);
@@ -38,12 +98,67 @@ CREATE TABLE IF NOT EXISTS fact_matches (
server_ip TEXT, server_ip TEXT,
server_port INTEGER, server_port INTEGER,
location TEXT, location TEXT,
has_side_data_and_rating2 INTEGER,
match_main_id INTEGER,
demo_url TEXT,
game_mode INTEGER,
game_name TEXT,
map_desc TEXT,
location_full TEXT,
match_mode INTEGER,
match_status INTEGER,
match_flag INTEGER,
status INTEGER,
waiver INTEGER,
year INTEGER,
season TEXT,
round_total INTEGER,
cs_type INTEGER,
priority_show_type INTEGER,
pug10m_show_type INTEGER,
credit_match_status INTEGER,
knife_winner INTEGER,
knife_winner_role INTEGER,
most_1v2_uid INTEGER,
most_assist_uid INTEGER,
most_awp_uid INTEGER,
most_end_uid INTEGER,
most_first_kill_uid INTEGER,
most_headshot_uid INTEGER,
most_jump_uid INTEGER,
mvp_uid INTEGER,
response_code INTEGER,
response_message TEXT,
response_status INTEGER,
response_timestamp INTEGER,
response_trace_id TEXT,
response_success INTEGER,
response_errcode INTEGER,
treat_info_raw TEXT,
round_list_raw TEXT,
leetify_data_raw TEXT,
data_source_type TEXT CHECK(data_source_type IN ('leetify', 'classic', 'unknown')), -- 'leetify' has economy data, 'classic' has detailed xyz data_source_type TEXT CHECK(data_source_type IN ('leetify', 'classic', 'unknown')), -- 'leetify' has economy data, 'classic' has detailed xyz
processed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP processed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
); );
CREATE INDEX IF NOT EXISTS idx_fact_matches_time ON fact_matches(start_time); CREATE INDEX IF NOT EXISTS idx_fact_matches_time ON fact_matches(start_time);
CREATE TABLE IF NOT EXISTS fact_match_teams (
match_id TEXT,
group_id INTEGER,
group_all_score INTEGER,
group_change_elo REAL,
group_fh_role INTEGER,
group_fh_score INTEGER,
group_origin_elo REAL,
group_sh_role INTEGER,
group_sh_score INTEGER,
group_tid INTEGER,
group_uids TEXT,
PRIMARY KEY (match_id, group_id),
FOREIGN KEY (match_id) REFERENCES fact_matches(match_id) ON DELETE CASCADE
);
-- 4. Fact: Match Player Stats (Wide Table) -- 4. Fact: Match Player Stats (Wide Table)
-- Aggregated stats for a player in a specific match -- Aggregated stats for a player in a specific match
CREATE TABLE IF NOT EXISTS fact_match_players ( CREATE TABLE IF NOT EXISTS fact_match_players (
@@ -82,8 +197,12 @@ CREATE TABLE IF NOT EXISTS fact_match_players (
jump_count INTEGER, jump_count INTEGER,
damage_total INTEGER, damage_total INTEGER,
damage_received INTEGER, damage_received INTEGER,
damage_receive INTEGER,
damage_stats INTEGER,
assisted_kill INTEGER, assisted_kill INTEGER,
awp_kill INTEGER, awp_kill INTEGER,
awp_kill_ct INTEGER,
awp_kill_t INTEGER,
benefit_kill INTEGER, benefit_kill INTEGER,
day TEXT, day TEXT,
defused_bomb INTEGER, defused_bomb INTEGER,
@@ -94,6 +213,8 @@ CREATE TABLE IF NOT EXISTS fact_match_players (
end_1v5 INTEGER, end_1v5 INTEGER,
explode_bomb INTEGER, explode_bomb INTEGER,
first_death INTEGER, first_death INTEGER,
fd_ct INTEGER,
fd_t INTEGER,
first_kill INTEGER, first_kill INTEGER,
flash_enemy INTEGER, flash_enemy INTEGER,
flash_team INTEGER, flash_team INTEGER,
@@ -139,6 +260,8 @@ CREATE TABLE IF NOT EXISTS fact_match_players (
throw_harm_enemy INTEGER, throw_harm_enemy INTEGER,
uid INTEGER, uid INTEGER,
year TEXT, year TEXT,
sts_raw TEXT,
level_info_raw TEXT,
PRIMARY KEY (match_id, steam_id_64), PRIMARY KEY (match_id, steam_id_64),
FOREIGN KEY (match_id) REFERENCES fact_matches(match_id) ON DELETE CASCADE FOREIGN KEY (match_id) REFERENCES fact_matches(match_id) ON DELETE CASCADE
@@ -177,8 +300,12 @@ CREATE TABLE IF NOT EXISTS fact_match_players_t (
jump_count INTEGER, jump_count INTEGER,
damage_total INTEGER, damage_total INTEGER,
damage_received INTEGER, damage_received INTEGER,
damage_receive INTEGER,
damage_stats INTEGER,
assisted_kill INTEGER, assisted_kill INTEGER,
awp_kill INTEGER, awp_kill INTEGER,
awp_kill_ct INTEGER,
awp_kill_t INTEGER,
benefit_kill INTEGER, benefit_kill INTEGER,
day TEXT, day TEXT,
defused_bomb INTEGER, defused_bomb INTEGER,
@@ -189,6 +316,8 @@ CREATE TABLE IF NOT EXISTS fact_match_players_t (
end_1v5 INTEGER, end_1v5 INTEGER,
explode_bomb INTEGER, explode_bomb INTEGER,
first_death INTEGER, first_death INTEGER,
fd_ct INTEGER,
fd_t INTEGER,
first_kill INTEGER, first_kill INTEGER,
flash_enemy INTEGER, flash_enemy INTEGER,
flash_team INTEGER, flash_team INTEGER,
@@ -234,6 +363,8 @@ CREATE TABLE IF NOT EXISTS fact_match_players_t (
throw_harm_enemy INTEGER, throw_harm_enemy INTEGER,
uid INTEGER, uid INTEGER,
year TEXT, year TEXT,
sts_raw TEXT,
level_info_raw TEXT,
PRIMARY KEY (match_id, steam_id_64), PRIMARY KEY (match_id, steam_id_64),
FOREIGN KEY (match_id) REFERENCES fact_matches(match_id) ON DELETE CASCADE FOREIGN KEY (match_id) REFERENCES fact_matches(match_id) ON DELETE CASCADE
); );
@@ -270,8 +401,12 @@ CREATE TABLE IF NOT EXISTS fact_match_players_ct (
jump_count INTEGER, jump_count INTEGER,
damage_total INTEGER, damage_total INTEGER,
damage_received INTEGER, damage_received INTEGER,
damage_receive INTEGER,
damage_stats INTEGER,
assisted_kill INTEGER, assisted_kill INTEGER,
awp_kill INTEGER, awp_kill INTEGER,
awp_kill_ct INTEGER,
awp_kill_t INTEGER,
benefit_kill INTEGER, benefit_kill INTEGER,
day TEXT, day TEXT,
defused_bomb INTEGER, defused_bomb INTEGER,
@@ -282,6 +417,8 @@ CREATE TABLE IF NOT EXISTS fact_match_players_ct (
end_1v5 INTEGER, end_1v5 INTEGER,
explode_bomb INTEGER, explode_bomb INTEGER,
first_death INTEGER, first_death INTEGER,
fd_ct INTEGER,
fd_t INTEGER,
first_kill INTEGER, first_kill INTEGER,
flash_enemy INTEGER, flash_enemy INTEGER,
flash_team INTEGER, flash_team INTEGER,
@@ -327,6 +464,8 @@ CREATE TABLE IF NOT EXISTS fact_match_players_ct (
throw_harm_enemy INTEGER, throw_harm_enemy INTEGER,
uid INTEGER, uid INTEGER,
year TEXT, year TEXT,
sts_raw TEXT,
level_info_raw TEXT,
PRIMARY KEY (match_id, steam_id_64), PRIMARY KEY (match_id, steam_id_64),
FOREIGN KEY (match_id) REFERENCES fact_matches(match_id) ON DELETE CASCADE FOREIGN KEY (match_id) REFERENCES fact_matches(match_id) ON DELETE CASCADE
); );

View File

@@ -0,0 +1,90 @@
path,group
data.group_1_team_info.logo_url,data.*
data.group_1_team_info.team_domain,data.*
data.group_1_team_info.team_id,data.*
data.group_1_team_info.team_name,data.*
data.group_1_team_info.team_tag,data.*
data.group_2_team_info.logo_url,data.*
data.group_2_team_info.team_domain,data.*
data.group_2_team_info.team_id,data.*
data.group_2_team_info.team_name,data.*
data.group_2_team_info.team_tag,data.*
data.group_N[].friend_relation,data.*
data.level_list[].elo,data.*
data.level_list[].elo_type,data.*
data.level_list[].group_id,data.*
data.level_list[].level_id,data.*
data.level_list[].level_image,data.*
data.level_list[].level_name,data.*
data.level_list[].remark,data.*
data.level_list[].rise_type,data.*
data.level_list[].shelves_status,data.*
data.room_card.attrs.flagAnimation,data.*
data.room_card.attrs.flagAnimationTime,data.*
data.room_card.attrs.flagViewUrl,data.*
data.room_card.attrs.flagViewVideo,data.*
data.room_card.attrs.flagViewVideoTime,data.*
data.room_card.attrs.getWay,data.*
data.room_card.attrs.mallJumpLink,data.*
data.room_card.attrs.matchViewUrlLeft,data.*
data.room_card.attrs.matchViewUrlRight,data.*
data.room_card.attrs.mvpSettleAnimation,data.*
data.room_card.attrs.mvpSettleColor,data.*
data.room_card.attrs.mvpSettleViewAnimation,data.*
data.room_card.attrs.pcImg,data.*
data.room_card.attrs.rarityLevel,data.*
data.room_card.attrs.sort,data.*
data.room_card.attrs.sourceId,data.*
data.room_card.attrs.templateId,data.*
data.room_card.category,data.*
data.room_card.createdAt,data.*
data.room_card.describe,data.*
data.room_card.displayStatus,data.*
data.room_card.getButton,data.*
data.room_card.getUrl,data.*
data.room_card.getWay,data.*
data.room_card.id,data.*
data.room_card.name,data.*
data.room_card.onShelf,data.*
data.room_card.propTemplateId,data.*
data.room_card.shelfAt,data.*
data.room_card.sysType,data.*
data.room_card.updatedAt,data.*
data.round_sfui_type[],data.*
data.season_type,data.*
data.uinfo_dict.<steamid>.avatar_url,data.*
data.uinfo_dict.<steamid>.college_id,data.*
data.uinfo_dict.<steamid>.country_id,data.*
data.uinfo_dict.<steamid>.credit,data.*
data.uinfo_dict.<steamid>.domain,data.*
data.uinfo_dict.<steamid>.gender,data.*
data.uinfo_dict.<steamid>.identity,data.*
data.uinfo_dict.<steamid>.language,data.*
data.uinfo_dict.<steamid>.nickname,data.*
data.uinfo_dict.<steamid>.plus_info,data.*
data.uinfo_dict.<steamid>.province,data.*
data.uinfo_dict.<steamid>.province_name,data.*
data.uinfo_dict.<steamid>.reg_date,data.*
data.uinfo_dict.<steamid>.region,data.*
data.uinfo_dict.<steamid>.region_name,data.*
data.uinfo_dict.<steamid>.status,data.*
data.uinfo_dict.<steamid>.steamid_64,data.*
data.uinfo_dict.<steamid>.trusted_score,data.*
data.uinfo_dict.<steamid>.trusted_status,data.*
data.uinfo_dict.<steamid>.uid,data.*
data.uinfo_dict.<steamid>.username,data.*
data.uinfo_dict.<steamid>.username_spam_status,data.*
data.uinfo_dict.<steamid>.uuid,data.*
data.user_stats.map_level.add_exp,data.*
data.user_stats.map_level.map_exp,data.*
data.user_stats.plat_level.add_exp,data.*
data.user_stats.plat_level.plat_level_exp,data.*
data.weapon_list.defuser[],data.*
data.weapon_list.item[],data.*
data.weapon_list.main_weapon[],data.*
data.weapon_list.other_item[],data.*
data.weapon_list.secondary_weapon[],data.*
trace_id,other
trace_id,other
trace_id,other
trace_id,other
1 path group
2 data.group_1_team_info.logo_url data.*
3 data.group_1_team_info.team_domain data.*
4 data.group_1_team_info.team_id data.*
5 data.group_1_team_info.team_name data.*
6 data.group_1_team_info.team_tag data.*
7 data.group_2_team_info.logo_url data.*
8 data.group_2_team_info.team_domain data.*
9 data.group_2_team_info.team_id data.*
10 data.group_2_team_info.team_name data.*
11 data.group_2_team_info.team_tag data.*
12 data.group_N[].friend_relation data.*
13 data.level_list[].elo data.*
14 data.level_list[].elo_type data.*
15 data.level_list[].group_id data.*
16 data.level_list[].level_id data.*
17 data.level_list[].level_image data.*
18 data.level_list[].level_name data.*
19 data.level_list[].remark data.*
20 data.level_list[].rise_type data.*
21 data.level_list[].shelves_status data.*
22 data.room_card.attrs.flagAnimation data.*
23 data.room_card.attrs.flagAnimationTime data.*
24 data.room_card.attrs.flagViewUrl data.*
25 data.room_card.attrs.flagViewVideo data.*
26 data.room_card.attrs.flagViewVideoTime data.*
27 data.room_card.attrs.getWay data.*
28 data.room_card.attrs.mallJumpLink data.*
29 data.room_card.attrs.matchViewUrlLeft data.*
30 data.room_card.attrs.matchViewUrlRight data.*
31 data.room_card.attrs.mvpSettleAnimation data.*
32 data.room_card.attrs.mvpSettleColor data.*
33 data.room_card.attrs.mvpSettleViewAnimation data.*
34 data.room_card.attrs.pcImg data.*
35 data.room_card.attrs.rarityLevel data.*
36 data.room_card.attrs.sort data.*
37 data.room_card.attrs.sourceId data.*
38 data.room_card.attrs.templateId data.*
39 data.room_card.category data.*
40 data.room_card.createdAt data.*
41 data.room_card.describe data.*
42 data.room_card.displayStatus data.*
43 data.room_card.getButton data.*
44 data.room_card.getUrl data.*
45 data.room_card.getWay data.*
46 data.room_card.id data.*
47 data.room_card.name data.*
48 data.room_card.onShelf data.*
49 data.room_card.propTemplateId data.*
50 data.room_card.shelfAt data.*
51 data.room_card.sysType data.*
52 data.room_card.updatedAt data.*
53 data.round_sfui_type[] data.*
54 data.season_type data.*
55 data.uinfo_dict.<steamid>.avatar_url data.*
56 data.uinfo_dict.<steamid>.college_id data.*
57 data.uinfo_dict.<steamid>.country_id data.*
58 data.uinfo_dict.<steamid>.credit data.*
59 data.uinfo_dict.<steamid>.domain data.*
60 data.uinfo_dict.<steamid>.gender data.*
61 data.uinfo_dict.<steamid>.identity data.*
62 data.uinfo_dict.<steamid>.language data.*
63 data.uinfo_dict.<steamid>.nickname data.*
64 data.uinfo_dict.<steamid>.plus_info data.*
65 data.uinfo_dict.<steamid>.province data.*
66 data.uinfo_dict.<steamid>.province_name data.*
67 data.uinfo_dict.<steamid>.reg_date data.*
68 data.uinfo_dict.<steamid>.region data.*
69 data.uinfo_dict.<steamid>.region_name data.*
70 data.uinfo_dict.<steamid>.status data.*
71 data.uinfo_dict.<steamid>.steamid_64 data.*
72 data.uinfo_dict.<steamid>.trusted_score data.*
73 data.uinfo_dict.<steamid>.trusted_status data.*
74 data.uinfo_dict.<steamid>.uid data.*
75 data.uinfo_dict.<steamid>.username data.*
76 data.uinfo_dict.<steamid>.username_spam_status data.*
77 data.uinfo_dict.<steamid>.uuid data.*
78 data.user_stats.map_level.add_exp data.*
79 data.user_stats.map_level.map_exp data.*
80 data.user_stats.plat_level.add_exp data.*
81 data.user_stats.plat_level.plat_level_exp data.*
82 data.weapon_list.defuser[] data.*
83 data.weapon_list.item[] data.*
84 data.weapon_list.main_weapon[] data.*
85 data.weapon_list.other_item[] data.*
86 data.weapon_list.secondary_weapon[] data.*
87 trace_id other
88 trace_id other
89 trace_id other
90 trace_id other