diff --git a/ETL/L2_Builder.py b/ETL/L2_Builder.py new file mode 100644 index 0000000..841b30a --- /dev/null +++ b/ETL/L2_Builder.py @@ -0,0 +1,879 @@ +import sqlite3 +import json +import os +import sys +import logging +from dataclasses import dataclass, field +from typing import List, Dict, Optional, Any, Tuple +from datetime import datetime + +# Setup logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +# Constants +L1A_DB_PATH = 'database/L1A/L1A.sqlite' +L2_DB_PATH = 'database/L2/L2_Main.sqlite' +SCHEMA_PATH = 'database/L2/schema.sql' + +# --- Data Structures for Unification --- + +@dataclass +class PlayerStats: + steam_id_64: str + team_id: int = 0 + kills: int = 0 + deaths: int = 0 + assists: int = 0 + headshot_count: int = 0 + kd_ratio: float = 0.0 + adr: float = 0.0 + rating: float = 0.0 + rating2: float = 0.0 + rating3: float = 0.0 + rws: float = 0.0 + mvp_count: int = 0 + elo_change: float = 0.0 + rank_score: int = 0 + is_win: bool = False + + # VIP Stats + kast: float = 0.0 + entry_kills: int = 0 + entry_deaths: int = 0 + awp_kills: int = 0 + clutch_1v1: int = 0 + clutch_1v2: int = 0 + clutch_1v3: int = 0 + clutch_1v4: int = 0 + clutch_1v5: int = 0 + flash_assists: int = 0 + flash_duration: float = 0.0 + jump_count: int = 0 + damage_total: int = 0 + damage_received: int = 0 + assisted_kill: int = 0 + awp_kill: int = 0 + benefit_kill: int = 0 + day: str = "" + defused_bomb: int = 0 + end_1v1: int = 0 + end_1v2: int = 0 + end_1v3: int = 0 + end_1v4: int = 0 + end_1v5: int = 0 + explode_bomb: int = 0 + first_death: int = 0 + first_kill: int = 0 + flash_enemy: int = 0 + flash_team: int = 0 + flash_team_time: float = 0.0 + flash_time: float = 0.0 + game_mode: str = "" + group_id: int = 0 + hold_total: int = 0 + id: int = 0 + is_highlight: int = 0 + is_most_1v2: int = 0 + is_most_assist: int = 0 + is_most_awp: int = 0 + is_most_end: int = 0 + is_most_first_kill: int = 0 + is_most_headshot: int = 0 + is_most_jump: int = 0 + is_svp: int = 0 + is_tie: int = 0 + kill_1: int = 0 + kill_2: int = 0 + kill_3: int = 0 + kill_4: int = 0 + kill_5: int = 0 + many_assists_cnt1: int = 0 + many_assists_cnt2: int = 0 + many_assists_cnt3: int = 0 + many_assists_cnt4: int = 0 + many_assists_cnt5: int = 0 + map: str = "" + match_code: str = "" + match_mode: str = "" + match_team_id: int = 0 + match_time: int = 0 + per_headshot: float = 0.0 + perfect_kill: int = 0 + planted_bomb: int = 0 + revenge_kill: int = 0 + round_total: int = 0 + season: str = "" + team_kill: int = 0 + throw_harm: int = 0 + throw_harm_enemy: int = 0 + uid: int = 0 + year: str = "" + +@dataclass +class RoundEvent: + event_id: str + event_type: str # 'kill', 'bomb_plant', etc. + event_time: int + attacker_steam_id: Optional[str] = None + victim_steam_id: Optional[str] = None + assister_steam_id: Optional[str] = None + flash_assist_steam_id: Optional[str] = None + trade_killer_steam_id: Optional[str] = None + weapon: Optional[str] = None + is_headshot: bool = False + is_wallbang: bool = False + is_blind: bool = False + is_through_smoke: bool = False + is_noscope: bool = False + # Spatial + attacker_pos: Optional[Tuple[int, int, int]] = None + victim_pos: Optional[Tuple[int, int, int]] = None + # Score + score_change_attacker: float = 0.0 + score_change_victim: float = 0.0 + +@dataclass +class PlayerEconomy: + steam_id_64: str + side: str + start_money: int = 0 + equipment_value: int = 0 + main_weapon: str = "" + has_helmet: bool = False + has_defuser: bool = False + round_performance_score: float = 0.0 + +@dataclass +class RoundData: + round_num: int + winner_side: str + win_reason: int + win_reason_desc: str + duration: float + end_time_stamp: str + ct_score: int + t_score: int + ct_money_start: int = 0 + t_money_start: int = 0 + events: List[RoundEvent] = field(default_factory=list) + economies: List[PlayerEconomy] = field(default_factory=list) + +@dataclass +class MatchData: + match_id: str + match_code: str = "" + map_name: str = "" + start_time: int = 0 + end_time: int = 0 + duration: int = 0 + winner_team: int = 0 + score_team1: int = 0 + score_team2: int = 0 + server_ip: str = "" + server_port: int = 0 + location: str = "" + data_source_type: str = "unknown" + players: Dict[str, PlayerStats] = field(default_factory=dict) # Key: steam_id_64 + players_t: Dict[str, PlayerStats] = field(default_factory=dict) + players_ct: Dict[str, PlayerStats] = field(default_factory=dict) + rounds: List[RoundData] = field(default_factory=list) + player_meta: Dict[str, Dict] = field(default_factory=dict) # steam_id -> {uid, name, avatar, ...} + +# --- Database Helper --- + +def init_db(): + if os.path.exists(L2_DB_PATH): + logger.info(f"Removing existing L2 DB at {L2_DB_PATH}") + try: + os.remove(L2_DB_PATH) + except PermissionError: + logger.error("Cannot remove L2 DB, it might be open.") + return False + + conn = sqlite3.connect(L2_DB_PATH) + with open(SCHEMA_PATH, 'r', encoding='utf-8') as f: + schema_sql = f.read() + conn.executescript(schema_sql) + conn.commit() + conn.close() + logger.info("L2 DB Initialized.") + return True + +# --- Parsers --- + +class MatchParser: + def __init__(self, match_id, raw_requests): + self.match_id = match_id + self.raw_requests = raw_requests + self.match_data = MatchData(match_id=match_id) + + # Extracted JSON bodies + self.data_match = None + self.data_vip = None + self.data_leetify = None + self.data_round_list = None + + self._extract_payloads() + + def _extract_payloads(self): + for req in self.raw_requests: + url = req.get('url', '') + body = req.get('body', {}) + + if not body: + continue + + # Check URLs + if 'crane/http/api/data/match/' in url: + self.data_match = body.get('data', {}) + elif 'crane/http/api/data/vip_plus_match_data/' in url: + self.data_vip = body.get('data', {}) + elif 'crane/http/api/match/leetify_rating/' in url: + self.data_leetify = body.get('data', {}) + elif 'crane/http/api/match/round/' in url: + self.data_round_list = body.get('data', {}) + + def parse(self) -> MatchData: + if not self.data_match: + logger.warning(f"No base match data found for {self.match_id}") + return self.match_data + + self._parse_base_info() + self._parse_players_base() + self._parse_players_vip() + + # Decide which round source to use + if self.data_leetify and self.data_leetify.get('leetify_data'): + self.match_data.data_source_type = 'leetify' + self._parse_leetify_rounds() + elif self.data_round_list and self.data_round_list.get('round_list'): + self.match_data.data_source_type = 'classic' + self._parse_classic_rounds() + else: + self.match_data.data_source_type = 'unknown' + logger.info(f"No round data found for {self.match_id}") + + return self.match_data + + def _parse_base_info(self): + m = self.data_match.get('main', {}) + self.match_data.match_code = m.get('match_code', '') + self.match_data.map_name = m.get('map', '') + self.match_data.start_time = m.get('start_time', 0) + self.match_data.end_time = m.get('end_time', 0) + self.match_data.duration = self.match_data.end_time - self.match_data.start_time if self.match_data.end_time else 0 + self.match_data.winner_team = m.get('match_winner', 0) + self.match_data.score_team1 = m.get('group1_all_score', 0) + self.match_data.score_team2 = m.get('group2_all_score', 0) + self.match_data.server_ip = m.get('server_ip', '') + # Port is sometimes string + try: + self.match_data.server_port = int(m.get('server_port', 0)) + except: + self.match_data.server_port = 0 + self.match_data.location = m.get('location', '') + + def _parse_players_base(self): + # Players are in group_1 and group_2 lists in data_match + groups = [] + if 'group_1' in self.data_match: groups.extend(self.data_match['group_1']) + if 'group_2' in self.data_match: groups.extend(self.data_match['group_2']) + + for p in groups: + # We need steam_id. + # Structure: user_info -> user_data -> steam -> steamId + user_info = p.get('user_info', {}) + user_data = user_info.get('user_data', {}) + steam_data = user_data.get('steam', {}) + steam_id = str(steam_data.get('steamId', '')) + + fight = p.get('fight', {}) + fight_t = p.get('fight_t', {}) + fight_ct = p.get('fight_ct', {}) + uid = fight.get('uid') + + # Store meta for dim_players + user_data = user_info.get('user_data', {}) + profile = user_data.get('profile', {}) + + # If steam_id is empty, use temporary placeholder '5E:{uid}' + # Ideally we want steam_id_64. + if not steam_id and uid: + steam_id = f"5E:{uid}" + + if not steam_id: + continue + + self.match_data.player_meta[steam_id] = { + 'uid': uid, + 'username': user_data.get('username', ''), + 'avatar_url': profile.get('avatarUrl', ''), + 'domain': profile.get('domain', ''), + 'created_at': user_data.get('createdAt', 0), + 'updated_at': user_data.get('updatedAt', 0) + } + + stats = PlayerStats(steam_id_64=steam_id) + sts = p.get('sts', {}) + + try: + # Use safe conversion helper + def safe_int(val): + try: return int(float(val)) if val is not None else 0 + except: return 0 + + def safe_float(val): + try: return float(val) if val is not None else 0.0 + except: return 0.0 + + def safe_text(val): + return "" if val is None else str(val) + + def get_stat(key): + if key in fight and fight.get(key) not in [None, ""]: + return fight.get(key) + return 0 + + def build_side_stats(fight_side, team_id_value): + side_stats = PlayerStats(steam_id_64=steam_id) + side_stats.team_id = team_id_value + side_stats.kills = safe_int(fight_side.get('kill')) + side_stats.deaths = safe_int(fight_side.get('death')) + side_stats.assists = safe_int(fight_side.get('assist')) + side_stats.headshot_count = safe_int(fight_side.get('headshot')) + side_stats.adr = safe_float(fight_side.get('adr')) + side_stats.rating = safe_float(fight_side.get('rating')) + side_stats.rating2 = safe_float(fight_side.get('rating2')) + side_stats.rating3 = safe_float(fight_side.get('rating3')) + side_stats.rws = safe_float(fight_side.get('rws')) + side_stats.mvp_count = safe_int(fight_side.get('is_mvp')) + side_stats.flash_duration = safe_float(fight_side.get('flash_enemy_time')) + side_stats.jump_count = safe_int(fight_side.get('jump_total')) + side_stats.is_win = bool(safe_int(fight_side.get('is_win'))) + side_stats.assisted_kill = safe_int(fight_side.get('assisted_kill')) + side_stats.awp_kill = safe_int(fight_side.get('awp_kill')) + side_stats.benefit_kill = safe_int(fight_side.get('benefit_kill')) + side_stats.day = safe_text(fight_side.get('day')) + side_stats.defused_bomb = safe_int(fight_side.get('defused_bomb')) + side_stats.end_1v1 = safe_int(fight_side.get('end_1v1')) + side_stats.end_1v2 = safe_int(fight_side.get('end_1v2')) + side_stats.end_1v3 = safe_int(fight_side.get('end_1v3')) + side_stats.end_1v4 = safe_int(fight_side.get('end_1v4')) + side_stats.end_1v5 = safe_int(fight_side.get('end_1v5')) + side_stats.explode_bomb = safe_int(fight_side.get('explode_bomb')) + side_stats.first_death = safe_int(fight_side.get('first_death')) + side_stats.first_kill = safe_int(fight_side.get('first_kill')) + side_stats.flash_enemy = safe_int(fight_side.get('flash_enemy')) + side_stats.flash_team = safe_int(fight_side.get('flash_team')) + side_stats.flash_team_time = safe_float(fight_side.get('flash_team_time')) + side_stats.flash_time = safe_float(fight_side.get('flash_time')) + side_stats.game_mode = safe_text(fight_side.get('game_mode')) + side_stats.group_id = safe_int(fight_side.get('group_id')) + side_stats.hold_total = safe_int(fight_side.get('hold_total')) + side_stats.id = safe_int(fight_side.get('id')) + side_stats.is_highlight = safe_int(fight_side.get('is_highlight')) + side_stats.is_most_1v2 = safe_int(fight_side.get('is_most_1v2')) + side_stats.is_most_assist = safe_int(fight_side.get('is_most_assist')) + side_stats.is_most_awp = safe_int(fight_side.get('is_most_awp')) + side_stats.is_most_end = safe_int(fight_side.get('is_most_end')) + side_stats.is_most_first_kill = safe_int(fight_side.get('is_most_first_kill')) + side_stats.is_most_headshot = safe_int(fight_side.get('is_most_headshot')) + side_stats.is_most_jump = safe_int(fight_side.get('is_most_jump')) + side_stats.is_svp = safe_int(fight_side.get('is_svp')) + side_stats.is_tie = safe_int(fight_side.get('is_tie')) + side_stats.kill_1 = safe_int(fight_side.get('kill_1')) + side_stats.kill_2 = safe_int(fight_side.get('kill_2')) + side_stats.kill_3 = safe_int(fight_side.get('kill_3')) + side_stats.kill_4 = safe_int(fight_side.get('kill_4')) + side_stats.kill_5 = safe_int(fight_side.get('kill_5')) + side_stats.many_assists_cnt1 = safe_int(fight_side.get('many_assists_cnt1')) + side_stats.many_assists_cnt2 = safe_int(fight_side.get('many_assists_cnt2')) + side_stats.many_assists_cnt3 = safe_int(fight_side.get('many_assists_cnt3')) + side_stats.many_assists_cnt4 = safe_int(fight_side.get('many_assists_cnt4')) + side_stats.many_assists_cnt5 = safe_int(fight_side.get('many_assists_cnt5')) + side_stats.map = safe_text(fight_side.get('map')) + side_stats.match_code = safe_text(fight_side.get('match_code')) + side_stats.match_mode = safe_text(fight_side.get('match_mode')) + side_stats.match_team_id = safe_int(fight_side.get('match_team_id')) + side_stats.match_time = safe_int(fight_side.get('match_time')) + side_stats.per_headshot = safe_float(fight_side.get('per_headshot')) + side_stats.perfect_kill = safe_int(fight_side.get('perfect_kill')) + side_stats.planted_bomb = safe_int(fight_side.get('planted_bomb')) + side_stats.revenge_kill = safe_int(fight_side.get('revenge_kill')) + side_stats.round_total = safe_int(fight_side.get('round_total')) + side_stats.season = safe_text(fight_side.get('season')) + side_stats.team_kill = safe_int(fight_side.get('team_kill')) + side_stats.throw_harm = safe_int(fight_side.get('throw_harm')) + side_stats.throw_harm_enemy = safe_int(fight_side.get('throw_harm_enemy')) + side_stats.uid = safe_int(fight_side.get('uid')) + side_stats.year = safe_text(fight_side.get('year')) + return side_stats + + team_id_value = safe_int(fight.get('match_team_id')) + stats.team_id = team_id_value + stats.kills = safe_int(get_stat('kill')) + stats.deaths = safe_int(get_stat('death')) + stats.assists = safe_int(get_stat('assist')) + stats.headshot_count = safe_int(get_stat('headshot')) + + stats.adr = safe_float(get_stat('adr')) + stats.rating = safe_float(get_stat('rating')) + stats.rating2 = safe_float(get_stat('rating2')) + stats.rating3 = safe_float(get_stat('rating3')) + stats.rws = safe_float(get_stat('rws')) + + # is_mvp might be string "1" or int 1 + stats.mvp_count = safe_int(get_stat('is_mvp')) + + stats.flash_duration = safe_float(get_stat('flash_enemy_time')) + stats.jump_count = safe_int(get_stat('jump_total')) + stats.is_win = bool(safe_int(get_stat('is_win'))) + + stats.elo_change = safe_float(sts.get('change_elo')) + stats.rank_score = safe_int(sts.get('rank')) + stats.assisted_kill = safe_int(fight.get('assisted_kill')) + stats.awp_kill = safe_int(fight.get('awp_kill')) + stats.benefit_kill = safe_int(fight.get('benefit_kill')) + stats.day = safe_text(fight.get('day')) + stats.defused_bomb = safe_int(fight.get('defused_bomb')) + stats.end_1v1 = safe_int(fight.get('end_1v1')) + stats.end_1v2 = safe_int(fight.get('end_1v2')) + stats.end_1v3 = safe_int(fight.get('end_1v3')) + stats.end_1v4 = safe_int(fight.get('end_1v4')) + stats.end_1v5 = safe_int(fight.get('end_1v5')) + stats.explode_bomb = safe_int(fight.get('explode_bomb')) + stats.first_death = safe_int(fight.get('first_death')) + stats.first_kill = safe_int(fight.get('first_kill')) + stats.flash_enemy = safe_int(fight.get('flash_enemy')) + stats.flash_team = safe_int(fight.get('flash_team')) + stats.flash_team_time = safe_float(fight.get('flash_team_time')) + stats.flash_time = safe_float(fight.get('flash_time')) + stats.game_mode = safe_text(fight.get('game_mode')) + stats.group_id = safe_int(fight.get('group_id')) + stats.hold_total = safe_int(fight.get('hold_total')) + stats.id = safe_int(fight.get('id')) + stats.is_highlight = safe_int(fight.get('is_highlight')) + stats.is_most_1v2 = safe_int(fight.get('is_most_1v2')) + stats.is_most_assist = safe_int(fight.get('is_most_assist')) + stats.is_most_awp = safe_int(fight.get('is_most_awp')) + stats.is_most_end = safe_int(fight.get('is_most_end')) + stats.is_most_first_kill = safe_int(fight.get('is_most_first_kill')) + stats.is_most_headshot = safe_int(fight.get('is_most_headshot')) + stats.is_most_jump = safe_int(fight.get('is_most_jump')) + stats.is_svp = safe_int(fight.get('is_svp')) + stats.is_tie = safe_int(fight.get('is_tie')) + stats.kill_1 = safe_int(fight.get('kill_1')) + stats.kill_2 = safe_int(fight.get('kill_2')) + stats.kill_3 = safe_int(fight.get('kill_3')) + stats.kill_4 = safe_int(fight.get('kill_4')) + stats.kill_5 = safe_int(fight.get('kill_5')) + stats.many_assists_cnt1 = safe_int(fight.get('many_assists_cnt1')) + stats.many_assists_cnt2 = safe_int(fight.get('many_assists_cnt2')) + stats.many_assists_cnt3 = safe_int(fight.get('many_assists_cnt3')) + stats.many_assists_cnt4 = safe_int(fight.get('many_assists_cnt4')) + stats.many_assists_cnt5 = safe_int(fight.get('many_assists_cnt5')) + stats.map = safe_text(fight.get('map')) + stats.match_code = safe_text(fight.get('match_code')) + stats.match_mode = safe_text(fight.get('match_mode')) + stats.match_team_id = safe_int(fight.get('match_team_id')) + stats.match_time = safe_int(fight.get('match_time')) + stats.per_headshot = safe_float(fight.get('per_headshot')) + stats.perfect_kill = safe_int(fight.get('perfect_kill')) + stats.planted_bomb = safe_int(fight.get('planted_bomb')) + stats.revenge_kill = safe_int(fight.get('revenge_kill')) + stats.round_total = safe_int(fight.get('round_total')) + stats.season = safe_text(fight.get('season')) + stats.team_kill = safe_int(fight.get('team_kill')) + stats.throw_harm = safe_int(fight.get('throw_harm')) + stats.throw_harm_enemy = safe_int(fight.get('throw_harm_enemy')) + stats.uid = safe_int(fight.get('uid')) + stats.year = safe_text(fight.get('year')) + + except Exception as e: + logger.error(f"Error parsing stats for {steam_id} in {self.match_id}: {e}") + pass + + self.match_data.players[steam_id] = stats + if isinstance(fight_t, dict) and fight_t: + t_team_id = team_id_value or safe_int(fight_t.get('match_team_id')) + self.match_data.players_t[steam_id] = build_side_stats(fight_t, t_team_id) + if isinstance(fight_ct, dict) and fight_ct: + ct_team_id = team_id_value or safe_int(fight_ct.get('match_team_id')) + self.match_data.players_ct[steam_id] = build_side_stats(fight_ct, ct_team_id) + + def _parse_players_vip(self): + if not self.data_vip: + return + + # Structure: data_vip -> steamid (key) -> dict + for sid, vdata in self.data_vip.items(): + # SID might be steam_id_64 directly + if sid in self.match_data.players: + p = self.match_data.players[sid] + p.kast = float(vdata.get('kast', 0)) + p.awp_kills = int(vdata.get('awp_kill', 0)) + # Damage stats might need calculation or mapping + # p.damage_total = ... + else: + # Try to match by 5E ID if possible, but here keys are steamids usually + pass + + def _parse_leetify_rounds(self): + l_data = self.data_leetify.get('leetify_data', {}) + round_list = l_data.get('round_stat', []) + + for idx, r in enumerate(round_list): + rd = RoundData( + round_num=r.get('round', idx + 1), + winner_side='CT' if r.get('win_reason') in [7, 8, 9] else 'T', # Approximate logic, need real enum + win_reason=r.get('win_reason', 0), + win_reason_desc=str(r.get('win_reason', 0)), + duration=0, # Leetify might not have exact duration easily + end_time_stamp=r.get('end_ts', ''), + ct_score=r.get('sfui_event', {}).get('score_ct', 0), + t_score=r.get('sfui_event', {}).get('score_t', 0), + ct_money_start=r.get('ct_money_group', 0), + t_money_start=r.get('t_money_group', 0) + ) + + # Events + # Leetify has 'show_event' list + events = r.get('show_event', []) + for evt in events: + e_type_code = evt.get('event_type') + # Mapping needed for event types. + # Assuming 3 is kill based on schema 'kill_event' presence + + if evt.get('kill_event'): + k = evt['kill_event'] + re = RoundEvent( + event_id=f"{self.match_id}_{rd.round_num}_{k.get('Ts', '')}_{k.get('Killer')}", + event_type='kill', + event_time=evt.get('ts', 0), + attacker_steam_id=k.get('Killer'), + victim_steam_id=k.get('Victim'), + weapon=k.get('WeaponName'), + is_headshot=k.get('Headshot', False), + is_wallbang=k.get('Penetrated', False), + is_blind=k.get('AttackerBlind', False), + is_through_smoke=k.get('ThroughSmoke', False), + is_noscope=k.get('NoScope', False) + ) + + # Leetify specifics + # Trade? + if evt.get('trade_score_change'): + re.trade_killer_steam_id = list(evt['trade_score_change'].keys())[0] + + if evt.get('flash_assist_killer_score_change'): + re.flash_assist_steam_id = list(evt['flash_assist_killer_score_change'].keys())[0] + + # Score changes + if evt.get('killer_score_change'): + # e.g. {'': {'score': 17.0}} + vals = list(evt['killer_score_change'].values()) + if vals: re.score_change_attacker = vals[0].get('score', 0) + + if evt.get('victim_score_change'): + vals = list(evt['victim_score_change'].values()) + if vals: re.score_change_victim = vals[0].get('score', 0) + + rd.events.append(re) + + bron_equipment = r.get('bron_equipment') or {} + player_t_score = r.get('player_t_score') or {} + player_ct_score = r.get('player_ct_score') or {} + player_bron_crash = r.get('player_bron_crash') or {} + + def pick_main_weapon(items): + if not isinstance(items, list): + return "" + ignore = { + "weapon_knife", + "weapon_knife_t", + "weapon_knife_gg", + "weapon_knife_ct", + "weapon_c4", + "weapon_flashbang", + "weapon_hegrenade", + "weapon_smokegrenade", + "weapon_molotov", + "weapon_incgrenade", + "weapon_decoy" + } + for it in items: + if not isinstance(it, dict): + continue + name = it.get('WeaponName') + if name and name not in ignore: + return name + for it in items: + if not isinstance(it, dict): + continue + name = it.get('WeaponName') + if name: + return name + return "" + + def pick_money(items): + if not isinstance(items, list): + return 0 + vals = [] + for it in items: + if isinstance(it, dict) and it.get('Money') is not None: + vals.append(it.get('Money')) + return int(max(vals)) if vals else 0 + + side_scores = {} + for sid, val in player_t_score.items(): + side_scores[str(sid)] = ("T", float(val) if val is not None else 0.0) + for sid, val in player_ct_score.items(): + side_scores[str(sid)] = ("CT", float(val) if val is not None else 0.0) + + for sid in set(list(side_scores.keys()) + [str(k) for k in bron_equipment.keys()]): + if sid not in side_scores: + continue + side, score = side_scores[sid] + items = bron_equipment.get(sid) or bron_equipment.get(str(sid)) or [] + start_money = pick_money(items) + equipment_value = player_bron_crash.get(sid) + if equipment_value is None: + equipment_value = player_bron_crash.get(str(sid)) + equipment_value = int(equipment_value) if equipment_value is not None else 0 + main_weapon = pick_main_weapon(items) + rd.economies.append(PlayerEconomy( + steam_id_64=str(sid), + side=side, + start_money=start_money, + equipment_value=equipment_value, + main_weapon=main_weapon, + round_performance_score=float(score) + )) + + self.match_data.rounds.append(rd) + + def _parse_classic_rounds(self): + r_list = self.data_round_list.get('round_list', []) + for idx, r in enumerate(r_list): + # Classic round data often lacks score/winner in the list root? + # Check schema: 'current_score' -> ct/t + cur_score = r.get('current_score', {}) + + rd = RoundData( + round_num=idx + 1, + winner_side='None', # Default to None if unknown + win_reason=0, + win_reason_desc='', + duration=float(cur_score.get('final_round_time', 0)), + end_time_stamp='', + ct_score=cur_score.get('ct', 0), + t_score=cur_score.get('t', 0) + ) + + # Kills + # Classic has 'all_kill' list + kills = r.get('all_kill', []) + for k in kills: + attacker = k.get('attacker', {}) + victim = k.get('victim', {}) + + # Pos extraction + apos = attacker.get('pos', {}) + vpos = victim.get('pos', {}) + + re = RoundEvent( + event_id=f"{self.match_id}_{rd.round_num}_{k.get('pasttime')}_{attacker.get('steamid_64')}", + event_type='kill', + event_time=k.get('pasttime', 0), + attacker_steam_id=str(attacker.get('steamid_64', '')), + victim_steam_id=str(victim.get('steamid_64', '')), + weapon=k.get('weapon', ''), + is_headshot=k.get('headshot', False), + is_wallbang=k.get('penetrated', False), + is_blind=k.get('attackerblind', False), + is_through_smoke=k.get('throughsmoke', False), + is_noscope=k.get('noscope', False), + attacker_pos=(apos.get('x', 0), apos.get('y', 0), apos.get('z', 0)), + victim_pos=(vpos.get('x', 0), vpos.get('y', 0), vpos.get('z', 0)) + ) + rd.events.append(re) + + self.match_data.rounds.append(rd) + +# --- Main Execution --- + +def process_matches(): + if not init_db(): + return + + l1_conn = sqlite3.connect(L1A_DB_PATH) + l1_cursor = l1_conn.cursor() + + l2_conn = sqlite3.connect(L2_DB_PATH) + l2_cursor = l2_conn.cursor() + + logger.info("Reading from L1A...") + l1_cursor.execute("SELECT match_id, content FROM raw_iframe_network") + + count = 0 + while True: + rows = l1_cursor.fetchmany(10) + if not rows: + break + + for row in rows: + match_id, content = row + try: + raw_requests = json.loads(content) + parser = MatchParser(match_id, raw_requests) + match_data = parser.parse() + save_match(l2_cursor, match_data) + count += 1 + if count % 10 == 0: + l2_conn.commit() + print(f"Processed {count} matches...", end='\r') + except Exception as e: + logger.error(f"Error processing match {match_id}: {e}") + # continue + + l2_conn.commit() + l1_conn.close() + l2_conn.close() + logger.info(f"\nDone. Processed {count} matches.") + +def save_match(cursor, m: MatchData): + # 1. Dim Players (Upsert) + for sid, meta in m.player_meta.items(): + cursor.execute(""" + INSERT INTO dim_players (steam_id_64, uid, username, avatar_url, domain, created_at, updated_at, last_seen_match_id) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(steam_id_64) DO UPDATE SET + username=excluded.username, + avatar_url=excluded.avatar_url, + last_seen_match_id=excluded.last_seen_match_id + """, ( + sid, meta.get('uid'), meta.get('username'), meta.get('avatar_url'), + meta.get('domain'), meta.get('created_at'), meta.get('updated_at'), + m.match_id + )) + + # 2. Dim Maps (Ignore if exists) + if m.map_name: + cursor.execute("INSERT OR IGNORE INTO dim_maps (map_name) VALUES (?)", (m.map_name,)) + + # 3. Fact Matches + cursor.execute(""" + INSERT OR REPLACE INTO fact_matches + (match_id, match_code, map_name, start_time, end_time, duration, winner_team, score_team1, score_team2, server_ip, server_port, location, data_source_type) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, ( + m.match_id, m.match_code, m.map_name, m.start_time, m.end_time, m.duration, + m.winner_team, m.score_team1, m.score_team2, m.server_ip, m.server_port, m.location, m.data_source_type + )) + + # 4. Fact Match Players + player_columns = [ + "match_id", "steam_id_64", "team_id", "kills", "deaths", "assists", "headshot_count", + "kd_ratio", "adr", "rating", "rating2", "rating3", "rws", "mvp_count", "elo_change", + "rank_score", "is_win", "kast", "entry_kills", "entry_deaths", "awp_kills", + "clutch_1v1", "clutch_1v2", "clutch_1v3", "clutch_1v4", "clutch_1v5", + "flash_assists", "flash_duration", "jump_count", "damage_total", "damage_received", + "assisted_kill", "awp_kill", "benefit_kill", "day", "defused_bomb", "end_1v1", + "end_1v2", "end_1v3", "end_1v4", "end_1v5", "explode_bomb", "first_death", + "first_kill", "flash_enemy", "flash_team", "flash_team_time", "flash_time", + "game_mode", "group_id", "hold_total", "id", "is_highlight", "is_most_1v2", + "is_most_assist", "is_most_awp", "is_most_end", "is_most_first_kill", + "is_most_headshot", "is_most_jump", "is_svp", "is_tie", "kill_1", "kill_2", + "kill_3", "kill_4", "kill_5", "many_assists_cnt1", "many_assists_cnt2", + "many_assists_cnt3", "many_assists_cnt4", "many_assists_cnt5", "map", + "match_code", "match_mode", "match_team_id", "match_time", "per_headshot", + "perfect_kill", "planted_bomb", "revenge_kill", "round_total", "season", + "team_kill", "throw_harm", "throw_harm_enemy", "uid", "year" + ] + player_placeholders = ",".join(["?"] * len(player_columns)) + player_columns_sql = ",".join(player_columns) + + def player_values(sid, p): + return [ + m.match_id, sid, p.team_id, p.kills, p.deaths, p.assists, p.headshot_count, + p.kd_ratio, p.adr, p.rating, p.rating2, p.rating3, p.rws, p.mvp_count, + p.elo_change, p.rank_score, p.is_win, p.kast, p.entry_kills, p.entry_deaths, + p.awp_kills, p.clutch_1v1, p.clutch_1v2, p.clutch_1v3, p.clutch_1v4, + p.clutch_1v5, p.flash_assists, p.flash_duration, p.jump_count, p.damage_total, + p.damage_received, p.assisted_kill, p.awp_kill, p.benefit_kill, p.day, + p.defused_bomb, p.end_1v1, p.end_1v2, p.end_1v3, p.end_1v4, p.end_1v5, + p.explode_bomb, p.first_death, p.first_kill, p.flash_enemy, p.flash_team, + p.flash_team_time, p.flash_time, p.game_mode, p.group_id, p.hold_total, + p.id, p.is_highlight, p.is_most_1v2, p.is_most_assist, p.is_most_awp, + p.is_most_end, p.is_most_first_kill, p.is_most_headshot, p.is_most_jump, + p.is_svp, p.is_tie, p.kill_1, p.kill_2, p.kill_3, p.kill_4, p.kill_5, + p.many_assists_cnt1, p.many_assists_cnt2, p.many_assists_cnt3, p.many_assists_cnt4, + p.many_assists_cnt5, p.map, p.match_code, p.match_mode, p.match_team_id, + p.match_time, p.per_headshot, p.perfect_kill, p.planted_bomb, p.revenge_kill, + p.round_total, p.season, p.team_kill, p.throw_harm, p.throw_harm_enemy, + p.uid, p.year + ] + + for sid, p in m.players.items(): + cursor.execute( + f"INSERT OR REPLACE INTO fact_match_players ({player_columns_sql}) VALUES ({player_placeholders})", + player_values(sid, p) + ) + for sid, p in m.players_t.items(): + cursor.execute( + f"INSERT OR REPLACE INTO fact_match_players_t ({player_columns_sql}) VALUES ({player_placeholders})", + player_values(sid, p) + ) + for sid, p in m.players_ct.items(): + cursor.execute( + f"INSERT OR REPLACE INTO fact_match_players_ct ({player_columns_sql}) VALUES ({player_placeholders})", + player_values(sid, p) + ) + + # 5. Rounds & Events + for r in m.rounds: + cursor.execute(""" + INSERT OR REPLACE INTO fact_rounds + (match_id, round_num, winner_side, win_reason, win_reason_desc, duration, end_time_stamp, ct_score, t_score, ct_money_start, t_money_start) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, ( + m.match_id, r.round_num, r.winner_side, r.win_reason, r.win_reason_desc, + r.duration, r.end_time_stamp, r.ct_score, r.t_score, r.ct_money_start, r.t_money_start + )) + + for e in r.events: + # Handle Pos + ax, ay, az = e.attacker_pos if e.attacker_pos else (None, None, None) + vx, vy, vz = e.victim_pos if e.victim_pos else (None, None, None) + + # Use uuid for event_id to ensure uniqueness if logic fails + import uuid + if not e.event_id: + e.event_id = str(uuid.uuid4()) + + cursor.execute(""" + INSERT OR REPLACE INTO fact_round_events + (event_id, match_id, round_num, event_type, event_time, attacker_steam_id, victim_steam_id, + weapon, is_headshot, is_wallbang, is_blind, is_through_smoke, is_noscope, + trade_killer_steam_id, flash_assist_steam_id, score_change_attacker, score_change_victim, + attacker_pos_x, attacker_pos_y, attacker_pos_z, victim_pos_x, victim_pos_y, victim_pos_z) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, ( + e.event_id, m.match_id, r.round_num, e.event_type, e.event_time, e.attacker_steam_id, e.victim_steam_id, + e.weapon, e.is_headshot, e.is_wallbang, e.is_blind, e.is_through_smoke, e.is_noscope, + e.trade_killer_steam_id, e.flash_assist_steam_id, e.score_change_attacker, e.score_change_victim, + ax, ay, az, vx, vy, vz + )) + + for pe in r.economies: + cursor.execute(""" + INSERT OR REPLACE INTO fact_round_player_economy + (match_id, round_num, steam_id_64, side, start_money, equipment_value, main_weapon, has_helmet, has_defuser, round_performance_score) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, ( + m.match_id, r.round_num, pe.steam_id_64, pe.side, pe.start_money, pe.equipment_value, pe.main_weapon, pe.has_helmet, pe.has_defuser, pe.round_performance_score + )) + +if __name__ == "__main__": + process_matches() diff --git a/ETL/verify_L2.py b/ETL/verify_L2.py new file mode 100644 index 0000000..0d97b47 --- /dev/null +++ b/ETL/verify_L2.py @@ -0,0 +1,245 @@ +import sqlite3 +import pandas as pd +import csv + +pd.set_option('display.max_columns', None) +pd.set_option('display.width', 1000) + +db_path = 'database/L2/L2_Main.sqlite' + +def verify(): + conn = sqlite3.connect(db_path) + + print("--- Counts ---") + tables = [ + 'dim_players', + 'dim_maps', + 'fact_matches', + 'fact_match_players', + 'fact_match_players_t', + 'fact_match_players_ct', + 'fact_rounds', + 'fact_round_events', + 'fact_round_player_economy' + ] + for t in tables: + count = conn.execute(f"SELECT COUNT(*) FROM {t}").fetchone()[0] + print(f"{t}: {count}") + + print("\n--- Data Source Distribution ---") + dist = pd.read_sql("SELECT data_source_type, COUNT(*) as cnt FROM fact_matches GROUP BY data_source_type", conn) + print(dist) + + print("\n--- Sample Round Events (Leetify vs Classic) ---") + # Fetch one event from a leetify match + leetify_match = conn.execute("SELECT match_id FROM fact_matches WHERE data_source_type='leetify' LIMIT 1").fetchone() + if leetify_match: + mid = leetify_match[0] + print(f"Leetify Match: {mid}") + df = pd.read_sql(f"SELECT * FROM fact_round_events WHERE match_id='{mid}' AND event_type='kill' LIMIT 1", conn) + print(df[['event_type', 'attacker_steam_id', 'trade_killer_steam_id', 'attacker_pos_x', 'score_change_attacker']]) + + # Fetch one event from a classic match + classic_match = conn.execute("SELECT match_id FROM fact_matches WHERE data_source_type='classic' LIMIT 1").fetchone() + if classic_match: + mid = classic_match[0] + print(f"Classic Match: {mid}") + df = pd.read_sql(f"SELECT * FROM fact_round_events WHERE match_id='{mid}' AND event_type='kill' LIMIT 1", conn) + print(df[['event_type', 'attacker_steam_id', 'trade_killer_steam_id', 'attacker_pos_x', 'score_change_attacker']]) + + print("\n--- Sample Player Stats (New Fields) ---") + df_players = pd.read_sql("SELECT steam_id_64, rating, rating3, elo_change, rank_score, flash_duration, jump_count FROM fact_match_players LIMIT 5", conn) + print(df_players) + + print("\n--- Integrity Checks ---") + missing_players = conn.execute(""" + SELECT COUNT(*) FROM fact_match_players f + LEFT JOIN dim_players d ON f.steam_id_64 = d.steam_id_64 + WHERE d.steam_id_64 IS NULL + """).fetchone()[0] + print(f"fact_match_players missing dim_players: {missing_players}") + + missing_round_matches = conn.execute(""" + SELECT COUNT(*) FROM fact_rounds r + LEFT JOIN fact_matches m ON r.match_id = m.match_id + WHERE m.match_id IS NULL + """).fetchone()[0] + print(f"fact_rounds missing fact_matches: {missing_round_matches}") + + missing_event_rounds = conn.execute(""" + SELECT COUNT(*) FROM fact_round_events e + LEFT JOIN fact_rounds r ON e.match_id = r.match_id AND e.round_num = r.round_num + WHERE r.match_id IS NULL + """).fetchone()[0] + print(f"fact_round_events missing fact_rounds: {missing_event_rounds}") + + side_zero_t = conn.execute(""" + SELECT COUNT(*) FROM fact_match_players_t + WHERE COALESCE(kills,0)=0 AND COALESCE(deaths,0)=0 AND COALESCE(assists,0)=0 + """).fetchone()[0] + side_zero_ct = conn.execute(""" + SELECT COUNT(*) FROM fact_match_players_ct + WHERE COALESCE(kills,0)=0 AND COALESCE(deaths,0)=0 AND COALESCE(assists,0)=0 + """).fetchone()[0] + print(f"fact_match_players_t zero K/D/A: {side_zero_t}") + print(f"fact_match_players_ct zero K/D/A: {side_zero_ct}") + + print("\n--- Full vs T/CT Comparison ---") + cols = [ + 'kills', 'deaths', 'assists', 'headshot_count', 'adr', 'rating', 'rating2', + 'rating3', 'rws', 'mvp_count', 'flash_duration', 'jump_count', 'is_win' + ] + df_full = pd.read_sql( + "SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players", + conn + ) + df_t = pd.read_sql( + "SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players_t", + conn + ).rename(columns={c: f"{c}_t" for c in cols}) + df_ct = pd.read_sql( + "SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players_ct", + conn + ).rename(columns={c: f"{c}_ct" for c in cols}) + + df = df_full.merge(df_t, on=['match_id', 'steam_id_64'], how='left') + df = df.merge(df_ct, on=['match_id', 'steam_id_64'], how='left') + + def is_empty(s): + return s.isna() | (s == 0) + + for c in cols: + empty_count = is_empty(df[c]).sum() + print(f"{c} empty: {empty_count}") + + additive = ['kills', 'deaths', 'assists', 'headshot_count', 'mvp_count', 'flash_duration', 'jump_count'] + for c in additive: + t_sum = df[f"{c}_t"].fillna(0) + df[f"{c}_ct"].fillna(0) + tol = 0.01 if c == 'flash_duration' else 0 + diff = (df[c].fillna(0) - t_sum).abs() > tol + print(f"{c} full != t+ct: {diff.sum()}") + + non_additive = ['adr', 'rating', 'rating2', 'rating3', 'rws', 'is_win'] + for c in non_additive: + side_nonempty = (~is_empty(df[f"{c}_t"])) | (~is_empty(df[f"{c}_ct"])) + full_empty_side_nonempty = is_empty(df[c]) & side_nonempty + full_nonempty_side_empty = (~is_empty(df[c])) & (~side_nonempty) + print(f"{c} full empty but side has: {full_empty_side_nonempty.sum()}") + print(f"{c} full has but side empty: {full_nonempty_side_empty.sum()}") + + print("\n--- Rating Detail ---") + rating_cols = ['rating', 'rating2', 'rating3'] + for c in rating_cols: + full_null = df[c].isna().sum() + full_zero = (df[c] == 0).sum() + full_nonzero = ((~df[c].isna()) & (df[c] != 0)).sum() + side_t_nonzero = ((~df[f"{c}_t"].isna()) & (df[f"{c}_t"] != 0)).sum() + side_ct_nonzero = ((~df[f"{c}_ct"].isna()) & (df[f"{c}_ct"] != 0)).sum() + side_any_nonzero = ((~df[f"{c}_t"].isna()) & (df[f"{c}_t"] != 0)) | ((~df[f"{c}_ct"].isna()) & (df[f"{c}_ct"] != 0)) + full_nonzero_side_zero = ((~df[c].isna()) & (df[c] != 0) & (~side_any_nonzero)).sum() + full_zero_side_nonzero = (((df[c].isna()) | (df[c] == 0)) & side_any_nonzero).sum() + print(f"{c} full null: {full_null} full zero: {full_zero} full nonzero: {full_nonzero}") + print(f"{c} side t nonzero: {side_t_nonzero} side ct nonzero: {side_ct_nonzero}") + print(f"{c} full nonzero but side all zero: {full_nonzero_side_zero}") + print(f"{c} full zero but side has: {full_zero_side_nonzero}") + + df_rating_src = pd.read_sql( + "SELECT f.rating, f.rating2, f.rating3, m.data_source_type FROM fact_match_players f JOIN fact_matches m ON f.match_id = m.match_id", + conn + ) + for c in rating_cols: + grp = df_rating_src.groupby('data_source_type')[c].apply(lambda s: (s != 0).sum()).reset_index(name='nonzero') + print(f"{c} nonzero by source") + print(grp) + + print("\n--- Schema Coverage (fight_any) ---") + schema_path = 'database/original_json_schema/schema_flat.csv' + paths = [] + with open(schema_path, 'r', encoding='utf-8') as f: + reader = csv.reader(f) + _ = next(reader, None) + for row in reader: + if len(row) >= 2: + paths.append(row[1]) + fight_keys = set() + for p in paths: + if 'data.group_N[].fight_any.' in p: + key = p.split('fight_any.')[1].split('.')[0] + fight_keys.add(key) + l2_cols = set(pd.read_sql("PRAGMA table_info(fact_match_players)", conn)['name'].tolist()) + alias = { + 'kills': 'kill', + 'deaths': 'death', + 'assists': 'assist', + 'headshot_count': 'headshot', + 'mvp_count': 'is_mvp', + 'flash_duration': 'flash_enemy_time', + 'jump_count': 'jump_total', + 'awp_kills': 'awp_kill' + } + covered = set() + for c in l2_cols: + if c in fight_keys: + covered.add(c) + elif c in alias and alias[c] in fight_keys: + covered.add(alias[c]) + missing_keys = sorted(list(fight_keys - covered)) + print(f"fight_any keys: {len(fight_keys)}") + print(f"covered by L2 columns: {len(covered)}") + print(f"uncovered fight_any keys: {len(missing_keys)}") + if missing_keys: + print(missing_keys) + + print("\n--- Coverage Zero Rate (fight_any -> fact_match_players) ---") + fight_cols = [k for k in fight_keys if k in l2_cols or k in alias.values()] + col_map = {} + for k in fight_cols: + if k in l2_cols: + col_map[k] = k + else: + for l2k, src in alias.items(): + if src == k: + col_map[k] = l2k + break + select_cols = ["steam_id_64"] + list(set(col_map.values())) + df_fight = pd.read_sql( + "SELECT " + ",".join(select_cols) + " FROM fact_match_players", + conn + ) + total_rows = len(df_fight) + stats = [] + for fight_key, col in sorted(col_map.items()): + s = df_fight[col] + zeros = (s == 0).sum() + nulls = s.isna().sum() + nonzero = total_rows - zeros - nulls + stats.append({ + "fight_key": fight_key, + "column": col, + "nonzero": nonzero, + "zero": zeros, + "null": nulls, + "zero_rate": 0 if total_rows == 0 else round(zeros / total_rows, 4) + }) + df_stats = pd.DataFrame(stats).sort_values(["zero_rate", "nonzero"], ascending=[False, True]) + print(df_stats.head(30)) + print("\n-- zero_rate top (most zeros) --") + print(df_stats.head(10)) + print("\n-- zero_rate bottom (most nonzero) --") + print(df_stats.tail(10)) + + print("\n--- Schema Coverage (leetify economy) ---") + econ_keys = [ + 'data.leetify_data.round_stat[].bron_equipment.', + 'data.leetify_data.round_stat[].player_t_score.', + 'data.leetify_data.round_stat[].player_ct_score.', + 'data.leetify_data.round_stat[].player_bron_crash.' + ] + for k in econ_keys: + count = sum(1 for p in paths if k in p) + print(f"{k} paths: {count}") + + conn.close() + +if __name__ == "__main__": + verify() diff --git a/ETL/verify_deep.py b/ETL/verify_deep.py new file mode 100644 index 0000000..d7b2d54 --- /dev/null +++ b/ETL/verify_deep.py @@ -0,0 +1,81 @@ +import sqlite3 +import pandas as pd +import numpy as np + +pd.set_option('display.max_columns', None) +pd.set_option('display.width', 1000) +pd.set_option('display.float_format', '{:.2f}'.format) + +db_path = 'database/L2/L2_Main.sqlite' + +def check_nulls_zeros(): + conn = sqlite3.connect(db_path) + + print("=== 1. Fact Match Players: 关键字段零值/空值检查 ===") + df_players = pd.read_sql(""" + SELECT + kills, deaths, assists, adr, rating, rating2, + kast, awp_kills, flash_duration, jump_count, + elo_change + FROM fact_match_players + """, conn) + + stats = [] + for col in df_players.columns: + total = len(df_players) + nulls = df_players[col].isnull().sum() + zeros = (df_players[col] == 0).sum() + stats.append({ + 'Field': col, + 'Total': total, + 'Nulls': nulls, + 'Null%': (nulls/total)*100, + 'Zeros': zeros, + 'Zero%': (zeros/total)*100 + }) + print(pd.DataFrame(stats)) + + print("\n=== 2. Fact Round Events (Kills): 击杀完整性检查 ===") + # 只检查 event_type = 'kill' 的记录 + df_kills = pd.read_sql(""" + SELECT + attacker_steam_id, victim_steam_id, + event_time, weapon, + attacker_pos_x, score_change_attacker + FROM fact_round_events + WHERE event_type = 'kill' + """, conn) + + total_kills = len(df_kills) + missing_attacker = df_kills['attacker_steam_id'].isnull().sum() + (df_kills['attacker_steam_id'] == '').sum() + missing_victim = df_kills['victim_steam_id'].isnull().sum() + (df_kills['victim_steam_id'] == '').sum() + + # 检查 attacker 和 victim 是否相同(自杀) + self_kills = (df_kills['attacker_steam_id'] == df_kills['victim_steam_id']).sum() + + print(f"Total Kill Events: {total_kills}") + print(f"Missing Attacker: {missing_attacker} ({missing_attacker/total_kills*100:.2f}%)") + print(f"Missing Victim: {missing_victim} ({missing_victim/total_kills*100:.2f}%)") + print(f"Self Kills (Suicide?): {self_kills}") + + print("\n=== 3. Fact Round Events: 坐标与评分覆盖率 ===") + # 坐标应该在 classic 比赛中有值,leetify 比赛中可能为空 + # 评分应该在 leetify 比赛中有值 + + df_events = pd.read_sql(""" + SELECT + m.data_source_type, + COUNT(*) as total_events, + SUM(CASE WHEN e.attacker_pos_x IS NOT NULL AND e.attacker_pos_x != 0 THEN 1 ELSE 0 END) as has_pos, + SUM(CASE WHEN e.score_change_attacker IS NOT NULL AND e.score_change_attacker != 0 THEN 1 ELSE 0 END) as has_score + FROM fact_round_events e + JOIN fact_matches m ON e.match_id = m.match_id + WHERE e.event_type = 'kill' + GROUP BY m.data_source_type + """, conn) + print(df_events) + + conn.close() + +if __name__ == "__main__": + check_nulls_zeros() diff --git a/database/L2/L2_Main.sqlite b/database/L2/L2_Main.sqlite new file mode 100644 index 0000000..4f5ac0c Binary files /dev/null and b/database/L2/L2_Main.sqlite differ diff --git a/database/L2/schema.sql b/database/L2/schema.sql new file mode 100644 index 0000000..4511b60 --- /dev/null +++ b/database/L2/schema.sql @@ -0,0 +1,420 @@ +-- Enable Foreign Keys +PRAGMA foreign_keys = ON; + +-- 1. Dimension: Players +-- Stores persistent player information. +-- Conflict resolution: UPSERT on steam_id_64. +CREATE TABLE IF NOT EXISTS dim_players ( + steam_id_64 TEXT PRIMARY KEY, + uid INTEGER, -- 5E Platform ID + username TEXT, + avatar_url TEXT, + domain TEXT, + created_at INTEGER, -- Timestamp + updated_at INTEGER, -- Timestamp + last_seen_match_id TEXT -- To track when this info was last updated +); + +CREATE INDEX IF NOT EXISTS idx_dim_players_uid ON dim_players(uid); + +-- 2. Dimension: Maps +CREATE TABLE IF NOT EXISTS dim_maps ( + map_id INTEGER PRIMARY KEY AUTOINCREMENT, + map_name TEXT UNIQUE NOT NULL, + map_desc TEXT +); + +-- 3. Fact: Matches +CREATE TABLE IF NOT EXISTS fact_matches ( + match_id TEXT PRIMARY KEY, + match_code TEXT, + map_name TEXT, + start_time INTEGER, + end_time INTEGER, + duration INTEGER, + winner_team INTEGER, -- 1 or 2 + score_team1 INTEGER, + score_team2 INTEGER, + server_ip TEXT, + server_port INTEGER, + location TEXT, + data_source_type TEXT CHECK(data_source_type IN ('leetify', 'classic', 'unknown')), -- 'leetify' has economy data, 'classic' has detailed xyz + processed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +CREATE INDEX IF NOT EXISTS idx_fact_matches_time ON fact_matches(start_time); + +-- 4. Fact: Match Player Stats (Wide Table) +-- Aggregated stats for a player in a specific match +CREATE TABLE IF NOT EXISTS fact_match_players ( + match_id TEXT, + steam_id_64 TEXT, + team_id INTEGER, -- 1 or 2 + + -- Basic Stats + kills INTEGER DEFAULT 0, + deaths INTEGER DEFAULT 0, + assists INTEGER DEFAULT 0, + headshot_count INTEGER DEFAULT 0, + kd_ratio REAL, + adr REAL, + rating REAL, -- 5E Rating + rating2 REAL, + rating3 REAL, + rws REAL, + mvp_count INTEGER DEFAULT 0, + elo_change REAL, + rank_score INTEGER, + is_win BOOLEAN, + + -- Advanced Stats (VIP/Plus) + kast REAL, + entry_kills INTEGER, + entry_deaths INTEGER, + awp_kills INTEGER, + clutch_1v1 INTEGER, + clutch_1v2 INTEGER, + clutch_1v3 INTEGER, + clutch_1v4 INTEGER, + clutch_1v5 INTEGER, + flash_assists INTEGER, + flash_duration REAL, + jump_count INTEGER, + damage_total INTEGER, + damage_received INTEGER, + assisted_kill INTEGER, + awp_kill INTEGER, + benefit_kill INTEGER, + day TEXT, + defused_bomb INTEGER, + end_1v1 INTEGER, + end_1v2 INTEGER, + end_1v3 INTEGER, + end_1v4 INTEGER, + end_1v5 INTEGER, + explode_bomb INTEGER, + first_death INTEGER, + first_kill INTEGER, + flash_enemy INTEGER, + flash_team INTEGER, + flash_team_time REAL, + flash_time REAL, + game_mode TEXT, + group_id INTEGER, + hold_total INTEGER, + id INTEGER, + is_highlight INTEGER, + is_most_1v2 INTEGER, + is_most_assist INTEGER, + is_most_awp INTEGER, + is_most_end INTEGER, + is_most_first_kill INTEGER, + is_most_headshot INTEGER, + is_most_jump INTEGER, + is_svp INTEGER, + is_tie INTEGER, + kill_1 INTEGER, + kill_2 INTEGER, + kill_3 INTEGER, + kill_4 INTEGER, + kill_5 INTEGER, + many_assists_cnt1 INTEGER, + many_assists_cnt2 INTEGER, + many_assists_cnt3 INTEGER, + many_assists_cnt4 INTEGER, + many_assists_cnt5 INTEGER, + map TEXT, + match_code TEXT, + match_mode TEXT, + match_team_id INTEGER, + match_time INTEGER, + per_headshot REAL, + perfect_kill INTEGER, + planted_bomb INTEGER, + revenge_kill INTEGER, + round_total INTEGER, + season TEXT, + team_kill INTEGER, + throw_harm INTEGER, + throw_harm_enemy INTEGER, + uid INTEGER, + year TEXT, + + PRIMARY KEY (match_id, steam_id_64), + FOREIGN KEY (match_id) REFERENCES fact_matches(match_id) ON DELETE CASCADE + -- Intentionally not enforcing FK on steam_id_64 strictly to allow stats even if player dim missing, but ideally it should match. +); + +CREATE TABLE IF NOT EXISTS fact_match_players_t ( + match_id TEXT, + steam_id_64 TEXT, + team_id INTEGER, + kills INTEGER DEFAULT 0, + deaths INTEGER DEFAULT 0, + assists INTEGER DEFAULT 0, + headshot_count INTEGER DEFAULT 0, + kd_ratio REAL, + adr REAL, + rating REAL, + rating2 REAL, + rating3 REAL, + rws REAL, + mvp_count INTEGER DEFAULT 0, + elo_change REAL, + rank_score INTEGER, + is_win BOOLEAN, + kast REAL, + entry_kills INTEGER, + entry_deaths INTEGER, + awp_kills INTEGER, + clutch_1v1 INTEGER, + clutch_1v2 INTEGER, + clutch_1v3 INTEGER, + clutch_1v4 INTEGER, + clutch_1v5 INTEGER, + flash_assists INTEGER, + flash_duration REAL, + jump_count INTEGER, + damage_total INTEGER, + damage_received INTEGER, + assisted_kill INTEGER, + awp_kill INTEGER, + benefit_kill INTEGER, + day TEXT, + defused_bomb INTEGER, + end_1v1 INTEGER, + end_1v2 INTEGER, + end_1v3 INTEGER, + end_1v4 INTEGER, + end_1v5 INTEGER, + explode_bomb INTEGER, + first_death INTEGER, + first_kill INTEGER, + flash_enemy INTEGER, + flash_team INTEGER, + flash_team_time REAL, + flash_time REAL, + game_mode TEXT, + group_id INTEGER, + hold_total INTEGER, + id INTEGER, + is_highlight INTEGER, + is_most_1v2 INTEGER, + is_most_assist INTEGER, + is_most_awp INTEGER, + is_most_end INTEGER, + is_most_first_kill INTEGER, + is_most_headshot INTEGER, + is_most_jump INTEGER, + is_svp INTEGER, + is_tie INTEGER, + kill_1 INTEGER, + kill_2 INTEGER, + kill_3 INTEGER, + kill_4 INTEGER, + kill_5 INTEGER, + many_assists_cnt1 INTEGER, + many_assists_cnt2 INTEGER, + many_assists_cnt3 INTEGER, + many_assists_cnt4 INTEGER, + many_assists_cnt5 INTEGER, + map TEXT, + match_code TEXT, + match_mode TEXT, + match_team_id INTEGER, + match_time INTEGER, + per_headshot REAL, + perfect_kill INTEGER, + planted_bomb INTEGER, + revenge_kill INTEGER, + round_total INTEGER, + season TEXT, + team_kill INTEGER, + throw_harm INTEGER, + throw_harm_enemy INTEGER, + uid INTEGER, + year TEXT, + PRIMARY KEY (match_id, steam_id_64), + FOREIGN KEY (match_id) REFERENCES fact_matches(match_id) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS fact_match_players_ct ( + match_id TEXT, + steam_id_64 TEXT, + team_id INTEGER, + kills INTEGER DEFAULT 0, + deaths INTEGER DEFAULT 0, + assists INTEGER DEFAULT 0, + headshot_count INTEGER DEFAULT 0, + kd_ratio REAL, + adr REAL, + rating REAL, + rating2 REAL, + rating3 REAL, + rws REAL, + mvp_count INTEGER DEFAULT 0, + elo_change REAL, + rank_score INTEGER, + is_win BOOLEAN, + kast REAL, + entry_kills INTEGER, + entry_deaths INTEGER, + awp_kills INTEGER, + clutch_1v1 INTEGER, + clutch_1v2 INTEGER, + clutch_1v3 INTEGER, + clutch_1v4 INTEGER, + clutch_1v5 INTEGER, + flash_assists INTEGER, + flash_duration REAL, + jump_count INTEGER, + damage_total INTEGER, + damage_received INTEGER, + assisted_kill INTEGER, + awp_kill INTEGER, + benefit_kill INTEGER, + day TEXT, + defused_bomb INTEGER, + end_1v1 INTEGER, + end_1v2 INTEGER, + end_1v3 INTEGER, + end_1v4 INTEGER, + end_1v5 INTEGER, + explode_bomb INTEGER, + first_death INTEGER, + first_kill INTEGER, + flash_enemy INTEGER, + flash_team INTEGER, + flash_team_time REAL, + flash_time REAL, + game_mode TEXT, + group_id INTEGER, + hold_total INTEGER, + id INTEGER, + is_highlight INTEGER, + is_most_1v2 INTEGER, + is_most_assist INTEGER, + is_most_awp INTEGER, + is_most_end INTEGER, + is_most_first_kill INTEGER, + is_most_headshot INTEGER, + is_most_jump INTEGER, + is_svp INTEGER, + is_tie INTEGER, + kill_1 INTEGER, + kill_2 INTEGER, + kill_3 INTEGER, + kill_4 INTEGER, + kill_5 INTEGER, + many_assists_cnt1 INTEGER, + many_assists_cnt2 INTEGER, + many_assists_cnt3 INTEGER, + many_assists_cnt4 INTEGER, + many_assists_cnt5 INTEGER, + map TEXT, + match_code TEXT, + match_mode TEXT, + match_team_id INTEGER, + match_time INTEGER, + per_headshot REAL, + perfect_kill INTEGER, + planted_bomb INTEGER, + revenge_kill INTEGER, + round_total INTEGER, + season TEXT, + team_kill INTEGER, + throw_harm INTEGER, + throw_harm_enemy INTEGER, + uid INTEGER, + year TEXT, + PRIMARY KEY (match_id, steam_id_64), + FOREIGN KEY (match_id) REFERENCES fact_matches(match_id) ON DELETE CASCADE +); + +-- 5. Fact: Rounds +CREATE TABLE IF NOT EXISTS fact_rounds ( + match_id TEXT, + round_num INTEGER, + + winner_side TEXT CHECK(winner_side IN ('CT', 'T', 'None')), + win_reason INTEGER, -- Raw integer from source + win_reason_desc TEXT, -- Mapped description (e.g. 'TargetBombed') + duration REAL, + end_time_stamp TEXT, + + ct_score INTEGER, + t_score INTEGER, + + -- Leetify Specific + ct_money_start INTEGER, + t_money_start INTEGER, + + PRIMARY KEY (match_id, round_num), + FOREIGN KEY (match_id) REFERENCES fact_matches(match_id) ON DELETE CASCADE +); + +-- 6. Fact: Round Events (The largest table) +-- Unifies Kills, Bomb Events, etc. +CREATE TABLE IF NOT EXISTS fact_round_events ( + event_id TEXT PRIMARY KEY, -- UUID + match_id TEXT, + round_num INTEGER, + + event_type TEXT CHECK(event_type IN ('kill', 'bomb_plant', 'bomb_defuse', 'suicide', 'unknown')), + event_time INTEGER, -- Seconds from round start + + -- Participants + attacker_steam_id TEXT, + victim_steam_id TEXT, + assister_steam_id TEXT, + flash_assist_steam_id TEXT, + trade_killer_steam_id TEXT, + + -- Weapon & Context + weapon TEXT, + is_headshot BOOLEAN DEFAULT 0, + is_wallbang BOOLEAN DEFAULT 0, + is_blind BOOLEAN DEFAULT 0, + is_through_smoke BOOLEAN DEFAULT 0, + is_noscope BOOLEAN DEFAULT 0, + + -- Spatial Data (From RoundList) + attacker_pos_x INTEGER, + attacker_pos_y INTEGER, + attacker_pos_z INTEGER, + victim_pos_x INTEGER, + victim_pos_y INTEGER, + victim_pos_z INTEGER, + + -- Economy/Score Impact (From Leetify) + score_change_attacker REAL, + score_change_victim REAL, + + FOREIGN KEY (match_id, round_num) REFERENCES fact_rounds(match_id, round_num) ON DELETE CASCADE +); + +CREATE INDEX IF NOT EXISTS idx_round_events_match ON fact_round_events(match_id); +CREATE INDEX IF NOT EXISTS idx_round_events_attacker ON fact_round_events(attacker_steam_id); + +-- 7. Fact: Round Player Economy/Status +-- Snapshots of player state at round start/end +CREATE TABLE IF NOT EXISTS fact_round_player_economy ( + match_id TEXT, + round_num INTEGER, + steam_id_64 TEXT, + + side TEXT CHECK(side IN ('CT', 'T')), + start_money INTEGER, + equipment_value INTEGER, + + -- Inventory Summary + main_weapon TEXT, + has_helmet BOOLEAN, + has_defuser BOOLEAN, + + -- Round Performance Summary (Leetify) + round_performance_score REAL, + + PRIMARY KEY (match_id, round_num, steam_id_64), + FOREIGN KEY (match_id, round_num) REFERENCES fact_rounds(match_id, round_num) ON DELETE CASCADE +);