0.4 : L2 ver1 finished

This commit is contained in:
2026-01-23 23:02:15 +08:00
parent f7afb9cfd2
commit 879f63302c
5 changed files with 1625 additions and 0 deletions

879
ETL/L2_Builder.py Normal file
View File

@@ -0,0 +1,879 @@
import sqlite3
import json
import os
import sys
import logging
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Any, Tuple
from datetime import datetime
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# Constants
L1A_DB_PATH = 'database/L1A/L1A.sqlite'
L2_DB_PATH = 'database/L2/L2_Main.sqlite'
SCHEMA_PATH = 'database/L2/schema.sql'
# --- Data Structures for Unification ---
@dataclass
class PlayerStats:
steam_id_64: str
team_id: int = 0
kills: int = 0
deaths: int = 0
assists: int = 0
headshot_count: int = 0
kd_ratio: float = 0.0
adr: float = 0.0
rating: float = 0.0
rating2: float = 0.0
rating3: float = 0.0
rws: float = 0.0
mvp_count: int = 0
elo_change: float = 0.0
rank_score: int = 0
is_win: bool = False
# VIP Stats
kast: float = 0.0
entry_kills: int = 0
entry_deaths: int = 0
awp_kills: int = 0
clutch_1v1: int = 0
clutch_1v2: int = 0
clutch_1v3: int = 0
clutch_1v4: int = 0
clutch_1v5: int = 0
flash_assists: int = 0
flash_duration: float = 0.0
jump_count: int = 0
damage_total: int = 0
damage_received: int = 0
assisted_kill: int = 0
awp_kill: int = 0
benefit_kill: int = 0
day: str = ""
defused_bomb: int = 0
end_1v1: int = 0
end_1v2: int = 0
end_1v3: int = 0
end_1v4: int = 0
end_1v5: int = 0
explode_bomb: int = 0
first_death: int = 0
first_kill: int = 0
flash_enemy: int = 0
flash_team: int = 0
flash_team_time: float = 0.0
flash_time: float = 0.0
game_mode: str = ""
group_id: int = 0
hold_total: int = 0
id: int = 0
is_highlight: int = 0
is_most_1v2: int = 0
is_most_assist: int = 0
is_most_awp: int = 0
is_most_end: int = 0
is_most_first_kill: int = 0
is_most_headshot: int = 0
is_most_jump: int = 0
is_svp: int = 0
is_tie: int = 0
kill_1: int = 0
kill_2: int = 0
kill_3: int = 0
kill_4: int = 0
kill_5: int = 0
many_assists_cnt1: int = 0
many_assists_cnt2: int = 0
many_assists_cnt3: int = 0
many_assists_cnt4: int = 0
many_assists_cnt5: int = 0
map: str = ""
match_code: str = ""
match_mode: str = ""
match_team_id: int = 0
match_time: int = 0
per_headshot: float = 0.0
perfect_kill: int = 0
planted_bomb: int = 0
revenge_kill: int = 0
round_total: int = 0
season: str = ""
team_kill: int = 0
throw_harm: int = 0
throw_harm_enemy: int = 0
uid: int = 0
year: str = ""
@dataclass
class RoundEvent:
event_id: str
event_type: str # 'kill', 'bomb_plant', etc.
event_time: int
attacker_steam_id: Optional[str] = None
victim_steam_id: Optional[str] = None
assister_steam_id: Optional[str] = None
flash_assist_steam_id: Optional[str] = None
trade_killer_steam_id: Optional[str] = None
weapon: Optional[str] = None
is_headshot: bool = False
is_wallbang: bool = False
is_blind: bool = False
is_through_smoke: bool = False
is_noscope: bool = False
# Spatial
attacker_pos: Optional[Tuple[int, int, int]] = None
victim_pos: Optional[Tuple[int, int, int]] = None
# Score
score_change_attacker: float = 0.0
score_change_victim: float = 0.0
@dataclass
class PlayerEconomy:
steam_id_64: str
side: str
start_money: int = 0
equipment_value: int = 0
main_weapon: str = ""
has_helmet: bool = False
has_defuser: bool = False
round_performance_score: float = 0.0
@dataclass
class RoundData:
round_num: int
winner_side: str
win_reason: int
win_reason_desc: str
duration: float
end_time_stamp: str
ct_score: int
t_score: int
ct_money_start: int = 0
t_money_start: int = 0
events: List[RoundEvent] = field(default_factory=list)
economies: List[PlayerEconomy] = field(default_factory=list)
@dataclass
class MatchData:
match_id: str
match_code: str = ""
map_name: str = ""
start_time: int = 0
end_time: int = 0
duration: int = 0
winner_team: int = 0
score_team1: int = 0
score_team2: int = 0
server_ip: str = ""
server_port: int = 0
location: str = ""
data_source_type: str = "unknown"
players: Dict[str, PlayerStats] = field(default_factory=dict) # Key: steam_id_64
players_t: Dict[str, PlayerStats] = field(default_factory=dict)
players_ct: Dict[str, PlayerStats] = field(default_factory=dict)
rounds: List[RoundData] = field(default_factory=list)
player_meta: Dict[str, Dict] = field(default_factory=dict) # steam_id -> {uid, name, avatar, ...}
# --- Database Helper ---
def init_db():
if os.path.exists(L2_DB_PATH):
logger.info(f"Removing existing L2 DB at {L2_DB_PATH}")
try:
os.remove(L2_DB_PATH)
except PermissionError:
logger.error("Cannot remove L2 DB, it might be open.")
return False
conn = sqlite3.connect(L2_DB_PATH)
with open(SCHEMA_PATH, 'r', encoding='utf-8') as f:
schema_sql = f.read()
conn.executescript(schema_sql)
conn.commit()
conn.close()
logger.info("L2 DB Initialized.")
return True
# --- Parsers ---
class MatchParser:
def __init__(self, match_id, raw_requests):
self.match_id = match_id
self.raw_requests = raw_requests
self.match_data = MatchData(match_id=match_id)
# Extracted JSON bodies
self.data_match = None
self.data_vip = None
self.data_leetify = None
self.data_round_list = None
self._extract_payloads()
def _extract_payloads(self):
for req in self.raw_requests:
url = req.get('url', '')
body = req.get('body', {})
if not body:
continue
# Check URLs
if 'crane/http/api/data/match/' in url:
self.data_match = body.get('data', {})
elif 'crane/http/api/data/vip_plus_match_data/' in url:
self.data_vip = body.get('data', {})
elif 'crane/http/api/match/leetify_rating/' in url:
self.data_leetify = body.get('data', {})
elif 'crane/http/api/match/round/' in url:
self.data_round_list = body.get('data', {})
def parse(self) -> MatchData:
if not self.data_match:
logger.warning(f"No base match data found for {self.match_id}")
return self.match_data
self._parse_base_info()
self._parse_players_base()
self._parse_players_vip()
# Decide which round source to use
if self.data_leetify and self.data_leetify.get('leetify_data'):
self.match_data.data_source_type = 'leetify'
self._parse_leetify_rounds()
elif self.data_round_list and self.data_round_list.get('round_list'):
self.match_data.data_source_type = 'classic'
self._parse_classic_rounds()
else:
self.match_data.data_source_type = 'unknown'
logger.info(f"No round data found for {self.match_id}")
return self.match_data
def _parse_base_info(self):
m = self.data_match.get('main', {})
self.match_data.match_code = m.get('match_code', '')
self.match_data.map_name = m.get('map', '')
self.match_data.start_time = m.get('start_time', 0)
self.match_data.end_time = m.get('end_time', 0)
self.match_data.duration = self.match_data.end_time - self.match_data.start_time if self.match_data.end_time else 0
self.match_data.winner_team = m.get('match_winner', 0)
self.match_data.score_team1 = m.get('group1_all_score', 0)
self.match_data.score_team2 = m.get('group2_all_score', 0)
self.match_data.server_ip = m.get('server_ip', '')
# Port is sometimes string
try:
self.match_data.server_port = int(m.get('server_port', 0))
except:
self.match_data.server_port = 0
self.match_data.location = m.get('location', '')
def _parse_players_base(self):
# Players are in group_1 and group_2 lists in data_match
groups = []
if 'group_1' in self.data_match: groups.extend(self.data_match['group_1'])
if 'group_2' in self.data_match: groups.extend(self.data_match['group_2'])
for p in groups:
# We need steam_id.
# Structure: user_info -> user_data -> steam -> steamId
user_info = p.get('user_info', {})
user_data = user_info.get('user_data', {})
steam_data = user_data.get('steam', {})
steam_id = str(steam_data.get('steamId', ''))
fight = p.get('fight', {})
fight_t = p.get('fight_t', {})
fight_ct = p.get('fight_ct', {})
uid = fight.get('uid')
# Store meta for dim_players
user_data = user_info.get('user_data', {})
profile = user_data.get('profile', {})
# If steam_id is empty, use temporary placeholder '5E:{uid}'
# Ideally we want steam_id_64.
if not steam_id and uid:
steam_id = f"5E:{uid}"
if not steam_id:
continue
self.match_data.player_meta[steam_id] = {
'uid': uid,
'username': user_data.get('username', ''),
'avatar_url': profile.get('avatarUrl', ''),
'domain': profile.get('domain', ''),
'created_at': user_data.get('createdAt', 0),
'updated_at': user_data.get('updatedAt', 0)
}
stats = PlayerStats(steam_id_64=steam_id)
sts = p.get('sts', {})
try:
# Use safe conversion helper
def safe_int(val):
try: return int(float(val)) if val is not None else 0
except: return 0
def safe_float(val):
try: return float(val) if val is not None else 0.0
except: return 0.0
def safe_text(val):
return "" if val is None else str(val)
def get_stat(key):
if key in fight and fight.get(key) not in [None, ""]:
return fight.get(key)
return 0
def build_side_stats(fight_side, team_id_value):
side_stats = PlayerStats(steam_id_64=steam_id)
side_stats.team_id = team_id_value
side_stats.kills = safe_int(fight_side.get('kill'))
side_stats.deaths = safe_int(fight_side.get('death'))
side_stats.assists = safe_int(fight_side.get('assist'))
side_stats.headshot_count = safe_int(fight_side.get('headshot'))
side_stats.adr = safe_float(fight_side.get('adr'))
side_stats.rating = safe_float(fight_side.get('rating'))
side_stats.rating2 = safe_float(fight_side.get('rating2'))
side_stats.rating3 = safe_float(fight_side.get('rating3'))
side_stats.rws = safe_float(fight_side.get('rws'))
side_stats.mvp_count = safe_int(fight_side.get('is_mvp'))
side_stats.flash_duration = safe_float(fight_side.get('flash_enemy_time'))
side_stats.jump_count = safe_int(fight_side.get('jump_total'))
side_stats.is_win = bool(safe_int(fight_side.get('is_win')))
side_stats.assisted_kill = safe_int(fight_side.get('assisted_kill'))
side_stats.awp_kill = safe_int(fight_side.get('awp_kill'))
side_stats.benefit_kill = safe_int(fight_side.get('benefit_kill'))
side_stats.day = safe_text(fight_side.get('day'))
side_stats.defused_bomb = safe_int(fight_side.get('defused_bomb'))
side_stats.end_1v1 = safe_int(fight_side.get('end_1v1'))
side_stats.end_1v2 = safe_int(fight_side.get('end_1v2'))
side_stats.end_1v3 = safe_int(fight_side.get('end_1v3'))
side_stats.end_1v4 = safe_int(fight_side.get('end_1v4'))
side_stats.end_1v5 = safe_int(fight_side.get('end_1v5'))
side_stats.explode_bomb = safe_int(fight_side.get('explode_bomb'))
side_stats.first_death = safe_int(fight_side.get('first_death'))
side_stats.first_kill = safe_int(fight_side.get('first_kill'))
side_stats.flash_enemy = safe_int(fight_side.get('flash_enemy'))
side_stats.flash_team = safe_int(fight_side.get('flash_team'))
side_stats.flash_team_time = safe_float(fight_side.get('flash_team_time'))
side_stats.flash_time = safe_float(fight_side.get('flash_time'))
side_stats.game_mode = safe_text(fight_side.get('game_mode'))
side_stats.group_id = safe_int(fight_side.get('group_id'))
side_stats.hold_total = safe_int(fight_side.get('hold_total'))
side_stats.id = safe_int(fight_side.get('id'))
side_stats.is_highlight = safe_int(fight_side.get('is_highlight'))
side_stats.is_most_1v2 = safe_int(fight_side.get('is_most_1v2'))
side_stats.is_most_assist = safe_int(fight_side.get('is_most_assist'))
side_stats.is_most_awp = safe_int(fight_side.get('is_most_awp'))
side_stats.is_most_end = safe_int(fight_side.get('is_most_end'))
side_stats.is_most_first_kill = safe_int(fight_side.get('is_most_first_kill'))
side_stats.is_most_headshot = safe_int(fight_side.get('is_most_headshot'))
side_stats.is_most_jump = safe_int(fight_side.get('is_most_jump'))
side_stats.is_svp = safe_int(fight_side.get('is_svp'))
side_stats.is_tie = safe_int(fight_side.get('is_tie'))
side_stats.kill_1 = safe_int(fight_side.get('kill_1'))
side_stats.kill_2 = safe_int(fight_side.get('kill_2'))
side_stats.kill_3 = safe_int(fight_side.get('kill_3'))
side_stats.kill_4 = safe_int(fight_side.get('kill_4'))
side_stats.kill_5 = safe_int(fight_side.get('kill_5'))
side_stats.many_assists_cnt1 = safe_int(fight_side.get('many_assists_cnt1'))
side_stats.many_assists_cnt2 = safe_int(fight_side.get('many_assists_cnt2'))
side_stats.many_assists_cnt3 = safe_int(fight_side.get('many_assists_cnt3'))
side_stats.many_assists_cnt4 = safe_int(fight_side.get('many_assists_cnt4'))
side_stats.many_assists_cnt5 = safe_int(fight_side.get('many_assists_cnt5'))
side_stats.map = safe_text(fight_side.get('map'))
side_stats.match_code = safe_text(fight_side.get('match_code'))
side_stats.match_mode = safe_text(fight_side.get('match_mode'))
side_stats.match_team_id = safe_int(fight_side.get('match_team_id'))
side_stats.match_time = safe_int(fight_side.get('match_time'))
side_stats.per_headshot = safe_float(fight_side.get('per_headshot'))
side_stats.perfect_kill = safe_int(fight_side.get('perfect_kill'))
side_stats.planted_bomb = safe_int(fight_side.get('planted_bomb'))
side_stats.revenge_kill = safe_int(fight_side.get('revenge_kill'))
side_stats.round_total = safe_int(fight_side.get('round_total'))
side_stats.season = safe_text(fight_side.get('season'))
side_stats.team_kill = safe_int(fight_side.get('team_kill'))
side_stats.throw_harm = safe_int(fight_side.get('throw_harm'))
side_stats.throw_harm_enemy = safe_int(fight_side.get('throw_harm_enemy'))
side_stats.uid = safe_int(fight_side.get('uid'))
side_stats.year = safe_text(fight_side.get('year'))
return side_stats
team_id_value = safe_int(fight.get('match_team_id'))
stats.team_id = team_id_value
stats.kills = safe_int(get_stat('kill'))
stats.deaths = safe_int(get_stat('death'))
stats.assists = safe_int(get_stat('assist'))
stats.headshot_count = safe_int(get_stat('headshot'))
stats.adr = safe_float(get_stat('adr'))
stats.rating = safe_float(get_stat('rating'))
stats.rating2 = safe_float(get_stat('rating2'))
stats.rating3 = safe_float(get_stat('rating3'))
stats.rws = safe_float(get_stat('rws'))
# is_mvp might be string "1" or int 1
stats.mvp_count = safe_int(get_stat('is_mvp'))
stats.flash_duration = safe_float(get_stat('flash_enemy_time'))
stats.jump_count = safe_int(get_stat('jump_total'))
stats.is_win = bool(safe_int(get_stat('is_win')))
stats.elo_change = safe_float(sts.get('change_elo'))
stats.rank_score = safe_int(sts.get('rank'))
stats.assisted_kill = safe_int(fight.get('assisted_kill'))
stats.awp_kill = safe_int(fight.get('awp_kill'))
stats.benefit_kill = safe_int(fight.get('benefit_kill'))
stats.day = safe_text(fight.get('day'))
stats.defused_bomb = safe_int(fight.get('defused_bomb'))
stats.end_1v1 = safe_int(fight.get('end_1v1'))
stats.end_1v2 = safe_int(fight.get('end_1v2'))
stats.end_1v3 = safe_int(fight.get('end_1v3'))
stats.end_1v4 = safe_int(fight.get('end_1v4'))
stats.end_1v5 = safe_int(fight.get('end_1v5'))
stats.explode_bomb = safe_int(fight.get('explode_bomb'))
stats.first_death = safe_int(fight.get('first_death'))
stats.first_kill = safe_int(fight.get('first_kill'))
stats.flash_enemy = safe_int(fight.get('flash_enemy'))
stats.flash_team = safe_int(fight.get('flash_team'))
stats.flash_team_time = safe_float(fight.get('flash_team_time'))
stats.flash_time = safe_float(fight.get('flash_time'))
stats.game_mode = safe_text(fight.get('game_mode'))
stats.group_id = safe_int(fight.get('group_id'))
stats.hold_total = safe_int(fight.get('hold_total'))
stats.id = safe_int(fight.get('id'))
stats.is_highlight = safe_int(fight.get('is_highlight'))
stats.is_most_1v2 = safe_int(fight.get('is_most_1v2'))
stats.is_most_assist = safe_int(fight.get('is_most_assist'))
stats.is_most_awp = safe_int(fight.get('is_most_awp'))
stats.is_most_end = safe_int(fight.get('is_most_end'))
stats.is_most_first_kill = safe_int(fight.get('is_most_first_kill'))
stats.is_most_headshot = safe_int(fight.get('is_most_headshot'))
stats.is_most_jump = safe_int(fight.get('is_most_jump'))
stats.is_svp = safe_int(fight.get('is_svp'))
stats.is_tie = safe_int(fight.get('is_tie'))
stats.kill_1 = safe_int(fight.get('kill_1'))
stats.kill_2 = safe_int(fight.get('kill_2'))
stats.kill_3 = safe_int(fight.get('kill_3'))
stats.kill_4 = safe_int(fight.get('kill_4'))
stats.kill_5 = safe_int(fight.get('kill_5'))
stats.many_assists_cnt1 = safe_int(fight.get('many_assists_cnt1'))
stats.many_assists_cnt2 = safe_int(fight.get('many_assists_cnt2'))
stats.many_assists_cnt3 = safe_int(fight.get('many_assists_cnt3'))
stats.many_assists_cnt4 = safe_int(fight.get('many_assists_cnt4'))
stats.many_assists_cnt5 = safe_int(fight.get('many_assists_cnt5'))
stats.map = safe_text(fight.get('map'))
stats.match_code = safe_text(fight.get('match_code'))
stats.match_mode = safe_text(fight.get('match_mode'))
stats.match_team_id = safe_int(fight.get('match_team_id'))
stats.match_time = safe_int(fight.get('match_time'))
stats.per_headshot = safe_float(fight.get('per_headshot'))
stats.perfect_kill = safe_int(fight.get('perfect_kill'))
stats.planted_bomb = safe_int(fight.get('planted_bomb'))
stats.revenge_kill = safe_int(fight.get('revenge_kill'))
stats.round_total = safe_int(fight.get('round_total'))
stats.season = safe_text(fight.get('season'))
stats.team_kill = safe_int(fight.get('team_kill'))
stats.throw_harm = safe_int(fight.get('throw_harm'))
stats.throw_harm_enemy = safe_int(fight.get('throw_harm_enemy'))
stats.uid = safe_int(fight.get('uid'))
stats.year = safe_text(fight.get('year'))
except Exception as e:
logger.error(f"Error parsing stats for {steam_id} in {self.match_id}: {e}")
pass
self.match_data.players[steam_id] = stats
if isinstance(fight_t, dict) and fight_t:
t_team_id = team_id_value or safe_int(fight_t.get('match_team_id'))
self.match_data.players_t[steam_id] = build_side_stats(fight_t, t_team_id)
if isinstance(fight_ct, dict) and fight_ct:
ct_team_id = team_id_value or safe_int(fight_ct.get('match_team_id'))
self.match_data.players_ct[steam_id] = build_side_stats(fight_ct, ct_team_id)
def _parse_players_vip(self):
if not self.data_vip:
return
# Structure: data_vip -> steamid (key) -> dict
for sid, vdata in self.data_vip.items():
# SID might be steam_id_64 directly
if sid in self.match_data.players:
p = self.match_data.players[sid]
p.kast = float(vdata.get('kast', 0))
p.awp_kills = int(vdata.get('awp_kill', 0))
# Damage stats might need calculation or mapping
# p.damage_total = ...
else:
# Try to match by 5E ID if possible, but here keys are steamids usually
pass
def _parse_leetify_rounds(self):
l_data = self.data_leetify.get('leetify_data', {})
round_list = l_data.get('round_stat', [])
for idx, r in enumerate(round_list):
rd = RoundData(
round_num=r.get('round', idx + 1),
winner_side='CT' if r.get('win_reason') in [7, 8, 9] else 'T', # Approximate logic, need real enum
win_reason=r.get('win_reason', 0),
win_reason_desc=str(r.get('win_reason', 0)),
duration=0, # Leetify might not have exact duration easily
end_time_stamp=r.get('end_ts', ''),
ct_score=r.get('sfui_event', {}).get('score_ct', 0),
t_score=r.get('sfui_event', {}).get('score_t', 0),
ct_money_start=r.get('ct_money_group', 0),
t_money_start=r.get('t_money_group', 0)
)
# Events
# Leetify has 'show_event' list
events = r.get('show_event', [])
for evt in events:
e_type_code = evt.get('event_type')
# Mapping needed for event types.
# Assuming 3 is kill based on schema 'kill_event' presence
if evt.get('kill_event'):
k = evt['kill_event']
re = RoundEvent(
event_id=f"{self.match_id}_{rd.round_num}_{k.get('Ts', '')}_{k.get('Killer')}",
event_type='kill',
event_time=evt.get('ts', 0),
attacker_steam_id=k.get('Killer'),
victim_steam_id=k.get('Victim'),
weapon=k.get('WeaponName'),
is_headshot=k.get('Headshot', False),
is_wallbang=k.get('Penetrated', False),
is_blind=k.get('AttackerBlind', False),
is_through_smoke=k.get('ThroughSmoke', False),
is_noscope=k.get('NoScope', False)
)
# Leetify specifics
# Trade?
if evt.get('trade_score_change'):
re.trade_killer_steam_id = list(evt['trade_score_change'].keys())[0]
if evt.get('flash_assist_killer_score_change'):
re.flash_assist_steam_id = list(evt['flash_assist_killer_score_change'].keys())[0]
# Score changes
if evt.get('killer_score_change'):
# e.g. {'<steamid>': {'score': 17.0}}
vals = list(evt['killer_score_change'].values())
if vals: re.score_change_attacker = vals[0].get('score', 0)
if evt.get('victim_score_change'):
vals = list(evt['victim_score_change'].values())
if vals: re.score_change_victim = vals[0].get('score', 0)
rd.events.append(re)
bron_equipment = r.get('bron_equipment') or {}
player_t_score = r.get('player_t_score') or {}
player_ct_score = r.get('player_ct_score') or {}
player_bron_crash = r.get('player_bron_crash') or {}
def pick_main_weapon(items):
if not isinstance(items, list):
return ""
ignore = {
"weapon_knife",
"weapon_knife_t",
"weapon_knife_gg",
"weapon_knife_ct",
"weapon_c4",
"weapon_flashbang",
"weapon_hegrenade",
"weapon_smokegrenade",
"weapon_molotov",
"weapon_incgrenade",
"weapon_decoy"
}
for it in items:
if not isinstance(it, dict):
continue
name = it.get('WeaponName')
if name and name not in ignore:
return name
for it in items:
if not isinstance(it, dict):
continue
name = it.get('WeaponName')
if name:
return name
return ""
def pick_money(items):
if not isinstance(items, list):
return 0
vals = []
for it in items:
if isinstance(it, dict) and it.get('Money') is not None:
vals.append(it.get('Money'))
return int(max(vals)) if vals else 0
side_scores = {}
for sid, val in player_t_score.items():
side_scores[str(sid)] = ("T", float(val) if val is not None else 0.0)
for sid, val in player_ct_score.items():
side_scores[str(sid)] = ("CT", float(val) if val is not None else 0.0)
for sid in set(list(side_scores.keys()) + [str(k) for k in bron_equipment.keys()]):
if sid not in side_scores:
continue
side, score = side_scores[sid]
items = bron_equipment.get(sid) or bron_equipment.get(str(sid)) or []
start_money = pick_money(items)
equipment_value = player_bron_crash.get(sid)
if equipment_value is None:
equipment_value = player_bron_crash.get(str(sid))
equipment_value = int(equipment_value) if equipment_value is not None else 0
main_weapon = pick_main_weapon(items)
rd.economies.append(PlayerEconomy(
steam_id_64=str(sid),
side=side,
start_money=start_money,
equipment_value=equipment_value,
main_weapon=main_weapon,
round_performance_score=float(score)
))
self.match_data.rounds.append(rd)
def _parse_classic_rounds(self):
r_list = self.data_round_list.get('round_list', [])
for idx, r in enumerate(r_list):
# Classic round data often lacks score/winner in the list root?
# Check schema: 'current_score' -> ct/t
cur_score = r.get('current_score', {})
rd = RoundData(
round_num=idx + 1,
winner_side='None', # Default to None if unknown
win_reason=0,
win_reason_desc='',
duration=float(cur_score.get('final_round_time', 0)),
end_time_stamp='',
ct_score=cur_score.get('ct', 0),
t_score=cur_score.get('t', 0)
)
# Kills
# Classic has 'all_kill' list
kills = r.get('all_kill', [])
for k in kills:
attacker = k.get('attacker', {})
victim = k.get('victim', {})
# Pos extraction
apos = attacker.get('pos', {})
vpos = victim.get('pos', {})
re = RoundEvent(
event_id=f"{self.match_id}_{rd.round_num}_{k.get('pasttime')}_{attacker.get('steamid_64')}",
event_type='kill',
event_time=k.get('pasttime', 0),
attacker_steam_id=str(attacker.get('steamid_64', '')),
victim_steam_id=str(victim.get('steamid_64', '')),
weapon=k.get('weapon', ''),
is_headshot=k.get('headshot', False),
is_wallbang=k.get('penetrated', False),
is_blind=k.get('attackerblind', False),
is_through_smoke=k.get('throughsmoke', False),
is_noscope=k.get('noscope', False),
attacker_pos=(apos.get('x', 0), apos.get('y', 0), apos.get('z', 0)),
victim_pos=(vpos.get('x', 0), vpos.get('y', 0), vpos.get('z', 0))
)
rd.events.append(re)
self.match_data.rounds.append(rd)
# --- Main Execution ---
def process_matches():
if not init_db():
return
l1_conn = sqlite3.connect(L1A_DB_PATH)
l1_cursor = l1_conn.cursor()
l2_conn = sqlite3.connect(L2_DB_PATH)
l2_cursor = l2_conn.cursor()
logger.info("Reading from L1A...")
l1_cursor.execute("SELECT match_id, content FROM raw_iframe_network")
count = 0
while True:
rows = l1_cursor.fetchmany(10)
if not rows:
break
for row in rows:
match_id, content = row
try:
raw_requests = json.loads(content)
parser = MatchParser(match_id, raw_requests)
match_data = parser.parse()
save_match(l2_cursor, match_data)
count += 1
if count % 10 == 0:
l2_conn.commit()
print(f"Processed {count} matches...", end='\r')
except Exception as e:
logger.error(f"Error processing match {match_id}: {e}")
# continue
l2_conn.commit()
l1_conn.close()
l2_conn.close()
logger.info(f"\nDone. Processed {count} matches.")
def save_match(cursor, m: MatchData):
# 1. Dim Players (Upsert)
for sid, meta in m.player_meta.items():
cursor.execute("""
INSERT INTO dim_players (steam_id_64, uid, username, avatar_url, domain, created_at, updated_at, last_seen_match_id)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(steam_id_64) DO UPDATE SET
username=excluded.username,
avatar_url=excluded.avatar_url,
last_seen_match_id=excluded.last_seen_match_id
""", (
sid, meta.get('uid'), meta.get('username'), meta.get('avatar_url'),
meta.get('domain'), meta.get('created_at'), meta.get('updated_at'),
m.match_id
))
# 2. Dim Maps (Ignore if exists)
if m.map_name:
cursor.execute("INSERT OR IGNORE INTO dim_maps (map_name) VALUES (?)", (m.map_name,))
# 3. Fact Matches
cursor.execute("""
INSERT OR REPLACE INTO fact_matches
(match_id, match_code, map_name, start_time, end_time, duration, winner_team, score_team1, score_team2, server_ip, server_port, location, data_source_type)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
m.match_id, m.match_code, m.map_name, m.start_time, m.end_time, m.duration,
m.winner_team, m.score_team1, m.score_team2, m.server_ip, m.server_port, m.location, m.data_source_type
))
# 4. Fact Match Players
player_columns = [
"match_id", "steam_id_64", "team_id", "kills", "deaths", "assists", "headshot_count",
"kd_ratio", "adr", "rating", "rating2", "rating3", "rws", "mvp_count", "elo_change",
"rank_score", "is_win", "kast", "entry_kills", "entry_deaths", "awp_kills",
"clutch_1v1", "clutch_1v2", "clutch_1v3", "clutch_1v4", "clutch_1v5",
"flash_assists", "flash_duration", "jump_count", "damage_total", "damage_received",
"assisted_kill", "awp_kill", "benefit_kill", "day", "defused_bomb", "end_1v1",
"end_1v2", "end_1v3", "end_1v4", "end_1v5", "explode_bomb", "first_death",
"first_kill", "flash_enemy", "flash_team", "flash_team_time", "flash_time",
"game_mode", "group_id", "hold_total", "id", "is_highlight", "is_most_1v2",
"is_most_assist", "is_most_awp", "is_most_end", "is_most_first_kill",
"is_most_headshot", "is_most_jump", "is_svp", "is_tie", "kill_1", "kill_2",
"kill_3", "kill_4", "kill_5", "many_assists_cnt1", "many_assists_cnt2",
"many_assists_cnt3", "many_assists_cnt4", "many_assists_cnt5", "map",
"match_code", "match_mode", "match_team_id", "match_time", "per_headshot",
"perfect_kill", "planted_bomb", "revenge_kill", "round_total", "season",
"team_kill", "throw_harm", "throw_harm_enemy", "uid", "year"
]
player_placeholders = ",".join(["?"] * len(player_columns))
player_columns_sql = ",".join(player_columns)
def player_values(sid, p):
return [
m.match_id, sid, p.team_id, p.kills, p.deaths, p.assists, p.headshot_count,
p.kd_ratio, p.adr, p.rating, p.rating2, p.rating3, p.rws, p.mvp_count,
p.elo_change, p.rank_score, p.is_win, p.kast, p.entry_kills, p.entry_deaths,
p.awp_kills, p.clutch_1v1, p.clutch_1v2, p.clutch_1v3, p.clutch_1v4,
p.clutch_1v5, p.flash_assists, p.flash_duration, p.jump_count, p.damage_total,
p.damage_received, p.assisted_kill, p.awp_kill, p.benefit_kill, p.day,
p.defused_bomb, p.end_1v1, p.end_1v2, p.end_1v3, p.end_1v4, p.end_1v5,
p.explode_bomb, p.first_death, p.first_kill, p.flash_enemy, p.flash_team,
p.flash_team_time, p.flash_time, p.game_mode, p.group_id, p.hold_total,
p.id, p.is_highlight, p.is_most_1v2, p.is_most_assist, p.is_most_awp,
p.is_most_end, p.is_most_first_kill, p.is_most_headshot, p.is_most_jump,
p.is_svp, p.is_tie, p.kill_1, p.kill_2, p.kill_3, p.kill_4, p.kill_5,
p.many_assists_cnt1, p.many_assists_cnt2, p.many_assists_cnt3, p.many_assists_cnt4,
p.many_assists_cnt5, p.map, p.match_code, p.match_mode, p.match_team_id,
p.match_time, p.per_headshot, p.perfect_kill, p.planted_bomb, p.revenge_kill,
p.round_total, p.season, p.team_kill, p.throw_harm, p.throw_harm_enemy,
p.uid, p.year
]
for sid, p in m.players.items():
cursor.execute(
f"INSERT OR REPLACE INTO fact_match_players ({player_columns_sql}) VALUES ({player_placeholders})",
player_values(sid, p)
)
for sid, p in m.players_t.items():
cursor.execute(
f"INSERT OR REPLACE INTO fact_match_players_t ({player_columns_sql}) VALUES ({player_placeholders})",
player_values(sid, p)
)
for sid, p in m.players_ct.items():
cursor.execute(
f"INSERT OR REPLACE INTO fact_match_players_ct ({player_columns_sql}) VALUES ({player_placeholders})",
player_values(sid, p)
)
# 5. Rounds & Events
for r in m.rounds:
cursor.execute("""
INSERT OR REPLACE INTO fact_rounds
(match_id, round_num, winner_side, win_reason, win_reason_desc, duration, end_time_stamp, ct_score, t_score, ct_money_start, t_money_start)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
m.match_id, r.round_num, r.winner_side, r.win_reason, r.win_reason_desc,
r.duration, r.end_time_stamp, r.ct_score, r.t_score, r.ct_money_start, r.t_money_start
))
for e in r.events:
# Handle Pos
ax, ay, az = e.attacker_pos if e.attacker_pos else (None, None, None)
vx, vy, vz = e.victim_pos if e.victim_pos else (None, None, None)
# Use uuid for event_id to ensure uniqueness if logic fails
import uuid
if not e.event_id:
e.event_id = str(uuid.uuid4())
cursor.execute("""
INSERT OR REPLACE INTO fact_round_events
(event_id, match_id, round_num, event_type, event_time, attacker_steam_id, victim_steam_id,
weapon, is_headshot, is_wallbang, is_blind, is_through_smoke, is_noscope,
trade_killer_steam_id, flash_assist_steam_id, score_change_attacker, score_change_victim,
attacker_pos_x, attacker_pos_y, attacker_pos_z, victim_pos_x, victim_pos_y, victim_pos_z)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
e.event_id, m.match_id, r.round_num, e.event_type, e.event_time, e.attacker_steam_id, e.victim_steam_id,
e.weapon, e.is_headshot, e.is_wallbang, e.is_blind, e.is_through_smoke, e.is_noscope,
e.trade_killer_steam_id, e.flash_assist_steam_id, e.score_change_attacker, e.score_change_victim,
ax, ay, az, vx, vy, vz
))
for pe in r.economies:
cursor.execute("""
INSERT OR REPLACE INTO fact_round_player_economy
(match_id, round_num, steam_id_64, side, start_money, equipment_value, main_weapon, has_helmet, has_defuser, round_performance_score)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
m.match_id, r.round_num, pe.steam_id_64, pe.side, pe.start_money, pe.equipment_value, pe.main_weapon, pe.has_helmet, pe.has_defuser, pe.round_performance_score
))
if __name__ == "__main__":
process_matches()

245
ETL/verify_L2.py Normal file
View File

@@ -0,0 +1,245 @@
import sqlite3
import pandas as pd
import csv
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
db_path = 'database/L2/L2_Main.sqlite'
def verify():
conn = sqlite3.connect(db_path)
print("--- Counts ---")
tables = [
'dim_players',
'dim_maps',
'fact_matches',
'fact_match_players',
'fact_match_players_t',
'fact_match_players_ct',
'fact_rounds',
'fact_round_events',
'fact_round_player_economy'
]
for t in tables:
count = conn.execute(f"SELECT COUNT(*) FROM {t}").fetchone()[0]
print(f"{t}: {count}")
print("\n--- Data Source Distribution ---")
dist = pd.read_sql("SELECT data_source_type, COUNT(*) as cnt FROM fact_matches GROUP BY data_source_type", conn)
print(dist)
print("\n--- Sample Round Events (Leetify vs Classic) ---")
# Fetch one event from a leetify match
leetify_match = conn.execute("SELECT match_id FROM fact_matches WHERE data_source_type='leetify' LIMIT 1").fetchone()
if leetify_match:
mid = leetify_match[0]
print(f"Leetify Match: {mid}")
df = pd.read_sql(f"SELECT * FROM fact_round_events WHERE match_id='{mid}' AND event_type='kill' LIMIT 1", conn)
print(df[['event_type', 'attacker_steam_id', 'trade_killer_steam_id', 'attacker_pos_x', 'score_change_attacker']])
# Fetch one event from a classic match
classic_match = conn.execute("SELECT match_id FROM fact_matches WHERE data_source_type='classic' LIMIT 1").fetchone()
if classic_match:
mid = classic_match[0]
print(f"Classic Match: {mid}")
df = pd.read_sql(f"SELECT * FROM fact_round_events WHERE match_id='{mid}' AND event_type='kill' LIMIT 1", conn)
print(df[['event_type', 'attacker_steam_id', 'trade_killer_steam_id', 'attacker_pos_x', 'score_change_attacker']])
print("\n--- Sample Player Stats (New Fields) ---")
df_players = pd.read_sql("SELECT steam_id_64, rating, rating3, elo_change, rank_score, flash_duration, jump_count FROM fact_match_players LIMIT 5", conn)
print(df_players)
print("\n--- Integrity Checks ---")
missing_players = conn.execute("""
SELECT COUNT(*) FROM fact_match_players f
LEFT JOIN dim_players d ON f.steam_id_64 = d.steam_id_64
WHERE d.steam_id_64 IS NULL
""").fetchone()[0]
print(f"fact_match_players missing dim_players: {missing_players}")
missing_round_matches = conn.execute("""
SELECT COUNT(*) FROM fact_rounds r
LEFT JOIN fact_matches m ON r.match_id = m.match_id
WHERE m.match_id IS NULL
""").fetchone()[0]
print(f"fact_rounds missing fact_matches: {missing_round_matches}")
missing_event_rounds = conn.execute("""
SELECT COUNT(*) FROM fact_round_events e
LEFT JOIN fact_rounds r ON e.match_id = r.match_id AND e.round_num = r.round_num
WHERE r.match_id IS NULL
""").fetchone()[0]
print(f"fact_round_events missing fact_rounds: {missing_event_rounds}")
side_zero_t = conn.execute("""
SELECT COUNT(*) FROM fact_match_players_t
WHERE COALESCE(kills,0)=0 AND COALESCE(deaths,0)=0 AND COALESCE(assists,0)=0
""").fetchone()[0]
side_zero_ct = conn.execute("""
SELECT COUNT(*) FROM fact_match_players_ct
WHERE COALESCE(kills,0)=0 AND COALESCE(deaths,0)=0 AND COALESCE(assists,0)=0
""").fetchone()[0]
print(f"fact_match_players_t zero K/D/A: {side_zero_t}")
print(f"fact_match_players_ct zero K/D/A: {side_zero_ct}")
print("\n--- Full vs T/CT Comparison ---")
cols = [
'kills', 'deaths', 'assists', 'headshot_count', 'adr', 'rating', 'rating2',
'rating3', 'rws', 'mvp_count', 'flash_duration', 'jump_count', 'is_win'
]
df_full = pd.read_sql(
"SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players",
conn
)
df_t = pd.read_sql(
"SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players_t",
conn
).rename(columns={c: f"{c}_t" for c in cols})
df_ct = pd.read_sql(
"SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players_ct",
conn
).rename(columns={c: f"{c}_ct" for c in cols})
df = df_full.merge(df_t, on=['match_id', 'steam_id_64'], how='left')
df = df.merge(df_ct, on=['match_id', 'steam_id_64'], how='left')
def is_empty(s):
return s.isna() | (s == 0)
for c in cols:
empty_count = is_empty(df[c]).sum()
print(f"{c} empty: {empty_count}")
additive = ['kills', 'deaths', 'assists', 'headshot_count', 'mvp_count', 'flash_duration', 'jump_count']
for c in additive:
t_sum = df[f"{c}_t"].fillna(0) + df[f"{c}_ct"].fillna(0)
tol = 0.01 if c == 'flash_duration' else 0
diff = (df[c].fillna(0) - t_sum).abs() > tol
print(f"{c} full != t+ct: {diff.sum()}")
non_additive = ['adr', 'rating', 'rating2', 'rating3', 'rws', 'is_win']
for c in non_additive:
side_nonempty = (~is_empty(df[f"{c}_t"])) | (~is_empty(df[f"{c}_ct"]))
full_empty_side_nonempty = is_empty(df[c]) & side_nonempty
full_nonempty_side_empty = (~is_empty(df[c])) & (~side_nonempty)
print(f"{c} full empty but side has: {full_empty_side_nonempty.sum()}")
print(f"{c} full has but side empty: {full_nonempty_side_empty.sum()}")
print("\n--- Rating Detail ---")
rating_cols = ['rating', 'rating2', 'rating3']
for c in rating_cols:
full_null = df[c].isna().sum()
full_zero = (df[c] == 0).sum()
full_nonzero = ((~df[c].isna()) & (df[c] != 0)).sum()
side_t_nonzero = ((~df[f"{c}_t"].isna()) & (df[f"{c}_t"] != 0)).sum()
side_ct_nonzero = ((~df[f"{c}_ct"].isna()) & (df[f"{c}_ct"] != 0)).sum()
side_any_nonzero = ((~df[f"{c}_t"].isna()) & (df[f"{c}_t"] != 0)) | ((~df[f"{c}_ct"].isna()) & (df[f"{c}_ct"] != 0))
full_nonzero_side_zero = ((~df[c].isna()) & (df[c] != 0) & (~side_any_nonzero)).sum()
full_zero_side_nonzero = (((df[c].isna()) | (df[c] == 0)) & side_any_nonzero).sum()
print(f"{c} full null: {full_null} full zero: {full_zero} full nonzero: {full_nonzero}")
print(f"{c} side t nonzero: {side_t_nonzero} side ct nonzero: {side_ct_nonzero}")
print(f"{c} full nonzero but side all zero: {full_nonzero_side_zero}")
print(f"{c} full zero but side has: {full_zero_side_nonzero}")
df_rating_src = pd.read_sql(
"SELECT f.rating, f.rating2, f.rating3, m.data_source_type FROM fact_match_players f JOIN fact_matches m ON f.match_id = m.match_id",
conn
)
for c in rating_cols:
grp = df_rating_src.groupby('data_source_type')[c].apply(lambda s: (s != 0).sum()).reset_index(name='nonzero')
print(f"{c} nonzero by source")
print(grp)
print("\n--- Schema Coverage (fight_any) ---")
schema_path = 'database/original_json_schema/schema_flat.csv'
paths = []
with open(schema_path, 'r', encoding='utf-8') as f:
reader = csv.reader(f)
_ = next(reader, None)
for row in reader:
if len(row) >= 2:
paths.append(row[1])
fight_keys = set()
for p in paths:
if 'data.group_N[].fight_any.' in p:
key = p.split('fight_any.')[1].split('.')[0]
fight_keys.add(key)
l2_cols = set(pd.read_sql("PRAGMA table_info(fact_match_players)", conn)['name'].tolist())
alias = {
'kills': 'kill',
'deaths': 'death',
'assists': 'assist',
'headshot_count': 'headshot',
'mvp_count': 'is_mvp',
'flash_duration': 'flash_enemy_time',
'jump_count': 'jump_total',
'awp_kills': 'awp_kill'
}
covered = set()
for c in l2_cols:
if c in fight_keys:
covered.add(c)
elif c in alias and alias[c] in fight_keys:
covered.add(alias[c])
missing_keys = sorted(list(fight_keys - covered))
print(f"fight_any keys: {len(fight_keys)}")
print(f"covered by L2 columns: {len(covered)}")
print(f"uncovered fight_any keys: {len(missing_keys)}")
if missing_keys:
print(missing_keys)
print("\n--- Coverage Zero Rate (fight_any -> fact_match_players) ---")
fight_cols = [k for k in fight_keys if k in l2_cols or k in alias.values()]
col_map = {}
for k in fight_cols:
if k in l2_cols:
col_map[k] = k
else:
for l2k, src in alias.items():
if src == k:
col_map[k] = l2k
break
select_cols = ["steam_id_64"] + list(set(col_map.values()))
df_fight = pd.read_sql(
"SELECT " + ",".join(select_cols) + " FROM fact_match_players",
conn
)
total_rows = len(df_fight)
stats = []
for fight_key, col in sorted(col_map.items()):
s = df_fight[col]
zeros = (s == 0).sum()
nulls = s.isna().sum()
nonzero = total_rows - zeros - nulls
stats.append({
"fight_key": fight_key,
"column": col,
"nonzero": nonzero,
"zero": zeros,
"null": nulls,
"zero_rate": 0 if total_rows == 0 else round(zeros / total_rows, 4)
})
df_stats = pd.DataFrame(stats).sort_values(["zero_rate", "nonzero"], ascending=[False, True])
print(df_stats.head(30))
print("\n-- zero_rate top (most zeros) --")
print(df_stats.head(10))
print("\n-- zero_rate bottom (most nonzero) --")
print(df_stats.tail(10))
print("\n--- Schema Coverage (leetify economy) ---")
econ_keys = [
'data.leetify_data.round_stat[].bron_equipment.',
'data.leetify_data.round_stat[].player_t_score.',
'data.leetify_data.round_stat[].player_ct_score.',
'data.leetify_data.round_stat[].player_bron_crash.'
]
for k in econ_keys:
count = sum(1 for p in paths if k in p)
print(f"{k} paths: {count}")
conn.close()
if __name__ == "__main__":
verify()

81
ETL/verify_deep.py Normal file
View File

@@ -0,0 +1,81 @@
import sqlite3
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
pd.set_option('display.float_format', '{:.2f}'.format)
db_path = 'database/L2/L2_Main.sqlite'
def check_nulls_zeros():
conn = sqlite3.connect(db_path)
print("=== 1. Fact Match Players: 关键字段零值/空值检查 ===")
df_players = pd.read_sql("""
SELECT
kills, deaths, assists, adr, rating, rating2,
kast, awp_kills, flash_duration, jump_count,
elo_change
FROM fact_match_players
""", conn)
stats = []
for col in df_players.columns:
total = len(df_players)
nulls = df_players[col].isnull().sum()
zeros = (df_players[col] == 0).sum()
stats.append({
'Field': col,
'Total': total,
'Nulls': nulls,
'Null%': (nulls/total)*100,
'Zeros': zeros,
'Zero%': (zeros/total)*100
})
print(pd.DataFrame(stats))
print("\n=== 2. Fact Round Events (Kills): 击杀完整性检查 ===")
# 只检查 event_type = 'kill' 的记录
df_kills = pd.read_sql("""
SELECT
attacker_steam_id, victim_steam_id,
event_time, weapon,
attacker_pos_x, score_change_attacker
FROM fact_round_events
WHERE event_type = 'kill'
""", conn)
total_kills = len(df_kills)
missing_attacker = df_kills['attacker_steam_id'].isnull().sum() + (df_kills['attacker_steam_id'] == '').sum()
missing_victim = df_kills['victim_steam_id'].isnull().sum() + (df_kills['victim_steam_id'] == '').sum()
# 检查 attacker 和 victim 是否相同(自杀)
self_kills = (df_kills['attacker_steam_id'] == df_kills['victim_steam_id']).sum()
print(f"Total Kill Events: {total_kills}")
print(f"Missing Attacker: {missing_attacker} ({missing_attacker/total_kills*100:.2f}%)")
print(f"Missing Victim: {missing_victim} ({missing_victim/total_kills*100:.2f}%)")
print(f"Self Kills (Suicide?): {self_kills}")
print("\n=== 3. Fact Round Events: 坐标与评分覆盖率 ===")
# 坐标应该在 classic 比赛中有值leetify 比赛中可能为空
# 评分应该在 leetify 比赛中有值
df_events = pd.read_sql("""
SELECT
m.data_source_type,
COUNT(*) as total_events,
SUM(CASE WHEN e.attacker_pos_x IS NOT NULL AND e.attacker_pos_x != 0 THEN 1 ELSE 0 END) as has_pos,
SUM(CASE WHEN e.score_change_attacker IS NOT NULL AND e.score_change_attacker != 0 THEN 1 ELSE 0 END) as has_score
FROM fact_round_events e
JOIN fact_matches m ON e.match_id = m.match_id
WHERE e.event_type = 'kill'
GROUP BY m.data_source_type
""", conn)
print(df_events)
conn.close()
if __name__ == "__main__":
check_nulls_zeros()