import sqlite3 import logging import os import numpy as np import pandas as pd from datetime import datetime # Setup logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # Constants L2_DB_PATH = 'database/L2/L2_Main.sqlite' L3_DB_PATH = 'database/L3/L3_Features.sqlite' SCHEMA_PATH = 'database/L3/schema.sql' def init_db(): if not os.path.exists('database/L3'): os.makedirs('database/L3') conn = sqlite3.connect(L3_DB_PATH) with open(SCHEMA_PATH, 'r', encoding='utf-8') as f: conn.executescript(f.read()) conn.commit() conn.close() logger.info("L3 DB Initialized.") def get_db_connection(db_path): conn = sqlite3.connect(db_path) return conn def safe_div(a, b, default=0.0): return a / b if b and b != 0 else default def calculate_basic_features(df): if df.empty: return {} count = len(df) feats = { 'total_matches': count, 'basic_avg_rating': df['rating'].mean(), 'basic_avg_kd': df['kd_ratio'].mean(), 'basic_avg_kast': df['kast'].mean(), 'basic_avg_rws': df['rws'].mean(), 'basic_avg_headshot_kills': df['headshot_count'].sum() / count, 'basic_headshot_rate': safe_div(df['headshot_count'].sum(), df['kills'].sum()), 'basic_avg_first_kill': df['first_kill'].mean(), 'basic_avg_first_death': df['first_death'].mean(), 'basic_first_kill_rate': safe_div(df['first_kill'].sum(), df['first_kill'].sum() + df['first_death'].sum()), 'basic_first_death_rate': safe_div(df['first_death'].sum(), df['first_kill'].sum() + df['first_death'].sum()), 'basic_avg_kill_2': df['kill_2'].mean(), 'basic_avg_kill_3': df['kill_3'].mean(), 'basic_avg_kill_4': df['kill_4'].mean(), 'basic_avg_kill_5': df['kill_5'].mean(), 'basic_avg_assisted_kill': df['assisted_kill'].mean(), 'basic_avg_perfect_kill': df['perfect_kill'].mean(), 'basic_avg_revenge_kill': df['revenge_kill'].mean(), 'basic_avg_awp_kill': df['awp_kill'].mean(), 'basic_avg_jump_count': df['jump_count'].mean(), } return feats def calculate_sta_features(df): if df.empty: return {} df = df.sort_values('match_time') last_30 = df.tail(30) last_10 = df.tail(10) feats = { 'sta_last_30_rating': last_30['rating'].mean(), 'sta_win_rating': df[df['is_win'] == 1]['rating'].mean() if not df[df['is_win'] == 1].empty else 0.0, 'sta_loss_rating': df[df['is_win'] == 0]['rating'].mean() if not df[df['is_win'] == 0].empty else 0.0, 'sta_rating_volatility': last_10['rating'].std() if len(last_10) > 1 else 0.0, } df['date'] = pd.to_datetime(df['match_time'], unit='s').dt.date day_counts = df.groupby('date').size() busy_days = day_counts[day_counts >= 4].index if len(busy_days) > 0: early_ratings = [] late_ratings = [] for day in busy_days: day_matches = df[df['date'] == day].sort_values('match_time') early = day_matches.head(3) late = day_matches.tail(len(day_matches) - 3) early_ratings.extend(early['rating'].tolist()) late_ratings.extend(late['rating'].tolist()) feats['sta_fatigue_decay'] = np.mean(early_ratings) - np.mean(late_ratings) if early_ratings and late_ratings else 0.0 else: feats['sta_fatigue_decay'] = 0.0 df['hour_of_day'] = pd.to_datetime(df['match_time'], unit='s').dt.hour if len(df) > 5: corr = df['hour_of_day'].corr(df['rating']) feats['sta_time_rating_corr'] = corr if not np.isnan(corr) else 0.0 else: feats['sta_time_rating_corr'] = 0.0 return feats def calculate_util_features(df): if df.empty: return {} feats = { 'util_avg_nade_dmg': df['throw_harm'].mean() if 'throw_harm' in df.columns else 0.0, 'util_avg_flash_time': df['flash_duration'].mean() if 'flash_duration' in df.columns else 0.0, 'util_avg_flash_enemy': df['flash_enemy'].mean() if 'flash_enemy' in df.columns else 0.0, 'util_avg_flash_team': df['flash_team'].mean() if 'flash_team' in df.columns else 0.0, 'util_usage_rate': (df['flash_enemy'].mean() + df['throw_harm'].mean() / 50.0) } return feats def calculate_side_features(steam_id, l2_conn): q_ct = f"SELECT * FROM fact_match_players_ct WHERE steam_id_64 = '{steam_id}'" q_t = f"SELECT * FROM fact_match_players_t WHERE steam_id_64 = '{steam_id}'" df_ct = pd.read_sql_query(q_ct, l2_conn) df_t = pd.read_sql_query(q_t, l2_conn) feats = {} if not df_ct.empty: feats['side_rating_ct'] = df_ct['rating'].mean() feats['side_first_kill_rate_ct'] = safe_div(df_ct['first_kill'].sum(), df_ct['first_kill'].sum() + df_ct['first_death'].sum()) feats['side_hold_success_rate_ct'] = 0.0 feats['side_defused_bomb_count'] = df_ct['defused_bomb'].sum() if 'defused_bomb' in df_ct.columns else 0 else: feats.update({'side_rating_ct': 0.0, 'side_first_kill_rate_ct': 0.0, 'side_hold_success_rate_ct': 0.0, 'side_defused_bomb_count': 0}) if not df_t.empty: feats['side_rating_t'] = df_t['rating'].mean() feats['side_first_kill_rate_t'] = safe_div(df_t['first_kill'].sum(), df_t['first_kill'].sum() + df_t['first_death'].sum()) feats['side_entry_success_rate_t'] = 0.0 feats['side_planted_bomb_count'] = df_t['planted_bomb'].sum() if 'planted_bomb' in df_t.columns else 0 else: feats.update({'side_rating_t': 0.0, 'side_first_kill_rate_t': 0.0, 'side_entry_success_rate_t': 0.0, 'side_planted_bomb_count': 0}) feats['side_kd_diff_ct_t'] = (df_ct['kd_ratio'].mean() if not df_ct.empty else 0) - (df_t['kd_ratio'].mean() if not df_t.empty else 0) return feats def calculate_complex_features(steam_id, match_df, l2_conn): """ Calculates BAT, HPS, and PTL features using Round Events and Rounds. """ feats = {} # 1. HPS: Clutch from match stats (easier part) # clutch_1vX are wins. end_1vX are total attempts (assuming mapping logic). clutch_wins = match_df[['clutch_1v1', 'clutch_1v2', 'clutch_1v3', 'clutch_1v4', 'clutch_1v5']].sum().sum() clutch_attempts = match_df[['end_1v1', 'end_1v2', 'end_1v3', 'end_1v4', 'end_1v5']].sum().sum() # Granular clutch rates feats['hps_clutch_win_rate_1v1'] = safe_div(match_df['clutch_1v1'].sum(), match_df['end_1v1'].sum()) feats['hps_clutch_win_rate_1v2'] = safe_div(match_df['clutch_1v2'].sum(), match_df['end_1v2'].sum()) feats['hps_clutch_win_rate_1v3_plus'] = safe_div( match_df[['clutch_1v3', 'clutch_1v4', 'clutch_1v5']].sum().sum(), match_df[['end_1v3', 'end_1v4', 'end_1v5']].sum().sum() ) # 2. Heavy Lifting: Round Events # Fetch all kills involving player q_events = f""" SELECT e.*, p_vic.rank_score as victim_rank, p_att.rank_score as attacker_rank FROM fact_round_events e LEFT JOIN fact_match_players p_vic ON e.match_id = p_vic.match_id AND e.victim_steam_id = p_vic.steam_id_64 LEFT JOIN fact_match_players p_att ON e.match_id = p_att.match_id AND e.attacker_steam_id = p_att.steam_id_64 WHERE (e.attacker_steam_id = '{steam_id}' OR e.victim_steam_id = '{steam_id}') AND e.event_type = 'kill' """ try: events = pd.read_sql_query(q_events, l2_conn) except Exception as e: logger.error(f"Error fetching events for {steam_id}: {e}") events = pd.DataFrame() if not events.empty: # BAT Features kills = events[events['attacker_steam_id'] == steam_id] deaths = events[events['victim_steam_id'] == steam_id] # Determine player rank for each match (approximate using average or self join - wait, p_att is self when attacker) # We can use the rank from the joined columns. # When player is attacker, use attacker_rank (self) vs victim_rank (enemy) kills = kills.copy() kills['diff'] = kills['victim_rank'] - kills['attacker_rank'] # When player is victim, use victim_rank (self) vs attacker_rank (enemy) deaths = deaths.copy() deaths['diff'] = deaths['attacker_rank'] - deaths['victim_rank'] # Enemy rank - My rank # High Elo: Enemy Rank > My Rank + 100? Or just > My Rank? # Let's say High Elo = Enemy Rank > My Rank high_elo_kills = kills[kills['diff'] > 0].shape[0] high_elo_deaths = deaths[deaths['diff'] > 0].shape[0] # Enemy (Attacker) > Me (Victim) low_elo_kills = kills[kills['diff'] < 0].shape[0] low_elo_deaths = deaths[deaths['diff'] < 0].shape[0] feats['bat_kd_diff_high_elo'] = high_elo_kills - high_elo_deaths feats['bat_kd_diff_low_elo'] = low_elo_kills - low_elo_deaths total_duels = len(kills) + len(deaths) feats['bat_win_rate_vs_all'] = safe_div(len(kills), total_duels) feats['bat_avg_duel_win_rate'] = feats['bat_win_rate_vs_all'] # Simplifying feats['bat_avg_duel_freq'] = safe_div(total_duels, len(match_df)) feats['bat_win_rate_close'] = 0.0 # Placeholder for distance logic feats['bat_win_rate_mid'] = 0.0 feats['bat_win_rate_far'] = 0.0 else: feats.update({ 'bat_kd_diff_high_elo': 0, 'bat_kd_diff_low_elo': 0, 'bat_win_rate_vs_all': 0.0, 'bat_avg_duel_win_rate': 0.0, 'bat_avg_duel_freq': 0.0, 'bat_win_rate_close': 0.0, 'bat_win_rate_mid': 0.0, 'bat_win_rate_far': 0.0 }) # 3. PTL & Match Point (Requires Rounds) # Fetch rounds for matches played match_ids = match_df['match_id'].unique().tolist() if not match_ids: return feats match_ids_str = "'" + "','".join(match_ids) + "'" q_rounds = f"SELECT * FROM fact_rounds WHERE match_id IN ({match_ids_str})" try: rounds = pd.read_sql_query(q_rounds, l2_conn) except: rounds = pd.DataFrame() if not rounds.empty and not events.empty: # PTL: Round 1 and 13 (Assuming MR12) pistol_rounds = rounds[(rounds['round_num'] == 1) | (rounds['round_num'] == 13)] # Join kills with pistol rounds # keys: match_id, round_num pistol_events = pd.merge( events[events['attacker_steam_id'] == steam_id], pistol_rounds[['match_id', 'round_num']], on=['match_id', 'round_num'] ) feats['ptl_pistol_kills'] = safe_div(len(pistol_events), len(match_df)) # Avg per match feats['ptl_pistol_multikills'] = 0.0 # Complex to calc without grouping per round feats['ptl_pistol_win_rate'] = 0.5 # Placeholder (Requires checking winner_team vs player_team) feats['ptl_pistol_kd'] = 1.0 # Placeholder feats['ptl_pistol_util_efficiency'] = 0.0 # Match Point (HPS) # Logic: Score is 12 (MR12) or 15 (MR15). # We assume MR12 for simplicity or check max score. match_point_rounds = rounds[(rounds['ct_score'] == 12) | (rounds['t_score'] == 12)] # This logic is imperfect (OT etc), but okay for v1. feats['hps_match_point_win_rate'] = 0.5 # Placeholder else: feats.update({ 'ptl_pistol_kills': 0.0, 'ptl_pistol_multikills': 0.0, 'ptl_pistol_win_rate': 0.0, 'ptl_pistol_kd': 0.0, 'ptl_pistol_util_efficiency': 0.0, 'hps_match_point_win_rate': 0.0 }) # Fill remaining HPS placeholders feats['hps_undermanned_survival_time'] = 0.0 feats['hps_pressure_entry_rate'] = 0.0 feats['hps_momentum_multikill_rate'] = 0.0 feats['hps_tilt_rating_drop'] = 0.0 feats['hps_clutch_rating_rise'] = 0.0 feats['hps_comeback_kd_diff'] = 0.0 feats['hps_losing_streak_kd_diff'] = 0.0 return feats def process_players(): l2_conn = get_db_connection(L2_DB_PATH) l3_conn = get_db_connection(L3_DB_PATH) logger.info("Fetching player list...") players = pd.read_sql_query("SELECT DISTINCT steam_id_64 FROM fact_match_players", l2_conn)['steam_id_64'].tolist() logger.info(f"Found {len(players)} players. Processing...") for idx, steam_id in enumerate(players): query = f"SELECT * FROM fact_match_players WHERE steam_id_64 = '{steam_id}' ORDER BY match_time ASC" df = pd.read_sql_query(query, l2_conn) if df.empty: continue feats = calculate_basic_features(df) feats.update(calculate_sta_features(df)) feats.update(calculate_side_features(steam_id, l2_conn)) feats.update(calculate_util_features(df)) feats.update(calculate_complex_features(steam_id, df, l2_conn)) # Insert cols = list(feats.keys()) vals = list(feats.values()) vals = [float(v) if isinstance(v, (np.float32, np.float64)) else v for v in vals] vals = [int(v) if isinstance(v, (np.int32, np.int64)) else v for v in vals] col_str = ", ".join(cols) q_marks = ", ".join(["?"] * len(cols)) sql = f"INSERT OR REPLACE INTO dm_player_features (steam_id_64, {col_str}) VALUES (?, {q_marks})" l3_conn.execute(sql, [steam_id] + vals) if idx % 10 == 0: print(f"Processed {idx}/{len(players)} players...", end='\r') l3_conn.commit() l3_conn.commit() l2_conn.close() l3_conn.close() logger.info("\nDone.") if __name__ == "__main__": init_db() process_players()