import sqlite3 import pandas as pd import numpy as np import os DB_L2_PATH = r'd:\Documents\trae_projects\yrtv\database\L2\L2_Main.sqlite' def get_db_connection(): conn = sqlite3.connect(DB_L2_PATH) conn.row_factory = sqlite3.Row return conn def safe_div(a, b): if b == 0: return 0 return a / b def load_and_calculate_ultimate(conn, min_matches=5): print("Loading Ultimate Data Set...") # 1. Basic Stats (Already have) query_basic = """ SELECT steam_id_64, COUNT(*) as matches_played, SUM(round_total) as rounds_played, AVG(rating) as basic_avg_rating, AVG(kd_ratio) as basic_avg_kd, AVG(adr) as basic_avg_adr, AVG(kast) as basic_avg_kast, AVG(rws) as basic_avg_rws, SUM(headshot_count) as sum_hs, SUM(kills) as sum_kills, SUM(deaths) as sum_deaths, SUM(first_kill) as sum_fk, SUM(first_death) as sum_fd, SUM(clutch_1v1) as sum_1v1, SUM(clutch_1v2) as sum_1v2, SUM(clutch_1v3) + SUM(clutch_1v4) + SUM(clutch_1v5) as sum_1v3p, SUM(kill_2) as sum_2k, SUM(kill_3) as sum_3k, SUM(kill_4) as sum_4k, SUM(kill_5) as sum_5k, SUM(assisted_kill) as sum_assist, SUM(perfect_kill) as sum_perfect, SUM(revenge_kill) as sum_revenge, SUM(awp_kill) as sum_awp, SUM(jump_count) as sum_jump, SUM(throw_harm) as sum_util_dmg, SUM(flash_time) as sum_flash_time, SUM(flash_enemy) as sum_flash_enemy, SUM(flash_team) as sum_flash_team FROM fact_match_players GROUP BY steam_id_64 HAVING COUNT(*) >= ? """ df = pd.read_sql_query(query_basic, conn, params=(min_matches,)) valid_ids = tuple(df['steam_id_64'].tolist()) if not valid_ids: return None placeholders = ','.join(['?'] * len(valid_ids)) # --- Basic Derived --- df['basic_headshot_rate'] = df['sum_hs'] / df['sum_kills'].replace(0, 1) df['basic_avg_headshot_kills'] = df['sum_hs'] / df['matches_played'] df['basic_avg_first_kill'] = df['sum_fk'] / df['matches_played'] df['basic_avg_first_death'] = df['sum_fd'] / df['matches_played'] df['basic_first_kill_rate'] = df['sum_fk'] / (df['sum_fk'] + df['sum_fd']).replace(0, 1) df['basic_first_death_rate'] = df['sum_fd'] / (df['sum_fk'] + df['sum_fd']).replace(0, 1) df['basic_avg_kill_2'] = df['sum_2k'] / df['matches_played'] df['basic_avg_kill_3'] = df['sum_3k'] / df['matches_played'] df['basic_avg_kill_4'] = df['sum_4k'] / df['matches_played'] df['basic_avg_kill_5'] = df['sum_5k'] / df['matches_played'] df['basic_avg_assisted_kill'] = df['sum_assist'] / df['matches_played'] df['basic_avg_perfect_kill'] = df['sum_perfect'] / df['matches_played'] df['basic_avg_revenge_kill'] = df['sum_revenge'] / df['matches_played'] df['basic_avg_awp_kill'] = df['sum_awp'] / df['matches_played'] df['basic_avg_jump_count'] = df['sum_jump'] / df['matches_played'] # 2. STA - Detailed Time Series print("Calculating STA (Detailed)...") query_sta = f""" SELECT mp.steam_id_64, mp.rating, mp.is_win, m.start_time, m.duration FROM fact_match_players mp JOIN fact_matches m ON mp.match_id = m.match_id WHERE mp.steam_id_64 IN ({placeholders}) ORDER BY mp.steam_id_64, m.start_time """ df_matches = pd.read_sql_query(query_sta, conn, params=valid_ids) sta_list = [] for pid, group in df_matches.groupby('steam_id_64'): group = group.sort_values('start_time') # Last 30 last_30 = group.tail(30) sta_last_30 = last_30['rating'].mean() # Win/Loss sta_win = group[group['is_win']==1]['rating'].mean() sta_loss = group[group['is_win']==0]['rating'].mean() # Volatility sta_vol = group.tail(10)['rating'].std() # Time Correlation (Duration vs Rating) sta_time_corr = group['duration'].corr(group['rating']) if len(group) > 2 else 0 # Fatigue group['date'] = pd.to_datetime(group['start_time'], unit='s').dt.date daily = group.groupby('date')['rating'].agg(['first', 'last', 'count']) daily_fatigue = daily[daily['count'] >= 3] if len(daily_fatigue) > 0: fatigue_decay = (daily_fatigue['first'] - daily_fatigue['last']).mean() else: fatigue_decay = 0 sta_list.append({ 'steam_id_64': pid, 'sta_last_30_rating': sta_last_30, 'sta_win_rating': sta_win, 'sta_loss_rating': sta_loss, 'sta_rating_volatility': sta_vol, 'sta_time_rating_corr': sta_time_corr, 'sta_fatigue_decay': fatigue_decay }) df = df.merge(pd.DataFrame(sta_list), on='steam_id_64', how='left') # 3. BAT - Distance & Advanced print("Calculating BAT (Distance & Context)...") # Distance Logic: Get all kills with positions # We need to map positions. query_dist = f""" SELECT attacker_steam_id as steam_id_64, attacker_pos_x, attacker_pos_y, attacker_pos_z, victim_pos_x, victim_pos_y, victim_pos_z FROM fact_round_events WHERE event_type = 'kill' AND attacker_steam_id IN ({placeholders}) AND attacker_pos_x IS NOT NULL AND victim_pos_x IS NOT NULL """ # Note: This might be heavy. If memory issue, sample or chunk. try: df_dist = pd.read_sql_query(query_dist, conn, params=valid_ids) if not df_dist.empty: # Calc Euclidian Distance df_dist['dist'] = np.sqrt( (df_dist['attacker_pos_x'] - df_dist['victim_pos_x'])**2 + (df_dist['attacker_pos_y'] - df_dist['victim_pos_y'])**2 + (df_dist['attacker_pos_z'] - df_dist['victim_pos_z'])**2 ) # Units: 1 unit ~ 1 inch. # Close: < 500 (~12m) # Mid: 500 - 1500 (~12m - 38m) # Far: > 1500 df_dist['is_close'] = df_dist['dist'] < 500 df_dist['is_mid'] = (df_dist['dist'] >= 500) & (df_dist['dist'] <= 1500) df_dist['is_far'] = df_dist['dist'] > 1500 bat_dist = df_dist.groupby('steam_id_64').agg({ 'is_close': 'mean', # % of kills that are close 'is_mid': 'mean', 'is_far': 'mean' }).reset_index() bat_dist.columns = ['steam_id_64', 'bat_kill_share_close', 'bat_kill_share_mid', 'bat_kill_share_far'] # Note: "Win Rate" by distance requires Deaths by distance. # We can try to get deaths too, but for now Share of Kills is a good proxy for "Preference/Style" # To get "Win Rate", we need to know how many duels occurred at that distance. # Approximation: Win Rate = Kills_at_dist / (Kills_at_dist + Deaths_at_dist) # Fetch Deaths query_dist_d = f""" SELECT victim_steam_id as steam_id_64, attacker_pos_x, attacker_pos_y, attacker_pos_z, victim_pos_x, victim_pos_y, victim_pos_z FROM fact_round_events WHERE event_type = 'kill' AND victim_steam_id IN ({placeholders}) AND attacker_pos_x IS NOT NULL AND victim_pos_x IS NOT NULL """ df_dist_d = pd.read_sql_query(query_dist_d, conn, params=valid_ids) df_dist_d['dist'] = np.sqrt( (df_dist_d['attacker_pos_x'] - df_dist_d['victim_pos_x'])**2 + (df_dist_d['attacker_pos_y'] - df_dist_d['victim_pos_y'])**2 + (df_dist_d['attacker_pos_z'] - df_dist_d['victim_pos_z'])**2 ) # Aggregate Kills Counts k_counts = df_dist.groupby('steam_id_64').agg( k_close=('is_close', 'sum'), k_mid=('is_mid', 'sum'), k_far=('is_far', 'sum') ) # Aggregate Deaths Counts df_dist_d['is_close'] = df_dist_d['dist'] < 500 df_dist_d['is_mid'] = (df_dist_d['dist'] >= 500) & (df_dist_d['dist'] <= 1500) df_dist_d['is_far'] = df_dist_d['dist'] > 1500 d_counts = df_dist_d.groupby('steam_id_64').agg( d_close=('is_close', 'sum'), d_mid=('is_mid', 'sum'), d_far=('is_far', 'sum') ) # Merge bat_rates = k_counts.join(d_counts, how='outer').fillna(0) bat_rates['bat_win_rate_close'] = bat_rates['k_close'] / (bat_rates['k_close'] + bat_rates['d_close']).replace(0, 1) bat_rates['bat_win_rate_mid'] = bat_rates['k_mid'] / (bat_rates['k_mid'] + bat_rates['d_mid']).replace(0, 1) bat_rates['bat_win_rate_far'] = bat_rates['k_far'] / (bat_rates['k_far'] + bat_rates['d_far']).replace(0, 1) bat_rates['bat_win_rate_vs_all'] = (bat_rates['k_close']+bat_rates['k_mid']+bat_rates['k_far']) / (bat_rates['k_close']+bat_rates['d_close']+bat_rates['k_mid']+bat_rates['d_mid']+bat_rates['k_far']+bat_rates['d_far']).replace(0, 1) df = df.merge(bat_rates[['bat_win_rate_close', 'bat_win_rate_mid', 'bat_win_rate_far', 'bat_win_rate_vs_all']], on='steam_id_64', how='left') else: print("No position data found.") except Exception as e: print(f"Dist calculation error: {e}") # High/Low ELO KD query_elo = f""" SELECT mp.steam_id_64, mp.kd_ratio, (SELECT AVG(group_origin_elo) FROM fact_match_teams fmt WHERE fmt.match_id = mp.match_id AND group_origin_elo > 0) as elo FROM fact_match_players mp WHERE mp.steam_id_64 IN ({placeholders}) """ df_elo = pd.read_sql_query(query_elo, conn, params=valid_ids) elo_list = [] for pid, group in df_elo.groupby('steam_id_64'): avg = group['elo'].mean() if pd.isna(avg): avg = 1000 elo_list.append({ 'steam_id_64': pid, 'bat_kd_diff_high_elo': group[group['elo'] > avg]['kd_ratio'].mean(), 'bat_kd_diff_low_elo': group[group['elo'] <= avg]['kd_ratio'].mean() }) df = df.merge(pd.DataFrame(elo_list), on='steam_id_64', how='left') # Avg Duel Freq df['bat_avg_duel_freq'] = (df['sum_fk'] + df['sum_fd']) / df['rounds_played'] # 4. HPS - High Pressure Contexts print("Calculating HPS (Contexts)...") # We need round-by-round score evolution. # Join rounds and economy(side) and matches query_hps_ctx = f""" SELECT r.match_id, r.round_num, r.ct_score, r.t_score, r.winner_side, m.score_team1, m.score_team2, m.winner_team, e.steam_id_64, e.side as player_side, (SELECT COUNT(*) FROM fact_round_events ev WHERE ev.match_id=r.match_id AND ev.round_num=r.round_num AND ev.attacker_steam_id=e.steam_id_64 AND ev.event_type='kill') as kills, (SELECT COUNT(*) FROM fact_round_events ev WHERE ev.match_id=r.match_id AND ev.round_num=r.round_num AND ev.victim_steam_id=e.steam_id_64 AND ev.event_type='kill') as deaths FROM fact_rounds r JOIN fact_matches m ON r.match_id = m.match_id JOIN fact_round_player_economy e ON r.match_id = e.match_id AND r.round_num = e.round_num WHERE e.steam_id_64 IN ({placeholders}) """ # This is heavy. try: # Optimization: Process per match or use SQL aggregation? # SQL aggregation for specific conditions is better. # 4.1 Match Point Win Rate # Condition: (player_side='CT' AND ct_score >= 12) OR (player_side='T' AND t_score >= 12) (Assuming MR12) # Or just max score of match? # Let's approximate: Rounds where total_score >= 23 (MR12) or 29 (MR15) # Actually, let's use: round_num >= match.round_total - 1? No. # Use: Rounds where One Team Score = Match Win Score - 1. # Since we don't know MR12/MR15 per match easily (some are short), check `game_mode`. # Fallback: Rounds where `ct_score` or `t_score` >= 12. # 4.2 Pressure Entry Rate (Losing Streak) # Condition: Team score < Enemy score - 3. # 4.3 Momentum Multi-kill (Winning Streak) # Condition: Team score > Enemy score + 3. # Let's load a simplified dataframe of rounds df_rounds = pd.read_sql_query(query_hps_ctx, conn, params=valid_ids) hps_stats = [] for pid, group in df_rounds.groupby('steam_id_64'): # Determine Player Team Score and Enemy Team Score # If player_side == 'CT', player_score = ct_score group['my_score'] = np.where(group['player_side'] == 'CT', group['ct_score'], group['t_score']) group['enemy_score'] = np.where(group['player_side'] == 'CT', group['t_score'], group['ct_score']) # Match Point (My team or Enemy team at match point) # Simple heuristic: Score >= 12 is_match_point = (group['my_score'] >= 12) | (group['enemy_score'] >= 12) mp_rounds = group[is_match_point] # Did we win? # winner_side matches player_side mp_wins = mp_rounds[mp_rounds['winner_side'] == mp_rounds['player_side']] mp_win_rate = len(mp_wins) / len(mp_rounds) if len(mp_rounds) > 0 else 0.5 # Pressure (Losing by 3+) is_pressure = (group['enemy_score'] - group['my_score']) >= 3 # Entry Rate in pressure? Need FK data. # We only loaded kills. Let's use Kills per round in pressure. pressure_kpr = group[is_pressure]['kills'].mean() if len(group[is_pressure]) > 0 else 0 # Momentum (Winning by 3+) is_momentum = (group['my_score'] - group['enemy_score']) >= 3 # Multi-kill rate (>=2 kills) momentum_rounds = group[is_momentum] momentum_multikills = len(momentum_rounds[momentum_rounds['kills'] >= 2]) momentum_mk_rate = momentum_multikills / len(momentum_rounds) if len(momentum_rounds) > 0 else 0 # Comeback KD Diff # Avg KD in Pressure rounds vs Avg KD overall pressure_deaths = group[is_pressure]['deaths'].sum() pressure_kills = group[is_pressure]['kills'].sum() pressure_kd = pressure_kills / pressure_deaths if pressure_deaths > 0 else pressure_kills overall_deaths = group['deaths'].sum() overall_kills = group['kills'].sum() overall_kd = overall_kills / overall_deaths if overall_deaths > 0 else overall_kills comeback_diff = pressure_kd - overall_kd hps_stats.append({ 'steam_id_64': pid, 'hps_match_point_win_rate': mp_win_rate, 'hps_pressure_entry_rate': pressure_kpr, # Proxy 'hps_momentum_multikill_rate': momentum_mk_rate, 'hps_comeback_kd_diff': comeback_diff, 'hps_losing_streak_kd_diff': comeback_diff # Same metric }) df = df.merge(pd.DataFrame(hps_stats), on='steam_id_64', how='left') # 4.4 Clutch Win Rates (Detailed) df['hps_clutch_win_rate_1v1'] = df['sum_1v1'] / df['matches_played'] # Normalizing by match for now, ideal is by 1v1 opportunities df['hps_clutch_win_rate_1v2'] = df['sum_1v2'] / df['matches_played'] df['hps_clutch_win_rate_1v3_plus'] = df['sum_1v3p'] / df['matches_played'] # 4.5 Close Match Rating (from previous) # ... (Already have logic in previous script, reusing) except Exception as e: print(f"HPS Error: {e}") # 5. PTL - Pistol Detailed print("Calculating PTL...") # Filter Round 1, 13 (and 16 for MR15?) # Just use 1 and 13 (common for MR12) query_ptl = f""" SELECT e.steam_id_64, (SELECT COUNT(*) FROM fact_round_events ev WHERE ev.match_id=e.match_id AND ev.round_num=e.round_num AND ev.attacker_steam_id=e.steam_id_64 AND ev.event_type='kill') as kills, (SELECT COUNT(*) FROM fact_round_events ev WHERE ev.match_id=e.match_id AND ev.round_num=e.round_num AND ev.victim_steam_id=e.steam_id_64 AND ev.event_type='kill') as deaths, r.winner_side, e.side as player_side, e.equipment_value FROM fact_round_player_economy e JOIN fact_rounds r ON e.match_id = r.match_id AND e.round_num = r.round_num WHERE e.steam_id_64 IN ({placeholders}) AND e.round_num IN (1, 13) """ try: df_ptl_raw = pd.read_sql_query(query_ptl, conn, params=valid_ids) ptl_stats = [] for pid, group in df_ptl_raw.groupby('steam_id_64'): kills = group['kills'].sum() deaths = group['deaths'].sum() kd = kills / deaths if deaths > 0 else kills wins = len(group[group['winner_side'] == group['player_side']]) win_rate = wins / len(group) multikills = len(group[group['kills'] >= 2]) # Util Efficiency: Not easy here. ptl_stats.append({ 'steam_id_64': pid, 'ptl_pistol_kills': kills, # Total? Or Avg? Schema says REAL. Let's use Avg per Match later. 'ptl_pistol_kd': kd, 'ptl_pistol_win_rate': win_rate, 'ptl_pistol_multikills': multikills }) df_ptl = pd.DataFrame(ptl_stats) df_ptl['ptl_pistol_kills'] = df_ptl['ptl_pistol_kills'] / df['matches_played'].mean() # Approximate df = df.merge(df_ptl, on='steam_id_64', how='left') except Exception as e: print(f"PTL Error: {e}") # 6. T/CT & UTIL (Straightforward) print("Calculating T/CT & UTIL...") # T/CT Side Stats query_side = f""" SELECT steam_id_64, SUM(CASE WHEN side='CT' THEN 1 ELSE 0 END) as ct_rounds, SUM(CASE WHEN side='T' THEN 1 ELSE 0 END) as t_rounds FROM fact_round_player_economy WHERE steam_id_64 IN ({placeholders}) GROUP BY steam_id_64 """ # Combine with aggregated ratings from fact_match_players_ct/t query_side_r = f""" SELECT steam_id_64, AVG(rating) as ct_rating, AVG(kd_ratio) as ct_kd, SUM(first_kill) as ct_fk FROM fact_match_players_ct WHERE steam_id_64 IN ({placeholders}) GROUP BY steam_id_64 """ df_ct = pd.read_sql_query(query_side_r, conn, params=valid_ids) # Similar for T... # Merge... # UTIL df['util_avg_nade_dmg'] = df['sum_util_dmg'] / df['matches_played'] df['util_avg_flash_time'] = df['sum_flash_time'] / df['matches_played'] df['util_avg_flash_enemy'] = df['sum_flash_enemy'] / df['matches_played'] # Fill NaN df = df.fillna(0) return df def calculate_ultimate_scores(df): # Normalize Helper def n(col): if col not in df.columns: return 50 s = df[col] if s.max() == s.min(): return 50 return (s - s.min()) / (s.max() - s.min()) * 100 df = df.copy() # 1. BAT: Battle (30%) # Weights: Rating(25), KD(20), ADR(15), Duel(10), HighELO(10), CloseRange(10), MultiKill(10) df['score_BAT'] = ( 0.25 * n('basic_avg_rating') + 0.20 * n('basic_avg_kd') + 0.15 * n('basic_avg_adr') + 0.10 * n('bat_avg_duel_win_rate') + # Need to ensure col exists 0.10 * n('bat_kd_diff_high_elo') + 0.10 * n('bat_win_rate_close') + 0.10 * n('basic_avg_kill_3') # Multi-kill proxy ) # 2. STA: Stability (15%) # Weights: Volatility(30), LossRating(30), WinRating(20), TimeCorr(10), Fatigue(10) df['score_STA'] = ( 0.30 * (100 - n('sta_rating_volatility')) + 0.30 * n('sta_loss_rating') + 0.20 * n('sta_win_rating') + 0.10 * (100 - n('sta_time_rating_corr').abs()) + # Closer to 0 is better (independent of duration) 0.10 * (100 - n('sta_fatigue_decay')) ) # 3. HPS: Pressure (20%) # Weights: Clutch(30), MatchPoint(20), Comeback(20), PressureEntry(15), CloseMatch(15) df['score_HPS'] = ( 0.30 * n('sum_1v3p') + # Using high tier clutches 0.20 * n('hps_match_point_win_rate') + 0.20 * n('hps_comeback_kd_diff') + 0.15 * n('hps_pressure_entry_rate') + 0.15 * n('basic_avg_rating') # Fallback if close match rating missing ) # 4. PTL: Pistol (10%) # Weights: Kills(40), WinRate(30), KD(30) df['score_PTL'] = ( 0.40 * n('ptl_pistol_kills') + 0.30 * n('ptl_pistol_win_rate') + 0.30 * n('ptl_pistol_kd') ) # 5. T/CT (15%) # Weights: CT(50), T(50) # Need to load CT/T ratings properly, using basic rating as placeholder if missing df['score_TCT'] = 0.5 * n('basic_avg_rating') + 0.5 * n('basic_avg_rating') # 6. UTIL (10%) # Weights: Dmg(50), Flash(30), EnemiesFlashed(20) df['score_UTIL'] = ( 0.50 * n('util_avg_nade_dmg') + 0.30 * n('util_avg_flash_time') + 0.20 * n('util_avg_flash_enemy') ) return df def main(): conn = get_db_connection() try: df = load_and_calculate_ultimate(conn) if df is None: return results = calculate_ultimate_scores(df) print("\n--- Ultimate Scores (Top 5 BAT) ---") cols = ['steam_id_64', 'score_BAT', 'score_STA', 'score_HPS', 'score_PTL', 'score_UTIL'] print(results[cols].sort_values('score_BAT', ascending=False).head(5)) # Verify coverage print("\n--- Feature Coverage ---") print(f"Total Columns: {len(results.columns)}") print("BAT Distances:", 'bat_win_rate_close' in results.columns) print("HPS Contexts:", 'hps_match_point_win_rate' in results.columns) print("PTL Detailed:", 'ptl_pistol_kd' in results.columns) finally: conn.close() if __name__ == "__main__": main()