import sqlite3 import pandas as pd import numpy as np import os DB_L2_PATH = r'd:\Documents\trae_projects\yrtv\database\L2\L2_Main.sqlite' def get_db_connection(): conn = sqlite3.connect(DB_L2_PATH) conn.row_factory = sqlite3.Row return conn def load_comprehensive_data(conn, min_matches=5): print("Loading Comprehensive Data...") # 1. Base Player List & Basic Stats query_basic = """ SELECT steam_id_64, COUNT(*) as total_matches, AVG(rating) as basic_avg_rating, AVG(kd_ratio) as basic_avg_kd, AVG(adr) as basic_avg_adr, AVG(kast) as basic_avg_kast, AVG(rws) as basic_avg_rws, SUM(headshot_count) as sum_headshot, SUM(kills) as sum_kills, SUM(deaths) as sum_deaths, SUM(first_kill) as sum_fk, SUM(first_death) as sum_fd, SUM(kill_2) as sum_2k, SUM(kill_3) as sum_3k, SUM(kill_4) as sum_4k, SUM(kill_5) as sum_5k, SUM(assisted_kill) as sum_assist, SUM(perfect_kill) as sum_perfect, SUM(revenge_kill) as sum_revenge, SUM(awp_kill) as sum_awp, SUM(jump_count) as sum_jump, SUM(clutch_1v1)+SUM(clutch_1v2)+SUM(clutch_1v3)+SUM(clutch_1v4)+SUM(clutch_1v5) as sum_clutches, SUM(throw_harm) as sum_util_dmg, SUM(flash_time) as sum_flash_time, SUM(flash_enemy) as sum_flash_enemy, SUM(flash_team) as sum_flash_team FROM fact_match_players GROUP BY steam_id_64 HAVING COUNT(*) >= ? """ df = pd.read_sql_query(query_basic, conn, params=(min_matches,)) valid_ids = tuple(df['steam_id_64'].tolist()) if not valid_ids: print("No players found.") return None placeholders = ','.join(['?'] * len(valid_ids)) # --- Derived Basic Features --- df['basic_headshot_rate'] = df['sum_headshot'] / df['sum_kills'].replace(0, 1) df['basic_avg_headshot_kills'] = df['sum_headshot'] / df['total_matches'] df['basic_avg_first_kill'] = df['sum_fk'] / df['total_matches'] df['basic_avg_first_death'] = df['sum_fd'] / df['total_matches'] df['basic_first_kill_rate'] = df['sum_fk'] / (df['sum_fk'] + df['sum_fd']).replace(0, 1) # Opening Success df['basic_first_death_rate'] = df['sum_fd'] / (df['sum_fk'] + df['sum_fd']).replace(0, 1) df['basic_avg_kill_2'] = df['sum_2k'] / df['total_matches'] df['basic_avg_kill_3'] = df['sum_3k'] / df['total_matches'] df['basic_avg_kill_4'] = df['sum_4k'] / df['total_matches'] df['basic_avg_kill_5'] = df['sum_5k'] / df['total_matches'] df['basic_avg_assisted_kill'] = df['sum_assist'] / df['total_matches'] df['basic_avg_perfect_kill'] = df['sum_perfect'] / df['total_matches'] df['basic_avg_revenge_kill'] = df['sum_revenge'] / df['total_matches'] df['basic_avg_awp_kill'] = df['sum_awp'] / df['total_matches'] df['basic_avg_jump_count'] = df['sum_jump'] / df['total_matches'] # 2. STA (Stability) - Detailed print("Calculating STA...") query_sta = f""" SELECT mp.steam_id_64, mp.rating, mp.is_win, m.start_time FROM fact_match_players mp JOIN fact_matches m ON mp.match_id = m.match_id WHERE mp.steam_id_64 IN ({placeholders}) ORDER BY mp.steam_id_64, m.start_time """ df_matches = pd.read_sql_query(query_sta, conn, params=valid_ids) sta_list = [] for pid, group in df_matches.groupby('steam_id_64'): # Last 30 last_30 = group.tail(30) sta_last_30 = last_30['rating'].mean() # Win/Loss sta_win = group[group['is_win']==1]['rating'].mean() sta_loss = group[group['is_win']==0]['rating'].mean() # Volatility (Last 10) sta_vol = group.tail(10)['rating'].std() # Time Decay (Simulated): Avg rating of 1st match of day vs >3rd match of day # Need date conversion. group['date'] = pd.to_datetime(group['start_time'], unit='s').dt.date daily_counts = group.groupby('date').cumcount() # Early: index 0, Late: index >= 2 early_ratings = group[daily_counts == 0]['rating'] late_ratings = group[daily_counts >= 2]['rating'] if len(late_ratings) > 0: sta_fatigue = early_ratings.mean() - late_ratings.mean() # Positive means fatigue (drop) else: sta_fatigue = 0 sta_list.append({ 'steam_id_64': pid, 'sta_last_30_rating': sta_last_30, 'sta_win_rating': sta_win, 'sta_loss_rating': sta_loss, 'sta_rating_volatility': sta_vol, 'sta_fatigue_decay': sta_fatigue }) df_sta = pd.DataFrame(sta_list) df = df.merge(df_sta, on='steam_id_64', how='left') # 3. BAT (Battle) - Detailed print("Calculating BAT...") # Need Match ELO query_bat = f""" SELECT mp.steam_id_64, mp.kd_ratio, mp.entry_kills, mp.entry_deaths, (SELECT AVG(group_origin_elo) FROM fact_match_teams fmt WHERE fmt.match_id = mp.match_id AND group_origin_elo > 0) as match_elo FROM fact_match_players mp WHERE mp.steam_id_64 IN ({placeholders}) """ df_bat_raw = pd.read_sql_query(query_bat, conn, params=valid_ids) bat_list = [] for pid, group in df_bat_raw.groupby('steam_id_64'): avg_elo = group['match_elo'].mean() if pd.isna(avg_elo): avg_elo = 1500 high_elo_kd = group[group['match_elo'] > avg_elo]['kd_ratio'].mean() low_elo_kd = group[group['match_elo'] <= avg_elo]['kd_ratio'].mean() sum_entry_k = group['entry_kills'].sum() sum_entry_d = group['entry_deaths'].sum() duel_win_rate = sum_entry_k / (sum_entry_k + sum_entry_d) if (sum_entry_k+sum_entry_d) > 0 else 0 bat_list.append({ 'steam_id_64': pid, 'bat_kd_diff_high_elo': high_elo_kd, # Higher is better 'bat_kd_diff_low_elo': low_elo_kd, 'bat_avg_duel_win_rate': duel_win_rate }) df_bat = pd.DataFrame(bat_list) df = df.merge(df_bat, on='steam_id_64', how='left') # 4. HPS (Pressure) - Detailed print("Calculating HPS...") # Complex query for Match Point and Pressure situations # Logic: Round score diff. # Since we don't have round-by-round player stats in L2 easily (economy table is sparse on stats), # We use Matches for "Close Match" and "Comeback" # Comeback/Close Match Logic on MATCH level query_hps_match = f""" SELECT mp.steam_id_64, mp.kd_ratio, mp.rating, m.score_team1, m.score_team2, mp.team_id, m.winner_team FROM fact_match_players mp JOIN fact_matches m ON mp.match_id = m.match_id WHERE mp.steam_id_64 IN ({placeholders}) """ df_hps_raw = pd.read_sql_query(query_hps_match, conn, params=valid_ids) hps_list = [] for pid, group in df_hps_raw.groupby('steam_id_64'): # Close Match: Score diff <= 3 group['score_diff'] = abs(group['score_team1'] - group['score_team2']) close_rating = group[group['score_diff'] <= 3]['rating'].mean() # Comeback: Won match where score was close? # Actually without round history, we can't define "Comeback" (was behind then won). # We can define "Underdog Win": Won when ELO was lower? Or just Close Win. # Let's use Close Match Rating as primary HPS metric from matches. hps_list.append({ 'steam_id_64': pid, 'hps_close_match_rating': close_rating }) df_hps = pd.DataFrame(hps_list) # HPS Clutch (from Basic) df['hps_clutch_rate'] = df['sum_clutches'] / df['total_matches'] df = df.merge(df_hps, on='steam_id_64', how='left') # 5. PTL (Pistol) print("Calculating PTL...") # R1/R13 Kills query_ptl = f""" SELECT ev.attacker_steam_id as steam_id_64, COUNT(*) as pistol_kills FROM fact_round_events ev WHERE ev.event_type = 'kill' AND ev.round_num IN (1, 13) AND ev.attacker_steam_id IN ({placeholders}) GROUP BY ev.attacker_steam_id """ df_ptl = pd.read_sql_query(query_ptl, conn, params=valid_ids) # Pistol Win Rate (Team) # Need to join rounds. Too slow? # Simplify: Just use Pistol Kills per Match (normalized) df = df.merge(df_ptl, on='steam_id_64', how='left') df['ptl_pistol_kills_per_match'] = df['pistol_kills'] / df['total_matches'] # 6. T/CT print("Calculating T/CT...") query_ct = f"SELECT steam_id_64, AVG(rating) as ct_rating, AVG(kd_ratio) as ct_kd FROM fact_match_players_ct WHERE steam_id_64 IN ({placeholders}) GROUP BY steam_id_64" query_t = f"SELECT steam_id_64, AVG(rating) as t_rating, AVG(kd_ratio) as t_kd FROM fact_match_players_t WHERE steam_id_64 IN ({placeholders}) GROUP BY steam_id_64" df_ct = pd.read_sql_query(query_ct, conn, params=valid_ids) df_t = pd.read_sql_query(query_t, conn, params=valid_ids) df = df.merge(df_ct, on='steam_id_64', how='left').merge(df_t, on='steam_id_64', how='left') # 7. UTIL print("Calculating UTIL...") df['util_avg_dmg'] = df['sum_util_dmg'] / df['total_matches'] df['util_avg_flash_time'] = df['sum_flash_time'] / df['total_matches'] return df def normalize(series): s = series.fillna(series.mean()) if s.max() == s.min(): return pd.Series([50]*len(s), index=s.index) return (s - s.min()) / (s.max() - s.min()) * 100 def calculate_full_scores(df): df = df.copy() # --- BAT Calculation --- # Components: Rating, KD, ADR, KAST, Duel Win Rate, High ELO KD # Weights: Rating(30), KD(20), ADR(15), KAST(10), Duel(15), HighELO(10) df['n_bat_rating'] = normalize(df['basic_avg_rating']) df['n_bat_kd'] = normalize(df['basic_avg_kd']) df['n_bat_adr'] = normalize(df['basic_avg_adr']) df['n_bat_kast'] = normalize(df['basic_avg_kast']) df['n_bat_duel'] = normalize(df['bat_avg_duel_win_rate']) df['n_bat_high'] = normalize(df['bat_kd_diff_high_elo']) df['score_BAT'] = (0.3*df['n_bat_rating'] + 0.2*df['n_bat_kd'] + 0.15*df['n_bat_adr'] + 0.1*df['n_bat_kast'] + 0.15*df['n_bat_duel'] + 0.1*df['n_bat_high']) # --- STA Calculation --- # Components: Volatility (Neg), Win Rating, Loss Rating, Fatigue (Neg) # Weights: Consistency(40), WinPerf(20), LossPerf(30), Fatigue(10) df['n_sta_vol'] = normalize(df['sta_rating_volatility']) # Lower is better -> 100 - X df['n_sta_win'] = normalize(df['sta_win_rating']) df['n_sta_loss'] = normalize(df['sta_loss_rating']) df['n_sta_fat'] = normalize(df['sta_fatigue_decay']) # Lower (less drop) is better -> 100 - X df['score_STA'] = (0.4*(100-df['n_sta_vol']) + 0.2*df['n_sta_win'] + 0.3*df['n_sta_loss'] + 0.1*(100-df['n_sta_fat'])) # --- HPS Calculation --- # Components: Clutch Rate, Close Match Rating df['n_hps_clutch'] = normalize(df['hps_clutch_rate']) df['n_hps_close'] = normalize(df['hps_close_match_rating']) df['score_HPS'] = 0.5*df['n_hps_clutch'] + 0.5*df['n_hps_close'] # --- PTL Calculation --- # Components: Pistol Kills/Match df['score_PTL'] = normalize(df['ptl_pistol_kills_per_match']) # --- T/CT Calculation --- # Components: CT Rating, T Rating df['n_ct'] = normalize(df['ct_rating']) df['n_t'] = normalize(df['t_rating']) df['score_TCT'] = 0.5*df['n_ct'] + 0.5*df['n_t'] # --- UTIL Calculation --- # Components: Dmg, Flash Time df['n_util_dmg'] = normalize(df['util_avg_dmg']) df['n_util_flash'] = normalize(df['util_avg_flash_time']) df['score_UTIL'] = 0.6*df['n_util_dmg'] + 0.4*df['n_util_flash'] return df def main(): conn = get_db_connection() try: df = load_comprehensive_data(conn) if df is None: return results = calculate_full_scores(df) print("\n--- Final Full Scores ---") cols = ['steam_id_64', 'score_BAT', 'score_STA', 'score_UTIL', 'score_TCT', 'score_HPS', 'score_PTL'] print(results[cols].sort_values('score_BAT', ascending=False).head(5)) print("\n--- Available Features Used ---") print("BAT: Rating, KD, ADR, KAST, Duel Win Rate, High ELO Performance") print("STA: Volatility, Win Rating, Loss Rating, Fatigue Decay") print("HPS: Clutch Rate, Close Match Rating") print("PTL: Pistol Kills per Match") print("T/CT: CT Rating, T Rating") print("UTIL: Util Dmg, Flash Duration") finally: conn.close() if __name__ == "__main__": main()