from web.database import query_db, get_db, execute_db import sqlite3 import pandas as pd import numpy as np class FeatureService: @staticmethod def get_player_features(steam_id): sql = "SELECT * FROM dm_player_features WHERE steam_id_64 = ?" return query_db('l3', sql, [steam_id], one=True) @staticmethod def get_players_list(page=1, per_page=20, sort_by='rating', search=None): offset = (page - 1) * per_page # Sort Mapping sort_map = { 'rating': 'basic_avg_rating', 'kd': 'basic_avg_kd', 'kast': 'basic_avg_kast', 'matches': 'matches_played' } order_col = sort_map.get(sort_by, 'basic_avg_rating') from web.services.stats_service import StatsService # Helper to attach match counts def attach_match_counts(player_list): if not player_list: return ids = [p['steam_id_64'] for p in player_list] # Batch query for counts from L2 placeholders = ','.join('?' for _ in ids) sql = f""" SELECT steam_id_64, COUNT(*) as cnt FROM fact_match_players WHERE steam_id_64 IN ({placeholders}) GROUP BY steam_id_64 """ counts = query_db('l2', sql, ids) cnt_dict = {r['steam_id_64']: r['cnt'] for r in counts} for p in player_list: p['matches_played'] = cnt_dict.get(p['steam_id_64'], 0) if search: # Get all matching players l2_players, _ = StatsService.get_players(page=1, per_page=100, search=search) if not l2_players: return [], 0 steam_ids = [p['steam_id_64'] for p in l2_players] placeholders = ','.join('?' for _ in steam_ids) sql = f"SELECT * FROM dm_player_features WHERE steam_id_64 IN ({placeholders})" features = query_db('l3', sql, steam_ids) f_dict = {f['steam_id_64']: f for f in features} # Get counts for sorting count_sql = f"SELECT steam_id_64, COUNT(*) as cnt FROM fact_match_players WHERE steam_id_64 IN ({placeholders}) GROUP BY steam_id_64" counts = query_db('l2', count_sql, steam_ids) cnt_dict = {r['steam_id_64']: r['cnt'] for r in counts} merged = [] for p in l2_players: f = f_dict.get(p['steam_id_64']) m = dict(p) if f: m.update(dict(f)) else: # Fallback Calc stats = StatsService.get_player_basic_stats(p['steam_id_64']) if stats: m['basic_avg_rating'] = stats['rating'] m['basic_avg_kd'] = stats['kd'] m['basic_avg_kast'] = stats['kast'] else: m['basic_avg_rating'] = 0 m['basic_avg_kd'] = 0 m['basic_avg_kast'] = 0 m['matches_played'] = cnt_dict.get(p['steam_id_64'], 0) merged.append(m) merged.sort(key=lambda x: x.get(order_col, 0) or 0, reverse=True) total = len(merged) start = (page - 1) * per_page end = start + per_page return merged[start:end], total else: # Browse mode l3_count = query_db('l3', "SELECT COUNT(*) as cnt FROM dm_player_features", one=True)['cnt'] if l3_count == 0 or sort_by == 'matches': if sort_by == 'matches': sql = """ SELECT steam_id_64, COUNT(*) as cnt FROM fact_match_players GROUP BY steam_id_64 ORDER BY cnt DESC LIMIT ? OFFSET ? """ top_ids = query_db('l2', sql, [per_page, offset]) if not top_ids: return [], 0 total = query_db('l2', "SELECT COUNT(DISTINCT steam_id_64) as cnt FROM fact_match_players", one=True)['cnt'] ids = [r['steam_id_64'] for r in top_ids] l2_players = StatsService.get_players_by_ids(ids) # Merge logic merged = [] p_ph = ','.join('?' for _ in ids) f_sql = f"SELECT * FROM dm_player_features WHERE steam_id_64 IN ({p_ph})" features = query_db('l3', f_sql, ids) f_dict = {f['steam_id_64']: f for f in features} p_dict = {p['steam_id_64']: p for p in l2_players} for r in top_ids: sid = r['steam_id_64'] p = p_dict.get(sid) if not p: continue m = dict(p) f = f_dict.get(sid) if f: m.update(dict(f)) else: stats = StatsService.get_player_basic_stats(sid) if stats: m['basic_avg_rating'] = stats['rating'] m['basic_avg_kd'] = stats['kd'] m['basic_avg_kast'] = stats['kast'] else: m['basic_avg_rating'] = 0 m['basic_avg_kd'] = 0 m['basic_avg_kast'] = 0 m['matches_played'] = r['cnt'] merged.append(m) return merged, total # L3 empty fallback l2_players, total = StatsService.get_players(page, per_page, sort_by=None) merged = [] attach_match_counts(l2_players) for p in l2_players: m = dict(p) stats = StatsService.get_player_basic_stats(p['steam_id_64']) if stats: m['basic_avg_rating'] = stats['rating'] m['basic_avg_kd'] = stats['kd'] m['basic_avg_kast'] = stats['kast'] else: m['basic_avg_rating'] = 0 m['basic_avg_kd'] = 0 m['basic_avg_kast'] = 0 m['matches_played'] = p.get('matches_played', 0) merged.append(m) if sort_by != 'rating': merged.sort(key=lambda x: x.get(order_col, 0) or 0, reverse=True) return merged, total # Normal L3 browse sql = f"SELECT * FROM dm_player_features ORDER BY {order_col} DESC LIMIT ? OFFSET ?" features = query_db('l3', sql, [per_page, offset]) total = query_db('l3', "SELECT COUNT(*) as cnt FROM dm_player_features", one=True)['cnt'] if not features: return [], total steam_ids = [f['steam_id_64'] for f in features] l2_players = StatsService.get_players_by_ids(steam_ids) p_dict = {p['steam_id_64']: p for p in l2_players} merged = [] for f in features: m = dict(f) p = p_dict.get(f['steam_id_64']) if p: m.update(dict(p)) else: m['username'] = f['steam_id_64'] m['avatar_url'] = None merged.append(m) return merged, total @staticmethod def rebuild_all_features(min_matches=5): """ Refreshes the L3 Data Mart with full feature calculations. """ from web.config import Config from web.services.web_service import WebService import json l3_db_path = Config.DB_L3_PATH l2_db_path = Config.DB_L2_PATH # Get Team Players lineups = WebService.get_lineups() team_player_ids = set() for lineup in lineups: if lineup['player_ids_json']: try: ids = json.loads(lineup['player_ids_json']) # Ensure IDs are strings team_player_ids.update([str(i) for i in ids]) except: pass if not team_player_ids: print("No players found in any team lineup. Skipping L3 rebuild.") return 0 conn_l2 = sqlite3.connect(l2_db_path) conn_l2.row_factory = sqlite3.Row try: print(f"Loading L2 data for {len(team_player_ids)} players...") df = FeatureService._load_and_calculate_dataframe(conn_l2, list(team_player_ids)) if df is None or df.empty: print("No data to process.") return 0 print("Calculating Scores...") df = FeatureService._calculate_ultimate_scores(df) print("Saving to L3...") conn_l3 = sqlite3.connect(l3_db_path) cursor = conn_l3.cursor() # Ensure columns exist in DataFrame match DB columns cursor.execute("PRAGMA table_info(dm_player_features)") valid_cols = [r[1] for r in cursor.fetchall()] # Filter DF columns df_cols = [c for c in df.columns if c in valid_cols] df_to_save = df[df_cols].copy() df_to_save['updated_at'] = pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S') # Generate Insert SQL print(f"DEBUG: Saving {len(df_to_save.columns)} columns to L3. Sample side_kd_ct: {df_to_save.get('side_kd_ct', pd.Series([0])).iloc[0]}") placeholders = ','.join(['?'] * len(df_to_save.columns)) cols_str = ','.join(df_to_save.columns) sql = f"INSERT OR REPLACE INTO dm_player_features ({cols_str}) VALUES ({placeholders})" data = df_to_save.values.tolist() cursor.executemany(sql, data) conn_l3.commit() conn_l3.close() return len(df) except Exception as e: print(f"Rebuild Error: {e}") import traceback traceback.print_exc() return 0 finally: conn_l2.close() @staticmethod def _load_and_calculate_dataframe(conn, player_ids): if not player_ids: return None placeholders = ','.join(['?'] * len(player_ids)) # 1. Basic Stats query_basic = f""" SELECT steam_id_64, COUNT(*) as matches_played, SUM(round_total) as rounds_played, AVG(rating) as basic_avg_rating, AVG(kd_ratio) as basic_avg_kd, AVG(adr) as basic_avg_adr, AVG(kast) as basic_avg_kast, AVG(rws) as basic_avg_rws, SUM(headshot_count) as sum_hs, SUM(kills) as sum_kills, SUM(deaths) as sum_deaths, SUM(first_kill) as sum_fk, SUM(first_death) as sum_fd, SUM(clutch_1v1) as sum_1v1, SUM(clutch_1v2) as sum_1v2, SUM(clutch_1v3) as sum_1v3, SUM(clutch_1v4) as sum_1v4, SUM(clutch_1v5) as sum_1v5, SUM(kill_2) as sum_2k, SUM(kill_3) as sum_3k, SUM(kill_4) as sum_4k, SUM(kill_5) as sum_5k, SUM(assisted_kill) as sum_assist, SUM(perfect_kill) as sum_perfect, SUM(revenge_kill) as sum_revenge, SUM(awp_kill) as sum_awp, SUM(jump_count) as sum_jump, SUM(mvp_count) as sum_mvps, SUM(planted_bomb) as sum_plants, SUM(defused_bomb) as sum_defuses, SUM(CASE WHEN flash_assists > 0 THEN flash_assists WHEN assists > assisted_kill THEN assists - assisted_kill ELSE 0 END) as sum_flash_assists, SUM(throw_harm) as sum_util_dmg, SUM(flash_time) as sum_flash_time, SUM(flash_enemy) as sum_flash_enemy, SUM(flash_team) as sum_flash_team, SUM(util_flash_usage) as sum_util_flash, SUM(util_smoke_usage) as sum_util_smoke, SUM(util_molotov_usage) as sum_util_molotov, SUM(util_he_usage) as sum_util_he, SUM(util_decoy_usage) as sum_util_decoy FROM fact_match_players WHERE steam_id_64 IN ({placeholders}) GROUP BY steam_id_64 """ df = pd.read_sql_query(query_basic, conn, params=player_ids) if df.empty: return None # Basic Derived df['basic_headshot_rate'] = df['sum_hs'] / df['sum_kills'].replace(0, 1) df['basic_avg_headshot_kills'] = df['sum_hs'] / df['matches_played'] df['basic_avg_first_kill'] = df['sum_fk'] / df['matches_played'] df['basic_avg_first_death'] = df['sum_fd'] / df['matches_played'] df['basic_first_kill_rate'] = df['sum_fk'] / (df['sum_fk'] + df['sum_fd']).replace(0, 1) df['basic_first_death_rate'] = df['sum_fd'] / (df['sum_fk'] + df['sum_fd']).replace(0, 1) df['basic_avg_kill_2'] = df['sum_2k'] / df['matches_played'] df['basic_avg_kill_3'] = df['sum_3k'] / df['matches_played'] df['basic_avg_kill_4'] = df['sum_4k'] / df['matches_played'] df['basic_avg_kill_5'] = df['sum_5k'] / df['matches_played'] # New Metrics df['basic_multi_kill_rate'] = (df['sum_2k'] + df['sum_3k'] + df['sum_4k'] + df['sum_5k']) / df['rounds_played'].replace(0, 1) df['basic_total_1v1'] = df['sum_1v1'] df['basic_total_1v2'] = df['sum_1v2'] df['basic_total_1v3'] = df['sum_1v3'] df['basic_total_1v4'] = df['sum_1v4'] df['basic_total_1v5'] = df['sum_1v5'] df['basic_avg_assisted_kill'] = df['sum_assist'] / df['matches_played'] df['basic_avg_perfect_kill'] = df['sum_perfect'] / df['matches_played'] df['basic_avg_revenge_kill'] = df['sum_revenge'] / df['matches_played'] df['basic_avg_awp_kill'] = df['sum_awp'] / df['matches_played'] df['basic_avg_jump_count'] = df['sum_jump'] / df['matches_played'] df['basic_avg_mvps'] = df['sum_mvps'] / df['matches_played'] df['basic_avg_plants'] = df['sum_plants'] / df['matches_played'] df['basic_avg_defuses'] = df['sum_defuses'] / df['matches_played'] df['basic_avg_flash_assists'] = df['sum_flash_assists'] / df['matches_played'] # UTIL Basic df['util_avg_nade_dmg'] = df['sum_util_dmg'] / df['matches_played'] df['util_avg_flash_time'] = df['sum_flash_time'] / df['matches_played'] df['util_avg_flash_enemy'] = df['sum_flash_enemy'] / df['matches_played'] valid_ids = tuple(df['steam_id_64'].tolist()) placeholders = ','.join(['?'] * len(valid_ids)) # 2. STA (Detailed) query_sta = f""" SELECT mp.steam_id_64, mp.rating, mp.is_win, m.start_time, m.duration FROM fact_match_players mp JOIN fact_matches m ON mp.match_id = m.match_id WHERE mp.steam_id_64 IN ({placeholders}) ORDER BY mp.steam_id_64, m.start_time """ df_matches = pd.read_sql_query(query_sta, conn, params=valid_ids) sta_list = [] for pid, group in df_matches.groupby('steam_id_64'): group = group.sort_values('start_time') last_30 = group.tail(30) # Fatigue Calc # Simple heuristic: split matches by day, compare early (first 3) vs late (rest) group['date'] = pd.to_datetime(group['start_time'], unit='s').dt.date day_counts = group.groupby('date').size() busy_days = day_counts[day_counts >= 4].index # Days with 4+ matches fatigue_decays = [] for day in busy_days: day_matches = group[group['date'] == day] if len(day_matches) >= 4: early_rating = day_matches.head(3)['rating'].mean() late_rating = day_matches.tail(len(day_matches) - 3)['rating'].mean() fatigue_decays.append(early_rating - late_rating) avg_fatigue = np.mean(fatigue_decays) if fatigue_decays else 0 sta_list.append({ 'steam_id_64': pid, 'sta_last_30_rating': last_30['rating'].mean(), 'sta_win_rating': group[group['is_win']==1]['rating'].mean(), 'sta_loss_rating': group[group['is_win']==0]['rating'].mean(), 'sta_rating_volatility': group.tail(10)['rating'].std() if len(group) > 1 else 0, 'sta_time_rating_corr': group['duration'].corr(group['rating']) if len(group)>2 and group['rating'].std() > 0 else 0, 'sta_fatigue_decay': avg_fatigue }) df = df.merge(pd.DataFrame(sta_list), on='steam_id_64', how='left') # 3. BAT (High ELO) query_elo = f""" SELECT mp.steam_id_64, mp.kd_ratio, (SELECT AVG(group_origin_elo) FROM fact_match_teams fmt WHERE fmt.match_id = mp.match_id AND group_origin_elo > 0) as elo FROM fact_match_players mp WHERE mp.steam_id_64 IN ({placeholders}) """ df_elo = pd.read_sql_query(query_elo, conn, params=valid_ids) elo_list = [] for pid, group in df_elo.groupby('steam_id_64'): avg = group['elo'].mean() or 1000 elo_list.append({ 'steam_id_64': pid, 'bat_kd_diff_high_elo': group[group['elo'] > avg]['kd_ratio'].mean(), 'bat_kd_diff_low_elo': group[group['elo'] <= avg]['kd_ratio'].mean() }) df = df.merge(pd.DataFrame(elo_list), on='steam_id_64', how='left') # Duel Win Rate query_duel = f""" SELECT steam_id_64, SUM(entry_kills) as ek, SUM(entry_deaths) as ed FROM fact_match_players WHERE steam_id_64 IN ({placeholders}) GROUP BY steam_id_64 """ df_duel = pd.read_sql_query(query_duel, conn, params=valid_ids) df_duel['bat_avg_duel_win_rate'] = df_duel['ek'] / (df_duel['ek'] + df_duel['ed']).replace(0, 1) df = df.merge(df_duel[['steam_id_64', 'bat_avg_duel_win_rate']], on='steam_id_64', how='left') # 4. HPS # Clutch Rate df['hps_clutch_win_rate_1v1'] = df['sum_1v1'] / df['matches_played'] df['hps_clutch_win_rate_1v3_plus'] = df['sum_1v3p'] / df['matches_played'] # Prepare Detailed Event Data for HPS (Comeback), PTL (KD), and T/CT # A. Determine Side Info using fact_match_teams # 1. Get Match Teams query_teams = f""" SELECT match_id, group_fh_role, group_uids FROM fact_match_teams WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders})) """ df_teams = pd.read_sql_query(query_teams, conn, params=valid_ids) # 2. Get Player UIDs query_uids = f"SELECT match_id, steam_id_64, uid FROM fact_match_players WHERE steam_id_64 IN ({placeholders})" df_uids = pd.read_sql_query(query_uids, conn, params=valid_ids) # 3. Get Match Meta (Start Time for MR12/MR15) query_meta = f"SELECT match_id, start_time FROM fact_matches WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders}))" df_meta = pd.read_sql_query(query_meta, conn, params=valid_ids) df_meta['halftime_round'] = np.where(df_meta['start_time'] > 1695772800, 12, 15) # CS2 Release Date approx # 4. Build FH Side DataFrame fh_rows = [] if not df_teams.empty and not df_uids.empty: match_teams = {} # match_id -> [(role, [uids])] for _, row in df_teams.iterrows(): mid = row['match_id'] role = row['group_fh_role'] # 1=CT, 0=T try: uids = str(row['group_uids']).split(',') uids = [u.strip() for u in uids if u.strip()] except: uids = [] if mid not in match_teams: match_teams[mid] = [] match_teams[mid].append((role, uids)) for _, row in df_uids.iterrows(): mid = row['match_id'] sid = row['steam_id_64'] uid = str(row['uid']) if mid in match_teams: for role, uids in match_teams[mid]: if uid in uids: fh_rows.append({ 'match_id': mid, 'steam_id_64': sid, 'fh_side': 'CT' if role == 1 else 'T' }) break df_fh_sides = pd.DataFrame(fh_rows) if not df_fh_sides.empty: df_fh_sides = df_fh_sides.merge(df_meta[['match_id', 'halftime_round']], on='match_id', how='left') # B. Get Kill Events query_events = f""" SELECT match_id, round_num, attacker_steam_id, victim_steam_id, event_type, is_headshot, event_time FROM fact_round_events WHERE event_type='kill' AND (attacker_steam_id IN ({placeholders}) OR victim_steam_id IN ({placeholders})) """ df_events = pd.read_sql_query(query_events, conn, params=valid_ids + valid_ids) # C. Get Round Scores query_rounds = f""" SELECT match_id, round_num, ct_score, t_score, winner_side FROM fact_rounds WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders})) """ df_rounds = pd.read_sql_query(query_rounds, conn, params=valid_ids) # Fix missing winner_side by calculating from score changes if not df_rounds.empty: df_rounds = df_rounds.sort_values(['match_id', 'round_num']).reset_index(drop=True) df_rounds['prev_ct'] = df_rounds.groupby('match_id')['ct_score'].shift(1).fillna(0) df_rounds['prev_t'] = df_rounds.groupby('match_id')['t_score'].shift(1).fillna(0) # Determine winner based on score increment df_rounds['ct_win'] = (df_rounds['ct_score'] > df_rounds['prev_ct']) df_rounds['t_win'] = (df_rounds['t_score'] > df_rounds['prev_t']) df_rounds['calculated_winner'] = np.where(df_rounds['ct_win'], 'CT', np.where(df_rounds['t_win'], 'T', None)) # Force overwrite winner_side with calculated winner since DB data is unreliable (mostly NULL) df_rounds['winner_side'] = df_rounds['calculated_winner'] # Ensure winner_side is string type to match side ('CT', 'T') df_rounds['winner_side'] = df_rounds['winner_side'].astype(str) # Fallback for Round 1 if still None (e.g. if prev is 0 and score is 1) # Logic above handles Round 1 correctly (prev is 0). # --- Process Logic --- # Logic above handles Round 1 correctly (prev is 0). # --- Process Logic --- has_events = not df_events.empty has_sides = not df_fh_sides.empty if has_events and has_sides: # 1. Attacker Side df_events = df_events.merge(df_fh_sides, left_on=['match_id', 'attacker_steam_id'], right_on=['match_id', 'steam_id_64'], how='left') df_events.rename(columns={'fh_side': 'att_fh_side'}, inplace=True) df_events.drop(columns=['steam_id_64'], inplace=True) # 2. Victim Side df_events = df_events.merge(df_fh_sides, left_on=['match_id', 'victim_steam_id'], right_on=['match_id', 'steam_id_64'], how='left', suffixes=('', '_vic')) df_events.rename(columns={'fh_side': 'vic_fh_side'}, inplace=True) df_events.drop(columns=['steam_id_64'], inplace=True) # 3. Determine Actual Side (CT/T) # Logic: If round <= halftime -> FH Side. Else -> Opposite. def calc_side(fh_side, round_num, halftime): if pd.isna(fh_side): return None if round_num <= halftime: return fh_side return 'T' if fh_side == 'CT' else 'CT' # Vectorized approach # Attacker mask_fh_att = df_events['round_num'] <= df_events['halftime_round'] df_events['attacker_side'] = np.where(mask_fh_att, df_events['att_fh_side'], np.where(df_events['att_fh_side'] == 'CT', 'T', 'CT')) # Victim mask_fh_vic = df_events['round_num'] <= df_events['halftime_round'] df_events['victim_side'] = np.where(mask_fh_vic, df_events['vic_fh_side'], np.where(df_events['vic_fh_side'] == 'CT', 'T', 'CT')) # Merge Scores df_events = df_events.merge(df_rounds, on=['match_id', 'round_num'], how='left') # --- BAT: Win Rate vs All --- # Removed as per request (Difficult to calculate / All Zeros) df['bat_win_rate_vs_all'] = 0 # --- HPS: Match Point & Comeback --- # Match Point Win Rate mp_rounds = df_rounds[((df_rounds['ct_score'] == 12) | (df_rounds['t_score'] == 12) | (df_rounds['ct_score'] == 15) | (df_rounds['t_score'] == 15))] if not mp_rounds.empty and has_sides: # Need player side for these rounds # Expand sides for all rounds q_all_rounds = f"SELECT match_id, round_num FROM fact_rounds WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders}))" df_all_rounds = pd.read_sql_query(q_all_rounds, conn, params=valid_ids) df_player_rounds = df_all_rounds.merge(df_fh_sides, on='match_id') mask_fh = df_player_rounds['round_num'] <= df_player_rounds['halftime_round'] df_player_rounds['side'] = np.where(mask_fh, df_player_rounds['fh_side'], np.where(df_player_rounds['fh_side'] == 'CT', 'T', 'CT')) # Filter for MP rounds # Join mp_rounds with df_player_rounds mp_player = df_player_rounds.merge(mp_rounds[['match_id', 'round_num', 'winner_side']], on=['match_id', 'round_num']) mp_player['is_win'] = (mp_player['side'] == mp_player['winner_side']).astype(int) hps_mp = mp_player.groupby('steam_id_64')['is_win'].mean().reset_index() hps_mp.rename(columns={'is_win': 'hps_match_point_win_rate'}, inplace=True) df = df.merge(hps_mp, on='steam_id_64', how='left') else: df['hps_match_point_win_rate'] = 0.5 # Comeback KD Diff # Attacker Context df_events['att_team_score'] = np.where(df_events['attacker_side'] == 'CT', df_events['ct_score'], df_events['t_score']) df_events['att_opp_score'] = np.where(df_events['attacker_side'] == 'CT', df_events['t_score'], df_events['ct_score']) df_events['is_comeback_att'] = (df_events['att_team_score'] + 4 <= df_events['att_opp_score']) # Victim Context df_events['vic_team_score'] = np.where(df_events['victim_side'] == 'CT', df_events['ct_score'], df_events['t_score']) df_events['vic_opp_score'] = np.where(df_events['victim_side'] == 'CT', df_events['t_score'], df_events['ct_score']) df_events['is_comeback_vic'] = (df_events['vic_team_score'] + 4 <= df_events['vic_opp_score']) att_k = df_events.groupby('attacker_steam_id').size() vic_d = df_events.groupby('victim_steam_id').size() cb_k = df_events[df_events['is_comeback_att']].groupby('attacker_steam_id').size() cb_d = df_events[df_events['is_comeback_vic']].groupby('victim_steam_id').size() kd_stats = pd.DataFrame({'k': att_k, 'd': vic_d, 'cb_k': cb_k, 'cb_d': cb_d}).fillna(0) kd_stats['kd'] = kd_stats['k'] / kd_stats['d'].replace(0, 1) kd_stats['cb_kd'] = kd_stats['cb_k'] / kd_stats['cb_d'].replace(0, 1) kd_stats['hps_comeback_kd_diff'] = kd_stats['cb_kd'] - kd_stats['kd'] kd_stats.index.name = 'steam_id_64' df = df.merge(kd_stats[['hps_comeback_kd_diff']], on='steam_id_64', how='left') # HPS: Losing Streak KD Diff # Logic: KD in rounds where team has lost >= 3 consecutive rounds vs Global KD # 1. Identify Streak Rounds if not df_rounds.empty: # Ensure sorted df_rounds = df_rounds.sort_values(['match_id', 'round_num']) # Shift to check previous results # We need to handle match boundaries. Groupby match_id is safer. # CT Loss Streak g = df_rounds.groupby('match_id') df_rounds['ct_lost_1'] = g['t_win'].shift(1).fillna(False) df_rounds['ct_lost_2'] = g['t_win'].shift(2).fillna(False) df_rounds['ct_lost_3'] = g['t_win'].shift(3).fillna(False) df_rounds['ct_in_loss_streak'] = (df_rounds['ct_lost_1'] & df_rounds['ct_lost_2'] & df_rounds['ct_lost_3']) # T Loss Streak df_rounds['t_lost_1'] = g['ct_win'].shift(1).fillna(False) df_rounds['t_lost_2'] = g['ct_win'].shift(2).fillna(False) df_rounds['t_lost_3'] = g['ct_win'].shift(3).fillna(False) df_rounds['t_in_loss_streak'] = (df_rounds['t_lost_1'] & df_rounds['t_lost_2'] & df_rounds['t_lost_3']) # Merge into events # df_events already has 'match_id', 'round_num', 'attacker_side' # We need to merge streak info streak_cols = df_rounds[['match_id', 'round_num', 'ct_in_loss_streak', 't_in_loss_streak']] df_events = df_events.merge(streak_cols, on=['match_id', 'round_num'], how='left') # Determine if attacker is in streak df_events['att_is_loss_streak'] = np.where( df_events['attacker_side'] == 'CT', df_events['ct_in_loss_streak'], np.where(df_events['attacker_side'] == 'T', df_events['t_in_loss_streak'], False) ) # Determine if victim is in streak (for deaths) df_events['vic_is_loss_streak'] = np.where( df_events['victim_side'] == 'CT', df_events['ct_in_loss_streak'], np.where(df_events['victim_side'] == 'T', df_events['t_in_loss_streak'], False) ) # Calculate KD in Streak ls_k = df_events[df_events['att_is_loss_streak']].groupby('attacker_steam_id').size() ls_d = df_events[df_events['vic_is_loss_streak']].groupby('victim_steam_id').size() ls_stats = pd.DataFrame({'ls_k': ls_k, 'ls_d': ls_d}).fillna(0) ls_stats['ls_kd'] = ls_stats['ls_k'] / ls_stats['ls_d'].replace(0, 1) # Compare with Global KD (from df_sides or recomputed) # Recompute global KD from events to be consistent g_k = df_events.groupby('attacker_steam_id').size() g_d = df_events.groupby('victim_steam_id').size() g_stats = pd.DataFrame({'g_k': g_k, 'g_d': g_d}).fillna(0) g_stats['g_kd'] = g_stats['g_k'] / g_stats['g_d'].replace(0, 1) ls_stats = ls_stats.join(g_stats[['g_kd']], how='outer').fillna(0) ls_stats['hps_losing_streak_kd_diff'] = ls_stats['ls_kd'] - ls_stats['g_kd'] ls_stats.index.name = 'steam_id_64' df = df.merge(ls_stats[['hps_losing_streak_kd_diff']], on='steam_id_64', how='left') else: df['hps_losing_streak_kd_diff'] = 0 # HPS: Momentum Multi-kill Rate # Team won 3+ rounds -> 2+ kills # Need sequential win info. # Hard to vectorise fully without accurate round sequence reconstruction including missing rounds. # Placeholder: 0 df['hps_momentum_multikill_rate'] = 0 # HPS: Tilt Rating Drop df['hps_tilt_rating_drop'] = 0 # HPS: Clutch Rating Rise df['hps_clutch_rating_rise'] = 0 # HPS: Undermanned Survival df['hps_undermanned_survival_time'] = 0 # --- PTL: Pistol Stats --- pistol_rounds = [1, 13] df_pistol = df_events[df_events['round_num'].isin(pistol_rounds)] if not df_pistol.empty: pk = df_pistol.groupby('attacker_steam_id').size() pd_death = df_pistol.groupby('victim_steam_id').size() p_stats = pd.DataFrame({'pk': pk, 'pd': pd_death}).fillna(0) p_stats['ptl_pistol_kd'] = p_stats['pk'] / p_stats['pd'].replace(0, 1) phs = df_pistol[df_pistol['is_headshot'] == 1].groupby('attacker_steam_id').size() p_stats['phs'] = phs p_stats['phs'] = p_stats['phs'].fillna(0) p_stats['ptl_pistol_util_efficiency'] = p_stats['phs'] / p_stats['pk'].replace(0, 1) p_stats.index.name = 'steam_id_64' df = df.merge(p_stats[['ptl_pistol_kd', 'ptl_pistol_util_efficiency']], on='steam_id_64', how='left') else: df['ptl_pistol_kd'] = 1.0 df['ptl_pistol_util_efficiency'] = 0.0 # --- T/CT Stats (Directly from L2 Side Tables) --- query_sides_l2 = f""" SELECT steam_id_64, 'CT' as side, COUNT(*) as matches, SUM(round_total) as rounds, AVG(rating2) as rating, SUM(kills) as kills, SUM(deaths) as deaths, SUM(assists) as assists, AVG(CAST(is_win as FLOAT)) as win_rate, SUM(first_kill) as fk, SUM(first_death) as fd, AVG(kast) as kast, AVG(rws) as rws, SUM(kill_2 + kill_3 + kill_4 + kill_5) as multi_kill_rounds, SUM(headshot_count) as hs FROM fact_match_players_ct WHERE steam_id_64 IN ({placeholders}) GROUP BY steam_id_64 UNION ALL SELECT steam_id_64, 'T' as side, COUNT(*) as matches, SUM(round_total) as rounds, AVG(rating2) as rating, SUM(kills) as kills, SUM(deaths) as deaths, SUM(assists) as assists, AVG(CAST(is_win as FLOAT)) as win_rate, SUM(first_kill) as fk, SUM(first_death) as fd, AVG(kast) as kast, AVG(rws) as rws, SUM(kill_2 + kill_3 + kill_4 + kill_5) as multi_kill_rounds, SUM(headshot_count) as hs FROM fact_match_players_t WHERE steam_id_64 IN ({placeholders}) GROUP BY steam_id_64 """ df_sides = pd.read_sql_query(query_sides_l2, conn, params=valid_ids + valid_ids) if not df_sides.empty: # Calculate Derived Rates per row before pivoting df_sides['rounds'] = df_sides['rounds'].replace(0, 1) # Avoid div by zero # KD Calculation (Sum of Kills / Sum of Deaths) df_sides['kd'] = df_sides['kills'] / df_sides['deaths'].replace(0, 1) # KAST Proxy (if KAST is 0) # KAST ~= (Kills + Assists + Survived) / Rounds # Survived = Rounds - Deaths if df_sides['kast'].mean() == 0: df_sides['survived'] = df_sides['rounds'] - df_sides['deaths'] df_sides['kast'] = (df_sides['kills'] + df_sides['assists'] + df_sides['survived']) / df_sides['rounds'] df_sides['fk_rate'] = df_sides['fk'] / df_sides['rounds'] df_sides['fd_rate'] = df_sides['fd'] / df_sides['rounds'] df_sides['mk_rate'] = df_sides['multi_kill_rounds'] / df_sides['rounds'] df_sides['hs_rate'] = df_sides['hs'] / df_sides['kills'].replace(0, 1) # Pivot # We want columns like side_rating_ct, side_rating_t, etc. pivoted = df_sides.pivot(index='steam_id_64', columns='side').reset_index() # Flatten MultiIndex columns new_cols = ['steam_id_64'] for col_name, side in pivoted.columns[1:]: # Map L2 column names to Feature names # rating -> side_rating_{side} # kd -> side_kd_{side} # win_rate -> side_win_rate_{side} # fk_rate -> side_first_kill_rate_{side} # fd_rate -> side_first_death_rate_{side} # kast -> side_kast_{side} # rws -> side_rws_{side} # mk_rate -> side_multikill_rate_{side} # hs_rate -> side_headshot_rate_{side} target_map = { 'rating': 'side_rating', 'kd': 'side_kd', 'win_rate': 'side_win_rate', 'fk_rate': 'side_first_kill_rate', 'fd_rate': 'side_first_death_rate', 'kast': 'side_kast', 'rws': 'side_rws', 'mk_rate': 'side_multikill_rate', 'hs_rate': 'side_headshot_rate' } if col_name in target_map: new_cols.append(f"{target_map[col_name]}_{side.lower()}") else: new_cols.append(f"{col_name}_{side.lower()}") # Fallback for intermediate cols if needed pivoted.columns = new_cols # Select only relevant columns to merge cols_to_merge = [c for c in new_cols if c.startswith('side_')] cols_to_merge.append('steam_id_64') df = df.merge(pivoted[cols_to_merge], on='steam_id_64', how='left') # Fill NaN with 0 for side stats for c in cols_to_merge: if c != 'steam_id_64': df[c] = df[c].fillna(0) # Add calculated diffs for scoring/display if needed (or just let template handle it) # KD Diff for L3 Score calculation if 'side_rating_ct' in df.columns and 'side_rating_t' in df.columns: df['side_kd_diff_ct_t'] = df['side_rating_ct'] - df['side_rating_t'] else: df['side_kd_diff_ct_t'] = 0 # --- Obj Override from Main Table (sum_plants, sum_defuses) --- # side_obj_t = sum_plants / matches_played # side_obj_ct = sum_defuses / matches_played df['side_obj_t'] = df['sum_plants'] / df['matches_played'].replace(0, 1) df['side_obj_ct'] = df['sum_defuses'] / df['matches_played'].replace(0, 1) df['side_obj_t'] = df['side_obj_t'].fillna(0) df['side_obj_ct'] = df['side_obj_ct'].fillna(0) else: # Fallbacks cols = ['hps_match_point_win_rate', 'hps_comeback_kd_diff', 'ptl_pistol_kd', 'ptl_pistol_util_efficiency', 'side_rating_ct', 'side_rating_t', 'side_first_kill_rate_ct', 'side_first_kill_rate_t', 'side_kd_diff_ct_t', 'bat_win_rate_vs_all', 'hps_losing_streak_kd_diff', 'hps_momentum_multikill_rate', 'hps_tilt_rating_drop', 'hps_clutch_rating_rise', 'hps_undermanned_survival_time', 'side_win_rate_ct', 'side_win_rate_t', 'side_kd_ct', 'side_kd_t', 'side_kast_ct', 'side_kast_t', 'side_rws_ct', 'side_rws_t', 'side_first_death_rate_ct', 'side_first_death_rate_t', 'side_multikill_rate_ct', 'side_multikill_rate_t', 'side_headshot_rate_ct', 'side_headshot_rate_t', 'side_obj_ct', 'side_obj_t'] for c in cols: df[c] = 0 df['hps_match_point_win_rate'] = df['hps_match_point_win_rate'].fillna(0.5) df['bat_win_rate_vs_all'] = df['bat_win_rate_vs_all'].fillna(0.5) df['hps_losing_streak_kd_diff'] = df['hps_losing_streak_kd_diff'].fillna(0) # HPS Pressure Entry Rate (Entry Kills per Round in Losing Matches) q_mp_team = f"SELECT match_id, steam_id_64, is_win, entry_kills, round_total FROM fact_match_players WHERE steam_id_64 IN ({placeholders})" df_mp_team = pd.read_sql_query(q_mp_team, conn, params=valid_ids) if not df_mp_team.empty: losing_matches = df_mp_team[df_mp_team['is_win'] == 0] if not losing_matches.empty: # Sum Entry Kills / Sum Rounds pressure_entry = losing_matches.groupby('steam_id_64')[['entry_kills', 'round_total']].sum().reset_index() pressure_entry['hps_pressure_entry_rate'] = pressure_entry['entry_kills'] / pressure_entry['round_total'].replace(0, 1) df = df.merge(pressure_entry[['steam_id_64', 'hps_pressure_entry_rate']], on='steam_id_64', how='left') if 'hps_pressure_entry_rate' not in df.columns: df['hps_pressure_entry_rate'] = 0 df['hps_pressure_entry_rate'] = df['hps_pressure_entry_rate'].fillna(0) # 5. PTL (Additional Features: Kills & Multi) query_ptl = f""" SELECT ev.attacker_steam_id as steam_id_64, COUNT(*) as pistol_kills FROM fact_round_events ev WHERE ev.event_type = 'kill' AND ev.round_num IN (1, 13) AND ev.attacker_steam_id IN ({placeholders}) GROUP BY ev.attacker_steam_id """ df_ptl = pd.read_sql_query(query_ptl, conn, params=valid_ids) if not df_ptl.empty: df = df.merge(df_ptl, on='steam_id_64', how='left') df['ptl_pistol_kills'] = df['pistol_kills'] / df['matches_played'] else: df['ptl_pistol_kills'] = 0 query_ptl_multi = f""" SELECT attacker_steam_id as steam_id_64, COUNT(*) as multi_cnt FROM ( SELECT match_id, round_num, attacker_steam_id, COUNT(*) as k FROM fact_round_events WHERE event_type = 'kill' AND round_num IN (1, 13) AND attacker_steam_id IN ({placeholders}) GROUP BY match_id, round_num, attacker_steam_id HAVING k >= 2 ) GROUP BY attacker_steam_id """ df_ptl_multi = pd.read_sql_query(query_ptl_multi, conn, params=valid_ids) if not df_ptl_multi.empty: df = df.merge(df_ptl_multi, on='steam_id_64', how='left') df['ptl_pistol_multikills'] = df['multi_cnt'] / df['matches_played'] else: df['ptl_pistol_multikills'] = 0 # PTL Win Rate (Pandas Logic using fixed winner_side) if not df_rounds.empty and has_sides: # Ensure df_player_rounds exists if 'df_player_rounds' not in locals(): q_all_rounds = f"SELECT match_id, round_num FROM fact_rounds WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders}))" df_all_rounds = pd.read_sql_query(q_all_rounds, conn, params=valid_ids) df_player_rounds = df_all_rounds.merge(df_fh_sides, on='match_id') mask_fh = df_player_rounds['round_num'] <= df_player_rounds['halftime_round'] df_player_rounds['side'] = np.where(mask_fh, df_player_rounds['fh_side'], np.where(df_player_rounds['fh_side'] == 'CT', 'T', 'CT')) # Filter for Pistol Rounds (1 and after halftime) # Use halftime_round logic (MR12: 13, MR15: 16) player_pistol = df_player_rounds[ (df_player_rounds['round_num'] == 1) | (df_player_rounds['round_num'] == df_player_rounds['halftime_round'] + 1) ].copy() # Merge with df_rounds to get calculated winner_side df_rounds['winner_side'] = df_rounds['winner_side'].astype(str) # Ensure string for merge safety player_pistol = player_pistol.merge(df_rounds[['match_id', 'round_num', 'winner_side']], on=['match_id', 'round_num'], how='left') # Calculate Win # Ensure winner_side is in player_pistol columns after merge if 'winner_side' in player_pistol.columns: player_pistol['is_win'] = (player_pistol['side'] == player_pistol['winner_side']).astype(int) else: player_pistol['is_win'] = 0 ptl_wins = player_pistol.groupby('steam_id_64')['is_win'].agg(['sum', 'count']).reset_index() ptl_wins.rename(columns={'sum': 'pistol_wins', 'count': 'pistol_rounds'}, inplace=True) ptl_wins['ptl_pistol_win_rate'] = ptl_wins['pistol_wins'] / ptl_wins['pistol_rounds'].replace(0, 1) df = df.merge(ptl_wins[['steam_id_64', 'ptl_pistol_win_rate']], on='steam_id_64', how='left') else: df['ptl_pistol_win_rate'] = 0.5 df['ptl_pistol_multikills'] = df['ptl_pistol_multikills'].fillna(0) df['ptl_pistol_win_rate'] = df['ptl_pistol_win_rate'].fillna(0.5) # 7. UTIL (Enhanced with Prop Frequency) # Usage Rate: Average number of grenades purchased per round df['util_usage_rate'] = ( df['sum_util_flash'] + df['sum_util_smoke'] + df['sum_util_molotov'] + df['sum_util_he'] + df['sum_util_decoy'] ) / df['rounds_played'].replace(0, 1) * 100 # Multiply by 100 to make it comparable to other metrics (e.g. 1.5 nades/round -> 150) # Fallback if no new data yet (rely on old logic or keep 0) # We can try to fetch equipment_value as backup if sum is 0 if df['util_usage_rate'].sum() == 0: query_eco = f""" SELECT steam_id_64, AVG(equipment_value) as avg_equip_val FROM fact_round_player_economy WHERE steam_id_64 IN ({placeholders}) GROUP BY steam_id_64 """ df_eco = pd.read_sql_query(query_eco, conn, params=valid_ids) if not df_eco.empty: df_eco['util_usage_rate_backup'] = df_eco['avg_equip_val'] / 50.0 # Scaling factor for equipment value df = df.merge(df_eco[['steam_id_64', 'util_usage_rate_backup']], on='steam_id_64', how='left') df['util_usage_rate'] = df['util_usage_rate_backup'].fillna(0) df.drop(columns=['util_usage_rate_backup'], inplace=True) # Final Mappings df['total_matches'] = df['matches_played'] return df.fillna(0) @staticmethod def _calculate_ultimate_scores(df): def n(col): if col not in df.columns: return 50 s = df[col] if s.max() == s.min(): return 50 return (s - s.min()) / (s.max() - s.min()) * 100 df = df.copy() # BAT (30%) df['score_bat'] = ( 0.25 * n('basic_avg_rating') + 0.20 * n('basic_avg_kd') + 0.15 * n('basic_avg_adr') + 0.10 * n('bat_avg_duel_win_rate') + 0.10 * n('bat_kd_diff_high_elo') + 0.10 * n('basic_avg_kill_3') ) # STA (15%) df['score_sta'] = ( 0.30 * (100 - n('sta_rating_volatility')) + 0.30 * n('sta_loss_rating') + 0.20 * n('sta_win_rating') + 0.10 * (100 - abs(n('sta_time_rating_corr'))) ) # HPS (20%) df['score_hps'] = ( 0.25 * n('sum_1v3p') + 0.20 * n('hps_match_point_win_rate') + 0.20 * n('hps_comeback_kd_diff') + 0.15 * n('hps_pressure_entry_rate') + 0.20 * n('basic_avg_rating') ) # PTL (10%) df['score_ptl'] = ( 0.30 * n('ptl_pistol_kills') + 0.30 * n('ptl_pistol_win_rate') + 0.20 * n('ptl_pistol_kd') + 0.20 * n('ptl_pistol_util_efficiency') ) # T/CT (10%) df['score_tct'] = ( 0.35 * n('side_rating_ct') + 0.35 * n('side_rating_t') + 0.15 * n('side_first_kill_rate_ct') + 0.15 * n('side_first_kill_rate_t') ) # UTIL (10%) # Emphasize prop frequency (usage_rate) df['score_util'] = ( 0.35 * n('util_usage_rate') + 0.25 * n('util_avg_nade_dmg') + 0.20 * n('util_avg_flash_time') + 0.20 * n('util_avg_flash_enemy') ) return df @staticmethod def get_roster_features_distribution(target_steam_id): """ Calculates rank and distribution of the target player's L3 features (Scores) within the active roster. """ from web.services.web_service import WebService import json # 1. Get Active Roster IDs lineups = WebService.get_lineups() active_roster_ids = [] if lineups: try: raw_ids = json.loads(lineups[0]['player_ids_json']) active_roster_ids = [str(uid) for uid in raw_ids] except: pass if not active_roster_ids: return None # 2. Fetch L3 features for all roster members placeholders = ','.join('?' for _ in active_roster_ids) sql = f""" SELECT steam_id_64, score_bat, score_sta, score_hps, score_ptl, score_tct, score_util FROM dm_player_features WHERE steam_id_64 IN ({placeholders}) """ rows = query_db('l3', sql, active_roster_ids) if not rows: return None stats_map = {row['steam_id_64']: dict(row) for row in rows} target_steam_id = str(target_steam_id) # If target not in map (maybe no L3 data yet), default to 0 if target_steam_id not in stats_map: stats_map[target_steam_id] = { 'score_bat': 0, 'score_sta': 0, 'score_hps': 0, 'score_ptl': 0, 'score_tct': 0, 'score_util': 0 } # 3. Calculate Distribution metrics = ['score_bat', 'score_sta', 'score_hps', 'score_ptl', 'score_tct', 'score_util'] result = {} for m in metrics: values = [p.get(m, 0) or 0 for p in stats_map.values()] target_val = stats_map[target_steam_id].get(m, 0) or 0 if not values: result[m] = None continue values.sort(reverse=True) try: rank = values.index(target_val) + 1 except ValueError: rank = len(values) result[m] = { 'val': target_val, 'rank': rank, 'total': len(values), 'min': min(values), 'max': max(values), 'avg': sum(values) / len(values) } return result