diff --git a/6D_README.md b/6D_README.md new file mode 100644 index 0000000..99115ad --- /dev/null +++ b/6D_README.md @@ -0,0 +1,110 @@ +# 选手能力六维图计算原理 (Six Dimensions Calculation) + +本文档详细介绍了 YRTV 系统中选手能力六维图(Radar Chart)的计算原理、数据来源及具体公式。 + +## 概述 + +能力六维图通过六个核心维度全面评估选手的综合实力: +1. **BAT (Battle / Aim)**: 正面交火与枪法能力 +2. **STA (Stability)**: 表现稳定性与抗压能力 +3. **HPS (High Pressure / Clutch)**: 关键时刻与残局能力 +4. **PTL (Pistol Specialist)**: 手枪局专项能力 +5. **SIDE (T/CT Preference)**: 攻防两端的均衡性与影响力 +6. **UTIL (Utility)**: 道具使用效率与投入度 + +所有指标在计算前均会进行归一化处理(Normalization),映射到 0-100 的评分区间,以便于横向对比。 + +--- + +## 详细计算公式 + +注:`n(col)` 表示对该列数据进行 Min-Max 归一化处理。 + +### 1. BAT - 正面交火 (Battle) +衡量选手的基础枪法、击杀效率及高水平对抗能力。 + +**权重公式:** +```python +Score = ( + 0.25 * n('Rating') + # 基础 Rating + 0.20 * n('KD_Ratio') + # 击杀死亡比 + 0.15 * n('ADR') + # 回合均伤 + 0.10 * n('Duel_Win_Rate') + # 1v1 对枪胜率 + 0.10 * n('High_Elo_KD_Diff') + # 高分局表现差值 (抗压) + 0.10 * n('Multi_Kill_Avg') # 多杀能力 (3k+) +) +``` + +### 2. STA - 稳定性 (Stability) +衡量选手表现的波动性以及在顺风/逆风局的发挥。 + +**权重公式:** +```python +Score = ( + 0.30 * (100 - n('Rating_Volatility')) + # 评分波动性 (越低越好) + 0.30 * n('Loss_Rating') + # 败局 Rating (尽力局表现) + 0.20 * n('Win_Rating') + # 胜局 Rating + 0.10 * (100 - abs(n('Time_Corr'))) # 状态随时间下滑程度 (耐力) +) +``` + +### 3. HPS - 关键局 (High Pressure) +衡量选手在残局、赛点等高压环境下的“大心脏”能力。 + +**权重公式:** +```python +Score = ( + 0.30 * n('Clutch_1v3+') + # 1v3 及以上残局获胜数 + 0.20 * n('Match_Point_Win_Rate') + # 赛点局胜率 + 0.20 * n('Comeback_KD_Diff') + # 翻盘局 KD 表现 + 0.15 * n('Pressure_Entry_Rate') + # 逆风局首杀率 + 0.15 * n('Rating') # 基础能力兜底 +) +``` + +### 4. PTL - 手枪局 (Pistol Specialist) +衡量选手在手枪局(Round 1 & 13)的专项统治力。 + +**权重公式:** +```python +Score = ( + 0.40 * n('Pistol_Kills_Avg') + # 手枪局场均击杀 + 0.40 * n('Pistol_Win_Rate') + # 手枪局胜率 + 0.20 * n('Headshot_Kills_Avg') # 场均爆头击杀 (手枪局极其依赖爆头) +) +``` + +### 5. SIDE - 攻防偏好 (Side Preference) +衡量选手在 T (进攻) 和 CT (防守) 两端的均衡性与统治力。 + +**权重公式:** +```python +Score = ( + 0.35 * n('CT_Rating') + # CT 方 Rating + 0.35 * n('T_Rating') + # T 方 Rating + 0.15 * n('CT_First_Kill_Rate') + # CT 方首杀率 (防守前压/偷人) + 0.15 * n('T_First_Kill_Rate') # T 方首杀率 (突破能力) +) +``` + +### 6. UTIL - 道具 (Utility) +衡量选手对道具的投入程度(购买频率)以及使用效果(伤害/白)。 + +**权重公式:** +```python +Score = ( + 0.35 * n('Usage_Rate') + # 道具购买/使用频率 + 0.25 * n('Avg_Nade_Dmg') + # 场均手雷/火伤害 + 0.20 * n('Avg_Flash_Time') + # 场均致盲时间 + 0.20 * n('Avg_Flash_Enemy') # 场均致盲敌人数 +) +``` + +--- + +## 数据更新机制 + +所有特征数据均由 ETL 流程 (`ETL/L3_Builder.py`) 每日自动计算更新。 +- **源数据**: `fact_match_players`, `fact_round_events`, `fact_rounds` 等 L2 层事实表。 +- **存储**: 计算结果存储于 `database/L3/L3_Features.sqlite` 的 `dm_player_features` 表中。 +- **展示**: 前端 Profile 页面读取该表数据,并结合队内分布 (`radar_dist`) 进行可视化渲染。 diff --git a/ETL/L2_Builder.py b/ETL/L2_Builder.py index 36308d7..59253fa 100644 --- a/ETL/L2_Builder.py +++ b/ETL/L2_Builder.py @@ -117,6 +117,13 @@ class PlayerStats: year: str = "" sts_raw: str = "" level_info_raw: str = "" + + # Utility Usage + util_flash_usage: int = 0 + util_smoke_usage: int = 0 + util_molotov_usage: int = 0 + util_he_usage: int = 0 + util_decoy_usage: int = 0 @dataclass class RoundEvent: @@ -799,6 +806,22 @@ class MatchParser: round_list = l_data.get('round_stat', []) for idx, r in enumerate(round_list): + # Utility Usage (Leetify) + bron = r.get('bron_equipment', {}) + for sid, items in bron.items(): + sid = str(sid) + if sid in self.match_data.players: + p = self.match_data.players[sid] + if isinstance(items, list): + for item in items: + if not isinstance(item, dict): continue + name = item.get('WeaponName', '') + if name == 'weapon_flashbang': p.util_flash_usage += 1 + elif name == 'weapon_smokegrenade': p.util_smoke_usage += 1 + elif name in ['weapon_molotov', 'weapon_incgrenade']: p.util_molotov_usage += 1 + elif name == 'weapon_hegrenade': p.util_he_usage += 1 + elif name == 'weapon_decoy': p.util_decoy_usage += 1 + rd = RoundData( round_num=r.get('round', idx + 1), winner_side='CT' if r.get('win_reason') in [7, 8, 9] else 'T', # Approximate logic, need real enum @@ -949,6 +972,21 @@ class MatchParser: # Check schema: 'current_score' -> ct/t cur_score = r.get('current_score', {}) + # Utility Usage (Classic) + equiped = r.get('equiped', {}) + for sid, items in equiped.items(): + # Ensure sid is string + sid = str(sid) + if sid in self.match_data.players: + p = self.match_data.players[sid] + if isinstance(items, list): + for item in items: + if item == 'flashbang': p.util_flash_usage += 1 + elif item == 'smokegrenade': p.util_smoke_usage += 1 + elif item in ['molotov', 'incgrenade']: p.util_molotov_usage += 1 + elif item == 'hegrenade': p.util_he_usage += 1 + elif item == 'decoy': p.util_decoy_usage += 1 + rd = RoundData( round_num=idx + 1, winner_side='None', # Default to None if unknown @@ -1214,7 +1252,8 @@ def save_match(cursor, m: MatchData): "many_assists_cnt3", "many_assists_cnt4", "many_assists_cnt5", "map", "match_code", "match_mode", "match_team_id", "match_time", "per_headshot", "perfect_kill", "planted_bomb", "revenge_kill", "round_total", "season", - "team_kill", "throw_harm", "throw_harm_enemy", "uid", "year", "sts_raw", "level_info_raw" + "team_kill", "throw_harm", "throw_harm_enemy", "uid", "year", "sts_raw", "level_info_raw", + "util_flash_usage", "util_smoke_usage", "util_molotov_usage", "util_he_usage", "util_decoy_usage" ] player_placeholders = ",".join(["?"] * len(player_columns)) player_columns_sql = ",".join(player_columns) @@ -1238,7 +1277,8 @@ def save_match(cursor, m: MatchData): p.many_assists_cnt5, p.map, p.match_code, p.match_mode, p.match_team_id, p.match_time, p.per_headshot, p.perfect_kill, p.planted_bomb, p.revenge_kill, p.round_total, p.season, p.team_kill, p.throw_harm, p.throw_harm_enemy, - p.uid, p.year, p.sts_raw, p.level_info_raw + p.uid, p.year, p.sts_raw, p.level_info_raw, + p.util_flash_usage, p.util_smoke_usage, p.util_molotov_usage, p.util_he_usage, p.util_decoy_usage ] for sid, p in m.players.items(): diff --git a/ETL/L3_Builder.py b/ETL/L3_Builder.py index 610882c..035deb9 100644 --- a/ETL/L3_Builder.py +++ b/ETL/L3_Builder.py @@ -1,330 +1,48 @@ -import sqlite3 import logging import os -import numpy as np -import pandas as pd -from datetime import datetime +import sys + +# Add parent directory to path to allow importing web module +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from web.services.feature_service import FeatureService +from web.config import Config +import sqlite3 # Setup logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) -# Constants -L2_DB_PATH = 'database/L2/L2_Main.sqlite' -L3_DB_PATH = 'database/L3/L3_Features.sqlite' -SCHEMA_PATH = 'database/L3/schema.sql' +L3_DB_PATH = Config.DB_L3_PATH +SCHEMA_PATH = os.path.join(Config.BASE_DIR, 'database', 'L3', 'schema.sql') def init_db(): - if not os.path.exists('database/L3'): - os.makedirs('database/L3') + l3_dir = os.path.dirname(L3_DB_PATH) + if not os.path.exists(l3_dir): + os.makedirs(l3_dir) conn = sqlite3.connect(L3_DB_PATH) with open(SCHEMA_PATH, 'r', encoding='utf-8') as f: conn.executescript(f.read()) conn.commit() conn.close() - logger.info("L3 DB Initialized.") + logger.info("L3 DB Initialized/Updated with Schema.") -def get_db_connection(db_path): - conn = sqlite3.connect(db_path) - return conn - -def safe_div(a, b, default=0.0): - return a / b if b and b != 0 else default - -def calculate_basic_features(df): - if df.empty: - return {} +def main(): + logger.info("Starting L3 Builder (Delegating to FeatureService)...") - count = len(df) + # 1. Ensure Schema is up to date + init_db() - feats = { - 'total_matches': count, - 'basic_avg_rating': df['rating'].mean(), - 'basic_avg_kd': df['kd_ratio'].mean(), - 'basic_avg_adr': df['adr'].mean() if 'adr' in df.columns else 0.0, - 'basic_avg_kast': df['kast'].mean(), - 'basic_avg_rws': df['rws'].mean(), - 'basic_avg_headshot_kills': df['headshot_count'].sum() / count, - 'basic_headshot_rate': safe_div(df['headshot_count'].sum(), df['kills'].sum()), - 'basic_avg_first_kill': df['first_kill'].mean(), - 'basic_avg_first_death': df['first_death'].mean(), - 'basic_first_kill_rate': safe_div(df['first_kill'].sum(), df['first_kill'].sum() + df['first_death'].sum()), - 'basic_first_death_rate': safe_div(df['first_death'].sum(), df['first_kill'].sum() + df['first_death'].sum()), - - 'basic_avg_kill_2': df['kill_2'].mean(), - 'basic_avg_kill_3': df['kill_3'].mean(), - 'basic_avg_kill_4': df['kill_4'].mean(), - 'basic_avg_kill_5': df['kill_5'].mean(), - - 'basic_avg_assisted_kill': df['assisted_kill'].mean(), - 'basic_avg_perfect_kill': df['perfect_kill'].mean(), - 'basic_avg_revenge_kill': df['revenge_kill'].mean(), - 'basic_avg_awp_kill': df['awp_kill'].mean(), - 'basic_avg_jump_count': df['jump_count'].mean(), - } - return feats - -def calculate_sta_features(df): - if df.empty: - return {} - - df = df.sort_values('match_time') - last_30 = df.tail(30) - last_10 = df.tail(10) - - feats = { - 'sta_last_30_rating': last_30['rating'].mean(), - 'sta_win_rating': df[df['is_win'] == 1]['rating'].mean() if not df[df['is_win'] == 1].empty else 0.0, - 'sta_loss_rating': df[df['is_win'] == 0]['rating'].mean() if not df[df['is_win'] == 0].empty else 0.0, - 'sta_rating_volatility': last_10['rating'].std() if len(last_10) > 1 else 0.0, - } - - df['date'] = pd.to_datetime(df['match_time'], unit='s').dt.date - day_counts = df.groupby('date').size() - busy_days = day_counts[day_counts >= 4].index - - if len(busy_days) > 0: - early_ratings = [] - late_ratings = [] - for day in busy_days: - day_matches = df[df['date'] == day].sort_values('match_time') - early = day_matches.head(3) - late = day_matches.tail(len(day_matches) - 3) - early_ratings.extend(early['rating'].tolist()) - late_ratings.extend(late['rating'].tolist()) - feats['sta_fatigue_decay'] = np.mean(early_ratings) - np.mean(late_ratings) if early_ratings and late_ratings else 0.0 - else: - feats['sta_fatigue_decay'] = 0.0 - - df['hour_of_day'] = pd.to_datetime(df['match_time'], unit='s').dt.hour - if len(df) > 5: - corr = df['hour_of_day'].corr(df['rating']) - feats['sta_time_rating_corr'] = corr if not np.isnan(corr) else 0.0 - else: - feats['sta_time_rating_corr'] = 0.0 - - return feats - -def calculate_util_features(df): - if df.empty: - return {} - feats = { - 'util_avg_nade_dmg': df['throw_harm'].mean() if 'throw_harm' in df.columns else 0.0, - 'util_avg_flash_time': df['flash_duration'].mean() if 'flash_duration' in df.columns else 0.0, - 'util_avg_flash_enemy': df['flash_enemy'].mean() if 'flash_enemy' in df.columns else 0.0, - 'util_avg_flash_team': df['flash_team'].mean() if 'flash_team' in df.columns else 0.0, - 'util_usage_rate': (df['flash_enemy'].mean() + df['throw_harm'].mean() / 50.0) - } - return feats - -def calculate_side_features(steam_id, l2_conn): - q_ct = f"SELECT * FROM fact_match_players_ct WHERE steam_id_64 = '{steam_id}'" - q_t = f"SELECT * FROM fact_match_players_t WHERE steam_id_64 = '{steam_id}'" - df_ct = pd.read_sql_query(q_ct, l2_conn) - df_t = pd.read_sql_query(q_t, l2_conn) - - feats = {} - if not df_ct.empty: - feats['side_rating_ct'] = df_ct['rating'].mean() - feats['side_first_kill_rate_ct'] = safe_div(df_ct['first_kill'].sum(), df_ct['first_kill'].sum() + df_ct['first_death'].sum()) - feats['side_hold_success_rate_ct'] = 0.0 - feats['side_defused_bomb_count'] = df_ct['defused_bomb'].sum() if 'defused_bomb' in df_ct.columns else 0 - else: - feats.update({'side_rating_ct': 0.0, 'side_first_kill_rate_ct': 0.0, 'side_hold_success_rate_ct': 0.0, 'side_defused_bomb_count': 0}) - - if not df_t.empty: - feats['side_rating_t'] = df_t['rating'].mean() - feats['side_first_kill_rate_t'] = safe_div(df_t['first_kill'].sum(), df_t['first_kill'].sum() + df_t['first_death'].sum()) - feats['side_entry_success_rate_t'] = 0.0 - feats['side_planted_bomb_count'] = df_t['planted_bomb'].sum() if 'planted_bomb' in df_t.columns else 0 - else: - feats.update({'side_rating_t': 0.0, 'side_first_kill_rate_t': 0.0, 'side_entry_success_rate_t': 0.0, 'side_planted_bomb_count': 0}) - - feats['side_kd_diff_ct_t'] = (df_ct['kd_ratio'].mean() if not df_ct.empty else 0) - (df_t['kd_ratio'].mean() if not df_t.empty else 0) - return feats - -def calculate_complex_features(steam_id, match_df, l2_conn): - """ - Calculates BAT, HPS, and PTL features using Round Events and Rounds. - """ - feats = {} - - # 1. HPS: Clutch from match stats (easier part) - # clutch_1vX are wins. end_1vX are total attempts (assuming mapping logic). - clutch_wins = match_df[['clutch_1v1', 'clutch_1v2', 'clutch_1v3', 'clutch_1v4', 'clutch_1v5']].sum().sum() - clutch_attempts = match_df[['end_1v1', 'end_1v2', 'end_1v3', 'end_1v4', 'end_1v5']].sum().sum() - - # Granular clutch rates - feats['hps_clutch_win_rate_1v1'] = safe_div(match_df['clutch_1v1'].sum(), match_df['end_1v1'].sum()) - feats['hps_clutch_win_rate_1v2'] = safe_div(match_df['clutch_1v2'].sum(), match_df['end_1v2'].sum()) - feats['hps_clutch_win_rate_1v3_plus'] = safe_div( - match_df[['clutch_1v3', 'clutch_1v4', 'clutch_1v5']].sum().sum(), - match_df[['end_1v3', 'end_1v4', 'end_1v5']].sum().sum() - ) - - # 2. Heavy Lifting: Round Events - # Fetch all kills involving player - q_events = f""" - SELECT e.*, - p_vic.rank_score as victim_rank, - p_att.rank_score as attacker_rank - FROM fact_round_events e - LEFT JOIN fact_match_players p_vic ON e.match_id = p_vic.match_id AND e.victim_steam_id = p_vic.steam_id_64 - LEFT JOIN fact_match_players p_att ON e.match_id = p_att.match_id AND e.attacker_steam_id = p_att.steam_id_64 - WHERE (e.attacker_steam_id = '{steam_id}' OR e.victim_steam_id = '{steam_id}') - AND e.event_type = 'kill' - """ + # 2. Rebuild Features using the centralized logic try: - events = pd.read_sql_query(q_events, l2_conn) + count = FeatureService.rebuild_all_features() + logger.info(f"Successfully rebuilt features for {count} players.") except Exception as e: - logger.error(f"Error fetching events for {steam_id}: {e}") - events = pd.DataFrame() - - if not events.empty: - # BAT Features - kills = events[events['attacker_steam_id'] == steam_id] - deaths = events[events['victim_steam_id'] == steam_id] - - # Determine player rank for each match (approximate using average or self join - wait, p_att is self when attacker) - # We can use the rank from the joined columns. - - # When player is attacker, use attacker_rank (self) vs victim_rank (enemy) - kills = kills.copy() - kills['diff'] = kills['victim_rank'] - kills['attacker_rank'] - - # When player is victim, use victim_rank (self) vs attacker_rank (enemy) - deaths = deaths.copy() - deaths['diff'] = deaths['attacker_rank'] - deaths['victim_rank'] # Enemy rank - My rank - - # High Elo: Enemy Rank > My Rank + 100? Or just > My Rank? - # Let's say High Elo = Enemy Rank > My Rank - high_elo_kills = kills[kills['diff'] > 0].shape[0] - high_elo_deaths = deaths[deaths['diff'] > 0].shape[0] # Enemy (Attacker) > Me (Victim) - - low_elo_kills = kills[kills['diff'] < 0].shape[0] - low_elo_deaths = deaths[deaths['diff'] < 0].shape[0] - - feats['bat_kd_diff_high_elo'] = high_elo_kills - high_elo_deaths - feats['bat_kd_diff_low_elo'] = low_elo_kills - low_elo_deaths - - total_duels = len(kills) + len(deaths) - feats['bat_win_rate_vs_all'] = safe_div(len(kills), total_duels) - feats['bat_avg_duel_win_rate'] = feats['bat_win_rate_vs_all'] # Simplifying - feats['bat_avg_duel_freq'] = safe_div(total_duels, len(match_df)) - - feats['bat_win_rate_close'] = 0.0 # Placeholder for distance logic - feats['bat_win_rate_mid'] = 0.0 - feats['bat_win_rate_far'] = 0.0 - - else: - feats.update({ - 'bat_kd_diff_high_elo': 0, 'bat_kd_diff_low_elo': 0, - 'bat_win_rate_vs_all': 0.0, 'bat_avg_duel_win_rate': 0.0, - 'bat_avg_duel_freq': 0.0, 'bat_win_rate_close': 0.0, - 'bat_win_rate_mid': 0.0, 'bat_win_rate_far': 0.0 - }) - - # 3. PTL & Match Point (Requires Rounds) - # Fetch rounds for matches played - match_ids = match_df['match_id'].unique().tolist() - if not match_ids: - return feats - - match_ids_str = "'" + "','".join(match_ids) + "'" - q_rounds = f"SELECT * FROM fact_rounds WHERE match_id IN ({match_ids_str})" - try: - rounds = pd.read_sql_query(q_rounds, l2_conn) - except: - rounds = pd.DataFrame() - - if not rounds.empty and not events.empty: - # PTL: Round 1 and 13 (Assuming MR12) - pistol_rounds = rounds[(rounds['round_num'] == 1) | (rounds['round_num'] == 13)] - - # Join kills with pistol rounds - # keys: match_id, round_num - pistol_events = pd.merge( - events[events['attacker_steam_id'] == steam_id], - pistol_rounds[['match_id', 'round_num']], - on=['match_id', 'round_num'] - ) - - feats['ptl_pistol_kills'] = safe_div(len(pistol_events), len(match_df)) # Avg per match - feats['ptl_pistol_multikills'] = 0.0 # Complex to calc without grouping per round - feats['ptl_pistol_win_rate'] = 0.5 # Placeholder (Requires checking winner_team vs player_team) - feats['ptl_pistol_kd'] = 1.0 # Placeholder - feats['ptl_pistol_util_efficiency'] = 0.0 - - # Match Point (HPS) - # Logic: Score is 12 (MR12) or 15 (MR15). - # We assume MR12 for simplicity or check max score. - match_point_rounds = rounds[(rounds['ct_score'] == 12) | (rounds['t_score'] == 12)] - # This logic is imperfect (OT etc), but okay for v1. - feats['hps_match_point_win_rate'] = 0.5 # Placeholder - - else: - feats.update({ - 'ptl_pistol_kills': 0.0, 'ptl_pistol_multikills': 0.0, - 'ptl_pistol_win_rate': 0.0, 'ptl_pistol_kd': 0.0, - 'ptl_pistol_util_efficiency': 0.0, 'hps_match_point_win_rate': 0.0 - }) - - # Fill remaining HPS placeholders - feats['hps_undermanned_survival_time'] = 0.0 - feats['hps_pressure_entry_rate'] = 0.0 - feats['hps_momentum_multikill_rate'] = 0.0 - feats['hps_tilt_rating_drop'] = 0.0 - feats['hps_clutch_rating_rise'] = 0.0 - feats['hps_comeback_kd_diff'] = 0.0 - feats['hps_losing_streak_kd_diff'] = 0.0 - - return feats - -def process_players(): - l2_conn = get_db_connection(L2_DB_PATH) - l3_conn = get_db_connection(L3_DB_PATH) - - logger.info("Fetching player list...") - players = pd.read_sql_query("SELECT DISTINCT steam_id_64 FROM fact_match_players", l2_conn)['steam_id_64'].tolist() - - logger.info(f"Found {len(players)} players. Processing...") - - for idx, steam_id in enumerate(players): - query = f"SELECT * FROM fact_match_players WHERE steam_id_64 = '{steam_id}' ORDER BY match_time ASC" - df = pd.read_sql_query(query, l2_conn) - - if df.empty: - continue - - feats = calculate_basic_features(df) - feats.update(calculate_sta_features(df)) - feats.update(calculate_side_features(steam_id, l2_conn)) - feats.update(calculate_util_features(df)) - feats.update(calculate_complex_features(steam_id, df, l2_conn)) - - # Insert - cols = list(feats.keys()) - vals = list(feats.values()) - vals = [float(v) if isinstance(v, (np.float32, np.float64)) else v for v in vals] - vals = [int(v) if isinstance(v, (np.int32, np.int64)) else v for v in vals] - - col_str = ", ".join(cols) - q_marks = ", ".join(["?"] * len(cols)) - - sql = f"INSERT OR REPLACE INTO dm_player_features (steam_id_64, {col_str}) VALUES (?, {q_marks})" - l3_conn.execute(sql, [steam_id] + vals) - - if idx % 10 == 0: - print(f"Processed {idx}/{len(players)} players...", end='\r') - l3_conn.commit() - - l3_conn.commit() - l2_conn.close() - l3_conn.close() - logger.info("\nDone.") + logger.error(f"Error rebuilding features: {e}") + import traceback + traceback.print_exc() if __name__ == "__main__": - init_db() - process_players() + main() diff --git a/FeatureRDD.md b/FeatureRDD.md index d9b92e4..f3a79d1 100644 --- a/FeatureRDD.md +++ b/FeatureRDD.md @@ -12,7 +12,7 @@ 11. 每局2+杀/3+杀/4+杀/5杀次数(多杀) 12. 连续击杀累计次数(连杀) 15. **(New) 助攻次数 (assisted_kill)** -16. **(New) 无伤击杀 (perfect_kill)** +16. **(New) 完美击杀 (perfect_kill)** 17. **(New) 复仇击杀 (revenge_kill)** 18. **(New) AWP击杀数 (awp_kill)** 19. **(New) 总跳跃次数 (jump_count)** diff --git a/database/L2/L2_Main.sqlite b/database/L2/L2_Main.sqlite index 7652cfd..59f0b2e 100644 Binary files a/database/L2/L2_Main.sqlite and b/database/L2/L2_Main.sqlite differ diff --git a/database/L2/schema.sql b/database/L2/schema.sql index ae6338f..299fcb6 100644 --- a/database/L2/schema.sql +++ b/database/L2/schema.sql @@ -195,6 +195,13 @@ CREATE TABLE IF NOT EXISTS fact_match_players ( flash_assists INTEGER, flash_duration REAL, jump_count INTEGER, + + -- Utility Usage Stats (Parsed from round details) + util_flash_usage INTEGER DEFAULT 0, + util_smoke_usage INTEGER DEFAULT 0, + util_molotov_usage INTEGER DEFAULT 0, + util_he_usage INTEGER DEFAULT 0, + util_decoy_usage INTEGER DEFAULT 0, damage_total INTEGER, damage_received INTEGER, damage_receive INTEGER, @@ -365,6 +372,14 @@ CREATE TABLE IF NOT EXISTS fact_match_players_t ( year TEXT, sts_raw TEXT, level_info_raw TEXT, + + -- Utility Usage Stats (Parsed from round details) + util_flash_usage INTEGER DEFAULT 0, + util_smoke_usage INTEGER DEFAULT 0, + util_molotov_usage INTEGER DEFAULT 0, + util_he_usage INTEGER DEFAULT 0, + util_decoy_usage INTEGER DEFAULT 0, + PRIMARY KEY (match_id, steam_id_64), FOREIGN KEY (match_id) REFERENCES fact_matches(match_id) ON DELETE CASCADE ); @@ -466,6 +481,14 @@ CREATE TABLE IF NOT EXISTS fact_match_players_ct ( year TEXT, sts_raw TEXT, level_info_raw TEXT, + + -- Utility Usage Stats (Parsed from round details) + util_flash_usage INTEGER DEFAULT 0, + util_smoke_usage INTEGER DEFAULT 0, + util_molotov_usage INTEGER DEFAULT 0, + util_he_usage INTEGER DEFAULT 0, + util_decoy_usage INTEGER DEFAULT 0, + PRIMARY KEY (match_id, steam_id_64), FOREIGN KEY (match_id) REFERENCES fact_matches(match_id) ON DELETE CASCADE ); diff --git a/database/L3/L3_Features.sqlite b/database/L3/L3_Features.sqlite index 61798b6..bf53e97 100644 Binary files a/database/L3/L3_Features.sqlite and b/database/L3/L3_Features.sqlite differ diff --git a/database/L3/schema.sql b/database/L3/schema.sql index 095f133..97ab170 100644 --- a/database/L3/schema.sql +++ b/database/L3/schema.sql @@ -100,7 +100,17 @@ CREATE TABLE IF NOT EXISTS dm_player_features ( util_avg_flash_time REAL, util_avg_flash_enemy REAL, util_avg_flash_team REAL, - util_usage_rate REAL + util_usage_rate REAL, + + -- ========================================== + -- 7. Scores (0-100) + -- ========================================== + score_bat REAL, + score_sta REAL, + score_hps REAL, + score_ptl REAL, + score_tct REAL, + score_util REAL ); -- Optional: Detailed per-match feature table for time-series analysis diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..cdfecfa --- /dev/null +++ b/scripts/README.md @@ -0,0 +1 @@ +用于测试脚本目录。 \ No newline at end of file diff --git a/scripts/analyze_features.py b/scripts/analyze_features.py new file mode 100644 index 0000000..ab77cd3 --- /dev/null +++ b/scripts/analyze_features.py @@ -0,0 +1,214 @@ +import sqlite3 +import pandas as pd +import numpy as np +import os + +DB_L2_PATH = r'd:\Documents\trae_projects\yrtv\database\L2\L2_Main.sqlite' + +def get_db_connection(): + conn = sqlite3.connect(DB_L2_PATH) + conn.row_factory = sqlite3.Row + return conn + +def load_data_and_calculate(conn, min_matches=5): + print("Loading Basic Stats...") + + # 1. Basic Stats + query_basic = """ + SELECT + steam_id_64, + COUNT(*) as matches_played, + AVG(rating) as avg_rating, + AVG(kd_ratio) as avg_kd, + AVG(adr) as avg_adr, + AVG(kast) as avg_kast, + SUM(first_kill) as total_fk, + SUM(first_death) as total_fd, + SUM(clutch_1v1) + SUM(clutch_1v2) + SUM(clutch_1v3) + SUM(clutch_1v4) + SUM(clutch_1v5) as total_clutches, + SUM(throw_harm) as total_util_dmg, + SUM(flash_time) as total_flash_time, + SUM(flash_enemy) as total_flash_enemy + FROM fact_match_players + GROUP BY steam_id_64 + HAVING COUNT(*) >= ? + """ + df_basic = pd.read_sql_query(query_basic, conn, params=(min_matches,)) + + valid_ids = tuple(df_basic['steam_id_64'].tolist()) + if not valid_ids: + print("No players found.") + return None + placeholders = ','.join(['?'] * len(valid_ids)) + + # 2. Side Stats (T/CT) via Economy Table (which has side info) + print("Loading Side Stats via Round Map...") + # Map each round+player to a side + query_side_map = f""" + SELECT match_id, round_num, steam_id_64, side + FROM fact_round_player_economy + WHERE steam_id_64 IN ({placeholders}) + """ + try: + df_sides = pd.read_sql_query(query_side_map, conn, params=valid_ids) + + # Get all Kills + query_kills = f""" + SELECT match_id, round_num, attacker_steam_id as steam_id_64, COUNT(*) as kills + FROM fact_round_events + WHERE event_type = 'kill' + AND attacker_steam_id IN ({placeholders}) + GROUP BY match_id, round_num, attacker_steam_id + """ + df_kills = pd.read_sql_query(query_kills, conn, params=valid_ids) + + # Merge to get Kills per Side + df_merged = df_kills.merge(df_sides, on=['match_id', 'round_num', 'steam_id_64'], how='inner') + + # Aggregate + side_stats = df_merged.groupby(['steam_id_64', 'side'])['kills'].sum().unstack(fill_value=0) + side_stats.columns = [f'kills_{c.lower()}' for c in side_stats.columns] + + # Also need deaths to calc KD (approx) + # Assuming deaths are in events as victim + query_deaths = f""" + SELECT match_id, round_num, victim_steam_id as steam_id_64, COUNT(*) as deaths + FROM fact_round_events + WHERE event_type = 'kill' + AND victim_steam_id IN ({placeholders}) + GROUP BY match_id, round_num, victim_steam_id + """ + df_deaths = pd.read_sql_query(query_deaths, conn, params=valid_ids) + df_merged_d = df_deaths.merge(df_sides, on=['match_id', 'round_num', 'steam_id_64'], how='inner') + side_stats_d = df_merged_d.groupby(['steam_id_64', 'side'])['deaths'].sum().unstack(fill_value=0) + side_stats_d.columns = [f'deaths_{c.lower()}' for c in side_stats_d.columns] + + # Combine + df_side_final = side_stats.join(side_stats_d).fillna(0) + df_side_final['ct_kd'] = df_side_final.get('kills_ct', 0) / df_side_final.get('deaths_ct', 1).replace(0, 1) + df_side_final['t_kd'] = df_side_final.get('kills_t', 0) / df_side_final.get('deaths_t', 1).replace(0, 1) + + except Exception as e: + print(f"Side stats failed: {e}") + df_side_final = pd.DataFrame({'steam_id_64': list(valid_ids)}) + + # 3. PTL (Pistol) via Rounds 1 and 13 + print("Loading Pistol Stats via Rounds...") + query_pistol_kills = f""" + SELECT + ev.attacker_steam_id as steam_id_64, + COUNT(*) as pistol_kills + FROM fact_round_events ev + WHERE ev.attacker_steam_id IN ({placeholders}) + AND ev.event_type = 'kill' + AND ev.round_num IN (1, 13) + GROUP BY ev.attacker_steam_id + """ + df_ptl = pd.read_sql_query(query_pistol_kills, conn, params=valid_ids) + + # 4. HPS + print("Loading HPS Stats...") + query_close = f""" + SELECT mp.steam_id_64, AVG(mp.rating) as close_match_rating + FROM fact_match_players mp + JOIN fact_matches m ON mp.match_id = m.match_id + WHERE mp.steam_id_64 IN ({placeholders}) + AND ABS(m.score_team1 - m.score_team2) <= 3 + GROUP BY mp.steam_id_64 + """ + df_hps = pd.read_sql_query(query_close, conn, params=valid_ids) + + # 5. STA + query_sta = f""" + SELECT mp.steam_id_64, mp.rating, mp.is_win + FROM fact_match_players mp + WHERE mp.steam_id_64 IN ({placeholders}) + """ + df_matches = pd.read_sql_query(query_sta, conn, params=valid_ids) + sta_data = [] + for pid, group in df_matches.groupby('steam_id_64'): + rating_std = group['rating'].std() + win_rating = group[group['is_win']==1]['rating'].mean() + loss_rating = group[group['is_win']==0]['rating'].mean() + sta_data.append({'steam_id_64': pid, 'rating_std': rating_std, 'win_rating': win_rating, 'loss_rating': loss_rating}) + df_sta = pd.DataFrame(sta_data) + + # --- Merge All --- + df = df_basic.merge(df_side_final, on='steam_id_64', how='left') + df = df.merge(df_hps, on='steam_id_64', how='left') + df = df.merge(df_ptl, on='steam_id_64', how='left').fillna(0) + df = df.merge(df_sta, on='steam_id_64', how='left') + + return df + +def normalize_series(series): + min_v = series.min() + max_v = series.max() + if pd.isna(min_v) or pd.isna(max_v) or min_v == max_v: + return pd.Series([50]*len(series), index=series.index) + return (series - min_v) / (max_v - min_v) * 100 + +def calculate_scores(df): + df = df.copy() + + # BAT + df['n_rating'] = normalize_series(df['avg_rating']) + df['n_kd'] = normalize_series(df['avg_kd']) + df['n_adr'] = normalize_series(df['avg_adr']) + df['n_kast'] = normalize_series(df['avg_kast']) + df['score_BAT'] = 0.4*df['n_rating'] + 0.3*df['n_kd'] + 0.2*df['n_adr'] + 0.1*df['n_kast'] + + # STA + df['n_std'] = normalize_series(df['rating_std'].fillna(0)) + df['n_win_r'] = normalize_series(df['win_rating'].fillna(0)) + df['n_loss_r'] = normalize_series(df['loss_rating'].fillna(0)) + df['score_STA'] = 0.5*(100 - df['n_std']) + 0.25*df['n_win_r'] + 0.25*df['n_loss_r'] + + # UTIL + df['n_util_dmg'] = normalize_series(df['total_util_dmg'] / df['matches_played']) + df['n_flash'] = normalize_series(df['total_flash_time'] / df['matches_played']) + df['score_UTIL'] = 0.6*df['n_util_dmg'] + 0.4*df['n_flash'] + + # T/CT (Calculated from Event Logs) + df['n_ct_kd'] = normalize_series(df['ct_kd'].fillna(0)) + df['n_t_kd'] = normalize_series(df['t_kd'].fillna(0)) + df['score_TCT'] = 0.5*df['n_ct_kd'] + 0.5*df['n_t_kd'] + + # HPS + df['n_clutch'] = normalize_series(df['total_clutches'] / df['matches_played']) + df['n_close_r'] = normalize_series(df['close_match_rating'].fillna(0)) + df['score_HPS'] = 0.5*df['n_clutch'] + 0.5*df['n_close_r'] + + # PTL + df['n_pistol'] = normalize_series(df['pistol_kills'] / df['matches_played']) + df['score_PTL'] = df['n_pistol'] + + return df + +def main(): + conn = get_db_connection() + try: + df = load_data_and_calculate(conn) + if df is None: return + + # Debug: Print raw stats for checking T/CT issue + print("\n--- Raw T/CT Stats Sample ---") + if 'ct_kd' in df.columns: + print(df[['steam_id_64', 'ct_kd', 't_kd']].head()) + else: + print("CT/KD columns missing") + + results = calculate_scores(df) + + print("\n--- Final Dimension Scores (Top 5 by BAT) ---") + cols = ['steam_id_64', 'score_BAT', 'score_STA', 'score_UTIL', 'score_TCT', 'score_HPS', 'score_PTL'] + print(results[cols].sort_values('score_BAT', ascending=False).head(5)) + + except Exception as e: + print(f"Error: {e}") + import traceback + traceback.print_exc() + finally: + conn.close() + +if __name__ == "__main__": + main() diff --git a/scripts/analyze_l3_full.py b/scripts/analyze_l3_full.py new file mode 100644 index 0000000..41f4c09 --- /dev/null +++ b/scripts/analyze_l3_full.py @@ -0,0 +1,304 @@ +import sqlite3 +import pandas as pd +import numpy as np +import os + +DB_L2_PATH = r'd:\Documents\trae_projects\yrtv\database\L2\L2_Main.sqlite' + +def get_db_connection(): + conn = sqlite3.connect(DB_L2_PATH) + conn.row_factory = sqlite3.Row + return conn + +def load_comprehensive_data(conn, min_matches=5): + print("Loading Comprehensive Data...") + + # 1. Base Player List & Basic Stats + query_basic = """ + SELECT + steam_id_64, + COUNT(*) as total_matches, + AVG(rating) as basic_avg_rating, + AVG(kd_ratio) as basic_avg_kd, + AVG(adr) as basic_avg_adr, + AVG(kast) as basic_avg_kast, + AVG(rws) as basic_avg_rws, + SUM(headshot_count) as sum_headshot, + SUM(kills) as sum_kills, + SUM(deaths) as sum_deaths, + SUM(first_kill) as sum_fk, + SUM(first_death) as sum_fd, + SUM(kill_2) as sum_2k, + SUM(kill_3) as sum_3k, + SUM(kill_4) as sum_4k, + SUM(kill_5) as sum_5k, + SUM(assisted_kill) as sum_assist, + SUM(perfect_kill) as sum_perfect, + SUM(revenge_kill) as sum_revenge, + SUM(awp_kill) as sum_awp, + SUM(jump_count) as sum_jump, + SUM(clutch_1v1)+SUM(clutch_1v2)+SUM(clutch_1v3)+SUM(clutch_1v4)+SUM(clutch_1v5) as sum_clutches, + SUM(throw_harm) as sum_util_dmg, + SUM(flash_time) as sum_flash_time, + SUM(flash_enemy) as sum_flash_enemy, + SUM(flash_team) as sum_flash_team + FROM fact_match_players + GROUP BY steam_id_64 + HAVING COUNT(*) >= ? + """ + df = pd.read_sql_query(query_basic, conn, params=(min_matches,)) + + valid_ids = tuple(df['steam_id_64'].tolist()) + if not valid_ids: + print("No players found.") + return None + placeholders = ','.join(['?'] * len(valid_ids)) + + # --- Derived Basic Features --- + df['basic_headshot_rate'] = df['sum_headshot'] / df['sum_kills'].replace(0, 1) + df['basic_avg_headshot_kills'] = df['sum_headshot'] / df['total_matches'] + df['basic_avg_first_kill'] = df['sum_fk'] / df['total_matches'] + df['basic_avg_first_death'] = df['sum_fd'] / df['total_matches'] + df['basic_first_kill_rate'] = df['sum_fk'] / (df['sum_fk'] + df['sum_fd']).replace(0, 1) # Opening Success + df['basic_first_death_rate'] = df['sum_fd'] / (df['sum_fk'] + df['sum_fd']).replace(0, 1) + df['basic_avg_kill_2'] = df['sum_2k'] / df['total_matches'] + df['basic_avg_kill_3'] = df['sum_3k'] / df['total_matches'] + df['basic_avg_kill_4'] = df['sum_4k'] / df['total_matches'] + df['basic_avg_kill_5'] = df['sum_5k'] / df['total_matches'] + df['basic_avg_assisted_kill'] = df['sum_assist'] / df['total_matches'] + df['basic_avg_perfect_kill'] = df['sum_perfect'] / df['total_matches'] + df['basic_avg_revenge_kill'] = df['sum_revenge'] / df['total_matches'] + df['basic_avg_awp_kill'] = df['sum_awp'] / df['total_matches'] + df['basic_avg_jump_count'] = df['sum_jump'] / df['total_matches'] + + # 2. STA (Stability) - Detailed + print("Calculating STA...") + query_sta = f""" + SELECT mp.steam_id_64, mp.rating, mp.is_win, m.start_time + FROM fact_match_players mp + JOIN fact_matches m ON mp.match_id = m.match_id + WHERE mp.steam_id_64 IN ({placeholders}) + ORDER BY mp.steam_id_64, m.start_time + """ + df_matches = pd.read_sql_query(query_sta, conn, params=valid_ids) + + sta_list = [] + for pid, group in df_matches.groupby('steam_id_64'): + # Last 30 + last_30 = group.tail(30) + sta_last_30 = last_30['rating'].mean() + # Win/Loss + sta_win = group[group['is_win']==1]['rating'].mean() + sta_loss = group[group['is_win']==0]['rating'].mean() + # Volatility (Last 10) + sta_vol = group.tail(10)['rating'].std() + + # Time Decay (Simulated): Avg rating of 1st match of day vs >3rd match of day + # Need date conversion. + group['date'] = pd.to_datetime(group['start_time'], unit='s').dt.date + daily_counts = group.groupby('date').cumcount() + # Early: index 0, Late: index >= 2 + early_ratings = group[daily_counts == 0]['rating'] + late_ratings = group[daily_counts >= 2]['rating'] + + if len(late_ratings) > 0: + sta_fatigue = early_ratings.mean() - late_ratings.mean() # Positive means fatigue (drop) + else: + sta_fatigue = 0 + + sta_list.append({ + 'steam_id_64': pid, + 'sta_last_30_rating': sta_last_30, + 'sta_win_rating': sta_win, + 'sta_loss_rating': sta_loss, + 'sta_rating_volatility': sta_vol, + 'sta_fatigue_decay': sta_fatigue + }) + df_sta = pd.DataFrame(sta_list) + df = df.merge(df_sta, on='steam_id_64', how='left') + + # 3. BAT (Battle) - Detailed + print("Calculating BAT...") + # Need Match ELO + query_bat = f""" + SELECT mp.steam_id_64, mp.kd_ratio, mp.entry_kills, mp.entry_deaths, + (SELECT AVG(group_origin_elo) FROM fact_match_teams fmt WHERE fmt.match_id = mp.match_id AND group_origin_elo > 0) as match_elo + FROM fact_match_players mp + WHERE mp.steam_id_64 IN ({placeholders}) + """ + df_bat_raw = pd.read_sql_query(query_bat, conn, params=valid_ids) + + bat_list = [] + for pid, group in df_bat_raw.groupby('steam_id_64'): + avg_elo = group['match_elo'].mean() + if pd.isna(avg_elo): avg_elo = 1500 + + high_elo_kd = group[group['match_elo'] > avg_elo]['kd_ratio'].mean() + low_elo_kd = group[group['match_elo'] <= avg_elo]['kd_ratio'].mean() + + sum_entry_k = group['entry_kills'].sum() + sum_entry_d = group['entry_deaths'].sum() + duel_win_rate = sum_entry_k / (sum_entry_k + sum_entry_d) if (sum_entry_k+sum_entry_d) > 0 else 0 + + bat_list.append({ + 'steam_id_64': pid, + 'bat_kd_diff_high_elo': high_elo_kd, # Higher is better + 'bat_kd_diff_low_elo': low_elo_kd, + 'bat_avg_duel_win_rate': duel_win_rate + }) + df_bat = pd.DataFrame(bat_list) + df = df.merge(df_bat, on='steam_id_64', how='left') + + # 4. HPS (Pressure) - Detailed + print("Calculating HPS...") + # Complex query for Match Point and Pressure situations + # Logic: Round score diff. + # Since we don't have round-by-round player stats in L2 easily (economy table is sparse on stats), + # We use Matches for "Close Match" and "Comeback" + + # Comeback/Close Match Logic on MATCH level + query_hps_match = f""" + SELECT mp.steam_id_64, mp.kd_ratio, mp.rating, m.score_team1, m.score_team2, mp.team_id, m.winner_team + FROM fact_match_players mp + JOIN fact_matches m ON mp.match_id = m.match_id + WHERE mp.steam_id_64 IN ({placeholders}) + """ + df_hps_raw = pd.read_sql_query(query_hps_match, conn, params=valid_ids) + + hps_list = [] + for pid, group in df_hps_raw.groupby('steam_id_64'): + # Close Match: Score diff <= 3 + group['score_diff'] = abs(group['score_team1'] - group['score_team2']) + close_rating = group[group['score_diff'] <= 3]['rating'].mean() + + # Comeback: Won match where score was close? + # Actually without round history, we can't define "Comeback" (was behind then won). + # We can define "Underdog Win": Won when ELO was lower? Or just Close Win. + # Let's use Close Match Rating as primary HPS metric from matches. + + hps_list.append({ + 'steam_id_64': pid, + 'hps_close_match_rating': close_rating + }) + df_hps = pd.DataFrame(hps_list) + + # HPS Clutch (from Basic) + df['hps_clutch_rate'] = df['sum_clutches'] / df['total_matches'] + + df = df.merge(df_hps, on='steam_id_64', how='left') + + # 5. PTL (Pistol) + print("Calculating PTL...") + # R1/R13 Kills + query_ptl = f""" + SELECT ev.attacker_steam_id as steam_id_64, COUNT(*) as pistol_kills + FROM fact_round_events ev + WHERE ev.event_type = 'kill' AND ev.round_num IN (1, 13) + AND ev.attacker_steam_id IN ({placeholders}) + GROUP BY ev.attacker_steam_id + """ + df_ptl = pd.read_sql_query(query_ptl, conn, params=valid_ids) + # Pistol Win Rate (Team) + # Need to join rounds. Too slow? + # Simplify: Just use Pistol Kills per Match (normalized) + + df = df.merge(df_ptl, on='steam_id_64', how='left') + df['ptl_pistol_kills_per_match'] = df['pistol_kills'] / df['total_matches'] + + # 6. T/CT + print("Calculating T/CT...") + query_ct = f"SELECT steam_id_64, AVG(rating) as ct_rating, AVG(kd_ratio) as ct_kd FROM fact_match_players_ct WHERE steam_id_64 IN ({placeholders}) GROUP BY steam_id_64" + query_t = f"SELECT steam_id_64, AVG(rating) as t_rating, AVG(kd_ratio) as t_kd FROM fact_match_players_t WHERE steam_id_64 IN ({placeholders}) GROUP BY steam_id_64" + df_ct = pd.read_sql_query(query_ct, conn, params=valid_ids) + df_t = pd.read_sql_query(query_t, conn, params=valid_ids) + df = df.merge(df_ct, on='steam_id_64', how='left').merge(df_t, on='steam_id_64', how='left') + + # 7. UTIL + print("Calculating UTIL...") + df['util_avg_dmg'] = df['sum_util_dmg'] / df['total_matches'] + df['util_avg_flash_time'] = df['sum_flash_time'] / df['total_matches'] + + return df + +def normalize(series): + s = series.fillna(series.mean()) + if s.max() == s.min(): return pd.Series([50]*len(s), index=s.index) + return (s - s.min()) / (s.max() - s.min()) * 100 + +def calculate_full_scores(df): + df = df.copy() + + # --- BAT Calculation --- + # Components: Rating, KD, ADR, KAST, Duel Win Rate, High ELO KD + # Weights: Rating(30), KD(20), ADR(15), KAST(10), Duel(15), HighELO(10) + df['n_bat_rating'] = normalize(df['basic_avg_rating']) + df['n_bat_kd'] = normalize(df['basic_avg_kd']) + df['n_bat_adr'] = normalize(df['basic_avg_adr']) + df['n_bat_kast'] = normalize(df['basic_avg_kast']) + df['n_bat_duel'] = normalize(df['bat_avg_duel_win_rate']) + df['n_bat_high'] = normalize(df['bat_kd_diff_high_elo']) + + df['score_BAT'] = (0.3*df['n_bat_rating'] + 0.2*df['n_bat_kd'] + 0.15*df['n_bat_adr'] + + 0.1*df['n_bat_kast'] + 0.15*df['n_bat_duel'] + 0.1*df['n_bat_high']) + + # --- STA Calculation --- + # Components: Volatility (Neg), Win Rating, Loss Rating, Fatigue (Neg) + # Weights: Consistency(40), WinPerf(20), LossPerf(30), Fatigue(10) + df['n_sta_vol'] = normalize(df['sta_rating_volatility']) # Lower is better -> 100 - X + df['n_sta_win'] = normalize(df['sta_win_rating']) + df['n_sta_loss'] = normalize(df['sta_loss_rating']) + df['n_sta_fat'] = normalize(df['sta_fatigue_decay']) # Lower (less drop) is better -> 100 - X + + df['score_STA'] = (0.4*(100-df['n_sta_vol']) + 0.2*df['n_sta_win'] + + 0.3*df['n_sta_loss'] + 0.1*(100-df['n_sta_fat'])) + + # --- HPS Calculation --- + # Components: Clutch Rate, Close Match Rating + df['n_hps_clutch'] = normalize(df['hps_clutch_rate']) + df['n_hps_close'] = normalize(df['hps_close_match_rating']) + + df['score_HPS'] = 0.5*df['n_hps_clutch'] + 0.5*df['n_hps_close'] + + # --- PTL Calculation --- + # Components: Pistol Kills/Match + df['score_PTL'] = normalize(df['ptl_pistol_kills_per_match']) + + # --- T/CT Calculation --- + # Components: CT Rating, T Rating + df['n_ct'] = normalize(df['ct_rating']) + df['n_t'] = normalize(df['t_rating']) + df['score_TCT'] = 0.5*df['n_ct'] + 0.5*df['n_t'] + + # --- UTIL Calculation --- + # Components: Dmg, Flash Time + df['n_util_dmg'] = normalize(df['util_avg_dmg']) + df['n_util_flash'] = normalize(df['util_avg_flash_time']) + df['score_UTIL'] = 0.6*df['n_util_dmg'] + 0.4*df['n_util_flash'] + + return df + +def main(): + conn = get_db_connection() + try: + df = load_comprehensive_data(conn) + if df is None: return + + results = calculate_full_scores(df) + + print("\n--- Final Full Scores ---") + cols = ['steam_id_64', 'score_BAT', 'score_STA', 'score_UTIL', 'score_TCT', 'score_HPS', 'score_PTL'] + print(results[cols].sort_values('score_BAT', ascending=False).head(5)) + + print("\n--- Available Features Used ---") + print("BAT: Rating, KD, ADR, KAST, Duel Win Rate, High ELO Performance") + print("STA: Volatility, Win Rating, Loss Rating, Fatigue Decay") + print("HPS: Clutch Rate, Close Match Rating") + print("PTL: Pistol Kills per Match") + print("T/CT: CT Rating, T Rating") + print("UTIL: Util Dmg, Flash Duration") + + finally: + conn.close() + +if __name__ == "__main__": + main() diff --git a/scripts/analyze_l3_ultimate.py b/scripts/analyze_l3_ultimate.py new file mode 100644 index 0000000..f2c2a43 --- /dev/null +++ b/scripts/analyze_l3_ultimate.py @@ -0,0 +1,499 @@ +import sqlite3 +import pandas as pd +import numpy as np +import os + +DB_L2_PATH = r'd:\Documents\trae_projects\yrtv\database\L2\L2_Main.sqlite' + +def get_db_connection(): + conn = sqlite3.connect(DB_L2_PATH) + conn.row_factory = sqlite3.Row + return conn + +def safe_div(a, b): + if b == 0: return 0 + return a / b + +def load_and_calculate_ultimate(conn, min_matches=5): + print("Loading Ultimate Data Set...") + + # 1. Basic Stats (Already have) + query_basic = """ + SELECT + steam_id_64, + COUNT(*) as matches_played, + SUM(round_total) as rounds_played, + AVG(rating) as basic_avg_rating, + AVG(kd_ratio) as basic_avg_kd, + AVG(adr) as basic_avg_adr, + AVG(kast) as basic_avg_kast, + AVG(rws) as basic_avg_rws, + SUM(headshot_count) as sum_hs, + SUM(kills) as sum_kills, + SUM(deaths) as sum_deaths, + SUM(first_kill) as sum_fk, + SUM(first_death) as sum_fd, + SUM(clutch_1v1) as sum_1v1, + SUM(clutch_1v2) as sum_1v2, + SUM(clutch_1v3) + SUM(clutch_1v4) + SUM(clutch_1v5) as sum_1v3p, + SUM(kill_2) as sum_2k, + SUM(kill_3) as sum_3k, + SUM(kill_4) as sum_4k, + SUM(kill_5) as sum_5k, + SUM(assisted_kill) as sum_assist, + SUM(perfect_kill) as sum_perfect, + SUM(revenge_kill) as sum_revenge, + SUM(awp_kill) as sum_awp, + SUM(jump_count) as sum_jump, + SUM(throw_harm) as sum_util_dmg, + SUM(flash_time) as sum_flash_time, + SUM(flash_enemy) as sum_flash_enemy, + SUM(flash_team) as sum_flash_team + FROM fact_match_players + GROUP BY steam_id_64 + HAVING COUNT(*) >= ? + """ + df = pd.read_sql_query(query_basic, conn, params=(min_matches,)) + valid_ids = tuple(df['steam_id_64'].tolist()) + if not valid_ids: return None + placeholders = ','.join(['?'] * len(valid_ids)) + + # --- Basic Derived --- + df['basic_headshot_rate'] = df['sum_hs'] / df['sum_kills'].replace(0, 1) + df['basic_avg_headshot_kills'] = df['sum_hs'] / df['matches_played'] + df['basic_avg_first_kill'] = df['sum_fk'] / df['matches_played'] + df['basic_avg_first_death'] = df['sum_fd'] / df['matches_played'] + df['basic_first_kill_rate'] = df['sum_fk'] / (df['sum_fk'] + df['sum_fd']).replace(0, 1) + df['basic_first_death_rate'] = df['sum_fd'] / (df['sum_fk'] + df['sum_fd']).replace(0, 1) + df['basic_avg_kill_2'] = df['sum_2k'] / df['matches_played'] + df['basic_avg_kill_3'] = df['sum_3k'] / df['matches_played'] + df['basic_avg_kill_4'] = df['sum_4k'] / df['matches_played'] + df['basic_avg_kill_5'] = df['sum_5k'] / df['matches_played'] + df['basic_avg_assisted_kill'] = df['sum_assist'] / df['matches_played'] + df['basic_avg_perfect_kill'] = df['sum_perfect'] / df['matches_played'] + df['basic_avg_revenge_kill'] = df['sum_revenge'] / df['matches_played'] + df['basic_avg_awp_kill'] = df['sum_awp'] / df['matches_played'] + df['basic_avg_jump_count'] = df['sum_jump'] / df['matches_played'] + + # 2. STA - Detailed Time Series + print("Calculating STA (Detailed)...") + query_sta = f""" + SELECT mp.steam_id_64, mp.rating, mp.is_win, m.start_time, m.duration + FROM fact_match_players mp + JOIN fact_matches m ON mp.match_id = m.match_id + WHERE mp.steam_id_64 IN ({placeholders}) + ORDER BY mp.steam_id_64, m.start_time + """ + df_matches = pd.read_sql_query(query_sta, conn, params=valid_ids) + + sta_list = [] + for pid, group in df_matches.groupby('steam_id_64'): + group = group.sort_values('start_time') + # Last 30 + last_30 = group.tail(30) + sta_last_30 = last_30['rating'].mean() + # Win/Loss + sta_win = group[group['is_win']==1]['rating'].mean() + sta_loss = group[group['is_win']==0]['rating'].mean() + # Volatility + sta_vol = group.tail(10)['rating'].std() + # Time Correlation (Duration vs Rating) + sta_time_corr = group['duration'].corr(group['rating']) if len(group) > 2 else 0 + # Fatigue + group['date'] = pd.to_datetime(group['start_time'], unit='s').dt.date + daily = group.groupby('date')['rating'].agg(['first', 'last', 'count']) + daily_fatigue = daily[daily['count'] >= 3] + if len(daily_fatigue) > 0: + fatigue_decay = (daily_fatigue['first'] - daily_fatigue['last']).mean() + else: + fatigue_decay = 0 + + sta_list.append({ + 'steam_id_64': pid, + 'sta_last_30_rating': sta_last_30, + 'sta_win_rating': sta_win, + 'sta_loss_rating': sta_loss, + 'sta_rating_volatility': sta_vol, + 'sta_time_rating_corr': sta_time_corr, + 'sta_fatigue_decay': fatigue_decay + }) + df = df.merge(pd.DataFrame(sta_list), on='steam_id_64', how='left') + + # 3. BAT - Distance & Advanced + print("Calculating BAT (Distance & Context)...") + # Distance Logic: Get all kills with positions + # We need to map positions. + query_dist = f""" + SELECT attacker_steam_id as steam_id_64, + attacker_pos_x, attacker_pos_y, attacker_pos_z, + victim_pos_x, victim_pos_y, victim_pos_z + FROM fact_round_events + WHERE event_type = 'kill' + AND attacker_steam_id IN ({placeholders}) + AND attacker_pos_x IS NOT NULL AND victim_pos_x IS NOT NULL + """ + # Note: This might be heavy. If memory issue, sample or chunk. + try: + df_dist = pd.read_sql_query(query_dist, conn, params=valid_ids) + if not df_dist.empty: + # Calc Euclidian Distance + df_dist['dist'] = np.sqrt( + (df_dist['attacker_pos_x'] - df_dist['victim_pos_x'])**2 + + (df_dist['attacker_pos_y'] - df_dist['victim_pos_y'])**2 + + (df_dist['attacker_pos_z'] - df_dist['victim_pos_z'])**2 + ) + # Units: 1 unit ~ 1 inch. + # Close: < 500 (~12m) + # Mid: 500 - 1500 (~12m - 38m) + # Far: > 1500 + df_dist['is_close'] = df_dist['dist'] < 500 + df_dist['is_mid'] = (df_dist['dist'] >= 500) & (df_dist['dist'] <= 1500) + df_dist['is_far'] = df_dist['dist'] > 1500 + + bat_dist = df_dist.groupby('steam_id_64').agg({ + 'is_close': 'mean', # % of kills that are close + 'is_mid': 'mean', + 'is_far': 'mean' + }).reset_index() + bat_dist.columns = ['steam_id_64', 'bat_kill_share_close', 'bat_kill_share_mid', 'bat_kill_share_far'] + + # Note: "Win Rate" by distance requires Deaths by distance. + # We can try to get deaths too, but for now Share of Kills is a good proxy for "Preference/Style" + # To get "Win Rate", we need to know how many duels occurred at that distance. + # Approximation: Win Rate = Kills_at_dist / (Kills_at_dist + Deaths_at_dist) + + # Fetch Deaths + query_dist_d = f""" + SELECT victim_steam_id as steam_id_64, + attacker_pos_x, attacker_pos_y, attacker_pos_z, + victim_pos_x, victim_pos_y, victim_pos_z + FROM fact_round_events + WHERE event_type = 'kill' + AND victim_steam_id IN ({placeholders}) + AND attacker_pos_x IS NOT NULL AND victim_pos_x IS NOT NULL + """ + df_dist_d = pd.read_sql_query(query_dist_d, conn, params=valid_ids) + df_dist_d['dist'] = np.sqrt( + (df_dist_d['attacker_pos_x'] - df_dist_d['victim_pos_x'])**2 + + (df_dist_d['attacker_pos_y'] - df_dist_d['victim_pos_y'])**2 + + (df_dist_d['attacker_pos_z'] - df_dist_d['victim_pos_z'])**2 + ) + + # Aggregate Kills Counts + k_counts = df_dist.groupby('steam_id_64').agg( + k_close=('is_close', 'sum'), + k_mid=('is_mid', 'sum'), + k_far=('is_far', 'sum') + ) + # Aggregate Deaths Counts + df_dist_d['is_close'] = df_dist_d['dist'] < 500 + df_dist_d['is_mid'] = (df_dist_d['dist'] >= 500) & (df_dist_d['dist'] <= 1500) + df_dist_d['is_far'] = df_dist_d['dist'] > 1500 + d_counts = df_dist_d.groupby('steam_id_64').agg( + d_close=('is_close', 'sum'), + d_mid=('is_mid', 'sum'), + d_far=('is_far', 'sum') + ) + + # Merge + bat_rates = k_counts.join(d_counts, how='outer').fillna(0) + bat_rates['bat_win_rate_close'] = bat_rates['k_close'] / (bat_rates['k_close'] + bat_rates['d_close']).replace(0, 1) + bat_rates['bat_win_rate_mid'] = bat_rates['k_mid'] / (bat_rates['k_mid'] + bat_rates['d_mid']).replace(0, 1) + bat_rates['bat_win_rate_far'] = bat_rates['k_far'] / (bat_rates['k_far'] + bat_rates['d_far']).replace(0, 1) + bat_rates['bat_win_rate_vs_all'] = (bat_rates['k_close']+bat_rates['k_mid']+bat_rates['k_far']) / (bat_rates['k_close']+bat_rates['d_close']+bat_rates['k_mid']+bat_rates['d_mid']+bat_rates['k_far']+bat_rates['d_far']).replace(0, 1) + + df = df.merge(bat_rates[['bat_win_rate_close', 'bat_win_rate_mid', 'bat_win_rate_far', 'bat_win_rate_vs_all']], on='steam_id_64', how='left') + else: + print("No position data found.") + except Exception as e: + print(f"Dist calculation error: {e}") + + # High/Low ELO KD + query_elo = f""" + SELECT mp.steam_id_64, mp.kd_ratio, + (SELECT AVG(group_origin_elo) FROM fact_match_teams fmt WHERE fmt.match_id = mp.match_id AND group_origin_elo > 0) as elo + FROM fact_match_players mp + WHERE mp.steam_id_64 IN ({placeholders}) + """ + df_elo = pd.read_sql_query(query_elo, conn, params=valid_ids) + elo_list = [] + for pid, group in df_elo.groupby('steam_id_64'): + avg = group['elo'].mean() + if pd.isna(avg): avg = 1000 + elo_list.append({ + 'steam_id_64': pid, + 'bat_kd_diff_high_elo': group[group['elo'] > avg]['kd_ratio'].mean(), + 'bat_kd_diff_low_elo': group[group['elo'] <= avg]['kd_ratio'].mean() + }) + df = df.merge(pd.DataFrame(elo_list), on='steam_id_64', how='left') + + # Avg Duel Freq + df['bat_avg_duel_freq'] = (df['sum_fk'] + df['sum_fd']) / df['rounds_played'] + + # 4. HPS - High Pressure Contexts + print("Calculating HPS (Contexts)...") + # We need round-by-round score evolution. + # Join rounds and economy(side) and matches + query_hps_ctx = f""" + SELECT r.match_id, r.round_num, r.ct_score, r.t_score, r.winner_side, + m.score_team1, m.score_team2, m.winner_team, + e.steam_id_64, e.side as player_side, + (SELECT COUNT(*) FROM fact_round_events ev WHERE ev.match_id=r.match_id AND ev.round_num=r.round_num AND ev.attacker_steam_id=e.steam_id_64 AND ev.event_type='kill') as kills, + (SELECT COUNT(*) FROM fact_round_events ev WHERE ev.match_id=r.match_id AND ev.round_num=r.round_num AND ev.victim_steam_id=e.steam_id_64 AND ev.event_type='kill') as deaths + FROM fact_rounds r + JOIN fact_matches m ON r.match_id = m.match_id + JOIN fact_round_player_economy e ON r.match_id = e.match_id AND r.round_num = e.round_num + WHERE e.steam_id_64 IN ({placeholders}) + """ + # This is heavy. + try: + # Optimization: Process per match or use SQL aggregation? + # SQL aggregation for specific conditions is better. + + # 4.1 Match Point Win Rate + # Condition: (player_side='CT' AND ct_score >= 12) OR (player_side='T' AND t_score >= 12) (Assuming MR12) + # Or just max score of match? + # Let's approximate: Rounds where total_score >= 23 (MR12) or 29 (MR15) + # Actually, let's use: round_num >= match.round_total - 1? No. + # Use: Rounds where One Team Score = Match Win Score - 1. + # Since we don't know MR12/MR15 per match easily (some are short), check `game_mode`. + # Fallback: Rounds where `ct_score` or `t_score` >= 12. + + # 4.2 Pressure Entry Rate (Losing Streak) + # Condition: Team score < Enemy score - 3. + + # 4.3 Momentum Multi-kill (Winning Streak) + # Condition: Team score > Enemy score + 3. + + # Let's load a simplified dataframe of rounds + df_rounds = pd.read_sql_query(query_hps_ctx, conn, params=valid_ids) + + hps_stats = [] + for pid, group in df_rounds.groupby('steam_id_64'): + # Determine Player Team Score and Enemy Team Score + # If player_side == 'CT', player_score = ct_score + group['my_score'] = np.where(group['player_side'] == 'CT', group['ct_score'], group['t_score']) + group['enemy_score'] = np.where(group['player_side'] == 'CT', group['t_score'], group['ct_score']) + + # Match Point (My team or Enemy team at match point) + # Simple heuristic: Score >= 12 + is_match_point = (group['my_score'] >= 12) | (group['enemy_score'] >= 12) + mp_rounds = group[is_match_point] + # Did we win? + # winner_side matches player_side + mp_wins = mp_rounds[mp_rounds['winner_side'] == mp_rounds['player_side']] + mp_win_rate = len(mp_wins) / len(mp_rounds) if len(mp_rounds) > 0 else 0.5 + + # Pressure (Losing by 3+) + is_pressure = (group['enemy_score'] - group['my_score']) >= 3 + # Entry Rate in pressure? Need FK data. + # We only loaded kills. Let's use Kills per round in pressure. + pressure_kpr = group[is_pressure]['kills'].mean() if len(group[is_pressure]) > 0 else 0 + + # Momentum (Winning by 3+) + is_momentum = (group['my_score'] - group['enemy_score']) >= 3 + # Multi-kill rate (>=2 kills) + momentum_rounds = group[is_momentum] + momentum_multikills = len(momentum_rounds[momentum_rounds['kills'] >= 2]) + momentum_mk_rate = momentum_multikills / len(momentum_rounds) if len(momentum_rounds) > 0 else 0 + + # Comeback KD Diff + # Avg KD in Pressure rounds vs Avg KD overall + pressure_deaths = group[is_pressure]['deaths'].sum() + pressure_kills = group[is_pressure]['kills'].sum() + pressure_kd = pressure_kills / pressure_deaths if pressure_deaths > 0 else pressure_kills + + overall_deaths = group['deaths'].sum() + overall_kills = group['kills'].sum() + overall_kd = overall_kills / overall_deaths if overall_deaths > 0 else overall_kills + + comeback_diff = pressure_kd - overall_kd + + hps_stats.append({ + 'steam_id_64': pid, + 'hps_match_point_win_rate': mp_win_rate, + 'hps_pressure_entry_rate': pressure_kpr, # Proxy + 'hps_momentum_multikill_rate': momentum_mk_rate, + 'hps_comeback_kd_diff': comeback_diff, + 'hps_losing_streak_kd_diff': comeback_diff # Same metric + }) + + df = df.merge(pd.DataFrame(hps_stats), on='steam_id_64', how='left') + + # 4.4 Clutch Win Rates (Detailed) + df['hps_clutch_win_rate_1v1'] = df['sum_1v1'] / df['matches_played'] # Normalizing by match for now, ideal is by 1v1 opportunities + df['hps_clutch_win_rate_1v2'] = df['sum_1v2'] / df['matches_played'] + df['hps_clutch_win_rate_1v3_plus'] = df['sum_1v3p'] / df['matches_played'] + + # 4.5 Close Match Rating (from previous) + # ... (Already have logic in previous script, reusing) + + except Exception as e: + print(f"HPS Error: {e}") + + # 5. PTL - Pistol Detailed + print("Calculating PTL...") + # Filter Round 1, 13 (and 16 for MR15?) + # Just use 1 and 13 (common for MR12) + query_ptl = f""" + SELECT + e.steam_id_64, + (SELECT COUNT(*) FROM fact_round_events ev WHERE ev.match_id=e.match_id AND ev.round_num=e.round_num AND ev.attacker_steam_id=e.steam_id_64 AND ev.event_type='kill') as kills, + (SELECT COUNT(*) FROM fact_round_events ev WHERE ev.match_id=e.match_id AND ev.round_num=e.round_num AND ev.victim_steam_id=e.steam_id_64 AND ev.event_type='kill') as deaths, + r.winner_side, e.side as player_side, + e.equipment_value + FROM fact_round_player_economy e + JOIN fact_rounds r ON e.match_id = r.match_id AND e.round_num = r.round_num + WHERE e.steam_id_64 IN ({placeholders}) + AND e.round_num IN (1, 13) + """ + try: + df_ptl_raw = pd.read_sql_query(query_ptl, conn, params=valid_ids) + ptl_stats = [] + for pid, group in df_ptl_raw.groupby('steam_id_64'): + kills = group['kills'].sum() + deaths = group['deaths'].sum() + kd = kills / deaths if deaths > 0 else kills + + wins = len(group[group['winner_side'] == group['player_side']]) + win_rate = wins / len(group) + + multikills = len(group[group['kills'] >= 2]) + + # Util Efficiency: Not easy here. + + ptl_stats.append({ + 'steam_id_64': pid, + 'ptl_pistol_kills': kills, # Total? Or Avg? Schema says REAL. Let's use Avg per Match later. + 'ptl_pistol_kd': kd, + 'ptl_pistol_win_rate': win_rate, + 'ptl_pistol_multikills': multikills + }) + + df_ptl = pd.DataFrame(ptl_stats) + df_ptl['ptl_pistol_kills'] = df_ptl['ptl_pistol_kills'] / df['matches_played'].mean() # Approximate + df = df.merge(df_ptl, on='steam_id_64', how='left') + + except Exception as e: + print(f"PTL Error: {e}") + + # 6. T/CT & UTIL (Straightforward) + print("Calculating T/CT & UTIL...") + # T/CT Side Stats + query_side = f""" + SELECT steam_id_64, + SUM(CASE WHEN side='CT' THEN 1 ELSE 0 END) as ct_rounds, + SUM(CASE WHEN side='T' THEN 1 ELSE 0 END) as t_rounds + FROM fact_round_player_economy + WHERE steam_id_64 IN ({placeholders}) + GROUP BY steam_id_64 + """ + # Combine with aggregated ratings from fact_match_players_ct/t + query_side_r = f""" + SELECT steam_id_64, AVG(rating) as ct_rating, AVG(kd_ratio) as ct_kd, SUM(first_kill) as ct_fk + FROM fact_match_players_ct WHERE steam_id_64 IN ({placeholders}) GROUP BY steam_id_64 + """ + df_ct = pd.read_sql_query(query_side_r, conn, params=valid_ids) + # Similar for T... + + # Merge... + + # UTIL + df['util_avg_nade_dmg'] = df['sum_util_dmg'] / df['matches_played'] + df['util_avg_flash_time'] = df['sum_flash_time'] / df['matches_played'] + df['util_avg_flash_enemy'] = df['sum_flash_enemy'] / df['matches_played'] + + # Fill NaN + df = df.fillna(0) + + return df + +def calculate_ultimate_scores(df): + # Normalize Helper + def n(col): + if col not in df.columns: return 50 + s = df[col] + if s.max() == s.min(): return 50 + return (s - s.min()) / (s.max() - s.min()) * 100 + + df = df.copy() + + # 1. BAT: Battle (30%) + # Weights: Rating(25), KD(20), ADR(15), Duel(10), HighELO(10), CloseRange(10), MultiKill(10) + df['score_BAT'] = ( + 0.25 * n('basic_avg_rating') + + 0.20 * n('basic_avg_kd') + + 0.15 * n('basic_avg_adr') + + 0.10 * n('bat_avg_duel_win_rate') + # Need to ensure col exists + 0.10 * n('bat_kd_diff_high_elo') + + 0.10 * n('bat_win_rate_close') + + 0.10 * n('basic_avg_kill_3') # Multi-kill proxy + ) + + # 2. STA: Stability (15%) + # Weights: Volatility(30), LossRating(30), WinRating(20), TimeCorr(10), Fatigue(10) + df['score_STA'] = ( + 0.30 * (100 - n('sta_rating_volatility')) + + 0.30 * n('sta_loss_rating') + + 0.20 * n('sta_win_rating') + + 0.10 * (100 - n('sta_time_rating_corr').abs()) + # Closer to 0 is better (independent of duration) + 0.10 * (100 - n('sta_fatigue_decay')) + ) + + # 3. HPS: Pressure (20%) + # Weights: Clutch(30), MatchPoint(20), Comeback(20), PressureEntry(15), CloseMatch(15) + df['score_HPS'] = ( + 0.30 * n('sum_1v3p') + # Using high tier clutches + 0.20 * n('hps_match_point_win_rate') + + 0.20 * n('hps_comeback_kd_diff') + + 0.15 * n('hps_pressure_entry_rate') + + 0.15 * n('basic_avg_rating') # Fallback if close match rating missing + ) + + # 4. PTL: Pistol (10%) + # Weights: Kills(40), WinRate(30), KD(30) + df['score_PTL'] = ( + 0.40 * n('ptl_pistol_kills') + + 0.30 * n('ptl_pistol_win_rate') + + 0.30 * n('ptl_pistol_kd') + ) + + # 5. T/CT (15%) + # Weights: CT(50), T(50) + # Need to load CT/T ratings properly, using basic rating as placeholder if missing + df['score_TCT'] = 0.5 * n('basic_avg_rating') + 0.5 * n('basic_avg_rating') + + # 6. UTIL (10%) + # Weights: Dmg(50), Flash(30), EnemiesFlashed(20) + df['score_UTIL'] = ( + 0.50 * n('util_avg_nade_dmg') + + 0.30 * n('util_avg_flash_time') + + 0.20 * n('util_avg_flash_enemy') + ) + + return df + +def main(): + conn = get_db_connection() + try: + df = load_and_calculate_ultimate(conn) + if df is None: return + + results = calculate_ultimate_scores(df) + + print("\n--- Ultimate Scores (Top 5 BAT) ---") + cols = ['steam_id_64', 'score_BAT', 'score_STA', 'score_HPS', 'score_PTL', 'score_UTIL'] + print(results[cols].sort_values('score_BAT', ascending=False).head(5)) + + # Verify coverage + print("\n--- Feature Coverage ---") + print(f"Total Columns: {len(results.columns)}") + print("BAT Distances:", 'bat_win_rate_close' in results.columns) + print("HPS Contexts:", 'hps_match_point_win_rate' in results.columns) + print("PTL Detailed:", 'ptl_pistol_kd' in results.columns) + + finally: + conn.close() + +if __name__ == "__main__": + main() diff --git a/scripts/check_l1a.py b/scripts/check_l1a.py new file mode 100644 index 0000000..137c038 --- /dev/null +++ b/scripts/check_l1a.py @@ -0,0 +1,22 @@ +import sqlite3 +import os + +L1A_DB_PATH = r'd:\Documents\trae_projects\yrtv\database\L1A\L1A.sqlite' + +print("Checking L1A...") +if os.path.exists(L1A_DB_PATH): + try: + conn = sqlite3.connect(L1A_DB_PATH) + cursor = conn.cursor() + cursor.execute("SELECT name FROM sqlite_master WHERE type='table'") + tables = cursor.fetchall() + print(f"Tables: {tables}") + + cursor.execute("SELECT COUNT(*) FROM raw_iframe_network") + count = cursor.fetchone()[0] + print(f"L1A Records: {count}") + conn.close() + except Exception as e: + print(f"Error checking L1A: {e}") +else: + print(f"L1A DB not found at {L1A_DB_PATH}") diff --git a/scripts/check_l3_variance.py b/scripts/check_l3_variance.py new file mode 100644 index 0000000..5bb825c --- /dev/null +++ b/scripts/check_l3_variance.py @@ -0,0 +1,55 @@ +import sqlite3 +import pandas as pd +import numpy as np +import os + +# Config to match your project structure +class Config: + DB_L3_PATH = r'd:\Documents\trae_projects\yrtv\database\L3\L3_Features.sqlite' + +def check_variance(): + db_path = Config.DB_L3_PATH + if not os.path.exists(db_path): + print(f"L3 DB not found at {db_path}") + return + + conn = sqlite3.connect(db_path) + try: + # Read all features + df = pd.read_sql_query("SELECT * FROM dm_player_features", conn) + + print(f"Total rows: {len(df)}") + if len(df) == 0: + print("Table is empty.") + return + + numeric_cols = df.select_dtypes(include=['number']).columns + + print("\n--- Variance Analysis ---") + for col in numeric_cols: + if col in ['steam_id_64']: continue # Skip ID + + # Check for all zeros + if (df[col] == 0).all(): + print(f"[ALL ZERO] {col}") + continue + + # Check for single value (variance = 0) + if df[col].nunique() <= 1: + val = df[col].iloc[0] + print(f"[SINGLE VAL] {col} = {val}") + continue + + # Check for mostly zeros + zero_pct = (df[col] == 0).mean() + if zero_pct > 0.9: + print(f"[MOSTLY ZERO] {col} ({zero_pct:.1%} zeros)") + + # Basic stats for valid ones + # print(f"{col}: min={df[col].min():.2f}, max={df[col].max():.2f}, mean={df[col].mean():.2f}") + + finally: + conn.close() + +if __name__ == "__main__": + check_variance() diff --git a/check_round_data.py b/scripts/check_round_data.py similarity index 100% rename from check_round_data.py rename to scripts/check_round_data.py diff --git a/scripts/check_side_mapping.py b/scripts/check_side_mapping.py new file mode 100644 index 0000000..1d72ffe --- /dev/null +++ b/scripts/check_side_mapping.py @@ -0,0 +1,63 @@ +import sqlite3 +import pandas as pd +import json +import os +import sys + +# Add parent directory +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from web.config import Config + +def check_mapping(): + conn = sqlite3.connect(Config.DB_L2_PATH) + + # Join economy and teams via match_id + # We need to match steam_id (in eco) to group_uids (in teams) + + # 1. Get Economy R1 samples + query_eco = """ + SELECT match_id, steam_id_64, side + FROM fact_round_player_economy + WHERE round_num = 1 + LIMIT 10 + """ + eco_rows = pd.read_sql_query(query_eco, conn) + + if eco_rows.empty: + print("No Economy R1 data found.") + conn.close() + return + + print("Checking Mapping...") + for _, row in eco_rows.iterrows(): + mid = row['match_id'] + sid = row['steam_id_64'] + side = row['side'] + + # Get Teams for this match + query_teams = "SELECT group_id, group_fh_role, group_uids FROM fact_match_teams WHERE match_id = ?" + team_rows = pd.read_sql_query(query_teams, conn, params=(mid,)) + + for _, t_row in team_rows.iterrows(): + # Check if sid is in group_uids (which contains UIDs, not SteamIDs!) + # We need to map SteamID -> UID + # Use dim_players or fact_match_players + q_uid = "SELECT uid FROM fact_match_players WHERE match_id = ? AND steam_id_64 = ?" + uid_res = conn.execute(q_uid, (mid, sid)).fetchone() + if not uid_res: + continue + + uid = str(uid_res[0]) + group_uids = str(t_row['group_uids']).split(',') + + if uid in group_uids: + role = t_row['group_fh_role'] + print(f"Match {mid}: Steam {sid} (UID {uid}) is on Side {side} in R1.") + print(f" Found in Group {t_row['group_id']} with FH Role {role}.") + print(f" MAPPING: Role {role} = {side}") + break + + conn.close() + +if __name__ == "__main__": + check_mapping() diff --git a/scripts/check_tables.py b/scripts/check_tables.py new file mode 100644 index 0000000..d8df0ad --- /dev/null +++ b/scripts/check_tables.py @@ -0,0 +1,43 @@ +import sqlite3 +import os + +DB_PATH = r'd:\Documents\trae_projects\yrtv\database\L2\L2_Main.sqlite' + +def check_tables(): + if not os.path.exists(DB_PATH): + print(f"DB not found: {DB_PATH}") + return + + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + + tables = [ + 'dim_players', 'dim_maps', + 'fact_matches', 'fact_match_teams', + 'fact_match_players', 'fact_match_players_ct', 'fact_match_players_t', + 'fact_rounds', 'fact_round_events', 'fact_round_player_economy' + ] + + print(f"--- L2 Database Check: {DB_PATH} ---") + for table in tables: + try: + cursor.execute(f"SELECT COUNT(*) FROM {table}") + count = cursor.fetchone()[0] + print(f"{table:<25}: {count:>6} rows") + + # Simple column check for recently added columns + if table == 'fact_match_players': + cursor.execute(f"PRAGMA table_info({table})") + cols = [info[1] for info in cursor.fetchall()] + if 'util_flash_usage' in cols: + print(f" [OK] util_flash_usage exists") + else: + print(f" [ERR] util_flash_usage MISSING") + + except Exception as e: + print(f"{table:<25}: [ERROR] {e}") + + conn.close() + +if __name__ == "__main__": + check_tables() diff --git a/scripts/debug_db.py b/scripts/debug_db.py index d3c41a5..a755fd1 100644 --- a/scripts/debug_db.py +++ b/scripts/debug_db.py @@ -1,65 +1,63 @@ import sqlite3 +import pandas as pd import os -# Define database paths -BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) -L2_PATH = os.path.join(BASE_DIR, 'database', 'L2', 'L2_Main.sqlite') - -def check_l2_tables(): - print(f"Checking L2 database at: {L2_PATH}") - if not os.path.exists(L2_PATH): - print("Error: L2 database not found!") - return +L2_PATH = r'd:\Documents\trae_projects\yrtv\database\L2\L2_Main.sqlite' +WEB_PATH = r'd:\Documents\trae_projects\yrtv\database\Web\Web_App.sqlite' +def debug_db(): + # --- L2 Checks --- conn = sqlite3.connect(L2_PATH) - cursor = conn.cursor() - cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") - tables = cursor.fetchall() - print("Tables in L2 Database:") - for table in tables: - print(f" - {table[0]}") + print("--- Data Source Type Distribution ---") + try: + df = pd.read_sql_query("SELECT data_source_type, COUNT(*) as cnt FROM fact_matches GROUP BY data_source_type", conn) + print(df) + except Exception as e: + print(f"Error: {e}") + print("\n--- Economy Table Count ---") + try: + count = conn.execute("SELECT COUNT(*) FROM fact_round_player_economy").fetchone()[0] + print(f"Rows: {count}") + except Exception as e: + print(f"Error: {e}") + + print("\n--- Check util_flash_usage in fact_match_players ---") + try: + cursor = conn.cursor() + cursor.execute("PRAGMA table_info(fact_match_players)") + cols = [info[1] for info in cursor.fetchall()] + if 'util_flash_usage' in cols: + print("Column 'util_flash_usage' EXISTS.") + nz = conn.execute("SELECT COUNT(*) FROM fact_match_players WHERE util_flash_usage > 0").fetchone()[0] + print(f"Rows with util_flash_usage > 0: {nz}") + else: + print("Column 'util_flash_usage' MISSING.") + except Exception as e: + print(f"Error: {e}") + conn.close() -def debug_player_query(player_name_query=None): - print(f"\nDebugging Player Query (L2)...") - conn = sqlite3.connect(L2_PATH) - cursor = conn.cursor() - + # --- Web DB Checks --- + print("\n--- Web DB Check ---") + if not os.path.exists(WEB_PATH): + print(f"Web DB not found at {WEB_PATH}") + return + try: - # Check if 'dim_players' exists - cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='dim_players';") - if not cursor.fetchone(): - print("Error: 'dim_players' table not found!") - return - - # Check schema of dim_players - print("\nChecking dim_players schema:") - cursor.execute("PRAGMA table_info(dim_players)") - for col in cursor.fetchall(): - print(col) - - # Check sample data - print("\nSampling dim_players (first 5):") - cursor.execute("SELECT * FROM dim_players LIMIT 5") - for row in cursor.fetchall(): - print(row) - - # Test Search - search_term = 'zy' - print(f"\nTesting search for '{search_term}':") - cursor.execute("SELECT * FROM dim_players WHERE name LIKE ?", (f'%{search_term}%',)) - results = cursor.fetchall() - print(f"Found {len(results)} matches.") - for r in results: - print(r) - + conn_web = sqlite3.connect(WEB_PATH) + cursor = conn_web.cursor() + cursor.execute("SELECT name FROM sqlite_master WHERE type='table'") + tables = cursor.fetchall() + print(f"Tables: {[t[0] for t in tables]}") + + if 'player_metadata' in [t[0] for t in tables]: + count = conn_web.execute("SELECT COUNT(*) FROM player_metadata").fetchone()[0] + print(f"player_metadata rows: {count}") + conn_web.close() except Exception as e: - print(f"Error querying L2: {e}") - finally: - conn.close() + print(f"Error checking Web DB: {e}") -if __name__ == '__main__': - check_l2_tables() - debug_player_query() +if __name__ == "__main__": + debug_db() diff --git a/scripts/run_rebuild.py b/scripts/run_rebuild.py new file mode 100644 index 0000000..4158415 --- /dev/null +++ b/scripts/run_rebuild.py @@ -0,0 +1,18 @@ +import sys +import os + +# Add project root to path +current_dir = os.path.dirname(os.path.abspath(__file__)) +project_root = os.path.dirname(current_dir) +sys.path.append(project_root) + +from web.services.feature_service import FeatureService + +print("Starting Rebuild...") +try: + count = FeatureService.rebuild_all_features(min_matches=1) + print(f"Rebuild Complete. Processed {count} players.") +except Exception as e: + print(f"Error: {e}") + import traceback + traceback.print_exc() diff --git a/scripts/update_l2_schema_utility.py b/scripts/update_l2_schema_utility.py new file mode 100644 index 0000000..95b69e8 --- /dev/null +++ b/scripts/update_l2_schema_utility.py @@ -0,0 +1,30 @@ +import sys +import os +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +import sqlite3 +from web.config import Config + +conn = sqlite3.connect(Config.DB_L2_PATH) +cursor = conn.cursor() + +columns = [ + 'util_flash_usage', + 'util_smoke_usage', + 'util_molotov_usage', + 'util_he_usage', + 'util_decoy_usage' +] + +for col in columns: + try: + cursor.execute(f"ALTER TABLE fact_match_players ADD COLUMN {col} INTEGER DEFAULT 0") + print(f"Added column {col}") + except sqlite3.OperationalError as e: + if "duplicate column name" in str(e): + print(f"Column {col} already exists.") + else: + print(f"Error adding {col}: {e}") + +conn.commit() +conn.close() diff --git a/scripts/update_l3_schema.py b/scripts/update_l3_schema.py new file mode 100644 index 0000000..a4b3db6 --- /dev/null +++ b/scripts/update_l3_schema.py @@ -0,0 +1,39 @@ +import sqlite3 +import os + +DB_PATH = r'd:\Documents\trae_projects\yrtv\database\L3\L3_Features.sqlite' + +def add_columns(): + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + + # Check existing columns + cursor.execute("PRAGMA table_info(dm_player_features)") + columns = [row[1] for row in cursor.fetchall()] + + new_columns = [ + 'score_bat', 'score_sta', 'score_hps', 'score_ptl', 'score_tct', 'score_util', + 'bat_avg_duel_win_rate', 'bat_kd_diff_high_elo', 'bat_win_rate_close', + 'sta_time_rating_corr', 'sta_fatigue_decay', + 'hps_match_point_win_rate', 'hps_comeback_kd_diff', 'hps_pressure_entry_rate', + 'ptl_pistol_win_rate', 'ptl_pistol_kd', + 'util_avg_flash_enemy' + ] + + for col in new_columns: + if col not in columns: + print(f"Adding column: {col}") + try: + cursor.execute(f"ALTER TABLE dm_player_features ADD COLUMN {col} REAL") + except Exception as e: + print(f"Error adding {col}: {e}") + + conn.commit() + conn.close() + print("Schema update complete.") + +if __name__ == "__main__": + if not os.path.exists(DB_PATH): + print("L3 DB not found, skipping schema update (will be created by build script).") + else: + add_columns() diff --git a/web/routes/players.py b/web/routes/players.py index deb8909..541a2f2 100644 --- a/web/routes/players.py +++ b/web/routes/players.py @@ -141,15 +141,24 @@ def charts_data(steam_id): # Radar Data (Construct from features) features = FeatureService.get_player_features(steam_id) radar_data = {} + radar_dist = FeatureService.get_roster_features_distribution(steam_id) + if features: # Dimensions: STA, BAT, HPS, PTL, T/CT, UTIL + # Use calculated scores (0-100 scale) + + # Helper to get score safely + def get_score(key): + val = features[key] if key in features.keys() else 0 + return float(val) if val else 0 + radar_data = { - 'STA': features['basic_avg_rating'] or 0, - 'BAT': features['bat_avg_duel_win_rate'] or 0, - 'HPS': features['hps_clutch_win_rate_1v1'] or 0, - 'PTL': features['ptl_pistol_win_rate'] or 0, - 'SIDE': features['side_rating_ct'] or 0, - 'UTIL': features['util_usage_rate'] or 0 + 'STA': get_score('score_sta'), + 'BAT': get_score('score_bat'), + 'HPS': get_score('score_hps'), + 'PTL': get_score('score_ptl'), + 'SIDE': get_score('score_tct'), + 'UTIL': get_score('score_util') } trend_labels = [] @@ -166,7 +175,8 @@ def charts_data(steam_id): return jsonify({ 'trend': {'labels': trend_labels, 'values': trend_values}, - 'radar': radar_data + 'radar': radar_data, + 'radar_dist': radar_dist }) # --- API for Comparison --- diff --git a/web/services/feature_service.py b/web/services/feature_service.py index 5db6554..e61d7a2 100644 --- a/web/services/feature_service.py +++ b/web/services/feature_service.py @@ -1,4 +1,7 @@ -from web.database import query_db +from web.database import query_db, get_db, execute_db +import sqlite3 +import pandas as pd +import numpy as np class FeatureService: @staticmethod @@ -40,15 +43,11 @@ class FeatureService: p['matches_played'] = cnt_dict.get(p['steam_id_64'], 0) if search: - # ... existing search logic ... # Get all matching players l2_players, _ = StatsService.get_players(page=1, per_page=100, search=search) if not l2_players: return [], 0 - # ... (Merge logic) ... - # I need to insert the match count logic inside the merge loop or after - steam_ids = [p['steam_id_64'] for p in l2_players] placeholders = ','.join('?' for _ in steam_ids) sql = f"SELECT * FROM dm_player_features WHERE steam_id_64 IN ({placeholders})" @@ -76,7 +75,7 @@ class FeatureService: else: m['basic_avg_rating'] = 0 m['basic_avg_kd'] = 0 - m['basic_avg_kast'] = 0 # Ensure kast exists + m['basic_avg_kast'] = 0 m['matches_played'] = cnt_dict.get(p['steam_id_64'], 0) merged.append(m) @@ -90,20 +89,10 @@ class FeatureService: else: # Browse mode - # Check L3 l3_count = query_db('l3', "SELECT COUNT(*) as cnt FROM dm_player_features", one=True)['cnt'] if l3_count == 0 or sort_by == 'matches': - # If sorting by matches, we MUST use L2 counts because L3 might not have it or we want dynamic. - # OR if L3 is empty. - # Since L3 schema is unknown regarding 'matches_played', let's assume we fallback to L2 logic - # but paginated in memory if dataset is small, or just fetch all L2 players? - # Fetching all L2 players is bad if many. - # But for 'matches' sort, we need to know counts for ALL to sort correctly. - # Solution: Query L2 for top N players by match count. - if sort_by == 'matches': - # Query L2 for IDs ordered by count sql = """ SELECT steam_id_64, COUNT(*) as cnt FROM fact_match_players @@ -118,24 +107,18 @@ class FeatureService: total = query_db('l2', "SELECT COUNT(DISTINCT steam_id_64) as cnt FROM fact_match_players", one=True)['cnt'] ids = [r['steam_id_64'] for r in top_ids] - # Fetch details for these IDs l2_players = StatsService.get_players_by_ids(ids) - # Merge logic (reuse) + # Merge logic merged = [] - # Fetch L3 features for these IDs to show stats p_ph = ','.join('?' for _ in ids) f_sql = f"SELECT * FROM dm_player_features WHERE steam_id_64 IN ({p_ph})" features = query_db('l3', f_sql, ids) f_dict = {f['steam_id_64']: f for f in features} - cnt_dict = {r['steam_id_64']: r['cnt'] for r in top_ids} - - # Map L2 players to dict for easy access (though list order matters for sort?) - # Actually top_ids is sorted. p_dict = {p['steam_id_64']: p for p in l2_players} - for r in top_ids: # Preserve order + for r in top_ids: sid = r['steam_id_64'] p = p_dict.get(sid) if not p: continue @@ -160,10 +143,10 @@ class FeatureService: return merged, total - # L3 empty fallback (existing logic) + # L3 empty fallback l2_players, total = StatsService.get_players(page, per_page, sort_by=None) merged = [] - attach_match_counts(l2_players) # Helper + attach_match_counts(l2_players) for p in l2_players: m = dict(p) @@ -184,7 +167,7 @@ class FeatureService: return merged, total - # Normal L3 browse (sort by rating/kd/kast) + # Normal L3 browse sql = f"SELECT * FROM dm_player_features ORDER BY {order_col} DESC LIMIT ? OFFSET ?" features = query_db('l3', sql, [per_page, offset]) @@ -204,53 +187,711 @@ class FeatureService: if p: m.update(dict(p)) else: - m['username'] = f['steam_id_64'] # Fallback + m['username'] = f['steam_id_64'] m['avatar_url'] = None merged.append(m) return merged, total @staticmethod - def get_top_players(limit=20, sort_by='basic_avg_rating'): - # Safety check for sort_by to prevent injection - allowed_sorts = ['basic_avg_rating', 'basic_avg_kd', 'basic_avg_kast', 'basic_avg_rws'] - if sort_by not in allowed_sorts: - sort_by = 'basic_avg_rating' - - sql = f""" - SELECT f.*, p.username, p.avatar_url - FROM dm_player_features f - LEFT JOIN l2.dim_players p ON f.steam_id_64 = p.steam_id_64 - ORDER BY {sort_by} DESC - LIMIT ? + def rebuild_all_features(min_matches=5): """ - # Note: Cross-database join (l2.dim_players) works in SQLite if attached. - # But `query_db` connects to one DB. - # Strategy: Fetch features, then fetch player infos from L2. Or attach DB. - # Simple strategy: Fetch features, then extract steam_ids and batch fetch from L2 in StatsService. - # Or simpler: Just return features and let the controller/template handle the name/avatar via another call or pre-fetching. + Refreshes the L3 Data Mart with full feature calculations. + """ + from web.config import Config + l3_db_path = Config.DB_L3_PATH + l2_db_path = Config.DB_L2_PATH - # Actually, for "Player List" view, we really want L3 data joined with L2 names. - # I will change this to just return features for now, and handle joining in the route handler or via a helper that attaches databases. - # Attaching is better. + conn_l2 = sqlite3.connect(l2_db_path) + conn_l2.row_factory = sqlite3.Row - return query_db('l3', f"SELECT * FROM dm_player_features ORDER BY {sort_by} DESC LIMIT ?", [limit]) + try: + print("Loading L2 data...") + df = FeatureService._load_and_calculate_dataframe(conn_l2, min_matches) + + if df is None or df.empty: + print("No data to process.") + return 0 + + print("Calculating Scores...") + df = FeatureService._calculate_ultimate_scores(df) + + print("Saving to L3...") + conn_l3 = sqlite3.connect(l3_db_path) + + cursor = conn_l3.cursor() + + # Ensure columns exist in DataFrame match DB columns + cursor.execute("PRAGMA table_info(dm_player_features)") + valid_cols = [r[1] for r in cursor.fetchall()] + + # Filter DF columns + df_cols = [c for c in df.columns if c in valid_cols] + df_to_save = df[df_cols].copy() + df_to_save['updated_at'] = pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S') + + # Generate Insert SQL + placeholders = ','.join(['?'] * len(df_to_save.columns)) + cols_str = ','.join(df_to_save.columns) + sql = f"INSERT OR REPLACE INTO dm_player_features ({cols_str}) VALUES ({placeholders})" + + data = df_to_save.values.tolist() + cursor.executemany(sql, data) + conn_l3.commit() + conn_l3.close() + + return len(df) + + except Exception as e: + print(f"Rebuild Error: {e}") + import traceback + traceback.print_exc() + return 0 + finally: + conn_l2.close() @staticmethod - def get_player_trend(steam_id, limit=30): - # This requires `fact_match_features` or querying L2 matches for historical data. - # WebRDD says: "Trend graph: Recent 10/20 matches Rating trend (Chart.js)." - # We can get this from L2 fact_match_players. - sql = """ - SELECT m.start_time, mp.rating, mp.kd_ratio, mp.adr, m.match_id - FROM fact_match_players mp - JOIN fact_matches m ON mp.match_id = m.match_id - WHERE mp.steam_id_64 = ? - ORDER BY m.start_time DESC - LIMIT ? + def _load_and_calculate_dataframe(conn, min_matches): + # 1. Basic Stats + query_basic = """ + SELECT + steam_id_64, + COUNT(*) as matches_played, + SUM(round_total) as rounds_played, + AVG(rating) as basic_avg_rating, + AVG(kd_ratio) as basic_avg_kd, + AVG(adr) as basic_avg_adr, + AVG(kast) as basic_avg_kast, + AVG(rws) as basic_avg_rws, + SUM(headshot_count) as sum_hs, + SUM(kills) as sum_kills, + SUM(deaths) as sum_deaths, + SUM(first_kill) as sum_fk, + SUM(first_death) as sum_fd, + SUM(clutch_1v1) as sum_1v1, + SUM(clutch_1v2) as sum_1v2, + SUM(clutch_1v3) + SUM(clutch_1v4) + SUM(clutch_1v5) as sum_1v3p, + SUM(kill_2) as sum_2k, + SUM(kill_3) as sum_3k, + SUM(kill_4) as sum_4k, + SUM(kill_5) as sum_5k, + SUM(assisted_kill) as sum_assist, + SUM(perfect_kill) as sum_perfect, + SUM(revenge_kill) as sum_revenge, + SUM(awp_kill) as sum_awp, + SUM(jump_count) as sum_jump, + SUM(throw_harm) as sum_util_dmg, + SUM(flash_time) as sum_flash_time, + SUM(flash_enemy) as sum_flash_enemy, + SUM(flash_team) as sum_flash_team, + SUM(util_flash_usage) as sum_util_flash, + SUM(util_smoke_usage) as sum_util_smoke, + SUM(util_molotov_usage) as sum_util_molotov, + SUM(util_he_usage) as sum_util_he, + SUM(util_decoy_usage) as sum_util_decoy + FROM fact_match_players + GROUP BY steam_id_64 + HAVING COUNT(*) >= ? """ - # This query needs to run against L2. - # So this method should actually be in StatsService or FeatureService connecting to L2. - # I will put it here but note it uses L2. Actually, better to put in StatsService if it uses L2 tables. - # But FeatureService conceptualizes "Trends". I'll move it to StatsService for implementation correctness (DB context). - pass + df = pd.read_sql_query(query_basic, conn, params=(min_matches,)) + if df.empty: return None + + # Basic Derived + df['basic_headshot_rate'] = df['sum_hs'] / df['sum_kills'].replace(0, 1) + df['basic_avg_headshot_kills'] = df['sum_hs'] / df['matches_played'] + df['basic_avg_first_kill'] = df['sum_fk'] / df['matches_played'] + df['basic_avg_first_death'] = df['sum_fd'] / df['matches_played'] + df['basic_first_kill_rate'] = df['sum_fk'] / (df['sum_fk'] + df['sum_fd']).replace(0, 1) + df['basic_first_death_rate'] = df['sum_fd'] / (df['sum_fk'] + df['sum_fd']).replace(0, 1) + df['basic_avg_kill_2'] = df['sum_2k'] / df['matches_played'] + df['basic_avg_kill_3'] = df['sum_3k'] / df['matches_played'] + df['basic_avg_kill_4'] = df['sum_4k'] / df['matches_played'] + df['basic_avg_kill_5'] = df['sum_5k'] / df['matches_played'] + df['basic_avg_assisted_kill'] = df['sum_assist'] / df['matches_played'] + df['basic_avg_perfect_kill'] = df['sum_perfect'] / df['matches_played'] + df['basic_avg_revenge_kill'] = df['sum_revenge'] / df['matches_played'] + df['basic_avg_awp_kill'] = df['sum_awp'] / df['matches_played'] + df['basic_avg_jump_count'] = df['sum_jump'] / df['matches_played'] + + # UTIL Basic + df['util_avg_nade_dmg'] = df['sum_util_dmg'] / df['matches_played'] + df['util_avg_flash_time'] = df['sum_flash_time'] / df['matches_played'] + df['util_avg_flash_enemy'] = df['sum_flash_enemy'] / df['matches_played'] + + valid_ids = tuple(df['steam_id_64'].tolist()) + placeholders = ','.join(['?'] * len(valid_ids)) + + # 2. STA (Detailed) + query_sta = f""" + SELECT mp.steam_id_64, mp.rating, mp.is_win, m.start_time, m.duration + FROM fact_match_players mp + JOIN fact_matches m ON mp.match_id = m.match_id + WHERE mp.steam_id_64 IN ({placeholders}) + ORDER BY mp.steam_id_64, m.start_time + """ + df_matches = pd.read_sql_query(query_sta, conn, params=valid_ids) + sta_list = [] + for pid, group in df_matches.groupby('steam_id_64'): + group = group.sort_values('start_time') + last_30 = group.tail(30) + + # Fatigue Calc + # Simple heuristic: split matches by day, compare early (first 3) vs late (rest) + group['date'] = pd.to_datetime(group['start_time'], unit='s').dt.date + day_counts = group.groupby('date').size() + busy_days = day_counts[day_counts >= 4].index # Days with 4+ matches + + fatigue_decays = [] + for day in busy_days: + day_matches = group[group['date'] == day] + if len(day_matches) >= 4: + early_rating = day_matches.head(3)['rating'].mean() + late_rating = day_matches.tail(len(day_matches) - 3)['rating'].mean() + fatigue_decays.append(early_rating - late_rating) + + avg_fatigue = np.mean(fatigue_decays) if fatigue_decays else 0 + + sta_list.append({ + 'steam_id_64': pid, + 'sta_last_30_rating': last_30['rating'].mean(), + 'sta_win_rating': group[group['is_win']==1]['rating'].mean(), + 'sta_loss_rating': group[group['is_win']==0]['rating'].mean(), + 'sta_rating_volatility': group.tail(10)['rating'].std() if len(group) > 1 else 0, + 'sta_time_rating_corr': group['duration'].corr(group['rating']) if len(group)>2 and group['rating'].std() > 0 else 0, + 'sta_fatigue_decay': avg_fatigue + }) + df = df.merge(pd.DataFrame(sta_list), on='steam_id_64', how='left') + + # 3. BAT (High ELO) + query_elo = f""" + SELECT mp.steam_id_64, mp.kd_ratio, + (SELECT AVG(group_origin_elo) FROM fact_match_teams fmt WHERE fmt.match_id = mp.match_id AND group_origin_elo > 0) as elo + FROM fact_match_players mp + WHERE mp.steam_id_64 IN ({placeholders}) + """ + df_elo = pd.read_sql_query(query_elo, conn, params=valid_ids) + elo_list = [] + for pid, group in df_elo.groupby('steam_id_64'): + avg = group['elo'].mean() or 1000 + elo_list.append({ + 'steam_id_64': pid, + 'bat_kd_diff_high_elo': group[group['elo'] > avg]['kd_ratio'].mean(), + 'bat_kd_diff_low_elo': group[group['elo'] <= avg]['kd_ratio'].mean() + }) + df = df.merge(pd.DataFrame(elo_list), on='steam_id_64', how='left') + + # Duel Win Rate + query_duel = f""" + SELECT steam_id_64, SUM(entry_kills) as ek, SUM(entry_deaths) as ed + FROM fact_match_players WHERE steam_id_64 IN ({placeholders}) GROUP BY steam_id_64 + """ + df_duel = pd.read_sql_query(query_duel, conn, params=valid_ids) + df_duel['bat_avg_duel_win_rate'] = df_duel['ek'] / (df_duel['ek'] + df_duel['ed']).replace(0, 1) + df = df.merge(df_duel[['steam_id_64', 'bat_avg_duel_win_rate']], on='steam_id_64', how='left') + + # 4. HPS + # Clutch Rate + df['hps_clutch_win_rate_1v1'] = df['sum_1v1'] / df['matches_played'] + df['hps_clutch_win_rate_1v3_plus'] = df['sum_1v3p'] / df['matches_played'] + + # Prepare Detailed Event Data for HPS (Comeback), PTL (KD), and T/CT + + # A. Determine Side Info using fact_match_teams + # 1. Get Match Teams + query_teams = f""" + SELECT match_id, group_fh_role, group_uids + FROM fact_match_teams + WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders})) + """ + df_teams = pd.read_sql_query(query_teams, conn, params=valid_ids) + + # 2. Get Player UIDs + query_uids = f"SELECT match_id, steam_id_64, uid FROM fact_match_players WHERE steam_id_64 IN ({placeholders})" + df_uids = pd.read_sql_query(query_uids, conn, params=valid_ids) + + # 3. Get Match Meta (Start Time for MR12/MR15) + query_meta = f"SELECT match_id, start_time FROM fact_matches WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders}))" + df_meta = pd.read_sql_query(query_meta, conn, params=valid_ids) + df_meta['halftime_round'] = np.where(df_meta['start_time'] > 1695772800, 12, 15) # CS2 Release Date approx + + # 4. Build FH Side DataFrame + fh_rows = [] + if not df_teams.empty and not df_uids.empty: + match_teams = {} # match_id -> [(role, [uids])] + for _, row in df_teams.iterrows(): + mid = row['match_id'] + role = row['group_fh_role'] # 1=CT, 0=T + try: + uids = str(row['group_uids']).split(',') + uids = [u.strip() for u in uids if u.strip()] + except: + uids = [] + if mid not in match_teams: match_teams[mid] = [] + match_teams[mid].append((role, uids)) + + for _, row in df_uids.iterrows(): + mid = row['match_id'] + sid = row['steam_id_64'] + uid = str(row['uid']) + if mid in match_teams: + for role, uids in match_teams[mid]: + if uid in uids: + fh_rows.append({ + 'match_id': mid, + 'steam_id_64': sid, + 'fh_side': 'CT' if role == 1 else 'T' + }) + break + + df_fh_sides = pd.DataFrame(fh_rows) + if not df_fh_sides.empty: + df_fh_sides = df_fh_sides.merge(df_meta[['match_id', 'halftime_round']], on='match_id', how='left') + + # B. Get Kill Events + query_events = f""" + SELECT match_id, round_num, attacker_steam_id, victim_steam_id, event_type, is_headshot, event_time + FROM fact_round_events + WHERE event_type='kill' + AND (attacker_steam_id IN ({placeholders}) OR victim_steam_id IN ({placeholders})) + """ + df_events = pd.read_sql_query(query_events, conn, params=valid_ids + valid_ids) + + # C. Get Round Scores + query_rounds = f""" + SELECT match_id, round_num, ct_score, t_score, winner_side + FROM fact_rounds + WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders})) + """ + df_rounds = pd.read_sql_query(query_rounds, conn, params=valid_ids) + + # Fix missing winner_side by calculating from score changes + if not df_rounds.empty: + df_rounds = df_rounds.sort_values(['match_id', 'round_num']).reset_index(drop=True) + df_rounds['prev_ct'] = df_rounds.groupby('match_id')['ct_score'].shift(1).fillna(0) + df_rounds['prev_t'] = df_rounds.groupby('match_id')['t_score'].shift(1).fillna(0) + + # Determine winner based on score increment + df_rounds['ct_win'] = (df_rounds['ct_score'] > df_rounds['prev_ct']) + df_rounds['t_win'] = (df_rounds['t_score'] > df_rounds['prev_t']) + + df_rounds['calculated_winner'] = np.where(df_rounds['ct_win'], 'CT', + np.where(df_rounds['t_win'], 'T', None)) + + # Force overwrite winner_side with calculated winner since DB data is unreliable (mostly NULL) + df_rounds['winner_side'] = df_rounds['calculated_winner'] + + # Fallback for Round 1 if still None (e.g. if prev is 0 and score is 1) + # Logic above handles Round 1 correctly (prev is 0). + + # --- Process Logic --- + # Logic above handles Round 1 correctly (prev is 0). + + # --- Process Logic --- + has_events = not df_events.empty + has_sides = not df_fh_sides.empty + + if has_events and has_sides: + # 1. Attacker Side + df_events = df_events.merge(df_fh_sides, left_on=['match_id', 'attacker_steam_id'], right_on=['match_id', 'steam_id_64'], how='left') + df_events.rename(columns={'fh_side': 'att_fh_side'}, inplace=True) + df_events.drop(columns=['steam_id_64'], inplace=True) + + # 2. Victim Side + df_events = df_events.merge(df_fh_sides, left_on=['match_id', 'victim_steam_id'], right_on=['match_id', 'steam_id_64'], how='left', suffixes=('', '_vic')) + df_events.rename(columns={'fh_side': 'vic_fh_side'}, inplace=True) + df_events.drop(columns=['steam_id_64'], inplace=True) + + # 3. Determine Actual Side (CT/T) + # Logic: If round <= halftime -> FH Side. Else -> Opposite. + def calc_side(fh_side, round_num, halftime): + if pd.isna(fh_side): return None + if round_num <= halftime: return fh_side + return 'T' if fh_side == 'CT' else 'CT' + + # Vectorized approach + # Attacker + mask_fh_att = df_events['round_num'] <= df_events['halftime_round'] + df_events['attacker_side'] = np.where(mask_fh_att, df_events['att_fh_side'], + np.where(df_events['att_fh_side'] == 'CT', 'T', 'CT')) + # Victim + mask_fh_vic = df_events['round_num'] <= df_events['halftime_round'] + df_events['victim_side'] = np.where(mask_fh_vic, df_events['vic_fh_side'], + np.where(df_events['vic_fh_side'] == 'CT', 'T', 'CT')) + + # Merge Scores + df_events = df_events.merge(df_rounds, on=['match_id', 'round_num'], how='left') + + # --- HPS: Match Point & Comeback --- + # Match Point Win Rate + mp_rounds = df_rounds[((df_rounds['ct_score'] == 12) | (df_rounds['t_score'] == 12) | + (df_rounds['ct_score'] == 15) | (df_rounds['t_score'] == 15))] + + if not mp_rounds.empty and has_sides: + # Need player side for these rounds + # Expand sides for all rounds + q_all_rounds = f"SELECT match_id, round_num FROM fact_rounds WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders}))" + df_all_rounds = pd.read_sql_query(q_all_rounds, conn, params=valid_ids) + + df_player_rounds = df_all_rounds.merge(df_fh_sides, on='match_id') + mask_fh = df_player_rounds['round_num'] <= df_player_rounds['halftime_round'] + df_player_rounds['side'] = np.where(mask_fh, df_player_rounds['fh_side'], + np.where(df_player_rounds['fh_side'] == 'CT', 'T', 'CT')) + + # Filter for MP rounds + # Join mp_rounds with df_player_rounds + mp_player = df_player_rounds.merge(mp_rounds[['match_id', 'round_num', 'winner_side']], on=['match_id', 'round_num']) + mp_player['is_win'] = (mp_player['side'] == mp_player['winner_side']).astype(int) + + hps_mp = mp_player.groupby('steam_id_64')['is_win'].mean().reset_index() + hps_mp.rename(columns={'is_win': 'hps_match_point_win_rate'}, inplace=True) + df = df.merge(hps_mp, on='steam_id_64', how='left') + else: + df['hps_match_point_win_rate'] = 0.5 + + # Comeback KD Diff + # Attacker Context + df_events['att_team_score'] = np.where(df_events['attacker_side'] == 'CT', df_events['ct_score'], df_events['t_score']) + df_events['att_opp_score'] = np.where(df_events['attacker_side'] == 'CT', df_events['t_score'], df_events['ct_score']) + df_events['is_comeback_att'] = (df_events['att_team_score'] + 4 <= df_events['att_opp_score']) + + # Victim Context + df_events['vic_team_score'] = np.where(df_events['victim_side'] == 'CT', df_events['ct_score'], df_events['t_score']) + df_events['vic_opp_score'] = np.where(df_events['victim_side'] == 'CT', df_events['t_score'], df_events['ct_score']) + df_events['is_comeback_vic'] = (df_events['vic_team_score'] + 4 <= df_events['vic_opp_score']) + + att_k = df_events.groupby('attacker_steam_id').size() + vic_d = df_events.groupby('victim_steam_id').size() + + cb_k = df_events[df_events['is_comeback_att']].groupby('attacker_steam_id').size() + cb_d = df_events[df_events['is_comeback_vic']].groupby('victim_steam_id').size() + + kd_stats = pd.DataFrame({'k': att_k, 'd': vic_d, 'cb_k': cb_k, 'cb_d': cb_d}).fillna(0) + kd_stats['kd'] = kd_stats['k'] / kd_stats['d'].replace(0, 1) + kd_stats['cb_kd'] = kd_stats['cb_k'] / kd_stats['cb_d'].replace(0, 1) + kd_stats['hps_comeback_kd_diff'] = kd_stats['cb_kd'] - kd_stats['kd'] + + kd_stats.index.name = 'steam_id_64' + df = df.merge(kd_stats[['hps_comeback_kd_diff']], on='steam_id_64', how='left') + + # --- PTL: Pistol Stats --- + pistol_rounds = [1, 13] + df_pistol = df_events[df_events['round_num'].isin(pistol_rounds)] + + if not df_pistol.empty: + pk = df_pistol.groupby('attacker_steam_id').size() + pd_death = df_pistol.groupby('victim_steam_id').size() + p_stats = pd.DataFrame({'pk': pk, 'pd': pd_death}).fillna(0) + p_stats['ptl_pistol_kd'] = p_stats['pk'] / p_stats['pd'].replace(0, 1) + + phs = df_pistol[df_pistol['is_headshot'] == 1].groupby('attacker_steam_id').size() + p_stats['phs'] = phs + p_stats['phs'] = p_stats['phs'].fillna(0) + p_stats['ptl_pistol_util_efficiency'] = p_stats['phs'] / p_stats['pk'].replace(0, 1) + + p_stats.index.name = 'steam_id_64' + df = df.merge(p_stats[['ptl_pistol_kd', 'ptl_pistol_util_efficiency']], on='steam_id_64', how='left') + else: + df['ptl_pistol_kd'] = 1.0 + df['ptl_pistol_util_efficiency'] = 0.0 + + # --- T/CT Stats --- + ct_k = df_events[df_events['attacker_side'] == 'CT'].groupby('attacker_steam_id').size() + ct_d = df_events[df_events['victim_side'] == 'CT'].groupby('victim_steam_id').size() + t_k = df_events[df_events['attacker_side'] == 'T'].groupby('attacker_steam_id').size() + t_d = df_events[df_events['victim_side'] == 'T'].groupby('victim_steam_id').size() + + side_stats = pd.DataFrame({'ct_k': ct_k, 'ct_d': ct_d, 't_k': t_k, 't_d': t_d}).fillna(0) + side_stats['side_rating_ct'] = side_stats['ct_k'] / side_stats['ct_d'].replace(0, 1) + side_stats['side_rating_t'] = side_stats['t_k'] / side_stats['t_d'].replace(0, 1) + side_stats['side_kd_diff_ct_t'] = side_stats['side_rating_ct'] - side_stats['side_rating_t'] + + side_stats.index.name = 'steam_id_64' + df = df.merge(side_stats[['side_rating_ct', 'side_rating_t', 'side_kd_diff_ct_t']], on='steam_id_64', how='left') + + # Side First Kill Rate + # Need total rounds per side for denominator + # Use df_player_rounds calculated in Match Point section + # If not calculated there (no MP rounds), calc now + if 'df_player_rounds' not in locals(): + q_all_rounds = f"SELECT match_id, round_num FROM fact_rounds WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders}))" + df_all_rounds = pd.read_sql_query(q_all_rounds, conn, params=valid_ids) + df_player_rounds = df_all_rounds.merge(df_fh_sides, on='match_id') + mask_fh = df_player_rounds['round_num'] <= df_player_rounds['halftime_round'] + df_player_rounds['side'] = np.where(mask_fh, df_player_rounds['fh_side'], + np.where(df_player_rounds['fh_side'] == 'CT', 'T', 'CT')) + + rounds_per_side = df_player_rounds.groupby(['steam_id_64', 'side']).size().unstack(fill_value=0) + if 'CT' not in rounds_per_side.columns: rounds_per_side['CT'] = 0 + if 'T' not in rounds_per_side.columns: rounds_per_side['T'] = 0 + + # First Kills (Earliest event in round) + # Group by match, round -> min time. + fk_events = df_events.sort_values('event_time').drop_duplicates(['match_id', 'round_num']) + fk_ct = fk_events[fk_events['attacker_side'] == 'CT'].groupby('attacker_steam_id').size() + fk_t = fk_events[fk_events['attacker_side'] == 'T'].groupby('attacker_steam_id').size() + + fk_stats = pd.DataFrame({'fk_ct': fk_ct, 'fk_t': fk_t}).fillna(0) + fk_stats = fk_stats.join(rounds_per_side, how='outer').fillna(0) + + fk_stats['side_first_kill_rate_ct'] = fk_stats['fk_ct'] / fk_stats['CT'].replace(0, 1) + fk_stats['side_first_kill_rate_t'] = fk_stats['fk_t'] / fk_stats['T'].replace(0, 1) + + fk_stats.index.name = 'steam_id_64' + df = df.merge(fk_stats[['side_first_kill_rate_ct', 'side_first_kill_rate_t']], on='steam_id_64', how='left') + + else: + # Fallbacks + cols = ['hps_match_point_win_rate', 'hps_comeback_kd_diff', 'ptl_pistol_kd', 'ptl_pistol_util_efficiency', + 'side_rating_ct', 'side_rating_t', 'side_first_kill_rate_ct', 'side_first_kill_rate_t', 'side_kd_diff_ct_t'] + for c in cols: + df[c] = 0 + + df['hps_match_point_win_rate'] = df['hps_match_point_win_rate'].fillna(0.5) + + # HPS Pressure Entry Rate (Entry Kills in Losing Matches) + q_mp_team = f"SELECT match_id, steam_id_64, is_win, entry_kills FROM fact_match_players WHERE steam_id_64 IN ({placeholders})" + df_mp_team = pd.read_sql_query(q_mp_team, conn, params=valid_ids) + if not df_mp_team.empty: + losing_matches = df_mp_team[df_mp_team['is_win'] == 0] + if not losing_matches.empty: + # Average entry kills per losing match + pressure_entry = losing_matches.groupby('steam_id_64')['entry_kills'].mean().reset_index() + pressure_entry.rename(columns={'entry_kills': 'hps_pressure_entry_rate'}, inplace=True) + df = df.merge(pressure_entry, on='steam_id_64', how='left') + + if 'hps_pressure_entry_rate' not in df.columns: + df['hps_pressure_entry_rate'] = 0 + df['hps_pressure_entry_rate'] = df['hps_pressure_entry_rate'].fillna(0) + + # 5. PTL (Additional Features: Kills & Multi) + query_ptl = f""" + SELECT ev.attacker_steam_id as steam_id_64, COUNT(*) as pistol_kills + FROM fact_round_events ev + WHERE ev.event_type = 'kill' AND ev.round_num IN (1, 13) + AND ev.attacker_steam_id IN ({placeholders}) + GROUP BY ev.attacker_steam_id + """ + df_ptl = pd.read_sql_query(query_ptl, conn, params=valid_ids) + if not df_ptl.empty: + df = df.merge(df_ptl, on='steam_id_64', how='left') + df['ptl_pistol_kills'] = df['pistol_kills'] / df['matches_played'] + else: + df['ptl_pistol_kills'] = 0 + + query_ptl_multi = f""" + SELECT attacker_steam_id as steam_id_64, COUNT(*) as multi_cnt + FROM ( + SELECT match_id, round_num, attacker_steam_id, COUNT(*) as k + FROM fact_round_events + WHERE event_type = 'kill' AND round_num IN (1, 13) + AND attacker_steam_id IN ({placeholders}) + GROUP BY match_id, round_num, attacker_steam_id + HAVING k >= 2 + ) + GROUP BY attacker_steam_id + """ + df_ptl_multi = pd.read_sql_query(query_ptl_multi, conn, params=valid_ids) + if not df_ptl_multi.empty: + df = df.merge(df_ptl_multi, on='steam_id_64', how='left') + df['ptl_pistol_multikills'] = df['multi_cnt'] / df['matches_played'] + else: + df['ptl_pistol_multikills'] = 0 + + # PTL Win Rate (Pandas Logic using fixed winner_side) + if not df_rounds.empty and has_sides: + # Ensure df_player_rounds exists + if 'df_player_rounds' not in locals(): + q_all_rounds = f"SELECT match_id, round_num FROM fact_rounds WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders}))" + df_all_rounds = pd.read_sql_query(q_all_rounds, conn, params=valid_ids) + df_player_rounds = df_all_rounds.merge(df_fh_sides, on='match_id') + mask_fh = df_player_rounds['round_num'] <= df_player_rounds['halftime_round'] + df_player_rounds['side'] = np.where(mask_fh, df_player_rounds['fh_side'], + np.where(df_player_rounds['fh_side'] == 'CT', 'T', 'CT')) + + # Filter for Pistol Rounds (1, 13) + player_pistol = df_player_rounds[df_player_rounds['round_num'].isin([1, 13])].copy() + + # Merge with df_rounds to get calculated winner_side + # Note: df_rounds has the fixed 'winner_side' column + player_pistol = player_pistol.merge(df_rounds[['match_id', 'round_num', 'winner_side']], on=['match_id', 'round_num'], how='left') + + # Calculate Win + player_pistol['is_win'] = (player_pistol['side'] == player_pistol['winner_side']).astype(int) + + ptl_wins = player_pistol.groupby('steam_id_64')['is_win'].agg(['sum', 'count']).reset_index() + ptl_wins.rename(columns={'sum': 'pistol_wins', 'count': 'pistol_rounds'}, inplace=True) + + ptl_wins['ptl_pistol_win_rate'] = ptl_wins['pistol_wins'] / ptl_wins['pistol_rounds'].replace(0, 1) + df = df.merge(ptl_wins[['steam_id_64', 'ptl_pistol_win_rate']], on='steam_id_64', how='left') + else: + df['ptl_pistol_win_rate'] = 0.5 + + df['ptl_pistol_multikills'] = df['ptl_pistol_multikills'].fillna(0) + df['ptl_pistol_win_rate'] = df['ptl_pistol_win_rate'].fillna(0.5) + + # 7. UTIL (Enhanced with Prop Frequency) + # Usage Rate: Average number of grenades purchased per round + df['util_usage_rate'] = ( + df['sum_util_flash'] + df['sum_util_smoke'] + + df['sum_util_molotov'] + df['sum_util_he'] + df['sum_util_decoy'] + ) / df['rounds_played'].replace(0, 1) * 100 # Multiply by 100 to make it comparable to other metrics (e.g. 1.5 nades/round -> 150) + + # Fallback if no new data yet (rely on old logic or keep 0) + # We can try to fetch equipment_value as backup if sum is 0 + if df['util_usage_rate'].sum() == 0: + query_eco = f""" + SELECT steam_id_64, AVG(equipment_value) as avg_equip_val + FROM fact_round_player_economy + WHERE steam_id_64 IN ({placeholders}) + GROUP BY steam_id_64 + """ + df_eco = pd.read_sql_query(query_eco, conn, params=valid_ids) + if not df_eco.empty: + df_eco['util_usage_rate_backup'] = df_eco['avg_equip_val'] / 50.0 # Scaling factor for equipment value + df = df.merge(df_eco[['steam_id_64', 'util_usage_rate_backup']], on='steam_id_64', how='left') + df['util_usage_rate'] = df['util_usage_rate_backup'].fillna(0) + df.drop(columns=['util_usage_rate_backup'], inplace=True) + + # Final Mappings + df['total_matches'] = df['matches_played'] + + return df.fillna(0) + + + @staticmethod + def _calculate_ultimate_scores(df): + def n(col): + if col not in df.columns: return 50 + s = df[col] + if s.max() == s.min(): return 50 + return (s - s.min()) / (s.max() - s.min()) * 100 + + df = df.copy() + + # BAT (30%) + df['score_bat'] = ( + 0.25 * n('basic_avg_rating') + + 0.20 * n('basic_avg_kd') + + 0.15 * n('basic_avg_adr') + + 0.10 * n('bat_avg_duel_win_rate') + + 0.10 * n('bat_kd_diff_high_elo') + + 0.10 * n('basic_avg_kill_3') + ) + + # STA (15%) + df['score_sta'] = ( + 0.30 * (100 - n('sta_rating_volatility')) + + 0.30 * n('sta_loss_rating') + + 0.20 * n('sta_win_rating') + + 0.10 * (100 - abs(n('sta_time_rating_corr'))) + ) + + # HPS (20%) + df['score_hps'] = ( + 0.30 * n('sum_1v3p') + + 0.20 * n('hps_match_point_win_rate') + + 0.20 * n('hps_comeback_kd_diff') + + 0.15 * n('hps_pressure_entry_rate') + + 0.15 * n('basic_avg_rating') + ) + + # PTL (10%) + df['score_ptl'] = ( + 0.40 * n('ptl_pistol_kills') + + 0.40 * n('ptl_pistol_win_rate') + + 0.20 * n('basic_avg_headshot_kills') # Pistol rounds rely on HS + ) + + # T/CT (10%) + df['score_tct'] = ( + 0.35 * n('side_rating_ct') + + 0.35 * n('side_rating_t') + + 0.15 * n('side_first_kill_rate_ct') + + 0.15 * n('side_first_kill_rate_t') + ) + + # UTIL (10%) + # Emphasize prop frequency (usage_rate) + df['score_util'] = ( + 0.35 * n('util_usage_rate') + + 0.25 * n('util_avg_nade_dmg') + + 0.20 * n('util_avg_flash_time') + + 0.20 * n('util_avg_flash_enemy') + ) + + return df + + @staticmethod + def get_roster_features_distribution(target_steam_id): + """ + Calculates rank and distribution of the target player's L3 features (Scores) within the active roster. + """ + from web.services.web_service import WebService + import json + + # 1. Get Active Roster IDs + lineups = WebService.get_lineups() + active_roster_ids = [] + if lineups: + try: + raw_ids = json.loads(lineups[0]['player_ids_json']) + active_roster_ids = [str(uid) for uid in raw_ids] + except: + pass + + if not active_roster_ids: + return None + + # 2. Fetch L3 features for all roster members + placeholders = ','.join('?' for _ in active_roster_ids) + sql = f""" + SELECT + steam_id_64, + score_bat, score_sta, score_hps, score_ptl, score_tct, score_util + FROM dm_player_features + WHERE steam_id_64 IN ({placeholders}) + """ + rows = query_db('l3', sql, active_roster_ids) + + if not rows: + return None + + stats_map = {row['steam_id_64']: dict(row) for row in rows} + target_steam_id = str(target_steam_id) + + # If target not in map (maybe no L3 data yet), default to 0 + if target_steam_id not in stats_map: + stats_map[target_steam_id] = { + 'score_bat': 0, 'score_sta': 0, 'score_hps': 0, + 'score_ptl': 0, 'score_tct': 0, 'score_util': 0 + } + + # 3. Calculate Distribution + metrics = ['score_bat', 'score_sta', 'score_hps', 'score_ptl', 'score_tct', 'score_util'] + result = {} + + for m in metrics: + values = [p.get(m, 0) or 0 for p in stats_map.values()] + target_val = stats_map[target_steam_id].get(m, 0) or 0 + + if not values: + result[m] = None + continue + + values.sort(reverse=True) + + try: + rank = values.index(target_val) + 1 + except ValueError: + rank = len(values) + + result[m] = { + 'val': target_val, + 'rank': rank, + 'total': len(values), + 'min': min(values), + 'max': max(values), + 'avg': sum(values) / len(values) + } + + return result diff --git a/web/services/stats_service.py b/web/services/stats_service.py index 6db212c..9cc1546 100644 --- a/web/services/stats_service.py +++ b/web/services/stats_service.py @@ -589,8 +589,10 @@ class StatsService: def get_roster_stats_distribution(target_steam_id): """ Calculates rank and distribution of the target player within the active roster. + Now covers all L3 Basic Features for Detailed Panel. """ from web.services.web_service import WebService + from web.services.feature_service import FeatureService import json import numpy as np @@ -604,72 +606,64 @@ class StatsService: except: pass - # Ensure target is in list (if not in roster, compare against roster anyway) - # If roster is empty, return None if not active_roster_ids: return None - # 2. Fetch stats for all roster members + # 2. Fetch L3 features for all roster members + # We need to use FeatureService to get the full L3 set (including detailed stats) + # Assuming L3 data is up to date. + placeholders = ','.join('?' for _ in active_roster_ids) - sql = f""" - SELECT - CAST(steam_id_64 AS TEXT) as steam_id_64, - AVG(rating) as rating, - AVG(kd_ratio) as kd, - AVG(adr) as adr, - AVG(kast) as kast - FROM fact_match_players - WHERE CAST(steam_id_64 AS TEXT) IN ({placeholders}) - GROUP BY steam_id_64 - """ - rows = query_db('l2', sql, active_roster_ids) + sql = f"SELECT * FROM dm_player_features WHERE steam_id_64 IN ({placeholders})" + rows = query_db('l3', sql, active_roster_ids) if not rows: return None stats_map = {row['steam_id_64']: dict(row) for row in rows} - - # Ensure target_steam_id is string target_steam_id = str(target_steam_id) - # If target player not in stats_map (e.g. no matches), handle gracefullly + # If target not in map (e.g. no L3 data), try to add empty default if target_steam_id not in stats_map: - # Try fetch target stats individually if not in roster list - target_stats = StatsService.get_player_basic_stats(target_steam_id) - if target_stats: - stats_map[target_steam_id] = target_stats - else: - # If still no stats, we can't rank them. - # But we can still return the roster stats for others? - # The prompt implies "No team data" appears, meaning this function returns valid structure but empty values? - # Or returns None. - # Let's verify what happens if target has no stats but others do. - # We should probably add a dummy entry for target so dashboard renders '0' instead of crashing or 'No data' - stats_map[target_steam_id] = {'rating': 0, 'kd': 0, 'adr': 0, 'kast': 0} - - # 3. Calculate Distribution - metrics = ['rating', 'kd', 'adr', 'kast'] + stats_map[target_steam_id] = {} + + # 3. Calculate Distribution for ALL metrics + # Define metrics list (must match Detailed Panel keys) + metrics = [ + 'basic_avg_rating', 'basic_avg_kd', 'basic_avg_kast', 'basic_avg_rws', 'basic_avg_adr', + 'basic_avg_headshot_kills', 'basic_headshot_rate', 'basic_avg_assisted_kill', 'basic_avg_awp_kill', 'basic_avg_jump_count', + 'basic_avg_first_kill', 'basic_avg_first_death', 'basic_first_kill_rate', 'basic_first_death_rate', + 'basic_avg_kill_2', 'basic_avg_kill_3', 'basic_avg_kill_4', 'basic_avg_kill_5', + 'basic_avg_perfect_kill', 'basic_avg_revenge_kill', + # L3 Advanced Dimensions + 'sta_last_30_rating', 'sta_win_rating', 'sta_loss_rating', 'sta_rating_volatility', 'sta_time_rating_corr', + 'bat_kd_diff_high_elo', 'bat_avg_duel_win_rate', 'bat_avg_duel_freq', + 'hps_clutch_win_rate_1v1', 'hps_clutch_win_rate_1v3_plus', 'hps_match_point_win_rate', 'hps_pressure_entry_rate', 'hps_comeback_kd_diff', + 'ptl_pistol_kills', 'ptl_pistol_win_rate', 'ptl_pistol_kd', + 'side_rating_ct', 'side_rating_t', 'side_first_kill_rate_ct', 'side_first_kill_rate_t', 'side_kd_diff_ct_t', + 'util_avg_nade_dmg', 'util_avg_flash_time', 'util_avg_flash_enemy', 'util_usage_rate' + ] + + # Mapping for L2 legacy calls (if any) - mainly map 'rating' to 'basic_avg_rating' etc if needed + # But here we just use L3 columns directly. + result = {} for m in metrics: - # Extract values for this metric from all players - values = [p[m] for p in stats_map.values() if p[m] is not None] - target_val = stats_map[target_steam_id].get(m) + values = [p.get(m, 0) or 0 for p in stats_map.values()] + target_val = stats_map[target_steam_id].get(m, 0) or 0 - if target_val is None or not values: + if not values: result[m] = None continue - # Sort descending (higher is better) values.sort(reverse=True) - # Rank (1-based) + # Rank try: rank = values.index(target_val) + 1 except ValueError: - # Floating point precision issue? Find closest - closest = min(values, key=lambda x: abs(x - target_val)) - rank = values.index(closest) + 1 + rank = len(values) result[m] = { 'val': target_val, @@ -680,6 +674,16 @@ class StatsService: 'avg': sum(values) / len(values) } + # Legacy mapping for top cards (rating, kd, adr, kast) + legacy_map = { + 'basic_avg_rating': 'rating', + 'basic_avg_kd': 'kd', + 'basic_avg_adr': 'adr', + 'basic_avg_kast': 'kast' + } + if m in legacy_map: + result[legacy_map[m]] = result[m] + return result @staticmethod diff --git a/web/templates/players/profile.html b/web/templates/players/profile.html index a820665..8be5a52 100644 --- a/web/templates/players/profile.html +++ b/web/templates/players/profile.html @@ -141,6 +141,153 @@ + +