web/services/feature_service.py

from web.database import query_db, get_db, execute_db
import sqlite3
import pandas as pd
import numpy as np

class FeatureService:
    @staticmethod
    def get_player_features(steam_id):
        sql = "SELECT * FROM dm_player_features WHERE steam_id_64 = ?"
        return query_db('l3', sql, [steam_id], one=True)
        
    @staticmethod
    def get_players_list(page=1, per_page=20, sort_by='rating', search=None):
        offset = (page - 1) * per_page
        
        # Sort Mapping
        sort_map = {
            'rating': 'basic_avg_rating',
            'kd': 'basic_avg_kd',
            'kast': 'basic_avg_kast',
            'matches': 'matches_played' 
        }
        order_col = sort_map.get(sort_by, 'basic_avg_rating')
        
        from web.services.stats_service import StatsService
        
        # Helper to attach match counts
        def attach_match_counts(player_list):
            if not player_list:
                return
            ids = [p['steam_id_64'] for p in player_list]
            # Batch query for counts from L2
            placeholders = ','.join('?' for _ in ids)
            sql = f"""
                SELECT steam_id_64, COUNT(*) as cnt 
                FROM fact_match_players 
                WHERE steam_id_64 IN ({placeholders}) 
                GROUP BY steam_id_64
            """
            counts = query_db('l2', sql, ids)
            cnt_dict = {r['steam_id_64']: r['cnt'] for r in counts}
            for p in player_list:
                p['matches_played'] = cnt_dict.get(p['steam_id_64'], 0)

        if search:
            # Get all matching players
            l2_players, _ = StatsService.get_players(page=1, per_page=100, search=search)
            if not l2_players:
                return [], 0
            
            steam_ids = [p['steam_id_64'] for p in l2_players]
            placeholders = ','.join('?' for _ in steam_ids)
            sql = f"SELECT * FROM dm_player_features WHERE steam_id_64 IN ({placeholders})"
            features = query_db('l3', sql, steam_ids)
            f_dict = {f['steam_id_64']: f for f in features}
            
            # Get counts for sorting
            count_sql = f"SELECT steam_id_64, COUNT(*) as cnt FROM fact_match_players WHERE steam_id_64 IN ({placeholders}) GROUP BY steam_id_64"
            counts = query_db('l2', count_sql, steam_ids)
            cnt_dict = {r['steam_id_64']: r['cnt'] for r in counts}

            merged = []
            for p in l2_players:
                f = f_dict.get(p['steam_id_64'])
                m = dict(p)
                if f:
                    m.update(dict(f))
                else:
                    # Fallback Calc
                    stats = StatsService.get_player_basic_stats(p['steam_id_64'])
                    if stats:
                        m['basic_avg_rating'] = stats['rating']
                        m['basic_avg_kd'] = stats['kd']
                        m['basic_avg_kast'] = stats['kast']
                    else:
                        m['basic_avg_rating'] = 0
                        m['basic_avg_kd'] = 0
                        m['basic_avg_kast'] = 0
                
                m['matches_played'] = cnt_dict.get(p['steam_id_64'], 0)
                merged.append(m)
            
            merged.sort(key=lambda x: x.get(order_col, 0) or 0, reverse=True)
            
            total = len(merged)
            start = (page - 1) * per_page
            end = start + per_page
            return merged[start:end], total
            
        else:
            # Browse mode
            l3_count = query_db('l3', "SELECT COUNT(*) as cnt FROM dm_player_features", one=True)['cnt']
            
            if l3_count == 0 or sort_by == 'matches': 
                if sort_by == 'matches':
                    sql = """
                        SELECT steam_id_64, COUNT(*) as cnt 
                        FROM fact_match_players 
                        GROUP BY steam_id_64 
                        ORDER BY cnt DESC 
                        LIMIT ? OFFSET ?
                    """
                    top_ids = query_db('l2', sql, [per_page, offset])
                    if not top_ids:
                        return [], 0
                        
                    total = query_db('l2', "SELECT COUNT(DISTINCT steam_id_64) as cnt FROM fact_match_players", one=True)['cnt']
                    
                    ids = [r['steam_id_64'] for r in top_ids]
                    l2_players = StatsService.get_players_by_ids(ids)
                    
                    # Merge logic
                    merged = []
                    p_ph = ','.join('?' for _ in ids)
                    f_sql = f"SELECT * FROM dm_player_features WHERE steam_id_64 IN ({p_ph})"
                    features = query_db('l3', f_sql, ids)
                    f_dict = {f['steam_id_64']: f for f in features}
                    
                    p_dict = {p['steam_id_64']: p for p in l2_players}
                    
                    for r in top_ids:
                        sid = r['steam_id_64']
                        p = p_dict.get(sid)
                        if not p: continue
                        
                        m = dict(p)
                        f = f_dict.get(sid)
                        if f:
                            m.update(dict(f))
                        else:
                            stats = StatsService.get_player_basic_stats(sid)
                            if stats:
                                m['basic_avg_rating'] = stats['rating']
                                m['basic_avg_kd'] = stats['kd']
                                m['basic_avg_kast'] = stats['kast']
                            else:
                                m['basic_avg_rating'] = 0
                                m['basic_avg_kd'] = 0
                                m['basic_avg_kast'] = 0
                        
                        m['matches_played'] = r['cnt']
                        merged.append(m)
                        
                    return merged, total

                # L3 empty fallback
                l2_players, total = StatsService.get_players(page, per_page, sort_by=None) 
                merged = []
                attach_match_counts(l2_players)
                
                for p in l2_players:
                    m = dict(p)
                    stats = StatsService.get_player_basic_stats(p['steam_id_64'])
                    if stats:
                        m['basic_avg_rating'] = stats['rating']
                        m['basic_avg_kd'] = stats['kd']
                        m['basic_avg_kast'] = stats['kast']
                    else:
                        m['basic_avg_rating'] = 0
                        m['basic_avg_kd'] = 0
                        m['basic_avg_kast'] = 0
                    m['matches_played'] = p.get('matches_played', 0)
                    merged.append(m)
                
                if sort_by != 'rating': 
                    merged.sort(key=lambda x: x.get(order_col, 0) or 0, reverse=True)
                
                return merged, total
            
            # Normal L3 browse
            sql = f"SELECT * FROM dm_player_features ORDER BY {order_col} DESC LIMIT ? OFFSET ?"
            features = query_db('l3', sql, [per_page, offset])
            
            total = query_db('l3', "SELECT COUNT(*) as cnt FROM dm_player_features", one=True)['cnt']
            
            if not features:
                return [], total
                
            steam_ids = [f['steam_id_64'] for f in features]
            l2_players = StatsService.get_players_by_ids(steam_ids)
            p_dict = {p['steam_id_64']: p for p in l2_players}
            
            merged = []
            for f in features:
                m = dict(f)
                p = p_dict.get(f['steam_id_64'])
                if p:
                    m.update(dict(p))
                else:
                    m['username'] = f['steam_id_64']
                    m['avatar_url'] = None
                merged.append(m)
                
            return merged, total

    @staticmethod
    def rebuild_all_features(min_matches=5):
        """
        Refreshes the L3 Data Mart with full feature calculations.
        """
        from web.config import Config
        from web.services.web_service import WebService
        import json
        
        l3_db_path = Config.DB_L3_PATH
        l2_db_path = Config.DB_L2_PATH
        
        # Get Team Players
        lineups = WebService.get_lineups()
        team_player_ids = set()
        for lineup in lineups:
            if lineup['player_ids_json']:
                try:
                    ids = json.loads(lineup['player_ids_json'])
                    # Ensure IDs are strings
                    team_player_ids.update([str(i) for i in ids])
                except:
                    pass
        
        if not team_player_ids:
            print("No players found in any team lineup. Skipping L3 rebuild.")
            return 0
            
        conn_l2 = sqlite3.connect(l2_db_path)
        conn_l2.row_factory = sqlite3.Row
        
        try:
            print(f"Loading L2 data for {len(team_player_ids)} players...")
            df = FeatureService._load_and_calculate_dataframe(conn_l2, list(team_player_ids))
            
            if df is None or df.empty:
                print("No data to process.")
                return 0
                
            print("Calculating Scores...")
            df = FeatureService._calculate_ultimate_scores(df)
            
            print("Saving to L3...")
            conn_l3 = sqlite3.connect(l3_db_path)
            
            cursor = conn_l3.cursor()
            
            # Ensure columns exist in DataFrame match DB columns
            cursor.execute("PRAGMA table_info(dm_player_features)")
            valid_cols = [r[1] for r in cursor.fetchall()]
            
            # Filter DF columns
            df_cols = [c for c in df.columns if c in valid_cols]
            df_to_save = df[df_cols].copy()
            df_to_save['updated_at'] = pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')
            
            # Generate Insert SQL
            print(f"DEBUG: Saving {len(df_to_save.columns)} columns to L3. Sample side_kd_ct: {df_to_save.get('side_kd_ct', pd.Series([0])).iloc[0]}")
            placeholders = ','.join(['?'] * len(df_to_save.columns))
            cols_str = ','.join(df_to_save.columns)
            sql = f"INSERT OR REPLACE INTO dm_player_features ({cols_str}) VALUES ({placeholders})"
            
            data = df_to_save.values.tolist()
            cursor.executemany(sql, data)
            conn_l3.commit()
            conn_l3.close()
            
            return len(df)
            
        except Exception as e:
            print(f"Rebuild Error: {e}")
            import traceback
            traceback.print_exc()
            return 0
        finally:
            conn_l2.close()

    @staticmethod
    def _load_and_calculate_dataframe(conn, player_ids):
        if not player_ids:
            return None
            
        placeholders = ','.join(['?'] * len(player_ids))
        
        # 1. Basic Stats
        query_basic = f"""
        SELECT 
            steam_id_64,
            COUNT(*) as matches_played,
            SUM(round_total) as rounds_played,
            AVG(rating) as basic_avg_rating,
            AVG(kd_ratio) as basic_avg_kd,
            AVG(adr) as basic_avg_adr,
            AVG(kast) as basic_avg_kast,
            AVG(rws) as basic_avg_rws,
            SUM(headshot_count) as sum_hs,
            SUM(kills) as sum_kills,
            SUM(deaths) as sum_deaths,
            SUM(first_kill) as sum_fk,
            SUM(first_death) as sum_fd,
            SUM(clutch_1v1) as sum_1v1,
            SUM(clutch_1v2) as sum_1v2,
            SUM(clutch_1v3) + SUM(clutch_1v4) + SUM(clutch_1v5) as sum_1v3p,
            SUM(kill_2) as sum_2k,
            SUM(kill_3) as sum_3k,
            SUM(kill_4) as sum_4k,
            SUM(kill_5) as sum_5k,
            SUM(assisted_kill) as sum_assist,
            SUM(perfect_kill) as sum_perfect,
            SUM(revenge_kill) as sum_revenge,
            SUM(awp_kill) as sum_awp,
            SUM(jump_count) as sum_jump,
            SUM(mvp_count) as sum_mvps,
            SUM(planted_bomb) as sum_plants,
            SUM(defused_bomb) as sum_defuses,
            SUM(CASE 
                WHEN flash_assists > 0 THEN flash_assists 
                WHEN assists > assisted_kill THEN assists - assisted_kill 
                ELSE 0 
            END) as sum_flash_assists,
            SUM(throw_harm) as sum_util_dmg,
            SUM(flash_time) as sum_flash_time,
            SUM(flash_enemy) as sum_flash_enemy,
            SUM(flash_team) as sum_flash_team,
            SUM(util_flash_usage) as sum_util_flash,
            SUM(util_smoke_usage) as sum_util_smoke,
            SUM(util_molotov_usage) as sum_util_molotov,
            SUM(util_he_usage) as sum_util_he,
            SUM(util_decoy_usage) as sum_util_decoy
        FROM fact_match_players
        WHERE steam_id_64 IN ({placeholders})
        GROUP BY steam_id_64
        """
        df = pd.read_sql_query(query_basic, conn, params=player_ids)
        if df.empty: return None
        
        # Basic Derived
        df['basic_headshot_rate'] = df['sum_hs'] / df['sum_kills'].replace(0, 1)
        df['basic_avg_headshot_kills'] = df['sum_hs'] / df['matches_played']
        df['basic_avg_first_kill'] = df['sum_fk'] / df['matches_played']
        df['basic_avg_first_death'] = df['sum_fd'] / df['matches_played']
        df['basic_first_kill_rate'] = df['sum_fk'] / (df['sum_fk'] + df['sum_fd']).replace(0, 1)
        df['basic_first_death_rate'] = df['sum_fd'] / (df['sum_fk'] + df['sum_fd']).replace(0, 1)
        df['basic_avg_kill_2'] = df['sum_2k'] / df['matches_played']
        df['basic_avg_kill_3'] = df['sum_3k'] / df['matches_played']
        df['basic_avg_kill_4'] = df['sum_4k'] / df['matches_played']
        df['basic_avg_kill_5'] = df['sum_5k'] / df['matches_played']
        df['basic_avg_assisted_kill'] = df['sum_assist'] / df['matches_played']
        df['basic_avg_perfect_kill'] = df['sum_perfect'] / df['matches_played']
        df['basic_avg_revenge_kill'] = df['sum_revenge'] / df['matches_played']
        df['basic_avg_awp_kill'] = df['sum_awp'] / df['matches_played']
        df['basic_avg_jump_count'] = df['sum_jump'] / df['matches_played']
        df['basic_avg_mvps'] = df['sum_mvps'] / df['matches_played']
        df['basic_avg_plants'] = df['sum_plants'] / df['matches_played']
        df['basic_avg_defuses'] = df['sum_defuses'] / df['matches_played']
        df['basic_avg_flash_assists'] = df['sum_flash_assists'] / df['matches_played']
        
        # UTIL Basic
        df['util_avg_nade_dmg'] = df['sum_util_dmg'] / df['matches_played']
        df['util_avg_flash_time'] = df['sum_flash_time'] / df['matches_played']
        df['util_avg_flash_enemy'] = df['sum_flash_enemy'] / df['matches_played']
        
        valid_ids = tuple(df['steam_id_64'].tolist())
        placeholders = ','.join(['?'] * len(valid_ids))
        
        # 2. STA (Detailed)
        query_sta = f"""
        SELECT mp.steam_id_64, mp.rating, mp.is_win, m.start_time, m.duration
        FROM fact_match_players mp
        JOIN fact_matches m ON mp.match_id = m.match_id
        WHERE mp.steam_id_64 IN ({placeholders})
        ORDER BY mp.steam_id_64, m.start_time
        """
        df_matches = pd.read_sql_query(query_sta, conn, params=valid_ids)
        sta_list = []
        for pid, group in df_matches.groupby('steam_id_64'):
            group = group.sort_values('start_time')
            last_30 = group.tail(30)
            
            # Fatigue Calc
            # Simple heuristic: split matches by day, compare early (first 3) vs late (rest)
            group['date'] = pd.to_datetime(group['start_time'], unit='s').dt.date
            day_counts = group.groupby('date').size()
            busy_days = day_counts[day_counts >= 4].index # Days with 4+ matches
            
            fatigue_decays = []
            for day in busy_days:
                day_matches = group[group['date'] == day]
                if len(day_matches) >= 4:
                    early_rating = day_matches.head(3)['rating'].mean()
                    late_rating = day_matches.tail(len(day_matches) - 3)['rating'].mean()
                    fatigue_decays.append(early_rating - late_rating)
            
            avg_fatigue = np.mean(fatigue_decays) if fatigue_decays else 0
            
            sta_list.append({
                'steam_id_64': pid,
                'sta_last_30_rating': last_30['rating'].mean(),
                'sta_win_rating': group[group['is_win']==1]['rating'].mean(),
                'sta_loss_rating': group[group['is_win']==0]['rating'].mean(),
                'sta_rating_volatility': group.tail(10)['rating'].std() if len(group) > 1 else 0,
                'sta_time_rating_corr': group['duration'].corr(group['rating']) if len(group)>2 and group['rating'].std() > 0 else 0,
                'sta_fatigue_decay': avg_fatigue
            })
        df = df.merge(pd.DataFrame(sta_list), on='steam_id_64', how='left')
        
        # 3. BAT (High ELO)
        query_elo = f"""
        SELECT mp.steam_id_64, mp.kd_ratio, 
            (SELECT AVG(group_origin_elo) FROM fact_match_teams fmt WHERE fmt.match_id = mp.match_id AND group_origin_elo > 0) as elo
        FROM fact_match_players mp
        WHERE mp.steam_id_64 IN ({placeholders})
        """
        df_elo = pd.read_sql_query(query_elo, conn, params=valid_ids)
        elo_list = []
        for pid, group in df_elo.groupby('steam_id_64'):
            avg = group['elo'].mean() or 1000
            elo_list.append({
                'steam_id_64': pid,
                'bat_kd_diff_high_elo': group[group['elo'] > avg]['kd_ratio'].mean(),
                'bat_kd_diff_low_elo': group[group['elo'] <= avg]['kd_ratio'].mean()
            })
        df = df.merge(pd.DataFrame(elo_list), on='steam_id_64', how='left')
        
        # Duel Win Rate
        query_duel = f"""
        SELECT steam_id_64, SUM(entry_kills) as ek, SUM(entry_deaths) as ed
        FROM fact_match_players WHERE steam_id_64 IN ({placeholders}) GROUP BY steam_id_64
        """
        df_duel = pd.read_sql_query(query_duel, conn, params=valid_ids)
        df_duel['bat_avg_duel_win_rate'] = df_duel['ek'] / (df_duel['ek'] + df_duel['ed']).replace(0, 1)
        df = df.merge(df_duel[['steam_id_64', 'bat_avg_duel_win_rate']], on='steam_id_64', how='left')

        # 4. HPS
        # Clutch Rate
        df['hps_clutch_win_rate_1v1'] = df['sum_1v1'] / df['matches_played']
        df['hps_clutch_win_rate_1v3_plus'] = df['sum_1v3p'] / df['matches_played']
        
        # Prepare Detailed Event Data for HPS (Comeback), PTL (KD), and T/CT
        
        # A. Determine Side Info using fact_match_teams
        # 1. Get Match Teams
        query_teams = f"""
        SELECT match_id, group_fh_role, group_uids
        FROM fact_match_teams
        WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders}))
        """
        df_teams = pd.read_sql_query(query_teams, conn, params=valid_ids)
        
        # 2. Get Player UIDs
        query_uids = f"SELECT match_id, steam_id_64, uid FROM fact_match_players WHERE steam_id_64 IN ({placeholders})"
        df_uids = pd.read_sql_query(query_uids, conn, params=valid_ids)
        
        # 3. Get Match Meta (Start Time for MR12/MR15)
        query_meta = f"SELECT match_id, start_time FROM fact_matches WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders}))"
        df_meta = pd.read_sql_query(query_meta, conn, params=valid_ids)
        df_meta['halftime_round'] = np.where(df_meta['start_time'] > 1695772800, 12, 15) # CS2 Release Date approx
        
        # 4. Build FH Side DataFrame
        fh_rows = []
        if not df_teams.empty and not df_uids.empty:
            match_teams = {} # match_id -> [(role, [uids])]
            for _, row in df_teams.iterrows():
                mid = row['match_id']
                role = row['group_fh_role'] # 1=CT, 0=T
                try:
                    uids = str(row['group_uids']).split(',')
                    uids = [u.strip() for u in uids if u.strip()]
                except:
                    uids = []
                if mid not in match_teams: match_teams[mid] = []
                match_teams[mid].append((role, uids))
            
            for _, row in df_uids.iterrows():
                mid = row['match_id']
                sid = row['steam_id_64']
                uid = str(row['uid'])
                if mid in match_teams:
                    for role, uids in match_teams[mid]:
                        if uid in uids:
                            fh_rows.append({
                                'match_id': mid, 
                                'steam_id_64': sid, 
                                'fh_side': 'CT' if role == 1 else 'T'
                            })
                            break
                            
        df_fh_sides = pd.DataFrame(fh_rows)
        if not df_fh_sides.empty:
            df_fh_sides = df_fh_sides.merge(df_meta[['match_id', 'halftime_round']], on='match_id', how='left')

        # B. Get Kill Events
        query_events = f"""
        SELECT match_id, round_num, attacker_steam_id, victim_steam_id, event_type, is_headshot, event_time
        FROM fact_round_events
        WHERE event_type='kill' 
        AND (attacker_steam_id IN ({placeholders}) OR victim_steam_id IN ({placeholders}))
        """
        df_events = pd.read_sql_query(query_events, conn, params=valid_ids + valid_ids)
        
        # C. Get Round Scores
        query_rounds = f"""
        SELECT match_id, round_num, ct_score, t_score, winner_side 
        FROM fact_rounds 
        WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders}))
        """
        df_rounds = pd.read_sql_query(query_rounds, conn, params=valid_ids)
        
        # Fix missing winner_side by calculating from score changes
        if not df_rounds.empty:
            df_rounds = df_rounds.sort_values(['match_id', 'round_num']).reset_index(drop=True)
            df_rounds['prev_ct'] = df_rounds.groupby('match_id')['ct_score'].shift(1).fillna(0)
            df_rounds['prev_t'] = df_rounds.groupby('match_id')['t_score'].shift(1).fillna(0)
            
            # Determine winner based on score increment
            df_rounds['ct_win'] = (df_rounds['ct_score'] > df_rounds['prev_ct'])
            df_rounds['t_win'] = (df_rounds['t_score'] > df_rounds['prev_t'])
            
            df_rounds['calculated_winner'] = np.where(df_rounds['ct_win'], 'CT', 
                                             np.where(df_rounds['t_win'], 'T', None))
            
            # Force overwrite winner_side with calculated winner since DB data is unreliable (mostly NULL)
            df_rounds['winner_side'] = df_rounds['calculated_winner']
            
            # Ensure winner_side is string type to match side ('CT', 'T')
            df_rounds['winner_side'] = df_rounds['winner_side'].astype(str)
            
            # Fallback for Round 1 if still None (e.g. if prev is 0 and score is 1)
            # Logic above handles Round 1 correctly (prev is 0).

        # --- Process Logic ---
            # Logic above handles Round 1 correctly (prev is 0).

        # --- Process Logic ---
        has_events = not df_events.empty
        has_sides = not df_fh_sides.empty
        
        if has_events and has_sides:
            # 1. Attacker Side
            df_events = df_events.merge(df_fh_sides, left_on=['match_id', 'attacker_steam_id'], right_on=['match_id', 'steam_id_64'], how='left')
            df_events.rename(columns={'fh_side': 'att_fh_side'}, inplace=True)
            df_events.drop(columns=['steam_id_64'], inplace=True)
            
            # 2. Victim Side
            df_events = df_events.merge(df_fh_sides, left_on=['match_id', 'victim_steam_id'], right_on=['match_id', 'steam_id_64'], how='left', suffixes=('', '_vic'))
            df_events.rename(columns={'fh_side': 'vic_fh_side'}, inplace=True)
            df_events.drop(columns=['steam_id_64'], inplace=True)
            
            # 3. Determine Actual Side (CT/T)
            # Logic: If round <= halftime -> FH Side. Else -> Opposite.
            def calc_side(fh_side, round_num, halftime):
                if pd.isna(fh_side): return None
                if round_num <= halftime: return fh_side
                return 'T' if fh_side == 'CT' else 'CT'
            
            # Vectorized approach
            # Attacker
            mask_fh_att = df_events['round_num'] <= df_events['halftime_round']
            df_events['attacker_side'] = np.where(mask_fh_att, df_events['att_fh_side'], 
                                                np.where(df_events['att_fh_side'] == 'CT', 'T', 'CT'))
            # Victim
            mask_fh_vic = df_events['round_num'] <= df_events['halftime_round']
            df_events['victim_side'] = np.where(mask_fh_vic, df_events['vic_fh_side'], 
                                              np.where(df_events['vic_fh_side'] == 'CT', 'T', 'CT'))
            
            # Merge Scores
            df_events = df_events.merge(df_rounds, on=['match_id', 'round_num'], how='left')

            # --- BAT: Win Rate vs All ---
            # Removed as per request (Difficult to calculate / All Zeros)
            df['bat_win_rate_vs_all'] = 0

            # --- HPS: Match Point & Comeback ---
            # Match Point Win Rate
            mp_rounds = df_rounds[((df_rounds['ct_score'] == 12) | (df_rounds['t_score'] == 12) | 
                                   (df_rounds['ct_score'] == 15) | (df_rounds['t_score'] == 15))]
            
            if not mp_rounds.empty and has_sides:
                # Need player side for these rounds
                # Expand sides for all rounds
                q_all_rounds = f"SELECT match_id, round_num FROM fact_rounds WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders}))"
                df_all_rounds = pd.read_sql_query(q_all_rounds, conn, params=valid_ids)
                
                df_player_rounds = df_all_rounds.merge(df_fh_sides, on='match_id')
                mask_fh = df_player_rounds['round_num'] <= df_player_rounds['halftime_round']
                df_player_rounds['side'] = np.where(mask_fh, df_player_rounds['fh_side'], 
                                                  np.where(df_player_rounds['fh_side'] == 'CT', 'T', 'CT'))
                
                # Filter for MP rounds
                # Join mp_rounds with df_player_rounds
                mp_player = df_player_rounds.merge(mp_rounds[['match_id', 'round_num', 'winner_side']], on=['match_id', 'round_num'])
                mp_player['is_win'] = (mp_player['side'] == mp_player['winner_side']).astype(int)
                
                hps_mp = mp_player.groupby('steam_id_64')['is_win'].mean().reset_index()
                hps_mp.rename(columns={'is_win': 'hps_match_point_win_rate'}, inplace=True)
                df = df.merge(hps_mp, on='steam_id_64', how='left')
            else:
                df['hps_match_point_win_rate'] = 0.5
            
            # Comeback KD Diff
            # Attacker Context
            df_events['att_team_score'] = np.where(df_events['attacker_side'] == 'CT', df_events['ct_score'], df_events['t_score'])
            df_events['att_opp_score'] = np.where(df_events['attacker_side'] == 'CT', df_events['t_score'], df_events['ct_score'])
            df_events['is_comeback_att'] = (df_events['att_team_score'] + 4 <= df_events['att_opp_score'])
            
            # Victim Context
            df_events['vic_team_score'] = np.where(df_events['victim_side'] == 'CT', df_events['ct_score'], df_events['t_score'])
            df_events['vic_opp_score'] = np.where(df_events['victim_side'] == 'CT', df_events['t_score'], df_events['ct_score'])
            df_events['is_comeback_vic'] = (df_events['vic_team_score'] + 4 <= df_events['vic_opp_score'])

            att_k = df_events.groupby('attacker_steam_id').size()
            vic_d = df_events.groupby('victim_steam_id').size()
            
            cb_k = df_events[df_events['is_comeback_att']].groupby('attacker_steam_id').size()
            cb_d = df_events[df_events['is_comeback_vic']].groupby('victim_steam_id').size()
            
            kd_stats = pd.DataFrame({'k': att_k, 'd': vic_d, 'cb_k': cb_k, 'cb_d': cb_d}).fillna(0)
            kd_stats['kd'] = kd_stats['k'] / kd_stats['d'].replace(0, 1)
            kd_stats['cb_kd'] = kd_stats['cb_k'] / kd_stats['cb_d'].replace(0, 1)
            kd_stats['hps_comeback_kd_diff'] = kd_stats['cb_kd'] - kd_stats['kd']
            
            kd_stats.index.name = 'steam_id_64'
            df = df.merge(kd_stats[['hps_comeback_kd_diff']], on='steam_id_64', how='left')
            
            # HPS: Losing Streak KD Diff
            # Logic: KD in rounds where team has lost >= 3 consecutive rounds vs Global KD
            # 1. Identify Streak Rounds
            if not df_rounds.empty:
                # Ensure sorted
                df_rounds = df_rounds.sort_values(['match_id', 'round_num'])
                
                # Shift to check previous results
                # We need to handle match boundaries. Groupby match_id is safer.
                # CT Loss Streak
                g = df_rounds.groupby('match_id')
                df_rounds['ct_lost_1'] = g['t_win'].shift(1).fillna(False)
                df_rounds['ct_lost_2'] = g['t_win'].shift(2).fillna(False)
                df_rounds['ct_lost_3'] = g['t_win'].shift(3).fillna(False)
                df_rounds['ct_in_loss_streak'] = (df_rounds['ct_lost_1'] & df_rounds['ct_lost_2'] & df_rounds['ct_lost_3'])
                
                # T Loss Streak
                df_rounds['t_lost_1'] = g['ct_win'].shift(1).fillna(False)
                df_rounds['t_lost_2'] = g['ct_win'].shift(2).fillna(False)
                df_rounds['t_lost_3'] = g['ct_win'].shift(3).fillna(False)
                df_rounds['t_in_loss_streak'] = (df_rounds['t_lost_1'] & df_rounds['t_lost_2'] & df_rounds['t_lost_3'])
                
                # Merge into events
                # df_events already has 'match_id', 'round_num', 'attacker_side'
                # We need to merge streak info
                streak_cols = df_rounds[['match_id', 'round_num', 'ct_in_loss_streak', 't_in_loss_streak']]
                df_events = df_events.merge(streak_cols, on=['match_id', 'round_num'], how='left')
                
                # Determine if attacker is in streak
                df_events['att_is_loss_streak'] = np.where(
                    df_events['attacker_side'] == 'CT', df_events['ct_in_loss_streak'],
                    np.where(df_events['attacker_side'] == 'T', df_events['t_in_loss_streak'], False)
                )
                
                # Determine if victim is in streak (for deaths)
                df_events['vic_is_loss_streak'] = np.where(
                    df_events['victim_side'] == 'CT', df_events['ct_in_loss_streak'],
                    np.where(df_events['victim_side'] == 'T', df_events['t_in_loss_streak'], False)
                )
                
                # Calculate KD in Streak
                ls_k = df_events[df_events['att_is_loss_streak']].groupby('attacker_steam_id').size()
                ls_d = df_events[df_events['vic_is_loss_streak']].groupby('victim_steam_id').size()
                
                ls_stats = pd.DataFrame({'ls_k': ls_k, 'ls_d': ls_d}).fillna(0)
                ls_stats['ls_kd'] = ls_stats['ls_k'] / ls_stats['ls_d'].replace(0, 1)
                
                # Compare with Global KD (from df_sides or recomputed)
                # Recompute global KD from events to be consistent
                g_k = df_events.groupby('attacker_steam_id').size()
                g_d = df_events.groupby('victim_steam_id').size()
                g_stats = pd.DataFrame({'g_k': g_k, 'g_d': g_d}).fillna(0)
                g_stats['g_kd'] = g_stats['g_k'] / g_stats['g_d'].replace(0, 1)
                
                ls_stats = ls_stats.join(g_stats[['g_kd']], how='outer').fillna(0)
                ls_stats['hps_losing_streak_kd_diff'] = ls_stats['ls_kd'] - ls_stats['g_kd']
                
                ls_stats.index.name = 'steam_id_64'
                df = df.merge(ls_stats[['hps_losing_streak_kd_diff']], on='steam_id_64', how='left')
            else:
                df['hps_losing_streak_kd_diff'] = 0

            
            # HPS: Momentum Multi-kill Rate
            # Team won 3+ rounds -> 2+ kills
            # Need sequential win info.
            # Hard to vectorise fully without accurate round sequence reconstruction including missing rounds.
            # Placeholder: 0
            df['hps_momentum_multikill_rate'] = 0
            
            # HPS: Tilt Rating Drop
            df['hps_tilt_rating_drop'] = 0
            
            # HPS: Clutch Rating Rise
            df['hps_clutch_rating_rise'] = 0
            
            # HPS: Undermanned Survival
            df['hps_undermanned_survival_time'] = 0

            # --- PTL: Pistol Stats ---
            pistol_rounds = [1, 13]
            df_pistol = df_events[df_events['round_num'].isin(pistol_rounds)]
            
            if not df_pistol.empty:
                pk = df_pistol.groupby('attacker_steam_id').size()
                pd_death = df_pistol.groupby('victim_steam_id').size()
                p_stats = pd.DataFrame({'pk': pk, 'pd': pd_death}).fillna(0)
                p_stats['ptl_pistol_kd'] = p_stats['pk'] / p_stats['pd'].replace(0, 1)
                
                phs = df_pistol[df_pistol['is_headshot'] == 1].groupby('attacker_steam_id').size()
                p_stats['phs'] = phs
                p_stats['phs'] = p_stats['phs'].fillna(0)
                p_stats['ptl_pistol_util_efficiency'] = p_stats['phs'] / p_stats['pk'].replace(0, 1)
                
                p_stats.index.name = 'steam_id_64'
                df = df.merge(p_stats[['ptl_pistol_kd', 'ptl_pistol_util_efficiency']], on='steam_id_64', how='left')
            else:
                df['ptl_pistol_kd'] = 1.0
                df['ptl_pistol_util_efficiency'] = 0.0

            # --- T/CT Stats (Directly from L2 Side Tables) ---
            query_sides_l2 = f"""
            SELECT 
                steam_id_64,
                'CT' as side,
                COUNT(*) as matches,
                SUM(round_total) as rounds,
                AVG(rating2) as rating,
                SUM(kills) as kills,
                SUM(deaths) as deaths,
                SUM(assists) as assists,
                AVG(CAST(is_win as FLOAT)) as win_rate,
                SUM(first_kill) as fk,
                SUM(first_death) as fd,
                AVG(kast) as kast,
                AVG(rws) as rws,
                SUM(kill_2 + kill_3 + kill_4 + kill_5) as multi_kill_rounds,
                SUM(headshot_count) as hs
            FROM fact_match_players_ct
            WHERE steam_id_64 IN ({placeholders})
            GROUP BY steam_id_64
            
            UNION ALL
            
            SELECT 
                steam_id_64,
                'T' as side,
                COUNT(*) as matches,
                SUM(round_total) as rounds,
                AVG(rating2) as rating,
                SUM(kills) as kills,
                SUM(deaths) as deaths,
                SUM(assists) as assists,
                AVG(CAST(is_win as FLOAT)) as win_rate,
                SUM(first_kill) as fk,
                SUM(first_death) as fd,
                AVG(kast) as kast,
                AVG(rws) as rws,
                SUM(kill_2 + kill_3 + kill_4 + kill_5) as multi_kill_rounds,
                SUM(headshot_count) as hs
            FROM fact_match_players_t
            WHERE steam_id_64 IN ({placeholders})
            GROUP BY steam_id_64
            """
            
            df_sides = pd.read_sql_query(query_sides_l2, conn, params=valid_ids + valid_ids)
            
            if not df_sides.empty:
                # Calculate Derived Rates per row before pivoting
                df_sides['rounds'] = df_sides['rounds'].replace(0, 1) # Avoid div by zero
                
                # KD Calculation (Sum of Kills / Sum of Deaths)
                df_sides['kd'] = df_sides['kills'] / df_sides['deaths'].replace(0, 1)
                
                # KAST Proxy (if KAST is 0)
                # KAST ~= (Kills + Assists + Survived) / Rounds
                # Survived = Rounds - Deaths
                if df_sides['kast'].mean() == 0:
                     df_sides['survived'] = df_sides['rounds'] - df_sides['deaths']
                     df_sides['kast'] = (df_sides['kills'] + df_sides['assists'] + df_sides['survived']) / df_sides['rounds']

                
                df_sides['fk_rate'] = df_sides['fk'] / df_sides['rounds']
                df_sides['fd_rate'] = df_sides['fd'] / df_sides['rounds']
                df_sides['mk_rate'] = df_sides['multi_kill_rounds'] / df_sides['rounds']
                df_sides['hs_rate'] = df_sides['hs'] / df_sides['kills'].replace(0, 1)
                
                # Pivot
                # We want columns like side_rating_ct, side_rating_t, etc.
                pivoted = df_sides.pivot(index='steam_id_64', columns='side').reset_index()
                
                # Flatten MultiIndex columns
                new_cols = ['steam_id_64']
                for col_name, side in pivoted.columns[1:]:
                    # Map L2 column names to Feature names
                    # rating -> side_rating_{side}
                    # kd -> side_kd_{side}
                    # win_rate -> side_win_rate_{side}
                    # fk_rate -> side_first_kill_rate_{side}
                    # fd_rate -> side_first_death_rate_{side}
                    # kast -> side_kast_{side}
                    # rws -> side_rws_{side}
                    # mk_rate -> side_multikill_rate_{side}
                    # hs_rate -> side_headshot_rate_{side}
                    
                    target_map = {
                        'rating': 'side_rating',
                        'kd': 'side_kd',
                        'win_rate': 'side_win_rate',
                        'fk_rate': 'side_first_kill_rate',
                        'fd_rate': 'side_first_death_rate',
                        'kast': 'side_kast',
                        'rws': 'side_rws',
                        'mk_rate': 'side_multikill_rate',
                        'hs_rate': 'side_headshot_rate'
                    }
                    
                    if col_name in target_map:
                        new_cols.append(f"{target_map[col_name]}_{side.lower()}")
                    else:
                        new_cols.append(f"{col_name}_{side.lower()}") # Fallback for intermediate cols if needed
                
                pivoted.columns = new_cols
                
                # Select only relevant columns to merge
                cols_to_merge = [c for c in new_cols if c.startswith('side_')]
                cols_to_merge.append('steam_id_64')
                
                df = df.merge(pivoted[cols_to_merge], on='steam_id_64', how='left')
                
                # Fill NaN with 0 for side stats
                for c in cols_to_merge:
                    if c != 'steam_id_64':
                        df[c] = df[c].fillna(0)
                        
            # Add calculated diffs for scoring/display if needed (or just let template handle it)
            # KD Diff for L3 Score calculation
            if 'side_rating_ct' in df.columns and 'side_rating_t' in df.columns:
                 df['side_kd_diff_ct_t'] = df['side_rating_ct'] - df['side_rating_t']
            else:
                 df['side_kd_diff_ct_t'] = 0
                 
            # --- Obj Override from Main Table (sum_plants, sum_defuses) ---
            # side_obj_t = sum_plants / matches_played
            # side_obj_ct = sum_defuses / matches_played
            df['side_obj_t'] = df['sum_plants'] / df['matches_played'].replace(0, 1)
            df['side_obj_ct'] = df['sum_defuses'] / df['matches_played'].replace(0, 1)
            df['side_obj_t'] = df['side_obj_t'].fillna(0)
            df['side_obj_ct'] = df['side_obj_ct'].fillna(0)

        else:
            # Fallbacks
            cols = ['hps_match_point_win_rate', 'hps_comeback_kd_diff', 'ptl_pistol_kd', 'ptl_pistol_util_efficiency',
                    'side_rating_ct', 'side_rating_t', 'side_first_kill_rate_ct', 'side_first_kill_rate_t', 'side_kd_diff_ct_t',
                    'bat_win_rate_vs_all', 'hps_losing_streak_kd_diff', 'hps_momentum_multikill_rate', 
                    'hps_tilt_rating_drop', 'hps_clutch_rating_rise', 'hps_undermanned_survival_time',
                    'side_win_rate_ct', 'side_win_rate_t', 'side_kd_ct', 'side_kd_t',
                    'side_kast_ct', 'side_kast_t', 'side_rws_ct', 'side_rws_t',
                    'side_first_death_rate_ct', 'side_first_death_rate_t',
                    'side_multikill_rate_ct', 'side_multikill_rate_t',
                    'side_headshot_rate_ct', 'side_headshot_rate_t',
                    'side_obj_ct', 'side_obj_t']
            for c in cols:
                df[c] = 0
                
        df['hps_match_point_win_rate'] = df['hps_match_point_win_rate'].fillna(0.5)
        df['bat_win_rate_vs_all'] = df['bat_win_rate_vs_all'].fillna(0.5)
        df['hps_losing_streak_kd_diff'] = df['hps_losing_streak_kd_diff'].fillna(0)
        
        # HPS Pressure Entry Rate (Entry Kills per Round in Losing Matches)
        q_mp_team = f"SELECT match_id, steam_id_64, is_win, entry_kills, round_total FROM fact_match_players WHERE steam_id_64 IN ({placeholders})"
        df_mp_team = pd.read_sql_query(q_mp_team, conn, params=valid_ids)
        if not df_mp_team.empty:
             losing_matches = df_mp_team[df_mp_team['is_win'] == 0]
             if not losing_matches.empty:
                # Sum Entry Kills / Sum Rounds
                pressure_entry = losing_matches.groupby('steam_id_64')[['entry_kills', 'round_total']].sum().reset_index()
                pressure_entry['hps_pressure_entry_rate'] = pressure_entry['entry_kills'] / pressure_entry['round_total'].replace(0, 1)
                df = df.merge(pressure_entry[['steam_id_64', 'hps_pressure_entry_rate']], on='steam_id_64', how='left')
        
        if 'hps_pressure_entry_rate' not in df.columns:
            df['hps_pressure_entry_rate'] = 0
        df['hps_pressure_entry_rate'] = df['hps_pressure_entry_rate'].fillna(0)

        # 5. PTL (Additional Features: Kills & Multi)
        query_ptl = f"""
        SELECT ev.attacker_steam_id as steam_id_64, COUNT(*) as pistol_kills
        FROM fact_round_events ev
        WHERE ev.event_type = 'kill' AND ev.round_num IN (1, 13)
        AND ev.attacker_steam_id IN ({placeholders})
        GROUP BY ev.attacker_steam_id
        """
        df_ptl = pd.read_sql_query(query_ptl, conn, params=valid_ids)
        if not df_ptl.empty:
            df = df.merge(df_ptl, on='steam_id_64', how='left')
            df['ptl_pistol_kills'] = df['pistol_kills'] / df['matches_played']
        else:
            df['ptl_pistol_kills'] = 0
            
        query_ptl_multi = f"""
        SELECT attacker_steam_id as steam_id_64, COUNT(*) as multi_cnt
        FROM (
            SELECT match_id, round_num, attacker_steam_id, COUNT(*) as k
            FROM fact_round_events
            WHERE event_type = 'kill' AND round_num IN (1, 13)
            AND attacker_steam_id IN ({placeholders})
            GROUP BY match_id, round_num, attacker_steam_id
            HAVING k >= 2
        )
        GROUP BY attacker_steam_id
        """
        df_ptl_multi = pd.read_sql_query(query_ptl_multi, conn, params=valid_ids)
        if not df_ptl_multi.empty:
            df = df.merge(df_ptl_multi, on='steam_id_64', how='left')
            df['ptl_pistol_multikills'] = df['multi_cnt'] / df['matches_played']
        else:
            df['ptl_pistol_multikills'] = 0
            
        # PTL Win Rate (Pandas Logic using fixed winner_side)
        if not df_rounds.empty and has_sides:
             # Ensure df_player_rounds exists
             if 'df_player_rounds' not in locals():
                 q_all_rounds = f"SELECT match_id, round_num FROM fact_rounds WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders}))"
                 df_all_rounds = pd.read_sql_query(q_all_rounds, conn, params=valid_ids)
                 df_player_rounds = df_all_rounds.merge(df_fh_sides, on='match_id')
                 mask_fh = df_player_rounds['round_num'] <= df_player_rounds['halftime_round']
                 df_player_rounds['side'] = np.where(mask_fh, df_player_rounds['fh_side'], 
                                                   np.where(df_player_rounds['fh_side'] == 'CT', 'T', 'CT'))

             # Filter for Pistol Rounds (1 and after halftime)
             # Use halftime_round logic (MR12: 13, MR15: 16)
             player_pistol = df_player_rounds[
                 (df_player_rounds['round_num'] == 1) | 
                 (df_player_rounds['round_num'] == df_player_rounds['halftime_round'] + 1)
             ].copy()
             
             # Merge with df_rounds to get calculated winner_side
             df_rounds['winner_side'] = df_rounds['winner_side'].astype(str) # Ensure string for merge safety
             player_pistol = player_pistol.merge(df_rounds[['match_id', 'round_num', 'winner_side']], on=['match_id', 'round_num'], how='left')
             
             # Calculate Win
             # Ensure winner_side is in player_pistol columns after merge
             if 'winner_side' in player_pistol.columns:
                 player_pistol['is_win'] = (player_pistol['side'] == player_pistol['winner_side']).astype(int)
             else:
                 player_pistol['is_win'] = 0
             
             ptl_wins = player_pistol.groupby('steam_id_64')['is_win'].agg(['sum', 'count']).reset_index()
             ptl_wins.rename(columns={'sum': 'pistol_wins', 'count': 'pistol_rounds'}, inplace=True)
             
             ptl_wins['ptl_pistol_win_rate'] = ptl_wins['pistol_wins'] / ptl_wins['pistol_rounds'].replace(0, 1)
             df = df.merge(ptl_wins[['steam_id_64', 'ptl_pistol_win_rate']], on='steam_id_64', how='left')
        else:
            df['ptl_pistol_win_rate'] = 0.5

        df['ptl_pistol_multikills'] = df['ptl_pistol_multikills'].fillna(0)
        df['ptl_pistol_win_rate'] = df['ptl_pistol_win_rate'].fillna(0.5)

        # 7. UTIL (Enhanced with Prop Frequency)
        # Usage Rate: Average number of grenades purchased per round
        df['util_usage_rate'] = (
            df['sum_util_flash'] + df['sum_util_smoke'] + 
            df['sum_util_molotov'] + df['sum_util_he'] + df['sum_util_decoy']
        ) / df['rounds_played'].replace(0, 1) * 100 # Multiply by 100 to make it comparable to other metrics (e.g. 1.5 nades/round -> 150)
        
        # Fallback if no new data yet (rely on old logic or keep 0)
        # We can try to fetch equipment_value as backup if sum is 0
        if df['util_usage_rate'].sum() == 0:
            query_eco = f"""
            SELECT steam_id_64, AVG(equipment_value) as avg_equip_val
            FROM fact_round_player_economy
            WHERE steam_id_64 IN ({placeholders})
            GROUP BY steam_id_64
            """
            df_eco = pd.read_sql_query(query_eco, conn, params=valid_ids)
            if not df_eco.empty:
                df_eco['util_usage_rate_backup'] = df_eco['avg_equip_val'] / 50.0 # Scaling factor for equipment value
                df = df.merge(df_eco[['steam_id_64', 'util_usage_rate_backup']], on='steam_id_64', how='left')
                df['util_usage_rate'] = df['util_usage_rate_backup'].fillna(0)
                df.drop(columns=['util_usage_rate_backup'], inplace=True)

        # --- 8. New Feature Dimensions (Party, Rating Dist, ELO) ---
        # Fetch Base Data for Calculation
        q_new_feats = f"""
        SELECT mp.steam_id_64, mp.match_id, mp.match_team_id, mp.team_id, 
               mp.rating, mp.adr, mp.is_win
        FROM fact_match_players mp
        WHERE mp.steam_id_64 IN ({placeholders})
        """
        df_base = pd.read_sql_query(q_new_feats, conn, params=valid_ids)
        
        if not df_base.empty:
            # 8.1 Party Size Stats
            # Get party sizes for these matches
            # We need to query party sizes for ALL matches involved
            match_ids = df_base['match_id'].unique()
            if len(match_ids) > 0:
                match_id_ph = ','.join(['?'] * len(match_ids))
                q_party_size = f"""
                SELECT match_id, match_team_id, COUNT(*) as party_size
                FROM fact_match_players
                WHERE match_id IN ({match_id_ph}) AND match_team_id > 0
                GROUP BY match_id, match_team_id
                """
                # Split match_ids into chunks if too many
                chunk_size = 900
                party_sizes_list = []
                for i in range(0, len(match_ids), chunk_size):
                    chunk = match_ids[i:i+chunk_size]
                    chunk_ph = ','.join(['?'] * len(chunk))
                    q_chunk = q_party_size.replace(match_id_ph, chunk_ph)
                    party_sizes_list.append(pd.read_sql_query(q_chunk, conn, params=list(chunk)))
                
                if party_sizes_list:
                    df_party_sizes = pd.concat(party_sizes_list)
                    
                    # Merge party size to base data
                    df_base_party = df_base.merge(df_party_sizes, on=['match_id', 'match_team_id'], how='left')
                    
                    # Calculate Stats per Party Size (1-5)
                    # We want columns like party_1_win_rate, party_1_rating, party_1_adr
                    party_stats = df_base_party.groupby(['steam_id_64', 'party_size']).agg({
                        'is_win': 'mean',
                        'rating': 'mean',
                        'adr': 'mean'
                    }).reset_index()
                    
                    # Pivot
                    pivoted_party = party_stats.pivot(index='steam_id_64', columns='party_size').reset_index()
                    
                    # Flatten and rename
                    new_party_cols = ['steam_id_64']
                    for col in pivoted_party.columns:
                        if col[0] == 'steam_id_64': continue
                        metric, size = col
                        if size in [1, 2, 3, 4, 5]:
                            # metric is is_win, rating, adr
                            metric_name = 'win_rate' if metric == 'is_win' else metric
                            new_party_cols.append(f"party_{int(size)}_{metric_name}")
                    
                    # Handle MultiIndex column flattening properly
                    # The pivot creates MultiIndex. We need to construct a flat DataFrame.
                    flat_data = {'steam_id_64': pivoted_party['steam_id_64']}
                    for size in [1, 2, 3, 4, 5]:
                        if size in pivoted_party['is_win'].columns:
                            flat_data[f"party_{size}_win_rate"] = pivoted_party['is_win'][size]
                        if size in pivoted_party['rating'].columns:
                            flat_data[f"party_{size}_rating"] = pivoted_party['rating'][size]
                        if size in pivoted_party['adr'].columns:
                            flat_data[f"party_{size}_adr"] = pivoted_party['adr'][size]
                    
                    df_party_flat = pd.DataFrame(flat_data)
                    df = df.merge(df_party_flat, on='steam_id_64', how='left')

            # 8.2 Rating Distribution
            # rating_dist_carry_rate (>1.5), normal (1.0-1.5), sacrifice (0.6-1.0), sleeping (<0.6)
            df_base['rating_tier'] = pd.cut(df_base['rating'], 
                                          bins=[-1, 0.6, 1.0, 1.5, 100], 
                                          labels=['sleeping', 'sacrifice', 'normal', 'carry'],
                                          right=False) # <0.6, 0.6-<1.0, 1.0-<1.5, >=1.5 (wait, cut behavior)
            # Standard cut: right=True by default (a, b]. We want:
            # < 0.6
            # 0.6 <= x < 1.0
            # 1.0 <= x < 1.5
            # >= 1.5
            # So bins=[-inf, 0.6, 1.0, 1.5, inf], right=False -> [a, b)
            df_base['rating_tier'] = pd.cut(df_base['rating'], 
                                          bins=[-float('inf'), 0.6, 1.0, 1.5, float('inf')], 
                                          labels=['sleeping', 'sacrifice', 'normal', 'carry'],
                                          right=False)

            # Wait, 1.5 should be Normal or Carry?
            # User: >1.5 Carry, 1.0~1.5 Normal. So 1.5 is Normal? Or Carry?
            # Usually inclusive on lower bound.
            # 1.5 -> Carry (>1.5 usually means >= 1.5 or strictly >).
            # "1.0~1.5 正常" implies [1.0, 1.5]. ">1.5 Carry" implies (1.5, inf).
            # Let's assume >= 1.5 is Carry.
            # So bins: (-inf, 0.6), [0.6, 1.0), [1.0, 1.5), [1.5, inf)
            # right=False gives [a, b).
            # So [1.5, inf) is correct for Carry.
            
            dist_stats = df_base.groupby(['steam_id_64', 'rating_tier']).size().unstack(fill_value=0)
            # Calculate rates
            dist_stats = dist_stats.div(dist_stats.sum(axis=1), axis=0)
            dist_stats.columns = [f"rating_dist_{c}_rate" for c in dist_stats.columns]
            dist_stats = dist_stats.reset_index()
            
            df = df.merge(dist_stats, on='steam_id_64', how='left')

            # 8.3 ELO Stratification
            # Fetch Match Teams ELO
            if len(match_ids) > 0:
                q_elo = f"""
                SELECT match_id, group_id, group_origin_elo
                FROM fact_match_teams
                WHERE match_id IN ({match_id_ph})
                """
                # Use chunking again
                elo_list = []
                for i in range(0, len(match_ids), chunk_size):
                    chunk = match_ids[i:i+chunk_size]
                    chunk_ph = ','.join(['?'] * len(chunk))
                    q_chunk = q_elo.replace(match_id_ph, chunk_ph)
                    elo_list.append(pd.read_sql_query(q_chunk, conn, params=list(chunk)))
                
                if elo_list:
                    df_elo_teams = pd.concat(elo_list)
                    
                    # Merge to get Opponent ELO
                    # Player has match_id, team_id.
                    # Join on match_id.
                    # Filter where group_id != team_id
                    df_merged_elo = df_base.merge(df_elo_teams, on='match_id', how='left')
                    df_merged_elo = df_merged_elo[df_merged_elo['group_id'] != df_merged_elo['team_id']]
                    
                    # Now df_merged_elo has 'group_origin_elo' which is Opponent ELO
                    # Binning: <1200, 1200-1400, 1400-1600, 1600-1800, 1800-2000, >2000
                    # bins: [-inf, 1200, 1400, 1600, 1800, 2000, inf]
                    elo_bins = [-float('inf'), 1200, 1400, 1600, 1800, 2000, float('inf')]
                    elo_labels = ['lt1200', '1200_1400', '1400_1600', '1600_1800', '1800_2000', 'gt2000']
                    
                    df_merged_elo['elo_bin'] = pd.cut(df_merged_elo['group_origin_elo'], bins=elo_bins, labels=elo_labels, right=False)
                    
                    elo_stats = df_merged_elo.groupby(['steam_id_64', 'elo_bin']).agg({
                        'rating': 'mean'
                    }).unstack(fill_value=0) # We only need rating for now
                    
                    # Rename columns
                    # elo_stats columns are MultiIndex (rating, bin).
                    # We want: elo_{bin}_rating
                    flat_elo_data = {'steam_id_64': elo_stats.index}
                    for bin_label in elo_labels:
                        if bin_label in elo_stats['rating'].columns:
                            flat_elo_data[f"elo_{bin_label}_rating"] = elo_stats['rating'][bin_label].values
                    
                    df_elo_flat = pd.DataFrame(flat_elo_data)
                    df = df.merge(df_elo_flat, on='steam_id_64', how='left')

        # Final Mappings
        df['total_matches'] = df['matches_played']

        return df.fillna(0)


    @staticmethod
    def _calculate_ultimate_scores(df):
        def n(col):
            if col not in df.columns: return 50
            s = df[col]
            if s.max() == s.min(): return 50
            return (s - s.min()) / (s.max() - s.min()) * 100

        df = df.copy()
        
        # BAT (30%)
        df['score_bat'] = (
            0.25 * n('basic_avg_rating') +
            0.20 * n('basic_avg_kd') +
            0.15 * n('basic_avg_adr') +
            0.10 * n('bat_avg_duel_win_rate') + 
            0.10 * n('bat_kd_diff_high_elo') +
            0.10 * n('basic_avg_kill_3')
        )
        
        # STA (15%)
        df['score_sta'] = (
            0.30 * (100 - n('sta_rating_volatility')) +
            0.30 * n('sta_loss_rating') +
            0.20 * n('sta_win_rating') +
            0.10 * (100 - abs(n('sta_time_rating_corr')))
        )
        
        # HPS (20%)
        df['score_hps'] = (
            0.25 * n('sum_1v3p') +
            0.20 * n('hps_match_point_win_rate') +
            0.20 * n('hps_comeback_kd_diff') +
            0.15 * n('hps_pressure_entry_rate') + 
            0.20 * n('basic_avg_rating') 
        )
        
        # PTL (10%)
        df['score_ptl'] = (
            0.30 * n('ptl_pistol_kills') +
            0.30 * n('ptl_pistol_win_rate') +
            0.20 * n('ptl_pistol_kd') +
            0.20 * n('ptl_pistol_util_efficiency')
        )
        
        # T/CT (10%)
        df['score_tct'] = (
            0.35 * n('side_rating_ct') +
            0.35 * n('side_rating_t') +
            0.15 * n('side_first_kill_rate_ct') +
            0.15 * n('side_first_kill_rate_t')
        )

        # UTIL (10%)
        # Emphasize prop frequency (usage_rate)
        df['score_util'] = (
            0.35 * n('util_usage_rate') +
            0.25 * n('util_avg_nade_dmg') +
            0.20 * n('util_avg_flash_time') +
            0.20 * n('util_avg_flash_enemy')
        )
        
        return df

    @staticmethod
    def get_roster_features_distribution(target_steam_id):
        """
        Calculates rank and distribution of the target player's L3 features (Scores) within the active roster.
        """
        from web.services.web_service import WebService
        import json
        
        # 1. Get Active Roster IDs
        lineups = WebService.get_lineups()
        active_roster_ids = []
        if lineups:
            try:
                raw_ids = json.loads(lineups[0]['player_ids_json'])
                active_roster_ids = [str(uid) for uid in raw_ids]
            except:
                pass
        
        if not active_roster_ids:
            return None
            
        # 2. Fetch L3 features for all roster members
        placeholders = ','.join('?' for _ in active_roster_ids)
        sql = f"""
            SELECT 
                steam_id_64,
                score_bat, score_sta, score_hps, score_ptl, score_tct, score_util
            FROM dm_player_features
            WHERE steam_id_64 IN ({placeholders})
        """
        rows = query_db('l3', sql, active_roster_ids)
        
        if not rows:
            return None
            
        stats_map = {row['steam_id_64']: dict(row) for row in rows}
        target_steam_id = str(target_steam_id)
        
        # If target not in map (maybe no L3 data yet), default to 0
        if target_steam_id not in stats_map:
             stats_map[target_steam_id] = {
                 'score_bat': 0, 'score_sta': 0, 'score_hps': 0, 
                 'score_ptl': 0, 'score_tct': 0, 'score_util': 0
             }

        # 3. Calculate Distribution
        metrics = ['score_bat', 'score_sta', 'score_hps', 'score_ptl', 'score_tct', 'score_util']
        result = {}
        
        for m in metrics:
            values = [p.get(m, 0) or 0 for p in stats_map.values()]
            target_val = stats_map[target_steam_id].get(m, 0) or 0
            
            if not values:
                result[m] = None
                continue
                
            values.sort(reverse=True)
            
            try:
                rank = values.index(target_val) + 1
            except ValueError:
                rank = len(values) 
                
            result[m] = {
                'val': target_val,
                'rank': rank,
                'total': len(values),
                'min': min(values),
                'max': max(values),
                'avg': sum(values) / len(values)
            }
            
        return result