import sqlite3
import pandas as pd
import numpy as np
import os

DB_L2_PATH = r'd:\Documents\trae_projects\yrtv\database\L2\L2_Main.sqlite'

def get_db_connection():
    conn = sqlite3.connect(DB_L2_PATH)
    conn.row_factory = sqlite3.Row
    return conn

def safe_div(a, b):
    if b == 0: return 0
    return a / b

def load_and_calculate_ultimate(conn, min_matches=5):
    print("Loading Ultimate Data Set...")
    
    # 1. Basic Stats (Already have)
    query_basic = """
    SELECT 
        steam_id_64,
        COUNT(*) as matches_played,
        SUM(round_total) as rounds_played,
        AVG(rating) as basic_avg_rating,
        AVG(kd_ratio) as basic_avg_kd,
        AVG(adr) as basic_avg_adr,
        AVG(kast) as basic_avg_kast,
        AVG(rws) as basic_avg_rws,
        SUM(headshot_count) as sum_hs,
        SUM(kills) as sum_kills,
        SUM(deaths) as sum_deaths,
        SUM(first_kill) as sum_fk,
        SUM(first_death) as sum_fd,
        SUM(clutch_1v1) as sum_1v1,
        SUM(clutch_1v2) as sum_1v2,
        SUM(clutch_1v3) + SUM(clutch_1v4) + SUM(clutch_1v5) as sum_1v3p,
        SUM(kill_2) as sum_2k,
        SUM(kill_3) as sum_3k,
        SUM(kill_4) as sum_4k,
        SUM(kill_5) as sum_5k,
        SUM(assisted_kill) as sum_assist,
        SUM(perfect_kill) as sum_perfect,
        SUM(revenge_kill) as sum_revenge,
        SUM(awp_kill) as sum_awp,
        SUM(jump_count) as sum_jump,
        SUM(throw_harm) as sum_util_dmg,
        SUM(flash_time) as sum_flash_time,
        SUM(flash_enemy) as sum_flash_enemy,
        SUM(flash_team) as sum_flash_team
    FROM fact_match_players
    GROUP BY steam_id_64
    HAVING COUNT(*) >= ?
    """
    df = pd.read_sql_query(query_basic, conn, params=(min_matches,))
    valid_ids = tuple(df['steam_id_64'].tolist())
    if not valid_ids: return None
    placeholders = ','.join(['?'] * len(valid_ids))

    # --- Basic Derived ---
    df['basic_headshot_rate'] = df['sum_hs'] / df['sum_kills'].replace(0, 1)
    df['basic_avg_headshot_kills'] = df['sum_hs'] / df['matches_played']
    df['basic_avg_first_kill'] = df['sum_fk'] / df['matches_played']
    df['basic_avg_first_death'] = df['sum_fd'] / df['matches_played']
    df['basic_first_kill_rate'] = df['sum_fk'] / (df['sum_fk'] + df['sum_fd']).replace(0, 1)
    df['basic_first_death_rate'] = df['sum_fd'] / (df['sum_fk'] + df['sum_fd']).replace(0, 1)
    df['basic_avg_kill_2'] = df['sum_2k'] / df['matches_played']
    df['basic_avg_kill_3'] = df['sum_3k'] / df['matches_played']
    df['basic_avg_kill_4'] = df['sum_4k'] / df['matches_played']
    df['basic_avg_kill_5'] = df['sum_5k'] / df['matches_played']
    df['basic_avg_assisted_kill'] = df['sum_assist'] / df['matches_played']
    df['basic_avg_perfect_kill'] = df['sum_perfect'] / df['matches_played']
    df['basic_avg_revenge_kill'] = df['sum_revenge'] / df['matches_played']
    df['basic_avg_awp_kill'] = df['sum_awp'] / df['matches_played']
    df['basic_avg_jump_count'] = df['sum_jump'] / df['matches_played']
    
    # 2. STA - Detailed Time Series
    print("Calculating STA (Detailed)...")
    query_sta = f"""
    SELECT mp.steam_id_64, mp.rating, mp.is_win, m.start_time, m.duration
    FROM fact_match_players mp
    JOIN fact_matches m ON mp.match_id = m.match_id
    WHERE mp.steam_id_64 IN ({placeholders})
    ORDER BY mp.steam_id_64, m.start_time
    """
    df_matches = pd.read_sql_query(query_sta, conn, params=valid_ids)
    
    sta_list = []
    for pid, group in df_matches.groupby('steam_id_64'):
        group = group.sort_values('start_time')
        # Last 30
        last_30 = group.tail(30)
        sta_last_30 = last_30['rating'].mean()
        # Win/Loss
        sta_win = group[group['is_win']==1]['rating'].mean()
        sta_loss = group[group['is_win']==0]['rating'].mean()
        # Volatility
        sta_vol = group.tail(10)['rating'].std()
        # Time Correlation (Duration vs Rating)
        sta_time_corr = group['duration'].corr(group['rating']) if len(group) > 2 else 0
        # Fatigue
        group['date'] = pd.to_datetime(group['start_time'], unit='s').dt.date
        daily = group.groupby('date')['rating'].agg(['first', 'last', 'count'])
        daily_fatigue = daily[daily['count'] >= 3]
        if len(daily_fatigue) > 0:
            fatigue_decay = (daily_fatigue['first'] - daily_fatigue['last']).mean()
        else:
            fatigue_decay = 0
            
        sta_list.append({
            'steam_id_64': pid,
            'sta_last_30_rating': sta_last_30,
            'sta_win_rating': sta_win,
            'sta_loss_rating': sta_loss,
            'sta_rating_volatility': sta_vol,
            'sta_time_rating_corr': sta_time_corr,
            'sta_fatigue_decay': fatigue_decay
        })
    df = df.merge(pd.DataFrame(sta_list), on='steam_id_64', how='left')

    # 3. BAT - Distance & Advanced
    print("Calculating BAT (Distance & Context)...")
    # Distance Logic: Get all kills with positions
    # We need to map positions.
    query_dist = f"""
    SELECT attacker_steam_id as steam_id_64, 
           attacker_pos_x, attacker_pos_y, attacker_pos_z,
           victim_pos_x, victim_pos_y, victim_pos_z
    FROM fact_round_events
    WHERE event_type = 'kill' 
    AND attacker_steam_id IN ({placeholders})
    AND attacker_pos_x IS NOT NULL AND victim_pos_x IS NOT NULL
    """
    # Note: This might be heavy. If memory issue, sample or chunk.
    try:
        df_dist = pd.read_sql_query(query_dist, conn, params=valid_ids)
        if not df_dist.empty:
            # Calc Euclidian Distance
            df_dist['dist'] = np.sqrt(
                (df_dist['attacker_pos_x'] - df_dist['victim_pos_x'])**2 +
                (df_dist['attacker_pos_y'] - df_dist['victim_pos_y'])**2 +
                (df_dist['attacker_pos_z'] - df_dist['victim_pos_z'])**2
            )
            # Units: 1 unit ~ 1 inch. 
            # Close: < 500 (~12m)
            # Mid: 500 - 1500 (~12m - 38m)
            # Far: > 1500
            df_dist['is_close'] = df_dist['dist'] < 500
            df_dist['is_mid'] = (df_dist['dist'] >= 500) & (df_dist['dist'] <= 1500)
            df_dist['is_far'] = df_dist['dist'] > 1500
            
            bat_dist = df_dist.groupby('steam_id_64').agg({
                'is_close': 'mean', # % of kills that are close
                'is_mid': 'mean',
                'is_far': 'mean'
            }).reset_index()
            bat_dist.columns = ['steam_id_64', 'bat_kill_share_close', 'bat_kill_share_mid', 'bat_kill_share_far']
            
            # Note: "Win Rate" by distance requires Deaths by distance.
            # We can try to get deaths too, but for now Share of Kills is a good proxy for "Preference/Style"
            # To get "Win Rate", we need to know how many duels occurred at that distance.
            # Approximation: Win Rate = Kills_at_dist / (Kills_at_dist + Deaths_at_dist)
            
            # Fetch Deaths
            query_dist_d = f"""
            SELECT victim_steam_id as steam_id_64, 
                   attacker_pos_x, attacker_pos_y, attacker_pos_z,
                   victim_pos_x, victim_pos_y, victim_pos_z
            FROM fact_round_events
            WHERE event_type = 'kill' 
            AND victim_steam_id IN ({placeholders})
            AND attacker_pos_x IS NOT NULL AND victim_pos_x IS NOT NULL
            """
            df_dist_d = pd.read_sql_query(query_dist_d, conn, params=valid_ids)
            df_dist_d['dist'] = np.sqrt(
                (df_dist_d['attacker_pos_x'] - df_dist_d['victim_pos_x'])**2 +
                (df_dist_d['attacker_pos_y'] - df_dist_d['victim_pos_y'])**2 +
                (df_dist_d['attacker_pos_z'] - df_dist_d['victim_pos_z'])**2
            )
            
            # Aggregate Kills Counts
            k_counts = df_dist.groupby('steam_id_64').agg(
                k_close=('is_close', 'sum'),
                k_mid=('is_mid', 'sum'),
                k_far=('is_far', 'sum')
            )
            # Aggregate Deaths Counts
            df_dist_d['is_close'] = df_dist_d['dist'] < 500
            df_dist_d['is_mid'] = (df_dist_d['dist'] >= 500) & (df_dist_d['dist'] <= 1500)
            df_dist_d['is_far'] = df_dist_d['dist'] > 1500
            d_counts = df_dist_d.groupby('steam_id_64').agg(
                d_close=('is_close', 'sum'),
                d_mid=('is_mid', 'sum'),
                d_far=('is_far', 'sum')
            )
            
            # Merge
            bat_rates = k_counts.join(d_counts, how='outer').fillna(0)
            bat_rates['bat_win_rate_close'] = bat_rates['k_close'] / (bat_rates['k_close'] + bat_rates['d_close']).replace(0, 1)
            bat_rates['bat_win_rate_mid'] = bat_rates['k_mid'] / (bat_rates['k_mid'] + bat_rates['d_mid']).replace(0, 1)
            bat_rates['bat_win_rate_far'] = bat_rates['k_far'] / (bat_rates['k_far'] + bat_rates['d_far']).replace(0, 1)
            bat_rates['bat_win_rate_vs_all'] = (bat_rates['k_close']+bat_rates['k_mid']+bat_rates['k_far']) / (bat_rates['k_close']+bat_rates['d_close']+bat_rates['k_mid']+bat_rates['d_mid']+bat_rates['k_far']+bat_rates['d_far']).replace(0, 1)

            df = df.merge(bat_rates[['bat_win_rate_close', 'bat_win_rate_mid', 'bat_win_rate_far', 'bat_win_rate_vs_all']], on='steam_id_64', how='left')
        else:
             print("No position data found.")
    except Exception as e:
        print(f"Dist calculation error: {e}")

    # High/Low ELO KD
    query_elo = f"""
    SELECT mp.steam_id_64, mp.kd_ratio, 
           (SELECT AVG(group_origin_elo) FROM fact_match_teams fmt WHERE fmt.match_id = mp.match_id AND group_origin_elo > 0) as elo
    FROM fact_match_players mp
    WHERE mp.steam_id_64 IN ({placeholders})
    """
    df_elo = pd.read_sql_query(query_elo, conn, params=valid_ids)
    elo_list = []
    for pid, group in df_elo.groupby('steam_id_64'):
        avg = group['elo'].mean()
        if pd.isna(avg): avg = 1000
        elo_list.append({
            'steam_id_64': pid,
            'bat_kd_diff_high_elo': group[group['elo'] > avg]['kd_ratio'].mean(),
            'bat_kd_diff_low_elo': group[group['elo'] <= avg]['kd_ratio'].mean()
        })
    df = df.merge(pd.DataFrame(elo_list), on='steam_id_64', how='left')
    
    # Avg Duel Freq
    df['bat_avg_duel_freq'] = (df['sum_fk'] + df['sum_fd']) / df['rounds_played']

    # 4. HPS - High Pressure Contexts
    print("Calculating HPS (Contexts)...")
    # We need round-by-round score evolution.
    # Join rounds and economy(side) and matches
    query_hps_ctx = f"""
    SELECT r.match_id, r.round_num, r.ct_score, r.t_score, r.winner_side, 
           m.score_team1, m.score_team2, m.winner_team,
           e.steam_id_64, e.side as player_side,
           (SELECT COUNT(*) FROM fact_round_events ev WHERE ev.match_id=r.match_id AND ev.round_num=r.round_num AND ev.attacker_steam_id=e.steam_id_64 AND ev.event_type='kill') as kills,
           (SELECT COUNT(*) FROM fact_round_events ev WHERE ev.match_id=r.match_id AND ev.round_num=r.round_num AND ev.victim_steam_id=e.steam_id_64 AND ev.event_type='kill') as deaths
    FROM fact_rounds r
    JOIN fact_matches m ON r.match_id = m.match_id
    JOIN fact_round_player_economy e ON r.match_id = e.match_id AND r.round_num = e.round_num
    WHERE e.steam_id_64 IN ({placeholders})
    """
    # This is heavy.
    try:
        # Optimization: Process per match or use SQL aggregation?
        # SQL aggregation for specific conditions is better.
        
        # 4.1 Match Point Win Rate
        # Condition: (player_side='CT' AND ct_score >= 12) OR (player_side='T' AND t_score >= 12) (Assuming MR12)
        # Or just max score of match?
        # Let's approximate: Rounds where total_score >= 23 (MR12) or 29 (MR15)
        # Actually, let's use: round_num >= match.round_total - 1? No.
        # Use: Rounds where One Team Score = Match Win Score - 1.
        # Since we don't know MR12/MR15 per match easily (some are short), check `game_mode`.
        # Fallback: Rounds where `ct_score` or `t_score` >= 12.
        
        # 4.2 Pressure Entry Rate (Losing Streak)
        # Condition: Team score < Enemy score - 3.
        
        # 4.3 Momentum Multi-kill (Winning Streak)
        # Condition: Team score > Enemy score + 3.
        
        # Let's load a simplified dataframe of rounds
        df_rounds = pd.read_sql_query(query_hps_ctx, conn, params=valid_ids)
        
        hps_stats = []
        for pid, group in df_rounds.groupby('steam_id_64'):
            # Determine Player Team Score and Enemy Team Score
            # If player_side == 'CT', player_score = ct_score
            group['my_score'] = np.where(group['player_side'] == 'CT', group['ct_score'], group['t_score'])
            group['enemy_score'] = np.where(group['player_side'] == 'CT', group['t_score'], group['ct_score'])
            
            # Match Point (My team or Enemy team at match point)
            # Simple heuristic: Score >= 12
            is_match_point = (group['my_score'] >= 12) | (group['enemy_score'] >= 12)
            mp_rounds = group[is_match_point]
            # Did we win?
            # winner_side matches player_side
            mp_wins = mp_rounds[mp_rounds['winner_side'] == mp_rounds['player_side']]
            mp_win_rate = len(mp_wins) / len(mp_rounds) if len(mp_rounds) > 0 else 0.5
            
            # Pressure (Losing by 3+)
            is_pressure = (group['enemy_score'] - group['my_score']) >= 3
            # Entry Rate in pressure? Need FK data. 
            # We only loaded kills. Let's use Kills per round in pressure.
            pressure_kpr = group[is_pressure]['kills'].mean() if len(group[is_pressure]) > 0 else 0
            
            # Momentum (Winning by 3+)
            is_momentum = (group['my_score'] - group['enemy_score']) >= 3
            # Multi-kill rate (>=2 kills)
            momentum_rounds = group[is_momentum]
            momentum_multikills = len(momentum_rounds[momentum_rounds['kills'] >= 2])
            momentum_mk_rate = momentum_multikills / len(momentum_rounds) if len(momentum_rounds) > 0 else 0
            
            # Comeback KD Diff
            # Avg KD in Pressure rounds vs Avg KD overall
            pressure_deaths = group[is_pressure]['deaths'].sum()
            pressure_kills = group[is_pressure]['kills'].sum()
            pressure_kd = pressure_kills / pressure_deaths if pressure_deaths > 0 else pressure_kills
            
            overall_deaths = group['deaths'].sum()
            overall_kills = group['kills'].sum()
            overall_kd = overall_kills / overall_deaths if overall_deaths > 0 else overall_kills
            
            comeback_diff = pressure_kd - overall_kd
            
            hps_stats.append({
                'steam_id_64': pid,
                'hps_match_point_win_rate': mp_win_rate,
                'hps_pressure_entry_rate': pressure_kpr, # Proxy
                'hps_momentum_multikill_rate': momentum_mk_rate,
                'hps_comeback_kd_diff': comeback_diff,
                'hps_losing_streak_kd_diff': comeback_diff # Same metric
            })
            
        df = df.merge(pd.DataFrame(hps_stats), on='steam_id_64', how='left')
        
        # 4.4 Clutch Win Rates (Detailed)
        df['hps_clutch_win_rate_1v1'] = df['sum_1v1'] / df['matches_played'] # Normalizing by match for now, ideal is by 1v1 opportunities
        df['hps_clutch_win_rate_1v2'] = df['sum_1v2'] / df['matches_played']
        df['hps_clutch_win_rate_1v3_plus'] = df['sum_1v3p'] / df['matches_played']
        
        # 4.5 Close Match Rating (from previous)
        # ... (Already have logic in previous script, reusing)
        
    except Exception as e:
        print(f"HPS Error: {e}")

    # 5. PTL - Pistol Detailed
    print("Calculating PTL...")
    # Filter Round 1, 13 (and 16 for MR15?)
    # Just use 1 and 13 (common for MR12)
    query_ptl = f"""
    SELECT 
        e.steam_id_64,
        (SELECT COUNT(*) FROM fact_round_events ev WHERE ev.match_id=e.match_id AND ev.round_num=e.round_num AND ev.attacker_steam_id=e.steam_id_64 AND ev.event_type='kill') as kills,
        (SELECT COUNT(*) FROM fact_round_events ev WHERE ev.match_id=e.match_id AND ev.round_num=e.round_num AND ev.victim_steam_id=e.steam_id_64 AND ev.event_type='kill') as deaths,
        r.winner_side, e.side as player_side,
        e.equipment_value
    FROM fact_round_player_economy e
    JOIN fact_rounds r ON e.match_id = r.match_id AND e.round_num = r.round_num
    WHERE e.steam_id_64 IN ({placeholders})
    AND e.round_num IN (1, 13)
    """
    try:
        df_ptl_raw = pd.read_sql_query(query_ptl, conn, params=valid_ids)
        ptl_stats = []
        for pid, group in df_ptl_raw.groupby('steam_id_64'):
            kills = group['kills'].sum()
            deaths = group['deaths'].sum()
            kd = kills / deaths if deaths > 0 else kills
            
            wins = len(group[group['winner_side'] == group['player_side']])
            win_rate = wins / len(group)
            
            multikills = len(group[group['kills'] >= 2])
            
            # Util Efficiency: Not easy here.
            
            ptl_stats.append({
                'steam_id_64': pid,
                'ptl_pistol_kills': kills, # Total? Or Avg? Schema says REAL. Let's use Avg per Match later.
                'ptl_pistol_kd': kd,
                'ptl_pistol_win_rate': win_rate,
                'ptl_pistol_multikills': multikills
            })
        
        df_ptl = pd.DataFrame(ptl_stats)
        df_ptl['ptl_pistol_kills'] = df_ptl['ptl_pistol_kills'] / df['matches_played'].mean() # Approximate
        df = df.merge(df_ptl, on='steam_id_64', how='left')
        
    except Exception as e:
        print(f"PTL Error: {e}")

    # 6. T/CT & UTIL (Straightforward)
    print("Calculating T/CT & UTIL...")
    # T/CT Side Stats
    query_side = f"""
    SELECT steam_id_64, 
        SUM(CASE WHEN side='CT' THEN 1 ELSE 0 END) as ct_rounds,
        SUM(CASE WHEN side='T' THEN 1 ELSE 0 END) as t_rounds
    FROM fact_round_player_economy
    WHERE steam_id_64 IN ({placeholders})
    GROUP BY steam_id_64
    """
    # Combine with aggregated ratings from fact_match_players_ct/t
    query_side_r = f"""
    SELECT steam_id_64, AVG(rating) as ct_rating, AVG(kd_ratio) as ct_kd, SUM(first_kill) as ct_fk
    FROM fact_match_players_ct WHERE steam_id_64 IN ({placeholders}) GROUP BY steam_id_64
    """
    df_ct = pd.read_sql_query(query_side_r, conn, params=valid_ids)
    # Similar for T...
    
    # Merge...
    
    # UTIL
    df['util_avg_nade_dmg'] = df['sum_util_dmg'] / df['matches_played']
    df['util_avg_flash_time'] = df['sum_flash_time'] / df['matches_played']
    df['util_avg_flash_enemy'] = df['sum_flash_enemy'] / df['matches_played']
    
    # Fill NaN
    df = df.fillna(0)
    
    return df

def calculate_ultimate_scores(df):
    # Normalize Helper
    def n(col):
        if col not in df.columns: return 50
        s = df[col]
        if s.max() == s.min(): return 50
        return (s - s.min()) / (s.max() - s.min()) * 100

    df = df.copy()
    
    # 1. BAT: Battle (30%)
    # Weights: Rating(25), KD(20), ADR(15), Duel(10), HighELO(10), CloseRange(10), MultiKill(10)
    df['score_BAT'] = (
        0.25 * n('basic_avg_rating') +
        0.20 * n('basic_avg_kd') +
        0.15 * n('basic_avg_adr') +
        0.10 * n('bat_avg_duel_win_rate') + # Need to ensure col exists
        0.10 * n('bat_kd_diff_high_elo') +
        0.10 * n('bat_win_rate_close') +
        0.10 * n('basic_avg_kill_3') # Multi-kill proxy
    )
    
    # 2. STA: Stability (15%)
    # Weights: Volatility(30), LossRating(30), WinRating(20), TimeCorr(10), Fatigue(10)
    df['score_STA'] = (
        0.30 * (100 - n('sta_rating_volatility')) +
        0.30 * n('sta_loss_rating') +
        0.20 * n('sta_win_rating') +
        0.10 * (100 - n('sta_time_rating_corr').abs()) + # Closer to 0 is better (independent of duration)
        0.10 * (100 - n('sta_fatigue_decay'))
    )
    
    # 3. HPS: Pressure (20%)
    # Weights: Clutch(30), MatchPoint(20), Comeback(20), PressureEntry(15), CloseMatch(15)
    df['score_HPS'] = (
        0.30 * n('sum_1v3p') + # Using high tier clutches
        0.20 * n('hps_match_point_win_rate') +
        0.20 * n('hps_comeback_kd_diff') +
        0.15 * n('hps_pressure_entry_rate') + 
        0.15 * n('basic_avg_rating') # Fallback if close match rating missing
    )
    
    # 4. PTL: Pistol (10%)
    # Weights: Kills(40), WinRate(30), KD(30)
    df['score_PTL'] = (
        0.40 * n('ptl_pistol_kills') +
        0.30 * n('ptl_pistol_win_rate') +
        0.30 * n('ptl_pistol_kd')
    )
    
    # 5. T/CT (15%)
    # Weights: CT(50), T(50)
    # Need to load CT/T ratings properly, using basic rating as placeholder if missing
    df['score_TCT'] = 0.5 * n('basic_avg_rating') + 0.5 * n('basic_avg_rating') 
    
    # 6. UTIL (10%)
    # Weights: Dmg(50), Flash(30), EnemiesFlashed(20)
    df['score_UTIL'] = (
        0.50 * n('util_avg_nade_dmg') +
        0.30 * n('util_avg_flash_time') +
        0.20 * n('util_avg_flash_enemy')
    )
    
    return df

def main():
    conn = get_db_connection()
    try:
        df = load_and_calculate_ultimate(conn)
        if df is None: return
        
        results = calculate_ultimate_scores(df)
        
        print("\n--- Ultimate Scores (Top 5 BAT) ---")
        cols = ['steam_id_64', 'score_BAT', 'score_STA', 'score_HPS', 'score_PTL', 'score_UTIL']
        print(results[cols].sort_values('score_BAT', ascending=False).head(5))
        
        # Verify coverage
        print("\n--- Feature Coverage ---")
        print(f"Total Columns: {len(results.columns)}")
        print("BAT Distances:", 'bat_win_rate_close' in results.columns)
        print("HPS Contexts:", 'hps_match_point_win_rate' in results.columns)
        print("PTL Detailed:", 'ptl_pistol_kd' in results.columns)
        
    finally:
        conn.close()

if __name__ == "__main__":
    main()