500 lines
22 KiB
Python
500 lines
22 KiB
Python
|
|
import sqlite3
|
||
|
|
import pandas as pd
|
||
|
|
import numpy as np
|
||
|
|
import os
|
||
|
|
|
||
|
|
DB_L2_PATH = r'd:\Documents\trae_projects\yrtv\database\L2\L2_Main.sqlite'
|
||
|
|
|
||
|
|
def get_db_connection():
|
||
|
|
conn = sqlite3.connect(DB_L2_PATH)
|
||
|
|
conn.row_factory = sqlite3.Row
|
||
|
|
return conn
|
||
|
|
|
||
|
|
def safe_div(a, b):
|
||
|
|
if b == 0: return 0
|
||
|
|
return a / b
|
||
|
|
|
||
|
|
def load_and_calculate_ultimate(conn, min_matches=5):
|
||
|
|
print("Loading Ultimate Data Set...")
|
||
|
|
|
||
|
|
# 1. Basic Stats (Already have)
|
||
|
|
query_basic = """
|
||
|
|
SELECT
|
||
|
|
steam_id_64,
|
||
|
|
COUNT(*) as matches_played,
|
||
|
|
SUM(round_total) as rounds_played,
|
||
|
|
AVG(rating) as basic_avg_rating,
|
||
|
|
AVG(kd_ratio) as basic_avg_kd,
|
||
|
|
AVG(adr) as basic_avg_adr,
|
||
|
|
AVG(kast) as basic_avg_kast,
|
||
|
|
AVG(rws) as basic_avg_rws,
|
||
|
|
SUM(headshot_count) as sum_hs,
|
||
|
|
SUM(kills) as sum_kills,
|
||
|
|
SUM(deaths) as sum_deaths,
|
||
|
|
SUM(first_kill) as sum_fk,
|
||
|
|
SUM(first_death) as sum_fd,
|
||
|
|
SUM(clutch_1v1) as sum_1v1,
|
||
|
|
SUM(clutch_1v2) as sum_1v2,
|
||
|
|
SUM(clutch_1v3) + SUM(clutch_1v4) + SUM(clutch_1v5) as sum_1v3p,
|
||
|
|
SUM(kill_2) as sum_2k,
|
||
|
|
SUM(kill_3) as sum_3k,
|
||
|
|
SUM(kill_4) as sum_4k,
|
||
|
|
SUM(kill_5) as sum_5k,
|
||
|
|
SUM(assisted_kill) as sum_assist,
|
||
|
|
SUM(perfect_kill) as sum_perfect,
|
||
|
|
SUM(revenge_kill) as sum_revenge,
|
||
|
|
SUM(awp_kill) as sum_awp,
|
||
|
|
SUM(jump_count) as sum_jump,
|
||
|
|
SUM(throw_harm) as sum_util_dmg,
|
||
|
|
SUM(flash_time) as sum_flash_time,
|
||
|
|
SUM(flash_enemy) as sum_flash_enemy,
|
||
|
|
SUM(flash_team) as sum_flash_team
|
||
|
|
FROM fact_match_players
|
||
|
|
GROUP BY steam_id_64
|
||
|
|
HAVING COUNT(*) >= ?
|
||
|
|
"""
|
||
|
|
df = pd.read_sql_query(query_basic, conn, params=(min_matches,))
|
||
|
|
valid_ids = tuple(df['steam_id_64'].tolist())
|
||
|
|
if not valid_ids: return None
|
||
|
|
placeholders = ','.join(['?'] * len(valid_ids))
|
||
|
|
|
||
|
|
# --- Basic Derived ---
|
||
|
|
df['basic_headshot_rate'] = df['sum_hs'] / df['sum_kills'].replace(0, 1)
|
||
|
|
df['basic_avg_headshot_kills'] = df['sum_hs'] / df['matches_played']
|
||
|
|
df['basic_avg_first_kill'] = df['sum_fk'] / df['matches_played']
|
||
|
|
df['basic_avg_first_death'] = df['sum_fd'] / df['matches_played']
|
||
|
|
df['basic_first_kill_rate'] = df['sum_fk'] / (df['sum_fk'] + df['sum_fd']).replace(0, 1)
|
||
|
|
df['basic_first_death_rate'] = df['sum_fd'] / (df['sum_fk'] + df['sum_fd']).replace(0, 1)
|
||
|
|
df['basic_avg_kill_2'] = df['sum_2k'] / df['matches_played']
|
||
|
|
df['basic_avg_kill_3'] = df['sum_3k'] / df['matches_played']
|
||
|
|
df['basic_avg_kill_4'] = df['sum_4k'] / df['matches_played']
|
||
|
|
df['basic_avg_kill_5'] = df['sum_5k'] / df['matches_played']
|
||
|
|
df['basic_avg_assisted_kill'] = df['sum_assist'] / df['matches_played']
|
||
|
|
df['basic_avg_perfect_kill'] = df['sum_perfect'] / df['matches_played']
|
||
|
|
df['basic_avg_revenge_kill'] = df['sum_revenge'] / df['matches_played']
|
||
|
|
df['basic_avg_awp_kill'] = df['sum_awp'] / df['matches_played']
|
||
|
|
df['basic_avg_jump_count'] = df['sum_jump'] / df['matches_played']
|
||
|
|
|
||
|
|
# 2. STA - Detailed Time Series
|
||
|
|
print("Calculating STA (Detailed)...")
|
||
|
|
query_sta = f"""
|
||
|
|
SELECT mp.steam_id_64, mp.rating, mp.is_win, m.start_time, m.duration
|
||
|
|
FROM fact_match_players mp
|
||
|
|
JOIN fact_matches m ON mp.match_id = m.match_id
|
||
|
|
WHERE mp.steam_id_64 IN ({placeholders})
|
||
|
|
ORDER BY mp.steam_id_64, m.start_time
|
||
|
|
"""
|
||
|
|
df_matches = pd.read_sql_query(query_sta, conn, params=valid_ids)
|
||
|
|
|
||
|
|
sta_list = []
|
||
|
|
for pid, group in df_matches.groupby('steam_id_64'):
|
||
|
|
group = group.sort_values('start_time')
|
||
|
|
# Last 30
|
||
|
|
last_30 = group.tail(30)
|
||
|
|
sta_last_30 = last_30['rating'].mean()
|
||
|
|
# Win/Loss
|
||
|
|
sta_win = group[group['is_win']==1]['rating'].mean()
|
||
|
|
sta_loss = group[group['is_win']==0]['rating'].mean()
|
||
|
|
# Volatility
|
||
|
|
sta_vol = group.tail(10)['rating'].std()
|
||
|
|
# Time Correlation (Duration vs Rating)
|
||
|
|
sta_time_corr = group['duration'].corr(group['rating']) if len(group) > 2 else 0
|
||
|
|
# Fatigue
|
||
|
|
group['date'] = pd.to_datetime(group['start_time'], unit='s').dt.date
|
||
|
|
daily = group.groupby('date')['rating'].agg(['first', 'last', 'count'])
|
||
|
|
daily_fatigue = daily[daily['count'] >= 3]
|
||
|
|
if len(daily_fatigue) > 0:
|
||
|
|
fatigue_decay = (daily_fatigue['first'] - daily_fatigue['last']).mean()
|
||
|
|
else:
|
||
|
|
fatigue_decay = 0
|
||
|
|
|
||
|
|
sta_list.append({
|
||
|
|
'steam_id_64': pid,
|
||
|
|
'sta_last_30_rating': sta_last_30,
|
||
|
|
'sta_win_rating': sta_win,
|
||
|
|
'sta_loss_rating': sta_loss,
|
||
|
|
'sta_rating_volatility': sta_vol,
|
||
|
|
'sta_time_rating_corr': sta_time_corr,
|
||
|
|
'sta_fatigue_decay': fatigue_decay
|
||
|
|
})
|
||
|
|
df = df.merge(pd.DataFrame(sta_list), on='steam_id_64', how='left')
|
||
|
|
|
||
|
|
# 3. BAT - Distance & Advanced
|
||
|
|
print("Calculating BAT (Distance & Context)...")
|
||
|
|
# Distance Logic: Get all kills with positions
|
||
|
|
# We need to map positions.
|
||
|
|
query_dist = f"""
|
||
|
|
SELECT attacker_steam_id as steam_id_64,
|
||
|
|
attacker_pos_x, attacker_pos_y, attacker_pos_z,
|
||
|
|
victim_pos_x, victim_pos_y, victim_pos_z
|
||
|
|
FROM fact_round_events
|
||
|
|
WHERE event_type = 'kill'
|
||
|
|
AND attacker_steam_id IN ({placeholders})
|
||
|
|
AND attacker_pos_x IS NOT NULL AND victim_pos_x IS NOT NULL
|
||
|
|
"""
|
||
|
|
# Note: This might be heavy. If memory issue, sample or chunk.
|
||
|
|
try:
|
||
|
|
df_dist = pd.read_sql_query(query_dist, conn, params=valid_ids)
|
||
|
|
if not df_dist.empty:
|
||
|
|
# Calc Euclidian Distance
|
||
|
|
df_dist['dist'] = np.sqrt(
|
||
|
|
(df_dist['attacker_pos_x'] - df_dist['victim_pos_x'])**2 +
|
||
|
|
(df_dist['attacker_pos_y'] - df_dist['victim_pos_y'])**2 +
|
||
|
|
(df_dist['attacker_pos_z'] - df_dist['victim_pos_z'])**2
|
||
|
|
)
|
||
|
|
# Units: 1 unit ~ 1 inch.
|
||
|
|
# Close: < 500 (~12m)
|
||
|
|
# Mid: 500 - 1500 (~12m - 38m)
|
||
|
|
# Far: > 1500
|
||
|
|
df_dist['is_close'] = df_dist['dist'] < 500
|
||
|
|
df_dist['is_mid'] = (df_dist['dist'] >= 500) & (df_dist['dist'] <= 1500)
|
||
|
|
df_dist['is_far'] = df_dist['dist'] > 1500
|
||
|
|
|
||
|
|
bat_dist = df_dist.groupby('steam_id_64').agg({
|
||
|
|
'is_close': 'mean', # % of kills that are close
|
||
|
|
'is_mid': 'mean',
|
||
|
|
'is_far': 'mean'
|
||
|
|
}).reset_index()
|
||
|
|
bat_dist.columns = ['steam_id_64', 'bat_kill_share_close', 'bat_kill_share_mid', 'bat_kill_share_far']
|
||
|
|
|
||
|
|
# Note: "Win Rate" by distance requires Deaths by distance.
|
||
|
|
# We can try to get deaths too, but for now Share of Kills is a good proxy for "Preference/Style"
|
||
|
|
# To get "Win Rate", we need to know how many duels occurred at that distance.
|
||
|
|
# Approximation: Win Rate = Kills_at_dist / (Kills_at_dist + Deaths_at_dist)
|
||
|
|
|
||
|
|
# Fetch Deaths
|
||
|
|
query_dist_d = f"""
|
||
|
|
SELECT victim_steam_id as steam_id_64,
|
||
|
|
attacker_pos_x, attacker_pos_y, attacker_pos_z,
|
||
|
|
victim_pos_x, victim_pos_y, victim_pos_z
|
||
|
|
FROM fact_round_events
|
||
|
|
WHERE event_type = 'kill'
|
||
|
|
AND victim_steam_id IN ({placeholders})
|
||
|
|
AND attacker_pos_x IS NOT NULL AND victim_pos_x IS NOT NULL
|
||
|
|
"""
|
||
|
|
df_dist_d = pd.read_sql_query(query_dist_d, conn, params=valid_ids)
|
||
|
|
df_dist_d['dist'] = np.sqrt(
|
||
|
|
(df_dist_d['attacker_pos_x'] - df_dist_d['victim_pos_x'])**2 +
|
||
|
|
(df_dist_d['attacker_pos_y'] - df_dist_d['victim_pos_y'])**2 +
|
||
|
|
(df_dist_d['attacker_pos_z'] - df_dist_d['victim_pos_z'])**2
|
||
|
|
)
|
||
|
|
|
||
|
|
# Aggregate Kills Counts
|
||
|
|
k_counts = df_dist.groupby('steam_id_64').agg(
|
||
|
|
k_close=('is_close', 'sum'),
|
||
|
|
k_mid=('is_mid', 'sum'),
|
||
|
|
k_far=('is_far', 'sum')
|
||
|
|
)
|
||
|
|
# Aggregate Deaths Counts
|
||
|
|
df_dist_d['is_close'] = df_dist_d['dist'] < 500
|
||
|
|
df_dist_d['is_mid'] = (df_dist_d['dist'] >= 500) & (df_dist_d['dist'] <= 1500)
|
||
|
|
df_dist_d['is_far'] = df_dist_d['dist'] > 1500
|
||
|
|
d_counts = df_dist_d.groupby('steam_id_64').agg(
|
||
|
|
d_close=('is_close', 'sum'),
|
||
|
|
d_mid=('is_mid', 'sum'),
|
||
|
|
d_far=('is_far', 'sum')
|
||
|
|
)
|
||
|
|
|
||
|
|
# Merge
|
||
|
|
bat_rates = k_counts.join(d_counts, how='outer').fillna(0)
|
||
|
|
bat_rates['bat_win_rate_close'] = bat_rates['k_close'] / (bat_rates['k_close'] + bat_rates['d_close']).replace(0, 1)
|
||
|
|
bat_rates['bat_win_rate_mid'] = bat_rates['k_mid'] / (bat_rates['k_mid'] + bat_rates['d_mid']).replace(0, 1)
|
||
|
|
bat_rates['bat_win_rate_far'] = bat_rates['k_far'] / (bat_rates['k_far'] + bat_rates['d_far']).replace(0, 1)
|
||
|
|
bat_rates['bat_win_rate_vs_all'] = (bat_rates['k_close']+bat_rates['k_mid']+bat_rates['k_far']) / (bat_rates['k_close']+bat_rates['d_close']+bat_rates['k_mid']+bat_rates['d_mid']+bat_rates['k_far']+bat_rates['d_far']).replace(0, 1)
|
||
|
|
|
||
|
|
df = df.merge(bat_rates[['bat_win_rate_close', 'bat_win_rate_mid', 'bat_win_rate_far', 'bat_win_rate_vs_all']], on='steam_id_64', how='left')
|
||
|
|
else:
|
||
|
|
print("No position data found.")
|
||
|
|
except Exception as e:
|
||
|
|
print(f"Dist calculation error: {e}")
|
||
|
|
|
||
|
|
# High/Low ELO KD
|
||
|
|
query_elo = f"""
|
||
|
|
SELECT mp.steam_id_64, mp.kd_ratio,
|
||
|
|
(SELECT AVG(group_origin_elo) FROM fact_match_teams fmt WHERE fmt.match_id = mp.match_id AND group_origin_elo > 0) as elo
|
||
|
|
FROM fact_match_players mp
|
||
|
|
WHERE mp.steam_id_64 IN ({placeholders})
|
||
|
|
"""
|
||
|
|
df_elo = pd.read_sql_query(query_elo, conn, params=valid_ids)
|
||
|
|
elo_list = []
|
||
|
|
for pid, group in df_elo.groupby('steam_id_64'):
|
||
|
|
avg = group['elo'].mean()
|
||
|
|
if pd.isna(avg): avg = 1000
|
||
|
|
elo_list.append({
|
||
|
|
'steam_id_64': pid,
|
||
|
|
'bat_kd_diff_high_elo': group[group['elo'] > avg]['kd_ratio'].mean(),
|
||
|
|
'bat_kd_diff_low_elo': group[group['elo'] <= avg]['kd_ratio'].mean()
|
||
|
|
})
|
||
|
|
df = df.merge(pd.DataFrame(elo_list), on='steam_id_64', how='left')
|
||
|
|
|
||
|
|
# Avg Duel Freq
|
||
|
|
df['bat_avg_duel_freq'] = (df['sum_fk'] + df['sum_fd']) / df['rounds_played']
|
||
|
|
|
||
|
|
# 4. HPS - High Pressure Contexts
|
||
|
|
print("Calculating HPS (Contexts)...")
|
||
|
|
# We need round-by-round score evolution.
|
||
|
|
# Join rounds and economy(side) and matches
|
||
|
|
query_hps_ctx = f"""
|
||
|
|
SELECT r.match_id, r.round_num, r.ct_score, r.t_score, r.winner_side,
|
||
|
|
m.score_team1, m.score_team2, m.winner_team,
|
||
|
|
e.steam_id_64, e.side as player_side,
|
||
|
|
(SELECT COUNT(*) FROM fact_round_events ev WHERE ev.match_id=r.match_id AND ev.round_num=r.round_num AND ev.attacker_steam_id=e.steam_id_64 AND ev.event_type='kill') as kills,
|
||
|
|
(SELECT COUNT(*) FROM fact_round_events ev WHERE ev.match_id=r.match_id AND ev.round_num=r.round_num AND ev.victim_steam_id=e.steam_id_64 AND ev.event_type='kill') as deaths
|
||
|
|
FROM fact_rounds r
|
||
|
|
JOIN fact_matches m ON r.match_id = m.match_id
|
||
|
|
JOIN fact_round_player_economy e ON r.match_id = e.match_id AND r.round_num = e.round_num
|
||
|
|
WHERE e.steam_id_64 IN ({placeholders})
|
||
|
|
"""
|
||
|
|
# This is heavy.
|
||
|
|
try:
|
||
|
|
# Optimization: Process per match or use SQL aggregation?
|
||
|
|
# SQL aggregation for specific conditions is better.
|
||
|
|
|
||
|
|
# 4.1 Match Point Win Rate
|
||
|
|
# Condition: (player_side='CT' AND ct_score >= 12) OR (player_side='T' AND t_score >= 12) (Assuming MR12)
|
||
|
|
# Or just max score of match?
|
||
|
|
# Let's approximate: Rounds where total_score >= 23 (MR12) or 29 (MR15)
|
||
|
|
# Actually, let's use: round_num >= match.round_total - 1? No.
|
||
|
|
# Use: Rounds where One Team Score = Match Win Score - 1.
|
||
|
|
# Since we don't know MR12/MR15 per match easily (some are short), check `game_mode`.
|
||
|
|
# Fallback: Rounds where `ct_score` or `t_score` >= 12.
|
||
|
|
|
||
|
|
# 4.2 Pressure Entry Rate (Losing Streak)
|
||
|
|
# Condition: Team score < Enemy score - 3.
|
||
|
|
|
||
|
|
# 4.3 Momentum Multi-kill (Winning Streak)
|
||
|
|
# Condition: Team score > Enemy score + 3.
|
||
|
|
|
||
|
|
# Let's load a simplified dataframe of rounds
|
||
|
|
df_rounds = pd.read_sql_query(query_hps_ctx, conn, params=valid_ids)
|
||
|
|
|
||
|
|
hps_stats = []
|
||
|
|
for pid, group in df_rounds.groupby('steam_id_64'):
|
||
|
|
# Determine Player Team Score and Enemy Team Score
|
||
|
|
# If player_side == 'CT', player_score = ct_score
|
||
|
|
group['my_score'] = np.where(group['player_side'] == 'CT', group['ct_score'], group['t_score'])
|
||
|
|
group['enemy_score'] = np.where(group['player_side'] == 'CT', group['t_score'], group['ct_score'])
|
||
|
|
|
||
|
|
# Match Point (My team or Enemy team at match point)
|
||
|
|
# Simple heuristic: Score >= 12
|
||
|
|
is_match_point = (group['my_score'] >= 12) | (group['enemy_score'] >= 12)
|
||
|
|
mp_rounds = group[is_match_point]
|
||
|
|
# Did we win?
|
||
|
|
# winner_side matches player_side
|
||
|
|
mp_wins = mp_rounds[mp_rounds['winner_side'] == mp_rounds['player_side']]
|
||
|
|
mp_win_rate = len(mp_wins) / len(mp_rounds) if len(mp_rounds) > 0 else 0.5
|
||
|
|
|
||
|
|
# Pressure (Losing by 3+)
|
||
|
|
is_pressure = (group['enemy_score'] - group['my_score']) >= 3
|
||
|
|
# Entry Rate in pressure? Need FK data.
|
||
|
|
# We only loaded kills. Let's use Kills per round in pressure.
|
||
|
|
pressure_kpr = group[is_pressure]['kills'].mean() if len(group[is_pressure]) > 0 else 0
|
||
|
|
|
||
|
|
# Momentum (Winning by 3+)
|
||
|
|
is_momentum = (group['my_score'] - group['enemy_score']) >= 3
|
||
|
|
# Multi-kill rate (>=2 kills)
|
||
|
|
momentum_rounds = group[is_momentum]
|
||
|
|
momentum_multikills = len(momentum_rounds[momentum_rounds['kills'] >= 2])
|
||
|
|
momentum_mk_rate = momentum_multikills / len(momentum_rounds) if len(momentum_rounds) > 0 else 0
|
||
|
|
|
||
|
|
# Comeback KD Diff
|
||
|
|
# Avg KD in Pressure rounds vs Avg KD overall
|
||
|
|
pressure_deaths = group[is_pressure]['deaths'].sum()
|
||
|
|
pressure_kills = group[is_pressure]['kills'].sum()
|
||
|
|
pressure_kd = pressure_kills / pressure_deaths if pressure_deaths > 0 else pressure_kills
|
||
|
|
|
||
|
|
overall_deaths = group['deaths'].sum()
|
||
|
|
overall_kills = group['kills'].sum()
|
||
|
|
overall_kd = overall_kills / overall_deaths if overall_deaths > 0 else overall_kills
|
||
|
|
|
||
|
|
comeback_diff = pressure_kd - overall_kd
|
||
|
|
|
||
|
|
hps_stats.append({
|
||
|
|
'steam_id_64': pid,
|
||
|
|
'hps_match_point_win_rate': mp_win_rate,
|
||
|
|
'hps_pressure_entry_rate': pressure_kpr, # Proxy
|
||
|
|
'hps_momentum_multikill_rate': momentum_mk_rate,
|
||
|
|
'hps_comeback_kd_diff': comeback_diff,
|
||
|
|
'hps_losing_streak_kd_diff': comeback_diff # Same metric
|
||
|
|
})
|
||
|
|
|
||
|
|
df = df.merge(pd.DataFrame(hps_stats), on='steam_id_64', how='left')
|
||
|
|
|
||
|
|
# 4.4 Clutch Win Rates (Detailed)
|
||
|
|
df['hps_clutch_win_rate_1v1'] = df['sum_1v1'] / df['matches_played'] # Normalizing by match for now, ideal is by 1v1 opportunities
|
||
|
|
df['hps_clutch_win_rate_1v2'] = df['sum_1v2'] / df['matches_played']
|
||
|
|
df['hps_clutch_win_rate_1v3_plus'] = df['sum_1v3p'] / df['matches_played']
|
||
|
|
|
||
|
|
# 4.5 Close Match Rating (from previous)
|
||
|
|
# ... (Already have logic in previous script, reusing)
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(f"HPS Error: {e}")
|
||
|
|
|
||
|
|
# 5. PTL - Pistol Detailed
|
||
|
|
print("Calculating PTL...")
|
||
|
|
# Filter Round 1, 13 (and 16 for MR15?)
|
||
|
|
# Just use 1 and 13 (common for MR12)
|
||
|
|
query_ptl = f"""
|
||
|
|
SELECT
|
||
|
|
e.steam_id_64,
|
||
|
|
(SELECT COUNT(*) FROM fact_round_events ev WHERE ev.match_id=e.match_id AND ev.round_num=e.round_num AND ev.attacker_steam_id=e.steam_id_64 AND ev.event_type='kill') as kills,
|
||
|
|
(SELECT COUNT(*) FROM fact_round_events ev WHERE ev.match_id=e.match_id AND ev.round_num=e.round_num AND ev.victim_steam_id=e.steam_id_64 AND ev.event_type='kill') as deaths,
|
||
|
|
r.winner_side, e.side as player_side,
|
||
|
|
e.equipment_value
|
||
|
|
FROM fact_round_player_economy e
|
||
|
|
JOIN fact_rounds r ON e.match_id = r.match_id AND e.round_num = r.round_num
|
||
|
|
WHERE e.steam_id_64 IN ({placeholders})
|
||
|
|
AND e.round_num IN (1, 13)
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
df_ptl_raw = pd.read_sql_query(query_ptl, conn, params=valid_ids)
|
||
|
|
ptl_stats = []
|
||
|
|
for pid, group in df_ptl_raw.groupby('steam_id_64'):
|
||
|
|
kills = group['kills'].sum()
|
||
|
|
deaths = group['deaths'].sum()
|
||
|
|
kd = kills / deaths if deaths > 0 else kills
|
||
|
|
|
||
|
|
wins = len(group[group['winner_side'] == group['player_side']])
|
||
|
|
win_rate = wins / len(group)
|
||
|
|
|
||
|
|
multikills = len(group[group['kills'] >= 2])
|
||
|
|
|
||
|
|
# Util Efficiency: Not easy here.
|
||
|
|
|
||
|
|
ptl_stats.append({
|
||
|
|
'steam_id_64': pid,
|
||
|
|
'ptl_pistol_kills': kills, # Total? Or Avg? Schema says REAL. Let's use Avg per Match later.
|
||
|
|
'ptl_pistol_kd': kd,
|
||
|
|
'ptl_pistol_win_rate': win_rate,
|
||
|
|
'ptl_pistol_multikills': multikills
|
||
|
|
})
|
||
|
|
|
||
|
|
df_ptl = pd.DataFrame(ptl_stats)
|
||
|
|
df_ptl['ptl_pistol_kills'] = df_ptl['ptl_pistol_kills'] / df['matches_played'].mean() # Approximate
|
||
|
|
df = df.merge(df_ptl, on='steam_id_64', how='left')
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(f"PTL Error: {e}")
|
||
|
|
|
||
|
|
# 6. T/CT & UTIL (Straightforward)
|
||
|
|
print("Calculating T/CT & UTIL...")
|
||
|
|
# T/CT Side Stats
|
||
|
|
query_side = f"""
|
||
|
|
SELECT steam_id_64,
|
||
|
|
SUM(CASE WHEN side='CT' THEN 1 ELSE 0 END) as ct_rounds,
|
||
|
|
SUM(CASE WHEN side='T' THEN 1 ELSE 0 END) as t_rounds
|
||
|
|
FROM fact_round_player_economy
|
||
|
|
WHERE steam_id_64 IN ({placeholders})
|
||
|
|
GROUP BY steam_id_64
|
||
|
|
"""
|
||
|
|
# Combine with aggregated ratings from fact_match_players_ct/t
|
||
|
|
query_side_r = f"""
|
||
|
|
SELECT steam_id_64, AVG(rating) as ct_rating, AVG(kd_ratio) as ct_kd, SUM(first_kill) as ct_fk
|
||
|
|
FROM fact_match_players_ct WHERE steam_id_64 IN ({placeholders}) GROUP BY steam_id_64
|
||
|
|
"""
|
||
|
|
df_ct = pd.read_sql_query(query_side_r, conn, params=valid_ids)
|
||
|
|
# Similar for T...
|
||
|
|
|
||
|
|
# Merge...
|
||
|
|
|
||
|
|
# UTIL
|
||
|
|
df['util_avg_nade_dmg'] = df['sum_util_dmg'] / df['matches_played']
|
||
|
|
df['util_avg_flash_time'] = df['sum_flash_time'] / df['matches_played']
|
||
|
|
df['util_avg_flash_enemy'] = df['sum_flash_enemy'] / df['matches_played']
|
||
|
|
|
||
|
|
# Fill NaN
|
||
|
|
df = df.fillna(0)
|
||
|
|
|
||
|
|
return df
|
||
|
|
|
||
|
|
def calculate_ultimate_scores(df):
|
||
|
|
# Normalize Helper
|
||
|
|
def n(col):
|
||
|
|
if col not in df.columns: return 50
|
||
|
|
s = df[col]
|
||
|
|
if s.max() == s.min(): return 50
|
||
|
|
return (s - s.min()) / (s.max() - s.min()) * 100
|
||
|
|
|
||
|
|
df = df.copy()
|
||
|
|
|
||
|
|
# 1. BAT: Battle (30%)
|
||
|
|
# Weights: Rating(25), KD(20), ADR(15), Duel(10), HighELO(10), CloseRange(10), MultiKill(10)
|
||
|
|
df['score_BAT'] = (
|
||
|
|
0.25 * n('basic_avg_rating') +
|
||
|
|
0.20 * n('basic_avg_kd') +
|
||
|
|
0.15 * n('basic_avg_adr') +
|
||
|
|
0.10 * n('bat_avg_duel_win_rate') + # Need to ensure col exists
|
||
|
|
0.10 * n('bat_kd_diff_high_elo') +
|
||
|
|
0.10 * n('bat_win_rate_close') +
|
||
|
|
0.10 * n('basic_avg_kill_3') # Multi-kill proxy
|
||
|
|
)
|
||
|
|
|
||
|
|
# 2. STA: Stability (15%)
|
||
|
|
# Weights: Volatility(30), LossRating(30), WinRating(20), TimeCorr(10), Fatigue(10)
|
||
|
|
df['score_STA'] = (
|
||
|
|
0.30 * (100 - n('sta_rating_volatility')) +
|
||
|
|
0.30 * n('sta_loss_rating') +
|
||
|
|
0.20 * n('sta_win_rating') +
|
||
|
|
0.10 * (100 - n('sta_time_rating_corr').abs()) + # Closer to 0 is better (independent of duration)
|
||
|
|
0.10 * (100 - n('sta_fatigue_decay'))
|
||
|
|
)
|
||
|
|
|
||
|
|
# 3. HPS: Pressure (20%)
|
||
|
|
# Weights: Clutch(30), MatchPoint(20), Comeback(20), PressureEntry(15), CloseMatch(15)
|
||
|
|
df['score_HPS'] = (
|
||
|
|
0.30 * n('sum_1v3p') + # Using high tier clutches
|
||
|
|
0.20 * n('hps_match_point_win_rate') +
|
||
|
|
0.20 * n('hps_comeback_kd_diff') +
|
||
|
|
0.15 * n('hps_pressure_entry_rate') +
|
||
|
|
0.15 * n('basic_avg_rating') # Fallback if close match rating missing
|
||
|
|
)
|
||
|
|
|
||
|
|
# 4. PTL: Pistol (10%)
|
||
|
|
# Weights: Kills(40), WinRate(30), KD(30)
|
||
|
|
df['score_PTL'] = (
|
||
|
|
0.40 * n('ptl_pistol_kills') +
|
||
|
|
0.30 * n('ptl_pistol_win_rate') +
|
||
|
|
0.30 * n('ptl_pistol_kd')
|
||
|
|
)
|
||
|
|
|
||
|
|
# 5. T/CT (15%)
|
||
|
|
# Weights: CT(50), T(50)
|
||
|
|
# Need to load CT/T ratings properly, using basic rating as placeholder if missing
|
||
|
|
df['score_TCT'] = 0.5 * n('basic_avg_rating') + 0.5 * n('basic_avg_rating')
|
||
|
|
|
||
|
|
# 6. UTIL (10%)
|
||
|
|
# Weights: Dmg(50), Flash(30), EnemiesFlashed(20)
|
||
|
|
df['score_UTIL'] = (
|
||
|
|
0.50 * n('util_avg_nade_dmg') +
|
||
|
|
0.30 * n('util_avg_flash_time') +
|
||
|
|
0.20 * n('util_avg_flash_enemy')
|
||
|
|
)
|
||
|
|
|
||
|
|
return df
|
||
|
|
|
||
|
|
def main():
|
||
|
|
conn = get_db_connection()
|
||
|
|
try:
|
||
|
|
df = load_and_calculate_ultimate(conn)
|
||
|
|
if df is None: return
|
||
|
|
|
||
|
|
results = calculate_ultimate_scores(df)
|
||
|
|
|
||
|
|
print("\n--- Ultimate Scores (Top 5 BAT) ---")
|
||
|
|
cols = ['steam_id_64', 'score_BAT', 'score_STA', 'score_HPS', 'score_PTL', 'score_UTIL']
|
||
|
|
print(results[cols].sort_values('score_BAT', ascending=False).head(5))
|
||
|
|
|
||
|
|
# Verify coverage
|
||
|
|
print("\n--- Feature Coverage ---")
|
||
|
|
print(f"Total Columns: {len(results.columns)}")
|
||
|
|
print("BAT Distances:", 'bat_win_rate_close' in results.columns)
|
||
|
|
print("HPS Contexts:", 'hps_match_point_win_rate' in results.columns)
|
||
|
|
print("PTL Detailed:", 'ptl_pistol_kd' in results.columns)
|
||
|
|
|
||
|
|
finally:
|
||
|
|
conn.close()
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|