2257 lines
115 KiB
Python
2257 lines
115 KiB
Python
|
|
from web.database import query_db, get_db, execute_db
|
||
|
|
import sqlite3
|
||
|
|
import pandas as pd
|
||
|
|
import numpy as np
|
||
|
|
from web.services.weapon_service import get_weapon_info
|
||
|
|
|
||
|
|
class FeatureService:
|
||
|
|
@staticmethod
|
||
|
|
def get_player_features(steam_id):
|
||
|
|
sql = "SELECT * FROM dm_player_features WHERE steam_id_64 = ?"
|
||
|
|
return query_db('l3', sql, [steam_id], one=True)
|
||
|
|
|
||
|
|
@staticmethod
|
||
|
|
def get_players_list(page=1, per_page=20, sort_by='rating', search=None):
|
||
|
|
offset = (page - 1) * per_page
|
||
|
|
|
||
|
|
# Sort Mapping
|
||
|
|
sort_map = {
|
||
|
|
'rating': 'basic_avg_rating',
|
||
|
|
'kd': 'basic_avg_kd',
|
||
|
|
'kast': 'basic_avg_kast',
|
||
|
|
'matches': 'matches_played'
|
||
|
|
}
|
||
|
|
order_col = sort_map.get(sort_by, 'basic_avg_rating')
|
||
|
|
|
||
|
|
from web.services.stats_service import StatsService
|
||
|
|
|
||
|
|
# Helper to attach match counts
|
||
|
|
def attach_match_counts(player_list):
|
||
|
|
if not player_list:
|
||
|
|
return
|
||
|
|
ids = [p['steam_id_64'] for p in player_list]
|
||
|
|
# Batch query for counts from L2
|
||
|
|
placeholders = ','.join('?' for _ in ids)
|
||
|
|
sql = f"""
|
||
|
|
SELECT steam_id_64, COUNT(*) as cnt
|
||
|
|
FROM fact_match_players
|
||
|
|
WHERE steam_id_64 IN ({placeholders})
|
||
|
|
GROUP BY steam_id_64
|
||
|
|
"""
|
||
|
|
counts = query_db('l2', sql, ids)
|
||
|
|
cnt_dict = {r['steam_id_64']: r['cnt'] for r in counts}
|
||
|
|
for p in player_list:
|
||
|
|
p['matches_played'] = cnt_dict.get(p['steam_id_64'], 0)
|
||
|
|
|
||
|
|
if search:
|
||
|
|
# Get all matching players
|
||
|
|
l2_players, _ = StatsService.get_players(page=1, per_page=100, search=search)
|
||
|
|
if not l2_players:
|
||
|
|
return [], 0
|
||
|
|
|
||
|
|
steam_ids = [p['steam_id_64'] for p in l2_players]
|
||
|
|
placeholders = ','.join('?' for _ in steam_ids)
|
||
|
|
sql = f"SELECT * FROM dm_player_features WHERE steam_id_64 IN ({placeholders})"
|
||
|
|
features = query_db('l3', sql, steam_ids)
|
||
|
|
f_dict = {f['steam_id_64']: f for f in features}
|
||
|
|
|
||
|
|
# Get counts for sorting
|
||
|
|
count_sql = f"SELECT steam_id_64, COUNT(*) as cnt FROM fact_match_players WHERE steam_id_64 IN ({placeholders}) GROUP BY steam_id_64"
|
||
|
|
counts = query_db('l2', count_sql, steam_ids)
|
||
|
|
cnt_dict = {r['steam_id_64']: r['cnt'] for r in counts}
|
||
|
|
|
||
|
|
merged = []
|
||
|
|
for p in l2_players:
|
||
|
|
f = f_dict.get(p['steam_id_64'])
|
||
|
|
m = dict(p)
|
||
|
|
if f:
|
||
|
|
m.update(dict(f))
|
||
|
|
else:
|
||
|
|
# Fallback Calc
|
||
|
|
stats = StatsService.get_player_basic_stats(p['steam_id_64'])
|
||
|
|
if stats:
|
||
|
|
m['basic_avg_rating'] = stats['rating']
|
||
|
|
m['basic_avg_kd'] = stats['kd']
|
||
|
|
m['basic_avg_kast'] = stats['kast']
|
||
|
|
else:
|
||
|
|
m['basic_avg_rating'] = 0
|
||
|
|
m['basic_avg_kd'] = 0
|
||
|
|
m['basic_avg_kast'] = 0
|
||
|
|
|
||
|
|
m['matches_played'] = cnt_dict.get(p['steam_id_64'], 0)
|
||
|
|
merged.append(m)
|
||
|
|
|
||
|
|
merged.sort(key=lambda x: x.get(order_col, 0) or 0, reverse=True)
|
||
|
|
|
||
|
|
total = len(merged)
|
||
|
|
start = (page - 1) * per_page
|
||
|
|
end = start + per_page
|
||
|
|
return merged[start:end], total
|
||
|
|
|
||
|
|
else:
|
||
|
|
# Browse mode
|
||
|
|
l3_count = query_db('l3', "SELECT COUNT(*) as cnt FROM dm_player_features", one=True)['cnt']
|
||
|
|
|
||
|
|
if l3_count == 0 or sort_by == 'matches':
|
||
|
|
if sort_by == 'matches':
|
||
|
|
sql = """
|
||
|
|
SELECT steam_id_64, COUNT(*) as cnt
|
||
|
|
FROM fact_match_players
|
||
|
|
GROUP BY steam_id_64
|
||
|
|
ORDER BY cnt DESC
|
||
|
|
LIMIT ? OFFSET ?
|
||
|
|
"""
|
||
|
|
top_ids = query_db('l2', sql, [per_page, offset])
|
||
|
|
if not top_ids:
|
||
|
|
return [], 0
|
||
|
|
|
||
|
|
total = query_db('l2', "SELECT COUNT(DISTINCT steam_id_64) as cnt FROM fact_match_players", one=True)['cnt']
|
||
|
|
|
||
|
|
ids = [r['steam_id_64'] for r in top_ids]
|
||
|
|
l2_players = StatsService.get_players_by_ids(ids)
|
||
|
|
|
||
|
|
# Merge logic
|
||
|
|
merged = []
|
||
|
|
p_ph = ','.join('?' for _ in ids)
|
||
|
|
f_sql = f"SELECT * FROM dm_player_features WHERE steam_id_64 IN ({p_ph})"
|
||
|
|
features = query_db('l3', f_sql, ids)
|
||
|
|
f_dict = {f['steam_id_64']: f for f in features}
|
||
|
|
|
||
|
|
p_dict = {p['steam_id_64']: p for p in l2_players}
|
||
|
|
|
||
|
|
for r in top_ids:
|
||
|
|
sid = r['steam_id_64']
|
||
|
|
p = p_dict.get(sid)
|
||
|
|
if not p: continue
|
||
|
|
|
||
|
|
m = dict(p)
|
||
|
|
f = f_dict.get(sid)
|
||
|
|
if f:
|
||
|
|
m.update(dict(f))
|
||
|
|
else:
|
||
|
|
stats = StatsService.get_player_basic_stats(sid)
|
||
|
|
if stats:
|
||
|
|
m['basic_avg_rating'] = stats['rating']
|
||
|
|
m['basic_avg_kd'] = stats['kd']
|
||
|
|
m['basic_avg_kast'] = stats['kast']
|
||
|
|
else:
|
||
|
|
m['basic_avg_rating'] = 0
|
||
|
|
m['basic_avg_kd'] = 0
|
||
|
|
m['basic_avg_kast'] = 0
|
||
|
|
|
||
|
|
m['matches_played'] = r['cnt']
|
||
|
|
merged.append(m)
|
||
|
|
|
||
|
|
return merged, total
|
||
|
|
|
||
|
|
# L3 empty fallback
|
||
|
|
l2_players, total = StatsService.get_players(page, per_page, sort_by=None)
|
||
|
|
merged = []
|
||
|
|
attach_match_counts(l2_players)
|
||
|
|
|
||
|
|
for p in l2_players:
|
||
|
|
m = dict(p)
|
||
|
|
stats = StatsService.get_player_basic_stats(p['steam_id_64'])
|
||
|
|
if stats:
|
||
|
|
m['basic_avg_rating'] = stats['rating']
|
||
|
|
m['basic_avg_kd'] = stats['kd']
|
||
|
|
m['basic_avg_kast'] = stats['kast']
|
||
|
|
else:
|
||
|
|
m['basic_avg_rating'] = 0
|
||
|
|
m['basic_avg_kd'] = 0
|
||
|
|
m['basic_avg_kast'] = 0
|
||
|
|
m['matches_played'] = p.get('matches_played', 0)
|
||
|
|
merged.append(m)
|
||
|
|
|
||
|
|
if sort_by != 'rating':
|
||
|
|
merged.sort(key=lambda x: x.get(order_col, 0) or 0, reverse=True)
|
||
|
|
|
||
|
|
return merged, total
|
||
|
|
|
||
|
|
# Normal L3 browse
|
||
|
|
sql = f"SELECT * FROM dm_player_features ORDER BY {order_col} DESC LIMIT ? OFFSET ?"
|
||
|
|
features = query_db('l3', sql, [per_page, offset])
|
||
|
|
|
||
|
|
total = query_db('l3', "SELECT COUNT(*) as cnt FROM dm_player_features", one=True)['cnt']
|
||
|
|
|
||
|
|
if not features:
|
||
|
|
return [], total
|
||
|
|
|
||
|
|
steam_ids = [f['steam_id_64'] for f in features]
|
||
|
|
l2_players = StatsService.get_players_by_ids(steam_ids)
|
||
|
|
p_dict = {p['steam_id_64']: p for p in l2_players}
|
||
|
|
|
||
|
|
merged = []
|
||
|
|
for f in features:
|
||
|
|
m = dict(f)
|
||
|
|
p = p_dict.get(f['steam_id_64'])
|
||
|
|
if p:
|
||
|
|
m.update(dict(p))
|
||
|
|
else:
|
||
|
|
m['username'] = f['steam_id_64']
|
||
|
|
m['avatar_url'] = None
|
||
|
|
merged.append(m)
|
||
|
|
|
||
|
|
return merged, total
|
||
|
|
|
||
|
|
@staticmethod
|
||
|
|
def rebuild_all_features(min_matches=5):
|
||
|
|
"""
|
||
|
|
Refreshes the L3 Data Mart with full feature calculations.
|
||
|
|
"""
|
||
|
|
from web.config import Config
|
||
|
|
from web.services.web_service import WebService
|
||
|
|
import json
|
||
|
|
|
||
|
|
l3_db_path = Config.DB_L3_PATH
|
||
|
|
l2_db_path = Config.DB_L2_PATH
|
||
|
|
|
||
|
|
# Get Team Players
|
||
|
|
lineups = WebService.get_lineups()
|
||
|
|
team_player_ids = set()
|
||
|
|
for lineup in lineups:
|
||
|
|
if lineup['player_ids_json']:
|
||
|
|
try:
|
||
|
|
ids = json.loads(lineup['player_ids_json'])
|
||
|
|
# Ensure IDs are strings
|
||
|
|
team_player_ids.update([str(i) for i in ids])
|
||
|
|
except:
|
||
|
|
pass
|
||
|
|
|
||
|
|
if not team_player_ids:
|
||
|
|
print("No players found in any team lineup. Skipping L3 rebuild.")
|
||
|
|
return 0
|
||
|
|
|
||
|
|
conn_l2 = sqlite3.connect(l2_db_path)
|
||
|
|
conn_l2.row_factory = sqlite3.Row
|
||
|
|
|
||
|
|
try:
|
||
|
|
print(f"Loading L2 data for {len(team_player_ids)} players...")
|
||
|
|
df = FeatureService._load_and_calculate_dataframe(conn_l2, list(team_player_ids))
|
||
|
|
|
||
|
|
if df is None or df.empty:
|
||
|
|
print("No data to process.")
|
||
|
|
return 0
|
||
|
|
|
||
|
|
print("Calculating Scores...")
|
||
|
|
df = FeatureService._calculate_ultimate_scores(df)
|
||
|
|
|
||
|
|
print("Saving to L3...")
|
||
|
|
conn_l3 = sqlite3.connect(l3_db_path)
|
||
|
|
|
||
|
|
cursor = conn_l3.cursor()
|
||
|
|
|
||
|
|
# Ensure columns exist in DataFrame match DB columns
|
||
|
|
cursor.execute("PRAGMA table_info(dm_player_features)")
|
||
|
|
valid_cols = [r[1] for r in cursor.fetchall()]
|
||
|
|
|
||
|
|
# Filter DF columns
|
||
|
|
df_cols = [c for c in df.columns if c in valid_cols]
|
||
|
|
df_to_save = df[df_cols].copy()
|
||
|
|
df_to_save['updated_at'] = pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')
|
||
|
|
|
||
|
|
# Generate Insert SQL
|
||
|
|
print(f"DEBUG: Saving {len(df_to_save.columns)} columns to L3. Sample side_kd_ct: {df_to_save.get('side_kd_ct', pd.Series([0])).iloc[0]}")
|
||
|
|
placeholders = ','.join(['?'] * len(df_to_save.columns))
|
||
|
|
cols_str = ','.join(df_to_save.columns)
|
||
|
|
sql = f"INSERT OR REPLACE INTO dm_player_features ({cols_str}) VALUES ({placeholders})"
|
||
|
|
|
||
|
|
data = df_to_save.values.tolist()
|
||
|
|
cursor.executemany(sql, data)
|
||
|
|
conn_l3.commit()
|
||
|
|
conn_l3.close()
|
||
|
|
|
||
|
|
return len(df)
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(f"Rebuild Error: {e}")
|
||
|
|
import traceback
|
||
|
|
traceback.print_exc()
|
||
|
|
return 0
|
||
|
|
finally:
|
||
|
|
conn_l2.close()
|
||
|
|
|
||
|
|
@staticmethod
|
||
|
|
def _load_and_calculate_dataframe(conn, player_ids):
|
||
|
|
if not player_ids:
|
||
|
|
return None
|
||
|
|
|
||
|
|
placeholders = ','.join(['?'] * len(player_ids))
|
||
|
|
|
||
|
|
# 1. Basic Stats
|
||
|
|
query_basic = f"""
|
||
|
|
SELECT
|
||
|
|
steam_id_64,
|
||
|
|
COUNT(*) as matches_played,
|
||
|
|
SUM(round_total) as rounds_played,
|
||
|
|
AVG(rating) as basic_avg_rating,
|
||
|
|
AVG(kd_ratio) as basic_avg_kd,
|
||
|
|
AVG(adr) as basic_avg_adr,
|
||
|
|
AVG(kast) as basic_avg_kast,
|
||
|
|
AVG(rws) as basic_avg_rws,
|
||
|
|
SUM(headshot_count) as sum_hs,
|
||
|
|
SUM(kills) as sum_kills,
|
||
|
|
SUM(deaths) as sum_deaths,
|
||
|
|
SUM(first_kill) as sum_fk,
|
||
|
|
SUM(first_death) as sum_fd,
|
||
|
|
SUM(clutch_1v1) as sum_1v1,
|
||
|
|
SUM(clutch_1v2) as sum_1v2,
|
||
|
|
SUM(clutch_1v3) + SUM(clutch_1v4) + SUM(clutch_1v5) as sum_1v3p,
|
||
|
|
SUM(kill_2) as sum_2k,
|
||
|
|
SUM(kill_3) as sum_3k,
|
||
|
|
SUM(kill_4) as sum_4k,
|
||
|
|
SUM(kill_5) as sum_5k,
|
||
|
|
SUM(assisted_kill) as sum_assist,
|
||
|
|
SUM(perfect_kill) as sum_perfect,
|
||
|
|
SUM(revenge_kill) as sum_revenge,
|
||
|
|
SUM(awp_kill) as sum_awp,
|
||
|
|
SUM(jump_count) as sum_jump,
|
||
|
|
SUM(mvp_count) as sum_mvps,
|
||
|
|
SUM(planted_bomb) as sum_plants,
|
||
|
|
SUM(defused_bomb) as sum_defuses,
|
||
|
|
SUM(CASE
|
||
|
|
WHEN flash_assists > 0 THEN flash_assists
|
||
|
|
WHEN assists > assisted_kill THEN assists - assisted_kill
|
||
|
|
ELSE 0
|
||
|
|
END) as sum_flash_assists,
|
||
|
|
SUM(throw_harm) as sum_util_dmg,
|
||
|
|
SUM(flash_time) as sum_flash_time,
|
||
|
|
SUM(flash_enemy) as sum_flash_enemy,
|
||
|
|
SUM(flash_team) as sum_flash_team,
|
||
|
|
SUM(util_flash_usage) as sum_util_flash,
|
||
|
|
SUM(util_smoke_usage) as sum_util_smoke,
|
||
|
|
SUM(util_molotov_usage) as sum_util_molotov,
|
||
|
|
SUM(util_he_usage) as sum_util_he,
|
||
|
|
SUM(util_decoy_usage) as sum_util_decoy
|
||
|
|
FROM fact_match_players
|
||
|
|
WHERE steam_id_64 IN ({placeholders})
|
||
|
|
GROUP BY steam_id_64
|
||
|
|
"""
|
||
|
|
df = pd.read_sql_query(query_basic, conn, params=player_ids)
|
||
|
|
if df.empty: return None
|
||
|
|
|
||
|
|
# Basic Derived
|
||
|
|
df['basic_headshot_rate'] = df['sum_hs'] / df['sum_kills'].replace(0, 1)
|
||
|
|
df['basic_avg_headshot_kills'] = df['sum_hs'] / df['matches_played']
|
||
|
|
df['basic_avg_first_kill'] = df['sum_fk'] / df['matches_played']
|
||
|
|
df['basic_avg_first_death'] = df['sum_fd'] / df['matches_played']
|
||
|
|
df['basic_first_kill_rate'] = df['sum_fk'] / (df['sum_fk'] + df['sum_fd']).replace(0, 1)
|
||
|
|
df['basic_first_death_rate'] = df['sum_fd'] / (df['sum_fk'] + df['sum_fd']).replace(0, 1)
|
||
|
|
df['basic_avg_kill_2'] = df['sum_2k'] / df['matches_played']
|
||
|
|
df['basic_avg_kill_3'] = df['sum_3k'] / df['matches_played']
|
||
|
|
df['basic_avg_kill_4'] = df['sum_4k'] / df['matches_played']
|
||
|
|
df['basic_avg_kill_5'] = df['sum_5k'] / df['matches_played']
|
||
|
|
df['basic_avg_assisted_kill'] = df['sum_assist'] / df['matches_played']
|
||
|
|
df['basic_avg_perfect_kill'] = df['sum_perfect'] / df['matches_played']
|
||
|
|
df['basic_avg_revenge_kill'] = df['sum_revenge'] / df['matches_played']
|
||
|
|
df['basic_avg_awp_kill'] = df['sum_awp'] / df['matches_played']
|
||
|
|
df['basic_avg_jump_count'] = df['sum_jump'] / df['matches_played']
|
||
|
|
df['basic_avg_mvps'] = df['sum_mvps'] / df['matches_played']
|
||
|
|
df['basic_avg_plants'] = df['sum_plants'] / df['matches_played']
|
||
|
|
df['basic_avg_defuses'] = df['sum_defuses'] / df['matches_played']
|
||
|
|
df['basic_avg_flash_assists'] = df['sum_flash_assists'] / df['matches_played']
|
||
|
|
|
||
|
|
# UTIL Basic
|
||
|
|
df['util_avg_nade_dmg'] = df['sum_util_dmg'] / df['matches_played']
|
||
|
|
df['util_avg_flash_time'] = df['sum_flash_time'] / df['matches_played']
|
||
|
|
df['util_avg_flash_enemy'] = df['sum_flash_enemy'] / df['matches_played']
|
||
|
|
|
||
|
|
valid_ids = tuple(df['steam_id_64'].tolist())
|
||
|
|
placeholders = ','.join(['?'] * len(valid_ids))
|
||
|
|
|
||
|
|
try:
|
||
|
|
query_weapon_kills = f"""
|
||
|
|
SELECT attacker_steam_id as steam_id_64,
|
||
|
|
SUM(CASE WHEN lower(weapon) LIKE '%knife%' OR lower(weapon) LIKE '%bayonet%' THEN 1 ELSE 0 END) as knife_kills,
|
||
|
|
SUM(CASE WHEN lower(weapon) LIKE '%taser%' OR lower(weapon) LIKE '%zeus%' THEN 1 ELSE 0 END) as zeus_kills
|
||
|
|
FROM fact_round_events
|
||
|
|
WHERE event_type = 'kill'
|
||
|
|
AND attacker_steam_id IN ({placeholders})
|
||
|
|
GROUP BY attacker_steam_id
|
||
|
|
"""
|
||
|
|
df_weapon_kills = pd.read_sql_query(query_weapon_kills, conn, params=valid_ids)
|
||
|
|
if not df_weapon_kills.empty:
|
||
|
|
df = df.merge(df_weapon_kills, on='steam_id_64', how='left')
|
||
|
|
else:
|
||
|
|
df['knife_kills'] = 0
|
||
|
|
df['zeus_kills'] = 0
|
||
|
|
except Exception:
|
||
|
|
df['knife_kills'] = 0
|
||
|
|
df['zeus_kills'] = 0
|
||
|
|
|
||
|
|
df['basic_avg_knife_kill'] = df['knife_kills'].fillna(0) / df['matches_played'].replace(0, 1)
|
||
|
|
df['basic_avg_zeus_kill'] = df['zeus_kills'].fillna(0) / df['matches_played'].replace(0, 1)
|
||
|
|
|
||
|
|
try:
|
||
|
|
query_zeus_pick = f"""
|
||
|
|
SELECT steam_id_64,
|
||
|
|
AVG(CASE WHEN has_zeus = 1 THEN 1.0 ELSE 0.0 END) as basic_zeus_pick_rate
|
||
|
|
FROM fact_round_player_economy
|
||
|
|
WHERE steam_id_64 IN ({placeholders})
|
||
|
|
GROUP BY steam_id_64
|
||
|
|
"""
|
||
|
|
df_zeus_pick = pd.read_sql_query(query_zeus_pick, conn, params=valid_ids)
|
||
|
|
if not df_zeus_pick.empty:
|
||
|
|
df = df.merge(df_zeus_pick, on='steam_id_64', how='left')
|
||
|
|
except Exception:
|
||
|
|
df['basic_zeus_pick_rate'] = 0.0
|
||
|
|
|
||
|
|
df['basic_zeus_pick_rate'] = df.get('basic_zeus_pick_rate', 0.0)
|
||
|
|
df['basic_zeus_pick_rate'] = pd.to_numeric(df['basic_zeus_pick_rate'], errors='coerce').fillna(0.0)
|
||
|
|
|
||
|
|
# 2. STA (Detailed)
|
||
|
|
query_sta = f"""
|
||
|
|
SELECT mp.steam_id_64, mp.rating, mp.is_win, m.start_time, m.duration
|
||
|
|
FROM fact_match_players mp
|
||
|
|
JOIN fact_matches m ON mp.match_id = m.match_id
|
||
|
|
WHERE mp.steam_id_64 IN ({placeholders})
|
||
|
|
ORDER BY mp.steam_id_64, m.start_time
|
||
|
|
"""
|
||
|
|
df_matches = pd.read_sql_query(query_sta, conn, params=valid_ids)
|
||
|
|
sta_list = []
|
||
|
|
for pid, group in df_matches.groupby('steam_id_64'):
|
||
|
|
group = group.sort_values('start_time')
|
||
|
|
last_30 = group.tail(30)
|
||
|
|
|
||
|
|
# Fatigue Calc
|
||
|
|
# Simple heuristic: split matches by day, compare early (first 3) vs late (rest)
|
||
|
|
group['date'] = pd.to_datetime(group['start_time'], unit='s').dt.date
|
||
|
|
day_counts = group.groupby('date').size()
|
||
|
|
busy_days = day_counts[day_counts >= 4].index # Days with 4+ matches
|
||
|
|
|
||
|
|
fatigue_decays = []
|
||
|
|
for day in busy_days:
|
||
|
|
day_matches = group[group['date'] == day]
|
||
|
|
if len(day_matches) >= 4:
|
||
|
|
early_rating = day_matches.head(3)['rating'].mean()
|
||
|
|
late_rating = day_matches.tail(len(day_matches) - 3)['rating'].mean()
|
||
|
|
fatigue_decays.append(early_rating - late_rating)
|
||
|
|
|
||
|
|
avg_fatigue = np.mean(fatigue_decays) if fatigue_decays else 0
|
||
|
|
|
||
|
|
sta_list.append({
|
||
|
|
'steam_id_64': pid,
|
||
|
|
'sta_last_30_rating': last_30['rating'].mean(),
|
||
|
|
'sta_win_rating': group[group['is_win']==1]['rating'].mean(),
|
||
|
|
'sta_loss_rating': group[group['is_win']==0]['rating'].mean(),
|
||
|
|
'sta_rating_volatility': group.tail(10)['rating'].std() if len(group) > 1 else 0,
|
||
|
|
'sta_time_rating_corr': group['duration'].corr(group['rating']) if len(group)>2 and group['rating'].std() > 0 else 0,
|
||
|
|
'sta_fatigue_decay': avg_fatigue
|
||
|
|
})
|
||
|
|
df = df.merge(pd.DataFrame(sta_list), on='steam_id_64', how='left')
|
||
|
|
|
||
|
|
# 3. BAT (High ELO)
|
||
|
|
query_elo = f"""
|
||
|
|
SELECT mp.steam_id_64, mp.kd_ratio,
|
||
|
|
(SELECT AVG(group_origin_elo) FROM fact_match_teams fmt WHERE fmt.match_id = mp.match_id AND group_origin_elo > 0) as elo
|
||
|
|
FROM fact_match_players mp
|
||
|
|
WHERE mp.steam_id_64 IN ({placeholders})
|
||
|
|
"""
|
||
|
|
df_elo = pd.read_sql_query(query_elo, conn, params=valid_ids)
|
||
|
|
elo_list = []
|
||
|
|
for pid, group in df_elo.groupby('steam_id_64'):
|
||
|
|
avg = group['elo'].mean() or 1000
|
||
|
|
elo_list.append({
|
||
|
|
'steam_id_64': pid,
|
||
|
|
'bat_kd_diff_high_elo': group[group['elo'] > avg]['kd_ratio'].mean(),
|
||
|
|
'bat_kd_diff_low_elo': group[group['elo'] <= avg]['kd_ratio'].mean()
|
||
|
|
})
|
||
|
|
df = df.merge(pd.DataFrame(elo_list), on='steam_id_64', how='left')
|
||
|
|
|
||
|
|
# Duel Win Rate
|
||
|
|
query_duel = f"""
|
||
|
|
SELECT steam_id_64, SUM(entry_kills) as ek, SUM(entry_deaths) as ed
|
||
|
|
FROM fact_match_players WHERE steam_id_64 IN ({placeholders}) GROUP BY steam_id_64
|
||
|
|
"""
|
||
|
|
df_duel = pd.read_sql_query(query_duel, conn, params=valid_ids)
|
||
|
|
df_duel['bat_avg_duel_win_rate'] = df_duel['ek'] / (df_duel['ek'] + df_duel['ed']).replace(0, 1)
|
||
|
|
df = df.merge(df_duel[['steam_id_64', 'bat_avg_duel_win_rate']], on='steam_id_64', how='left')
|
||
|
|
|
||
|
|
# 4. HPS
|
||
|
|
# Clutch Rate
|
||
|
|
df['hps_clutch_win_rate_1v1'] = df['sum_1v1'] / df['matches_played']
|
||
|
|
df['hps_clutch_win_rate_1v3_plus'] = df['sum_1v3p'] / df['matches_played']
|
||
|
|
|
||
|
|
# Prepare Detailed Event Data for HPS (Comeback), PTL (KD), and T/CT
|
||
|
|
|
||
|
|
# A. Determine Side Info using fact_match_teams
|
||
|
|
# 1. Get Match Teams
|
||
|
|
query_teams = f"""
|
||
|
|
SELECT match_id, group_fh_role, group_uids
|
||
|
|
FROM fact_match_teams
|
||
|
|
WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders}))
|
||
|
|
"""
|
||
|
|
df_teams = pd.read_sql_query(query_teams, conn, params=valid_ids)
|
||
|
|
|
||
|
|
# 2. Get Player UIDs
|
||
|
|
query_uids = f"SELECT match_id, steam_id_64, uid FROM fact_match_players WHERE steam_id_64 IN ({placeholders})"
|
||
|
|
df_uids = pd.read_sql_query(query_uids, conn, params=valid_ids)
|
||
|
|
|
||
|
|
# 3. Get Match Meta (Start Time for MR12/MR15)
|
||
|
|
query_meta = f"SELECT match_id, start_time FROM fact_matches WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders}))"
|
||
|
|
df_meta = pd.read_sql_query(query_meta, conn, params=valid_ids)
|
||
|
|
df_meta['halftime_round'] = np.where(df_meta['start_time'] > 1695772800, 12, 15) # CS2 Release Date approx
|
||
|
|
|
||
|
|
# 4. Build FH Side DataFrame
|
||
|
|
fh_rows = []
|
||
|
|
if not df_teams.empty and not df_uids.empty:
|
||
|
|
match_teams = {} # match_id -> [(role, [uids])]
|
||
|
|
for _, row in df_teams.iterrows():
|
||
|
|
mid = row['match_id']
|
||
|
|
role = row['group_fh_role'] # 1=CT, 0=T
|
||
|
|
try:
|
||
|
|
uids = str(row['group_uids']).split(',')
|
||
|
|
uids = [u.strip() for u in uids if u.strip()]
|
||
|
|
except:
|
||
|
|
uids = []
|
||
|
|
if mid not in match_teams: match_teams[mid] = []
|
||
|
|
match_teams[mid].append((role, uids))
|
||
|
|
|
||
|
|
for _, row in df_uids.iterrows():
|
||
|
|
mid = row['match_id']
|
||
|
|
sid = row['steam_id_64']
|
||
|
|
uid = str(row['uid'])
|
||
|
|
if mid in match_teams:
|
||
|
|
for role, uids in match_teams[mid]:
|
||
|
|
if uid in uids:
|
||
|
|
fh_rows.append({
|
||
|
|
'match_id': mid,
|
||
|
|
'steam_id_64': sid,
|
||
|
|
'fh_side': 'CT' if role == 1 else 'T'
|
||
|
|
})
|
||
|
|
break
|
||
|
|
|
||
|
|
df_fh_sides = pd.DataFrame(fh_rows)
|
||
|
|
if df_fh_sides.empty:
|
||
|
|
df_fh_sides = pd.DataFrame(columns=['match_id', 'steam_id_64', 'fh_side', 'halftime_round'])
|
||
|
|
else:
|
||
|
|
df_fh_sides = df_fh_sides.merge(df_meta[['match_id', 'halftime_round']], on='match_id', how='left')
|
||
|
|
if 'halftime_round' not in df_fh_sides.columns:
|
||
|
|
df_fh_sides['halftime_round'] = 15
|
||
|
|
df_fh_sides['halftime_round'] = df_fh_sides['halftime_round'].fillna(15).astype(int)
|
||
|
|
|
||
|
|
# B. Get Kill Events
|
||
|
|
query_events = f"""
|
||
|
|
SELECT match_id, round_num, attacker_steam_id, victim_steam_id, event_type, is_headshot, event_time,
|
||
|
|
weapon, trade_killer_steam_id, flash_assist_steam_id
|
||
|
|
FROM fact_round_events
|
||
|
|
WHERE event_type='kill'
|
||
|
|
AND (attacker_steam_id IN ({placeholders}) OR victim_steam_id IN ({placeholders}))
|
||
|
|
"""
|
||
|
|
df_events = pd.read_sql_query(query_events, conn, params=valid_ids + valid_ids)
|
||
|
|
|
||
|
|
# C. Get Round Scores
|
||
|
|
query_rounds = f"""
|
||
|
|
SELECT match_id, round_num, ct_score, t_score, winner_side, duration
|
||
|
|
FROM fact_rounds
|
||
|
|
WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders}))
|
||
|
|
"""
|
||
|
|
df_rounds = pd.read_sql_query(query_rounds, conn, params=valid_ids)
|
||
|
|
|
||
|
|
# Fix missing winner_side by calculating from score changes
|
||
|
|
if not df_rounds.empty:
|
||
|
|
df_rounds = df_rounds.sort_values(['match_id', 'round_num']).reset_index(drop=True)
|
||
|
|
df_rounds['prev_ct'] = df_rounds.groupby('match_id')['ct_score'].shift(1).fillna(0)
|
||
|
|
df_rounds['prev_t'] = df_rounds.groupby('match_id')['t_score'].shift(1).fillna(0)
|
||
|
|
|
||
|
|
# Determine winner based on score increment
|
||
|
|
df_rounds['ct_win'] = (df_rounds['ct_score'] > df_rounds['prev_ct'])
|
||
|
|
df_rounds['t_win'] = (df_rounds['t_score'] > df_rounds['prev_t'])
|
||
|
|
|
||
|
|
df_rounds['calculated_winner'] = np.where(df_rounds['ct_win'], 'CT',
|
||
|
|
np.where(df_rounds['t_win'], 'T', None))
|
||
|
|
|
||
|
|
# Force overwrite winner_side with calculated winner since DB data is unreliable (mostly NULL)
|
||
|
|
df_rounds['winner_side'] = df_rounds['calculated_winner']
|
||
|
|
|
||
|
|
# Ensure winner_side is string type to match side ('CT', 'T')
|
||
|
|
df_rounds['winner_side'] = df_rounds['winner_side'].astype(str)
|
||
|
|
|
||
|
|
# Fallback for Round 1 if still None (e.g. if prev is 0 and score is 1)
|
||
|
|
# Logic above handles Round 1 correctly (prev is 0).
|
||
|
|
|
||
|
|
# --- Process Logic ---
|
||
|
|
# Logic above handles Round 1 correctly (prev is 0).
|
||
|
|
|
||
|
|
# --- Process Logic ---
|
||
|
|
has_events = not df_events.empty
|
||
|
|
has_sides = not df_fh_sides.empty
|
||
|
|
|
||
|
|
if has_events and has_sides:
|
||
|
|
# 1. Attacker Side
|
||
|
|
df_events = df_events.merge(df_fh_sides, left_on=['match_id', 'attacker_steam_id'], right_on=['match_id', 'steam_id_64'], how='left')
|
||
|
|
df_events.rename(columns={'fh_side': 'att_fh_side'}, inplace=True)
|
||
|
|
df_events.drop(columns=['steam_id_64'], inplace=True)
|
||
|
|
|
||
|
|
# 2. Victim Side
|
||
|
|
df_events = df_events.merge(df_fh_sides, left_on=['match_id', 'victim_steam_id'], right_on=['match_id', 'steam_id_64'], how='left', suffixes=('', '_vic'))
|
||
|
|
df_events.rename(columns={'fh_side': 'vic_fh_side'}, inplace=True)
|
||
|
|
df_events.drop(columns=['steam_id_64'], inplace=True)
|
||
|
|
|
||
|
|
# 3. Determine Actual Side (CT/T)
|
||
|
|
# Logic: If round <= halftime -> FH Side. Else -> Opposite.
|
||
|
|
def calc_side(fh_side, round_num, halftime):
|
||
|
|
if pd.isna(fh_side): return None
|
||
|
|
if round_num <= halftime: return fh_side
|
||
|
|
return 'T' if fh_side == 'CT' else 'CT'
|
||
|
|
|
||
|
|
# Vectorized approach
|
||
|
|
# Attacker
|
||
|
|
mask_fh_att = df_events['round_num'] <= df_events['halftime_round']
|
||
|
|
df_events['attacker_side'] = np.where(mask_fh_att, df_events['att_fh_side'],
|
||
|
|
np.where(df_events['att_fh_side'] == 'CT', 'T', 'CT'))
|
||
|
|
# Victim
|
||
|
|
mask_fh_vic = df_events['round_num'] <= df_events['halftime_round']
|
||
|
|
df_events['victim_side'] = np.where(mask_fh_vic, df_events['vic_fh_side'],
|
||
|
|
np.where(df_events['vic_fh_side'] == 'CT', 'T', 'CT'))
|
||
|
|
|
||
|
|
# Merge Scores
|
||
|
|
df_events = df_events.merge(df_rounds, on=['match_id', 'round_num'], how='left')
|
||
|
|
|
||
|
|
# --- BAT: Win Rate vs All ---
|
||
|
|
# Removed as per request (Difficult to calculate / All Zeros)
|
||
|
|
df['bat_win_rate_vs_all'] = 0
|
||
|
|
|
||
|
|
# --- HPS: Match Point & Comeback ---
|
||
|
|
# Match Point Win Rate
|
||
|
|
mp_rounds = df_rounds[((df_rounds['ct_score'] == 12) | (df_rounds['t_score'] == 12) |
|
||
|
|
(df_rounds['ct_score'] == 15) | (df_rounds['t_score'] == 15))]
|
||
|
|
|
||
|
|
if not mp_rounds.empty and has_sides:
|
||
|
|
# Need player side for these rounds
|
||
|
|
# Expand sides for all rounds
|
||
|
|
q_all_rounds = f"SELECT match_id, round_num FROM fact_rounds WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders}))"
|
||
|
|
df_all_rounds = pd.read_sql_query(q_all_rounds, conn, params=valid_ids)
|
||
|
|
|
||
|
|
df_player_rounds = df_all_rounds.merge(df_fh_sides, on='match_id')
|
||
|
|
mask_fh = df_player_rounds['round_num'] <= df_player_rounds['halftime_round']
|
||
|
|
df_player_rounds['side'] = np.where(mask_fh, df_player_rounds['fh_side'],
|
||
|
|
np.where(df_player_rounds['fh_side'] == 'CT', 'T', 'CT'))
|
||
|
|
|
||
|
|
# Filter for MP rounds
|
||
|
|
# Join mp_rounds with df_player_rounds
|
||
|
|
mp_player = df_player_rounds.merge(mp_rounds[['match_id', 'round_num', 'winner_side']], on=['match_id', 'round_num'])
|
||
|
|
mp_player['is_win'] = (mp_player['side'] == mp_player['winner_side']).astype(int)
|
||
|
|
|
||
|
|
hps_mp = mp_player.groupby('steam_id_64')['is_win'].mean().reset_index()
|
||
|
|
hps_mp.rename(columns={'is_win': 'hps_match_point_win_rate'}, inplace=True)
|
||
|
|
df = df.merge(hps_mp, on='steam_id_64', how='left')
|
||
|
|
else:
|
||
|
|
df['hps_match_point_win_rate'] = 0.5
|
||
|
|
|
||
|
|
# Comeback KD Diff
|
||
|
|
# Attacker Context
|
||
|
|
df_events['att_team_score'] = np.where(df_events['attacker_side'] == 'CT', df_events['ct_score'], df_events['t_score'])
|
||
|
|
df_events['att_opp_score'] = np.where(df_events['attacker_side'] == 'CT', df_events['t_score'], df_events['ct_score'])
|
||
|
|
df_events['is_comeback_att'] = (df_events['att_team_score'] + 4 <= df_events['att_opp_score'])
|
||
|
|
|
||
|
|
# Victim Context
|
||
|
|
df_events['vic_team_score'] = np.where(df_events['victim_side'] == 'CT', df_events['ct_score'], df_events['t_score'])
|
||
|
|
df_events['vic_opp_score'] = np.where(df_events['victim_side'] == 'CT', df_events['t_score'], df_events['ct_score'])
|
||
|
|
df_events['is_comeback_vic'] = (df_events['vic_team_score'] + 4 <= df_events['vic_opp_score'])
|
||
|
|
|
||
|
|
att_k = df_events.groupby('attacker_steam_id').size()
|
||
|
|
vic_d = df_events.groupby('victim_steam_id').size()
|
||
|
|
|
||
|
|
cb_k = df_events[df_events['is_comeback_att']].groupby('attacker_steam_id').size()
|
||
|
|
cb_d = df_events[df_events['is_comeback_vic']].groupby('victim_steam_id').size()
|
||
|
|
|
||
|
|
kd_stats = pd.DataFrame({'k': att_k, 'd': vic_d, 'cb_k': cb_k, 'cb_d': cb_d}).fillna(0)
|
||
|
|
kd_stats['kd'] = kd_stats['k'] / kd_stats['d'].replace(0, 1)
|
||
|
|
kd_stats['cb_kd'] = kd_stats['cb_k'] / kd_stats['cb_d'].replace(0, 1)
|
||
|
|
kd_stats['hps_comeback_kd_diff'] = kd_stats['cb_kd'] - kd_stats['kd']
|
||
|
|
|
||
|
|
kd_stats.index.name = 'steam_id_64'
|
||
|
|
df = df.merge(kd_stats[['hps_comeback_kd_diff']], on='steam_id_64', how='left')
|
||
|
|
|
||
|
|
# HPS: Losing Streak KD Diff
|
||
|
|
# Logic: KD in rounds where team has lost >= 3 consecutive rounds vs Global KD
|
||
|
|
# 1. Identify Streak Rounds
|
||
|
|
if not df_rounds.empty:
|
||
|
|
# Ensure sorted
|
||
|
|
df_rounds = df_rounds.sort_values(['match_id', 'round_num'])
|
||
|
|
|
||
|
|
# Shift to check previous results
|
||
|
|
# We need to handle match boundaries. Groupby match_id is safer.
|
||
|
|
# CT Loss Streak
|
||
|
|
g = df_rounds.groupby('match_id')
|
||
|
|
df_rounds['ct_lost_1'] = g['t_win'].shift(1).fillna(False)
|
||
|
|
df_rounds['ct_lost_2'] = g['t_win'].shift(2).fillna(False)
|
||
|
|
df_rounds['ct_lost_3'] = g['t_win'].shift(3).fillna(False)
|
||
|
|
df_rounds['ct_in_loss_streak'] = (df_rounds['ct_lost_1'] & df_rounds['ct_lost_2'] & df_rounds['ct_lost_3'])
|
||
|
|
|
||
|
|
# T Loss Streak
|
||
|
|
df_rounds['t_lost_1'] = g['ct_win'].shift(1).fillna(False)
|
||
|
|
df_rounds['t_lost_2'] = g['ct_win'].shift(2).fillna(False)
|
||
|
|
df_rounds['t_lost_3'] = g['ct_win'].shift(3).fillna(False)
|
||
|
|
df_rounds['t_in_loss_streak'] = (df_rounds['t_lost_1'] & df_rounds['t_lost_2'] & df_rounds['t_lost_3'])
|
||
|
|
|
||
|
|
# Merge into events
|
||
|
|
# df_events already has 'match_id', 'round_num', 'attacker_side'
|
||
|
|
# We need to merge streak info
|
||
|
|
streak_cols = df_rounds[['match_id', 'round_num', 'ct_in_loss_streak', 't_in_loss_streak']]
|
||
|
|
df_events = df_events.merge(streak_cols, on=['match_id', 'round_num'], how='left')
|
||
|
|
|
||
|
|
# Determine if attacker is in streak
|
||
|
|
df_events['att_is_loss_streak'] = np.where(
|
||
|
|
df_events['attacker_side'] == 'CT', df_events['ct_in_loss_streak'],
|
||
|
|
np.where(df_events['attacker_side'] == 'T', df_events['t_in_loss_streak'], False)
|
||
|
|
)
|
||
|
|
|
||
|
|
# Determine if victim is in streak (for deaths)
|
||
|
|
df_events['vic_is_loss_streak'] = np.where(
|
||
|
|
df_events['victim_side'] == 'CT', df_events['ct_in_loss_streak'],
|
||
|
|
np.where(df_events['victim_side'] == 'T', df_events['t_in_loss_streak'], False)
|
||
|
|
)
|
||
|
|
|
||
|
|
# Calculate KD in Streak
|
||
|
|
ls_k = df_events[df_events['att_is_loss_streak']].groupby('attacker_steam_id').size()
|
||
|
|
ls_d = df_events[df_events['vic_is_loss_streak']].groupby('victim_steam_id').size()
|
||
|
|
|
||
|
|
ls_stats = pd.DataFrame({'ls_k': ls_k, 'ls_d': ls_d}).fillna(0)
|
||
|
|
ls_stats['ls_kd'] = ls_stats['ls_k'] / ls_stats['ls_d'].replace(0, 1)
|
||
|
|
|
||
|
|
# Compare with Global KD (from df_sides or recomputed)
|
||
|
|
# Recompute global KD from events to be consistent
|
||
|
|
g_k = df_events.groupby('attacker_steam_id').size()
|
||
|
|
g_d = df_events.groupby('victim_steam_id').size()
|
||
|
|
g_stats = pd.DataFrame({'g_k': g_k, 'g_d': g_d}).fillna(0)
|
||
|
|
g_stats['g_kd'] = g_stats['g_k'] / g_stats['g_d'].replace(0, 1)
|
||
|
|
|
||
|
|
ls_stats = ls_stats.join(g_stats[['g_kd']], how='outer').fillna(0)
|
||
|
|
ls_stats['hps_losing_streak_kd_diff'] = ls_stats['ls_kd'] - ls_stats['g_kd']
|
||
|
|
|
||
|
|
ls_stats.index.name = 'steam_id_64'
|
||
|
|
df = df.merge(ls_stats[['hps_losing_streak_kd_diff']], on='steam_id_64', how='left')
|
||
|
|
else:
|
||
|
|
df['hps_losing_streak_kd_diff'] = 0
|
||
|
|
|
||
|
|
|
||
|
|
# HPS: Momentum Multi-kill Rate
|
||
|
|
# Team won 3+ rounds -> 2+ kills
|
||
|
|
# Need sequential win info.
|
||
|
|
# Hard to vectorise fully without accurate round sequence reconstruction including missing rounds.
|
||
|
|
# Placeholder: 0
|
||
|
|
df['hps_momentum_multikill_rate'] = 0
|
||
|
|
|
||
|
|
# HPS: Tilt Rating Drop
|
||
|
|
df['hps_tilt_rating_drop'] = 0
|
||
|
|
|
||
|
|
# HPS: Clutch Rating Rise
|
||
|
|
df['hps_clutch_rating_rise'] = 0
|
||
|
|
|
||
|
|
# HPS: Undermanned Survival
|
||
|
|
df['hps_undermanned_survival_time'] = 0
|
||
|
|
|
||
|
|
# --- PTL: Pistol Stats ---
|
||
|
|
pistol_rounds = [1, 13]
|
||
|
|
df_pistol = df_events[df_events['round_num'].isin(pistol_rounds)]
|
||
|
|
|
||
|
|
if not df_pistol.empty:
|
||
|
|
pk = df_pistol.groupby('attacker_steam_id').size()
|
||
|
|
pd_death = df_pistol.groupby('victim_steam_id').size()
|
||
|
|
p_stats = pd.DataFrame({'pk': pk, 'pd': pd_death}).fillna(0)
|
||
|
|
p_stats['ptl_pistol_kd'] = p_stats['pk'] / p_stats['pd'].replace(0, 1)
|
||
|
|
|
||
|
|
phs = df_pistol[df_pistol['is_headshot'] == 1].groupby('attacker_steam_id').size()
|
||
|
|
p_stats['phs'] = phs
|
||
|
|
p_stats['phs'] = p_stats['phs'].fillna(0)
|
||
|
|
p_stats['ptl_pistol_util_efficiency'] = p_stats['phs'] / p_stats['pk'].replace(0, 1)
|
||
|
|
|
||
|
|
p_stats.index.name = 'steam_id_64'
|
||
|
|
df = df.merge(p_stats[['ptl_pistol_kd', 'ptl_pistol_util_efficiency']], on='steam_id_64', how='left')
|
||
|
|
else:
|
||
|
|
df['ptl_pistol_kd'] = 1.0
|
||
|
|
df['ptl_pistol_util_efficiency'] = 0.0
|
||
|
|
|
||
|
|
# --- T/CT Stats (Directly from L2 Side Tables) ---
|
||
|
|
query_sides_l2 = f"""
|
||
|
|
SELECT
|
||
|
|
steam_id_64,
|
||
|
|
'CT' as side,
|
||
|
|
COUNT(*) as matches,
|
||
|
|
SUM(round_total) as rounds,
|
||
|
|
AVG(rating2) as rating,
|
||
|
|
SUM(kills) as kills,
|
||
|
|
SUM(deaths) as deaths,
|
||
|
|
SUM(assists) as assists,
|
||
|
|
AVG(CAST(is_win as FLOAT)) as win_rate,
|
||
|
|
SUM(first_kill) as fk,
|
||
|
|
SUM(first_death) as fd,
|
||
|
|
AVG(kast) as kast,
|
||
|
|
AVG(rws) as rws,
|
||
|
|
SUM(kill_2 + kill_3 + kill_4 + kill_5) as multi_kill_rounds,
|
||
|
|
SUM(headshot_count) as hs
|
||
|
|
FROM fact_match_players_ct
|
||
|
|
WHERE steam_id_64 IN ({placeholders})
|
||
|
|
GROUP BY steam_id_64
|
||
|
|
|
||
|
|
UNION ALL
|
||
|
|
|
||
|
|
SELECT
|
||
|
|
steam_id_64,
|
||
|
|
'T' as side,
|
||
|
|
COUNT(*) as matches,
|
||
|
|
SUM(round_total) as rounds,
|
||
|
|
AVG(rating2) as rating,
|
||
|
|
SUM(kills) as kills,
|
||
|
|
SUM(deaths) as deaths,
|
||
|
|
SUM(assists) as assists,
|
||
|
|
AVG(CAST(is_win as FLOAT)) as win_rate,
|
||
|
|
SUM(first_kill) as fk,
|
||
|
|
SUM(first_death) as fd,
|
||
|
|
AVG(kast) as kast,
|
||
|
|
AVG(rws) as rws,
|
||
|
|
SUM(kill_2 + kill_3 + kill_4 + kill_5) as multi_kill_rounds,
|
||
|
|
SUM(headshot_count) as hs
|
||
|
|
FROM fact_match_players_t
|
||
|
|
WHERE steam_id_64 IN ({placeholders})
|
||
|
|
GROUP BY steam_id_64
|
||
|
|
"""
|
||
|
|
|
||
|
|
df_sides = pd.read_sql_query(query_sides_l2, conn, params=valid_ids + valid_ids)
|
||
|
|
|
||
|
|
if not df_sides.empty:
|
||
|
|
# Calculate Derived Rates per row before pivoting
|
||
|
|
df_sides['rounds'] = df_sides['rounds'].replace(0, 1) # Avoid div by zero
|
||
|
|
|
||
|
|
# KD Calculation (Sum of Kills / Sum of Deaths)
|
||
|
|
df_sides['kd'] = df_sides['kills'] / df_sides['deaths'].replace(0, 1)
|
||
|
|
|
||
|
|
# KAST Proxy (if KAST is 0)
|
||
|
|
# KAST ~= (Kills + Assists + Survived) / Rounds
|
||
|
|
# Survived = Rounds - Deaths
|
||
|
|
if df_sides['kast'].mean() == 0:
|
||
|
|
df_sides['survived'] = df_sides['rounds'] - df_sides['deaths']
|
||
|
|
df_sides['kast'] = (df_sides['kills'] + df_sides['assists'] + df_sides['survived']) / df_sides['rounds']
|
||
|
|
|
||
|
|
|
||
|
|
df_sides['fk_rate'] = df_sides['fk'] / df_sides['rounds']
|
||
|
|
df_sides['fd_rate'] = df_sides['fd'] / df_sides['rounds']
|
||
|
|
df_sides['mk_rate'] = df_sides['multi_kill_rounds'] / df_sides['rounds']
|
||
|
|
df_sides['hs_rate'] = df_sides['hs'] / df_sides['kills'].replace(0, 1)
|
||
|
|
|
||
|
|
# Pivot
|
||
|
|
# We want columns like side_rating_ct, side_rating_t, etc.
|
||
|
|
pivoted = df_sides.pivot(index='steam_id_64', columns='side').reset_index()
|
||
|
|
|
||
|
|
# Flatten MultiIndex columns
|
||
|
|
new_cols = ['steam_id_64']
|
||
|
|
for col_name, side in pivoted.columns[1:]:
|
||
|
|
# Map L2 column names to Feature names
|
||
|
|
# rating -> side_rating_{side}
|
||
|
|
# kd -> side_kd_{side}
|
||
|
|
# win_rate -> side_win_rate_{side}
|
||
|
|
# fk_rate -> side_first_kill_rate_{side}
|
||
|
|
# fd_rate -> side_first_death_rate_{side}
|
||
|
|
# kast -> side_kast_{side}
|
||
|
|
# rws -> side_rws_{side}
|
||
|
|
# mk_rate -> side_multikill_rate_{side}
|
||
|
|
# hs_rate -> side_headshot_rate_{side}
|
||
|
|
|
||
|
|
target_map = {
|
||
|
|
'rating': 'side_rating',
|
||
|
|
'kd': 'side_kd',
|
||
|
|
'win_rate': 'side_win_rate',
|
||
|
|
'fk_rate': 'side_first_kill_rate',
|
||
|
|
'fd_rate': 'side_first_death_rate',
|
||
|
|
'kast': 'side_kast',
|
||
|
|
'rws': 'side_rws',
|
||
|
|
'mk_rate': 'side_multikill_rate',
|
||
|
|
'hs_rate': 'side_headshot_rate'
|
||
|
|
}
|
||
|
|
|
||
|
|
if col_name in target_map:
|
||
|
|
new_cols.append(f"{target_map[col_name]}_{side.lower()}")
|
||
|
|
else:
|
||
|
|
new_cols.append(f"{col_name}_{side.lower()}") # Fallback for intermediate cols if needed
|
||
|
|
|
||
|
|
pivoted.columns = new_cols
|
||
|
|
|
||
|
|
# Select only relevant columns to merge
|
||
|
|
cols_to_merge = [c for c in new_cols if c.startswith('side_')]
|
||
|
|
cols_to_merge.append('steam_id_64')
|
||
|
|
|
||
|
|
df = df.merge(pivoted[cols_to_merge], on='steam_id_64', how='left')
|
||
|
|
|
||
|
|
# Fill NaN with 0 for side stats
|
||
|
|
for c in cols_to_merge:
|
||
|
|
if c != 'steam_id_64':
|
||
|
|
df[c] = df[c].fillna(0)
|
||
|
|
|
||
|
|
# Add calculated diffs for scoring/display if needed (or just let template handle it)
|
||
|
|
# KD Diff for L3 Score calculation
|
||
|
|
if 'side_rating_ct' in df.columns and 'side_rating_t' in df.columns:
|
||
|
|
df['side_kd_diff_ct_t'] = df['side_rating_ct'] - df['side_rating_t']
|
||
|
|
else:
|
||
|
|
df['side_kd_diff_ct_t'] = 0
|
||
|
|
|
||
|
|
# --- Obj Override from Main Table (sum_plants, sum_defuses) ---
|
||
|
|
# side_obj_t = sum_plants / matches_played
|
||
|
|
# side_obj_ct = sum_defuses / matches_played
|
||
|
|
df['side_obj_t'] = df['sum_plants'] / df['matches_played'].replace(0, 1)
|
||
|
|
df['side_obj_ct'] = df['sum_defuses'] / df['matches_played'].replace(0, 1)
|
||
|
|
df['side_obj_t'] = df['side_obj_t'].fillna(0)
|
||
|
|
df['side_obj_ct'] = df['side_obj_ct'].fillna(0)
|
||
|
|
|
||
|
|
else:
|
||
|
|
# Fallbacks
|
||
|
|
cols = ['hps_match_point_win_rate', 'hps_comeback_kd_diff', 'ptl_pistol_kd', 'ptl_pistol_util_efficiency',
|
||
|
|
'side_rating_ct', 'side_rating_t', 'side_first_kill_rate_ct', 'side_first_kill_rate_t', 'side_kd_diff_ct_t',
|
||
|
|
'bat_win_rate_vs_all', 'hps_losing_streak_kd_diff', 'hps_momentum_multikill_rate',
|
||
|
|
'hps_tilt_rating_drop', 'hps_clutch_rating_rise', 'hps_undermanned_survival_time',
|
||
|
|
'side_win_rate_ct', 'side_win_rate_t', 'side_kd_ct', 'side_kd_t',
|
||
|
|
'side_kast_ct', 'side_kast_t', 'side_rws_ct', 'side_rws_t',
|
||
|
|
'side_first_death_rate_ct', 'side_first_death_rate_t',
|
||
|
|
'side_multikill_rate_ct', 'side_multikill_rate_t',
|
||
|
|
'side_headshot_rate_ct', 'side_headshot_rate_t',
|
||
|
|
'side_obj_ct', 'side_obj_t']
|
||
|
|
for c in cols:
|
||
|
|
df[c] = 0
|
||
|
|
|
||
|
|
df['hps_match_point_win_rate'] = df['hps_match_point_win_rate'].fillna(0.5)
|
||
|
|
df['bat_win_rate_vs_all'] = df['bat_win_rate_vs_all'].fillna(0.5)
|
||
|
|
df['hps_losing_streak_kd_diff'] = df['hps_losing_streak_kd_diff'].fillna(0)
|
||
|
|
|
||
|
|
# HPS Pressure Entry Rate (Entry Kills per Round in Losing Matches)
|
||
|
|
q_mp_team = f"SELECT match_id, steam_id_64, is_win, entry_kills, round_total FROM fact_match_players WHERE steam_id_64 IN ({placeholders})"
|
||
|
|
df_mp_team = pd.read_sql_query(q_mp_team, conn, params=valid_ids)
|
||
|
|
if not df_mp_team.empty:
|
||
|
|
losing_matches = df_mp_team[df_mp_team['is_win'] == 0]
|
||
|
|
if not losing_matches.empty:
|
||
|
|
# Sum Entry Kills / Sum Rounds
|
||
|
|
pressure_entry = losing_matches.groupby('steam_id_64')[['entry_kills', 'round_total']].sum().reset_index()
|
||
|
|
pressure_entry['hps_pressure_entry_rate'] = pressure_entry['entry_kills'] / pressure_entry['round_total'].replace(0, 1)
|
||
|
|
df = df.merge(pressure_entry[['steam_id_64', 'hps_pressure_entry_rate']], on='steam_id_64', how='left')
|
||
|
|
|
||
|
|
if 'hps_pressure_entry_rate' not in df.columns:
|
||
|
|
df['hps_pressure_entry_rate'] = 0
|
||
|
|
df['hps_pressure_entry_rate'] = df['hps_pressure_entry_rate'].fillna(0)
|
||
|
|
|
||
|
|
# 5. PTL (Additional Features: Kills & Multi)
|
||
|
|
query_ptl = f"""
|
||
|
|
SELECT ev.attacker_steam_id as steam_id_64, COUNT(*) as pistol_kills
|
||
|
|
FROM fact_round_events ev
|
||
|
|
WHERE ev.event_type = 'kill' AND ev.round_num IN (1, 13)
|
||
|
|
AND ev.attacker_steam_id IN ({placeholders})
|
||
|
|
GROUP BY ev.attacker_steam_id
|
||
|
|
"""
|
||
|
|
df_ptl = pd.read_sql_query(query_ptl, conn, params=valid_ids)
|
||
|
|
if not df_ptl.empty:
|
||
|
|
df = df.merge(df_ptl, on='steam_id_64', how='left')
|
||
|
|
df['ptl_pistol_kills'] = df['pistol_kills'] / df['matches_played']
|
||
|
|
else:
|
||
|
|
df['ptl_pistol_kills'] = 0
|
||
|
|
|
||
|
|
query_ptl_multi = f"""
|
||
|
|
SELECT attacker_steam_id as steam_id_64, COUNT(*) as multi_cnt
|
||
|
|
FROM (
|
||
|
|
SELECT match_id, round_num, attacker_steam_id, COUNT(*) as k
|
||
|
|
FROM fact_round_events
|
||
|
|
WHERE event_type = 'kill' AND round_num IN (1, 13)
|
||
|
|
AND attacker_steam_id IN ({placeholders})
|
||
|
|
GROUP BY match_id, round_num, attacker_steam_id
|
||
|
|
HAVING k >= 2
|
||
|
|
)
|
||
|
|
GROUP BY attacker_steam_id
|
||
|
|
"""
|
||
|
|
df_ptl_multi = pd.read_sql_query(query_ptl_multi, conn, params=valid_ids)
|
||
|
|
if not df_ptl_multi.empty:
|
||
|
|
df = df.merge(df_ptl_multi, on='steam_id_64', how='left')
|
||
|
|
df['ptl_pistol_multikills'] = df['multi_cnt'] / df['matches_played']
|
||
|
|
else:
|
||
|
|
df['ptl_pistol_multikills'] = 0
|
||
|
|
|
||
|
|
# PTL Win Rate (Pandas Logic using fixed winner_side)
|
||
|
|
if not df_rounds.empty and has_sides:
|
||
|
|
# Ensure df_player_rounds exists
|
||
|
|
if 'df_player_rounds' not in locals():
|
||
|
|
q_all_rounds = f"SELECT match_id, round_num FROM fact_rounds WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders}))"
|
||
|
|
df_all_rounds = pd.read_sql_query(q_all_rounds, conn, params=valid_ids)
|
||
|
|
df_player_rounds = df_all_rounds.merge(df_fh_sides, on='match_id')
|
||
|
|
mask_fh = df_player_rounds['round_num'] <= df_player_rounds['halftime_round']
|
||
|
|
df_player_rounds['side'] = np.where(mask_fh, df_player_rounds['fh_side'],
|
||
|
|
np.where(df_player_rounds['fh_side'] == 'CT', 'T', 'CT'))
|
||
|
|
|
||
|
|
# Filter for Pistol Rounds (1 and after halftime)
|
||
|
|
# Use halftime_round logic (MR12: 13, MR15: 16)
|
||
|
|
player_pistol = df_player_rounds[
|
||
|
|
(df_player_rounds['round_num'] == 1) |
|
||
|
|
(df_player_rounds['round_num'] == df_player_rounds['halftime_round'] + 1)
|
||
|
|
].copy()
|
||
|
|
|
||
|
|
# Merge with df_rounds to get calculated winner_side
|
||
|
|
df_rounds['winner_side'] = df_rounds['winner_side'].astype(str) # Ensure string for merge safety
|
||
|
|
player_pistol = player_pistol.merge(df_rounds[['match_id', 'round_num', 'winner_side']], on=['match_id', 'round_num'], how='left')
|
||
|
|
|
||
|
|
# Calculate Win
|
||
|
|
# Ensure winner_side is in player_pistol columns after merge
|
||
|
|
if 'winner_side' in player_pistol.columns:
|
||
|
|
player_pistol['is_win'] = (player_pistol['side'] == player_pistol['winner_side']).astype(int)
|
||
|
|
else:
|
||
|
|
player_pistol['is_win'] = 0
|
||
|
|
|
||
|
|
ptl_wins = player_pistol.groupby('steam_id_64')['is_win'].agg(['sum', 'count']).reset_index()
|
||
|
|
ptl_wins.rename(columns={'sum': 'pistol_wins', 'count': 'pistol_rounds'}, inplace=True)
|
||
|
|
|
||
|
|
ptl_wins['ptl_pistol_win_rate'] = ptl_wins['pistol_wins'] / ptl_wins['pistol_rounds'].replace(0, 1)
|
||
|
|
df = df.merge(ptl_wins[['steam_id_64', 'ptl_pistol_win_rate']], on='steam_id_64', how='left')
|
||
|
|
else:
|
||
|
|
df['ptl_pistol_win_rate'] = 0.5
|
||
|
|
|
||
|
|
df['ptl_pistol_multikills'] = df['ptl_pistol_multikills'].fillna(0)
|
||
|
|
df['ptl_pistol_win_rate'] = df['ptl_pistol_win_rate'].fillna(0.5)
|
||
|
|
|
||
|
|
# 7. UTIL (Enhanced with Prop Frequency)
|
||
|
|
# Usage Rate: Average number of grenades purchased per round
|
||
|
|
df['util_usage_rate'] = (
|
||
|
|
df['sum_util_flash'] + df['sum_util_smoke'] +
|
||
|
|
df['sum_util_molotov'] + df['sum_util_he'] + df['sum_util_decoy']
|
||
|
|
) / df['rounds_played'].replace(0, 1) * 100 # Multiply by 100 to make it comparable to other metrics (e.g. 1.5 nades/round -> 150)
|
||
|
|
|
||
|
|
# Fallback if no new data yet (rely on old logic or keep 0)
|
||
|
|
# We can try to fetch equipment_value as backup if sum is 0
|
||
|
|
if df['util_usage_rate'].sum() == 0:
|
||
|
|
query_eco = f"""
|
||
|
|
SELECT steam_id_64, AVG(equipment_value) as avg_equip_val
|
||
|
|
FROM fact_round_player_economy
|
||
|
|
WHERE steam_id_64 IN ({placeholders})
|
||
|
|
GROUP BY steam_id_64
|
||
|
|
"""
|
||
|
|
df_eco = pd.read_sql_query(query_eco, conn, params=valid_ids)
|
||
|
|
if not df_eco.empty:
|
||
|
|
df_eco['util_usage_rate_backup'] = df_eco['avg_equip_val'] / 50.0 # Scaling factor for equipment value
|
||
|
|
df = df.merge(df_eco[['steam_id_64', 'util_usage_rate_backup']], on='steam_id_64', how='left')
|
||
|
|
df['util_usage_rate'] = df['util_usage_rate_backup'].fillna(0)
|
||
|
|
df.drop(columns=['util_usage_rate_backup'], inplace=True)
|
||
|
|
|
||
|
|
# --- 8. New Feature Dimensions (Party, Rating Dist, ELO) ---
|
||
|
|
# Fetch Base Data for Calculation
|
||
|
|
q_new_feats = f"""
|
||
|
|
SELECT mp.steam_id_64, mp.match_id, mp.match_team_id, mp.team_id,
|
||
|
|
mp.rating, mp.adr, mp.is_win, mp.map as map_name
|
||
|
|
FROM fact_match_players mp
|
||
|
|
WHERE mp.steam_id_64 IN ({placeholders})
|
||
|
|
"""
|
||
|
|
df_base = pd.read_sql_query(q_new_feats, conn, params=valid_ids)
|
||
|
|
|
||
|
|
if not df_base.empty:
|
||
|
|
# 8.1 Party Size Stats
|
||
|
|
# Get party sizes for these matches
|
||
|
|
# We need to query party sizes for ALL matches involved
|
||
|
|
match_ids = df_base['match_id'].unique()
|
||
|
|
if len(match_ids) > 0:
|
||
|
|
match_id_ph = ','.join(['?'] * len(match_ids))
|
||
|
|
q_party_size = f"""
|
||
|
|
SELECT match_id, match_team_id, COUNT(*) as party_size
|
||
|
|
FROM fact_match_players
|
||
|
|
WHERE match_id IN ({match_id_ph}) AND match_team_id > 0
|
||
|
|
GROUP BY match_id, match_team_id
|
||
|
|
"""
|
||
|
|
chunk_size = 900
|
||
|
|
party_sizes_list = []
|
||
|
|
for i in range(0, len(match_ids), chunk_size):
|
||
|
|
chunk = match_ids[i:i+chunk_size]
|
||
|
|
chunk_ph = ','.join(['?'] * len(chunk))
|
||
|
|
q_chunk = q_party_size.replace(match_id_ph, chunk_ph)
|
||
|
|
party_sizes_list.append(pd.read_sql_query(q_chunk, conn, params=list(chunk)))
|
||
|
|
|
||
|
|
if party_sizes_list:
|
||
|
|
df_party_sizes = pd.concat(party_sizes_list)
|
||
|
|
df_base_party = df_base.merge(df_party_sizes, on=['match_id', 'match_team_id'], how='left')
|
||
|
|
else:
|
||
|
|
df_base_party = df_base.copy()
|
||
|
|
|
||
|
|
df_base_party['party_size'] = df_base_party['party_size'].fillna(1)
|
||
|
|
df_base_party = df_base_party[df_base_party['party_size'].isin([1, 2, 3, 4, 5])]
|
||
|
|
|
||
|
|
party_stats = df_base_party.groupby(['steam_id_64', 'party_size']).agg({
|
||
|
|
'is_win': 'mean',
|
||
|
|
'rating': 'mean',
|
||
|
|
'adr': 'mean'
|
||
|
|
}).reset_index()
|
||
|
|
|
||
|
|
pivoted_party = party_stats.pivot(index='steam_id_64', columns='party_size').reset_index()
|
||
|
|
|
||
|
|
new_party_cols = ['steam_id_64']
|
||
|
|
for col in pivoted_party.columns:
|
||
|
|
if col[0] == 'steam_id_64': continue
|
||
|
|
metric, size = col
|
||
|
|
if size in [1, 2, 3, 4, 5]:
|
||
|
|
metric_name = 'win_rate' if metric == 'is_win' else metric
|
||
|
|
new_party_cols.append(f"party_{int(size)}_{metric_name}")
|
||
|
|
|
||
|
|
flat_data = {'steam_id_64': pivoted_party['steam_id_64']}
|
||
|
|
for size in [1, 2, 3, 4, 5]:
|
||
|
|
if size in pivoted_party['is_win'].columns:
|
||
|
|
flat_data[f"party_{size}_win_rate"] = pivoted_party['is_win'][size]
|
||
|
|
if size in pivoted_party['rating'].columns:
|
||
|
|
flat_data[f"party_{size}_rating"] = pivoted_party['rating'][size]
|
||
|
|
if size in pivoted_party['adr'].columns:
|
||
|
|
flat_data[f"party_{size}_adr"] = pivoted_party['adr'][size]
|
||
|
|
|
||
|
|
df_party_flat = pd.DataFrame(flat_data)
|
||
|
|
df = df.merge(df_party_flat, on='steam_id_64', how='left')
|
||
|
|
|
||
|
|
# 8.2 Rating Distribution
|
||
|
|
# rating_dist_carry_rate (>1.5), normal (1.0-1.5), sacrifice (0.6-1.0), sleeping (<0.6)
|
||
|
|
df_base['rating_tier'] = pd.cut(df_base['rating'],
|
||
|
|
bins=[-1, 0.6, 1.0, 1.5, 100],
|
||
|
|
labels=['sleeping', 'sacrifice', 'normal', 'carry'],
|
||
|
|
right=False) # <0.6, 0.6-<1.0, 1.0-<1.5, >=1.5 (wait, cut behavior)
|
||
|
|
# Standard cut: right=True by default (a, b]. We want:
|
||
|
|
# < 0.6
|
||
|
|
# 0.6 <= x < 1.0
|
||
|
|
# 1.0 <= x < 1.5
|
||
|
|
# >= 1.5
|
||
|
|
# So bins=[-inf, 0.6, 1.0, 1.5, inf], right=False -> [a, b)
|
||
|
|
df_base['rating_tier'] = pd.cut(df_base['rating'],
|
||
|
|
bins=[-float('inf'), 0.6, 1.0, 1.5, float('inf')],
|
||
|
|
labels=['sleeping', 'sacrifice', 'normal', 'carry'],
|
||
|
|
right=False)
|
||
|
|
|
||
|
|
# Wait, 1.5 should be Normal or Carry?
|
||
|
|
# User: >1.5 Carry, 1.0~1.5 Normal. So 1.5 is Normal? Or Carry?
|
||
|
|
# Usually inclusive on lower bound.
|
||
|
|
# 1.5 -> Carry (>1.5 usually means >= 1.5 or strictly >).
|
||
|
|
# "1.0~1.5 正常" implies [1.0, 1.5]. ">1.5 Carry" implies (1.5, inf).
|
||
|
|
# Let's assume >= 1.5 is Carry.
|
||
|
|
# So bins: (-inf, 0.6), [0.6, 1.0), [1.0, 1.5), [1.5, inf)
|
||
|
|
# right=False gives [a, b).
|
||
|
|
# So [1.5, inf) is correct for Carry.
|
||
|
|
|
||
|
|
dist_stats = df_base.groupby(['steam_id_64', 'rating_tier']).size().unstack(fill_value=0)
|
||
|
|
# Calculate rates
|
||
|
|
dist_stats = dist_stats.div(dist_stats.sum(axis=1), axis=0)
|
||
|
|
dist_stats.columns = [f"rating_dist_{c}_rate" for c in dist_stats.columns]
|
||
|
|
dist_stats = dist_stats.reset_index()
|
||
|
|
|
||
|
|
df = df.merge(dist_stats, on='steam_id_64', how='left')
|
||
|
|
|
||
|
|
# 8.3 ELO Stratification
|
||
|
|
# Fetch Match Teams ELO
|
||
|
|
if len(match_ids) > 0:
|
||
|
|
q_elo = f"""
|
||
|
|
SELECT match_id, group_id, group_origin_elo
|
||
|
|
FROM fact_match_teams
|
||
|
|
WHERE match_id IN ({match_id_ph})
|
||
|
|
"""
|
||
|
|
# Use chunking again
|
||
|
|
elo_list = []
|
||
|
|
for i in range(0, len(match_ids), chunk_size):
|
||
|
|
chunk = match_ids[i:i+chunk_size]
|
||
|
|
chunk_ph = ','.join(['?'] * len(chunk))
|
||
|
|
q_chunk = q_elo.replace(match_id_ph, chunk_ph)
|
||
|
|
elo_list.append(pd.read_sql_query(q_chunk, conn, params=list(chunk)))
|
||
|
|
|
||
|
|
if elo_list:
|
||
|
|
df_elo_teams = pd.concat(elo_list)
|
||
|
|
|
||
|
|
# Merge to get Opponent ELO
|
||
|
|
# Player has match_id, team_id.
|
||
|
|
# Join on match_id.
|
||
|
|
# Filter where group_id != team_id
|
||
|
|
df_merged_elo = df_base.merge(df_elo_teams, on='match_id', how='left')
|
||
|
|
df_merged_elo = df_merged_elo[df_merged_elo['group_id'] != df_merged_elo['team_id']]
|
||
|
|
|
||
|
|
# Now df_merged_elo has 'group_origin_elo' which is Opponent ELO
|
||
|
|
# Binning: <1200, 1200-1400, 1400-1600, 1600-1800, 1800-2000, >2000
|
||
|
|
# bins: [-inf, 1200, 1400, 1600, 1800, 2000, inf]
|
||
|
|
elo_bins = [-float('inf'), 1200, 1400, 1600, 1800, 2000, float('inf')]
|
||
|
|
elo_labels = ['lt1200', '1200_1400', '1400_1600', '1600_1800', '1800_2000', 'gt2000']
|
||
|
|
|
||
|
|
df_merged_elo['elo_bin'] = pd.cut(df_merged_elo['group_origin_elo'], bins=elo_bins, labels=elo_labels, right=False)
|
||
|
|
|
||
|
|
elo_stats = df_merged_elo.groupby(['steam_id_64', 'elo_bin']).agg({
|
||
|
|
'rating': 'mean'
|
||
|
|
}).unstack(fill_value=0) # We only need rating for now
|
||
|
|
|
||
|
|
# Rename columns
|
||
|
|
# elo_stats columns are MultiIndex (rating, bin).
|
||
|
|
# We want: elo_{bin}_rating
|
||
|
|
flat_elo_data = {'steam_id_64': elo_stats.index}
|
||
|
|
for bin_label in elo_labels:
|
||
|
|
if bin_label in elo_stats['rating'].columns:
|
||
|
|
flat_elo_data[f"elo_{bin_label}_rating"] = elo_stats['rating'][bin_label].values
|
||
|
|
|
||
|
|
df_elo_flat = pd.DataFrame(flat_elo_data)
|
||
|
|
df = df.merge(df_elo_flat, on='steam_id_64', how='left')
|
||
|
|
|
||
|
|
# 9. New Features: Economy & Pace
|
||
|
|
df_eco = FeatureService._calculate_economy_features(conn, valid_ids)
|
||
|
|
if df_eco is not None:
|
||
|
|
df = df.merge(df_eco, on='steam_id_64', how='left')
|
||
|
|
|
||
|
|
df_pace = FeatureService._calculate_pace_features(conn, valid_ids)
|
||
|
|
if df_pace is not None:
|
||
|
|
df = df.merge(df_pace, on='steam_id_64', how='left')
|
||
|
|
|
||
|
|
if not df_base.empty:
|
||
|
|
player_mean = df_base.groupby('steam_id_64', as_index=False)['rating'].mean().rename(columns={'rating': 'player_mean_rating'})
|
||
|
|
map_mean = df_base.groupby(['steam_id_64', 'map_name'], as_index=False)['rating'].mean().rename(columns={'rating': 'map_mean_rating'})
|
||
|
|
map_dev = map_mean.merge(player_mean, on='steam_id_64', how='left')
|
||
|
|
map_dev['abs_dev'] = (map_dev['map_mean_rating'] - map_dev['player_mean_rating']).abs()
|
||
|
|
map_coef = map_dev.groupby('steam_id_64', as_index=False)['abs_dev'].mean().rename(columns={'abs_dev': 'map_stability_coef'})
|
||
|
|
df = df.merge(map_coef, on='steam_id_64', how='left')
|
||
|
|
|
||
|
|
import json
|
||
|
|
|
||
|
|
df['rd_phase_kill_early_share'] = 0.0
|
||
|
|
df['rd_phase_kill_mid_share'] = 0.0
|
||
|
|
df['rd_phase_kill_late_share'] = 0.0
|
||
|
|
df['rd_phase_death_early_share'] = 0.0
|
||
|
|
df['rd_phase_death_mid_share'] = 0.0
|
||
|
|
df['rd_phase_death_late_share'] = 0.0
|
||
|
|
df['rd_phase_kill_early_share_t'] = 0.0
|
||
|
|
df['rd_phase_kill_mid_share_t'] = 0.0
|
||
|
|
df['rd_phase_kill_late_share_t'] = 0.0
|
||
|
|
df['rd_phase_kill_early_share_ct'] = 0.0
|
||
|
|
df['rd_phase_kill_mid_share_ct'] = 0.0
|
||
|
|
df['rd_phase_kill_late_share_ct'] = 0.0
|
||
|
|
df['rd_phase_death_early_share_t'] = 0.0
|
||
|
|
df['rd_phase_death_mid_share_t'] = 0.0
|
||
|
|
df['rd_phase_death_late_share_t'] = 0.0
|
||
|
|
df['rd_phase_death_early_share_ct'] = 0.0
|
||
|
|
df['rd_phase_death_mid_share_ct'] = 0.0
|
||
|
|
df['rd_phase_death_late_share_ct'] = 0.0
|
||
|
|
df['rd_firstdeath_team_first_death_rounds'] = 0
|
||
|
|
df['rd_firstdeath_team_first_death_win_rate'] = 0.0
|
||
|
|
df['rd_invalid_death_rounds'] = 0
|
||
|
|
df['rd_invalid_death_rate'] = 0.0
|
||
|
|
df['rd_pressure_kpr_ratio'] = 0.0
|
||
|
|
df['rd_pressure_perf_ratio'] = 0.0
|
||
|
|
df['rd_pressure_rounds_down3'] = 0
|
||
|
|
df['rd_pressure_rounds_normal'] = 0
|
||
|
|
df['rd_matchpoint_kpr_ratio'] = 0.0
|
||
|
|
df['rd_matchpoint_perf_ratio'] = 0.0
|
||
|
|
df['rd_matchpoint_rounds'] = 0
|
||
|
|
df['rd_comeback_kill_share'] = 0.0
|
||
|
|
df['rd_comeback_rounds'] = 0
|
||
|
|
df['rd_trade_response_10s_rate'] = 0.0
|
||
|
|
df['rd_weapon_top_json'] = "[]"
|
||
|
|
df['rd_roundtype_split_json'] = "{}"
|
||
|
|
|
||
|
|
if not df_events.empty:
|
||
|
|
df_events['event_time'] = pd.to_numeric(df_events['event_time'], errors='coerce').fillna(0).astype(int)
|
||
|
|
|
||
|
|
df_events['phase_bucket'] = pd.cut(
|
||
|
|
df_events['event_time'],
|
||
|
|
bins=[-1, 30, 60, float('inf')],
|
||
|
|
labels=['early', 'mid', 'late']
|
||
|
|
)
|
||
|
|
|
||
|
|
k_cnt = df_events.groupby(['attacker_steam_id', 'phase_bucket']).size().unstack(fill_value=0)
|
||
|
|
k_tot = k_cnt.sum(axis=1).replace(0, 1)
|
||
|
|
k_share = k_cnt.div(k_tot, axis=0)
|
||
|
|
k_share.index.name = 'steam_id_64'
|
||
|
|
k_share = k_share.reset_index().rename(columns={
|
||
|
|
'early': 'rd_phase_kill_early_share',
|
||
|
|
'mid': 'rd_phase_kill_mid_share',
|
||
|
|
'late': 'rd_phase_kill_late_share'
|
||
|
|
})
|
||
|
|
df = df.merge(
|
||
|
|
k_share[['steam_id_64', 'rd_phase_kill_early_share', 'rd_phase_kill_mid_share', 'rd_phase_kill_late_share']],
|
||
|
|
on='steam_id_64',
|
||
|
|
how='left',
|
||
|
|
suffixes=('', '_calc')
|
||
|
|
)
|
||
|
|
for c in ['rd_phase_kill_early_share', 'rd_phase_kill_mid_share', 'rd_phase_kill_late_share']:
|
||
|
|
if f'{c}_calc' in df.columns:
|
||
|
|
df[c] = df[f'{c}_calc'].fillna(df[c])
|
||
|
|
df.drop(columns=[f'{c}_calc'], inplace=True)
|
||
|
|
|
||
|
|
d_cnt = df_events.groupby(['victim_steam_id', 'phase_bucket']).size().unstack(fill_value=0)
|
||
|
|
d_tot = d_cnt.sum(axis=1).replace(0, 1)
|
||
|
|
d_share = d_cnt.div(d_tot, axis=0)
|
||
|
|
d_share.index.name = 'steam_id_64'
|
||
|
|
d_share = d_share.reset_index().rename(columns={
|
||
|
|
'early': 'rd_phase_death_early_share',
|
||
|
|
'mid': 'rd_phase_death_mid_share',
|
||
|
|
'late': 'rd_phase_death_late_share'
|
||
|
|
})
|
||
|
|
df = df.merge(
|
||
|
|
d_share[['steam_id_64', 'rd_phase_death_early_share', 'rd_phase_death_mid_share', 'rd_phase_death_late_share']],
|
||
|
|
on='steam_id_64',
|
||
|
|
how='left',
|
||
|
|
suffixes=('', '_calc')
|
||
|
|
)
|
||
|
|
for c in ['rd_phase_death_early_share', 'rd_phase_death_mid_share', 'rd_phase_death_late_share']:
|
||
|
|
if f'{c}_calc' in df.columns:
|
||
|
|
df[c] = df[f'{c}_calc'].fillna(df[c])
|
||
|
|
df.drop(columns=[f'{c}_calc'], inplace=True)
|
||
|
|
|
||
|
|
if 'attacker_side' in df_events.columns:
|
||
|
|
k_side = df_events[df_events['attacker_side'].isin(['CT', 'T'])].copy()
|
||
|
|
if not k_side.empty:
|
||
|
|
k_cnt_side = k_side.groupby(['attacker_steam_id', 'attacker_side', 'phase_bucket']).size().reset_index(name='cnt')
|
||
|
|
k_piv = k_cnt_side.pivot_table(index=['attacker_steam_id', 'attacker_side'], columns='phase_bucket', values='cnt', fill_value=0)
|
||
|
|
k_piv['tot'] = k_piv.sum(axis=1).replace(0, 1)
|
||
|
|
k_piv = k_piv.div(k_piv['tot'], axis=0).drop(columns=['tot'])
|
||
|
|
k_piv = k_piv.reset_index().rename(columns={'attacker_steam_id': 'steam_id_64'})
|
||
|
|
|
||
|
|
for side, suffix in [('T', '_t'), ('CT', '_ct')]:
|
||
|
|
tmp = k_piv[k_piv['attacker_side'] == side].copy()
|
||
|
|
if not tmp.empty:
|
||
|
|
tmp = tmp.rename(columns={
|
||
|
|
'early': f'rd_phase_kill_early_share{suffix}',
|
||
|
|
'mid': f'rd_phase_kill_mid_share{suffix}',
|
||
|
|
'late': f'rd_phase_kill_late_share{suffix}',
|
||
|
|
})
|
||
|
|
df = df.merge(
|
||
|
|
tmp[['steam_id_64', f'rd_phase_kill_early_share{suffix}', f'rd_phase_kill_mid_share{suffix}', f'rd_phase_kill_late_share{suffix}']],
|
||
|
|
on='steam_id_64',
|
||
|
|
how='left',
|
||
|
|
suffixes=('', '_calc')
|
||
|
|
)
|
||
|
|
for c in [f'rd_phase_kill_early_share{suffix}', f'rd_phase_kill_mid_share{suffix}', f'rd_phase_kill_late_share{suffix}']:
|
||
|
|
if f'{c}_calc' in df.columns:
|
||
|
|
df[c] = df[f'{c}_calc'].fillna(df[c])
|
||
|
|
df.drop(columns=[f'{c}_calc'], inplace=True)
|
||
|
|
|
||
|
|
if 'victim_side' in df_events.columns:
|
||
|
|
d_side = df_events[df_events['victim_side'].isin(['CT', 'T'])].copy()
|
||
|
|
if not d_side.empty:
|
||
|
|
d_cnt_side = d_side.groupby(['victim_steam_id', 'victim_side', 'phase_bucket']).size().reset_index(name='cnt')
|
||
|
|
d_piv = d_cnt_side.pivot_table(index=['victim_steam_id', 'victim_side'], columns='phase_bucket', values='cnt', fill_value=0)
|
||
|
|
d_piv['tot'] = d_piv.sum(axis=1).replace(0, 1)
|
||
|
|
d_piv = d_piv.div(d_piv['tot'], axis=0).drop(columns=['tot'])
|
||
|
|
d_piv = d_piv.reset_index().rename(columns={'victim_steam_id': 'steam_id_64'})
|
||
|
|
|
||
|
|
for side, suffix in [('T', '_t'), ('CT', '_ct')]:
|
||
|
|
tmp = d_piv[d_piv['victim_side'] == side].copy()
|
||
|
|
if not tmp.empty:
|
||
|
|
tmp = tmp.rename(columns={
|
||
|
|
'early': f'rd_phase_death_early_share{suffix}',
|
||
|
|
'mid': f'rd_phase_death_mid_share{suffix}',
|
||
|
|
'late': f'rd_phase_death_late_share{suffix}',
|
||
|
|
})
|
||
|
|
df = df.merge(
|
||
|
|
tmp[['steam_id_64', f'rd_phase_death_early_share{suffix}', f'rd_phase_death_mid_share{suffix}', f'rd_phase_death_late_share{suffix}']],
|
||
|
|
on='steam_id_64',
|
||
|
|
how='left',
|
||
|
|
suffixes=('', '_calc')
|
||
|
|
)
|
||
|
|
for c in [f'rd_phase_death_early_share{suffix}', f'rd_phase_death_mid_share{suffix}', f'rd_phase_death_late_share{suffix}']:
|
||
|
|
if f'{c}_calc' in df.columns:
|
||
|
|
df[c] = df[f'{c}_calc'].fillna(df[c])
|
||
|
|
df.drop(columns=[f'{c}_calc'], inplace=True)
|
||
|
|
|
||
|
|
if 'victim_side' in df_events.columns and 'winner_side' in df_events.columns:
|
||
|
|
death_rows = df_events[['match_id', 'round_num', 'event_time', 'victim_steam_id', 'victim_side', 'winner_side']].copy()
|
||
|
|
death_rows = death_rows[death_rows['victim_side'].isin(['CT', 'T']) & death_rows['winner_side'].isin(['CT', 'T'])]
|
||
|
|
if not death_rows.empty:
|
||
|
|
min_death = death_rows.groupby(['match_id', 'round_num', 'victim_side'], as_index=False)['event_time'].min().rename(columns={'event_time': 'min_time'})
|
||
|
|
first_deaths = death_rows.merge(min_death, on=['match_id', 'round_num', 'victim_side'], how='inner')
|
||
|
|
first_deaths = first_deaths[first_deaths['event_time'] == first_deaths['min_time']]
|
||
|
|
first_deaths['is_win'] = (first_deaths['victim_side'] == first_deaths['winner_side']).astype(int)
|
||
|
|
fd_agg = first_deaths.groupby('victim_steam_id')['is_win'].agg(['count', 'mean']).reset_index()
|
||
|
|
fd_agg.rename(columns={
|
||
|
|
'victim_steam_id': 'steam_id_64',
|
||
|
|
'count': 'rd_firstdeath_team_first_death_rounds',
|
||
|
|
'mean': 'rd_firstdeath_team_first_death_win_rate'
|
||
|
|
}, inplace=True)
|
||
|
|
df = df.merge(fd_agg, on='steam_id_64', how='left', suffixes=('', '_calc'))
|
||
|
|
for c in ['rd_firstdeath_team_first_death_rounds', 'rd_firstdeath_team_first_death_win_rate']:
|
||
|
|
if f'{c}_calc' in df.columns:
|
||
|
|
df[c] = df[f'{c}_calc'].fillna(df[c])
|
||
|
|
df.drop(columns=[f'{c}_calc'], inplace=True)
|
||
|
|
|
||
|
|
kills_per_round = df_events.groupby(['match_id', 'round_num', 'attacker_steam_id']).size().reset_index(name='kills')
|
||
|
|
flash_round = df_events[df_events['flash_assist_steam_id'].notna() & (df_events['flash_assist_steam_id'] != '')] \
|
||
|
|
.groupby(['match_id', 'round_num', 'flash_assist_steam_id']).size().reset_index(name='flash_assists')
|
||
|
|
death_round = df_events.groupby(['match_id', 'round_num', 'victim_steam_id']).size().reset_index(name='deaths')
|
||
|
|
|
||
|
|
death_eval = death_round.rename(columns={'victim_steam_id': 'steam_id_64'}).merge(
|
||
|
|
kills_per_round.rename(columns={'attacker_steam_id': 'steam_id_64'})[['match_id', 'round_num', 'steam_id_64', 'kills']],
|
||
|
|
on=['match_id', 'round_num', 'steam_id_64'],
|
||
|
|
how='left'
|
||
|
|
).merge(
|
||
|
|
flash_round.rename(columns={'flash_assist_steam_id': 'steam_id_64'})[['match_id', 'round_num', 'steam_id_64', 'flash_assists']],
|
||
|
|
on=['match_id', 'round_num', 'steam_id_64'],
|
||
|
|
how='left'
|
||
|
|
).fillna({'kills': 0, 'flash_assists': 0})
|
||
|
|
death_eval['is_invalid'] = ((death_eval['kills'] <= 0) & (death_eval['flash_assists'] <= 0)).astype(int)
|
||
|
|
invalid_agg = death_eval.groupby('steam_id_64')['is_invalid'].agg(['sum', 'count']).reset_index()
|
||
|
|
invalid_agg.rename(columns={'sum': 'rd_invalid_death_rounds', 'count': 'death_rounds'}, inplace=True)
|
||
|
|
invalid_agg['rd_invalid_death_rate'] = invalid_agg['rd_invalid_death_rounds'] / invalid_agg['death_rounds'].replace(0, 1)
|
||
|
|
df = df.merge(
|
||
|
|
invalid_agg[['steam_id_64', 'rd_invalid_death_rounds', 'rd_invalid_death_rate']],
|
||
|
|
on='steam_id_64',
|
||
|
|
how='left',
|
||
|
|
suffixes=('', '_calc')
|
||
|
|
)
|
||
|
|
for c in ['rd_invalid_death_rounds', 'rd_invalid_death_rate']:
|
||
|
|
if f'{c}_calc' in df.columns:
|
||
|
|
df[c] = df[f'{c}_calc'].fillna(df[c])
|
||
|
|
df.drop(columns=[f'{c}_calc'], inplace=True)
|
||
|
|
|
||
|
|
if 'weapon' in df_events.columns:
|
||
|
|
w = df_events.copy()
|
||
|
|
w['weapon'] = w['weapon'].fillna('').astype(str)
|
||
|
|
w = w[w['weapon'] != '']
|
||
|
|
if not w.empty:
|
||
|
|
w_agg = w.groupby(['attacker_steam_id', 'weapon']).agg(
|
||
|
|
kills=('weapon', 'size'),
|
||
|
|
hs=('is_headshot', 'sum'),
|
||
|
|
).reset_index()
|
||
|
|
top_json = {}
|
||
|
|
for pid, g in w_agg.groupby('attacker_steam_id'):
|
||
|
|
g = g.sort_values('kills', ascending=False)
|
||
|
|
total = float(g['kills'].sum()) if g['kills'].sum() else 1.0
|
||
|
|
top = g.head(5)
|
||
|
|
items = []
|
||
|
|
for _, r in top.iterrows():
|
||
|
|
k = float(r['kills'])
|
||
|
|
hs = float(r['hs'])
|
||
|
|
wi = get_weapon_info(r['weapon'])
|
||
|
|
items.append({
|
||
|
|
'weapon': r['weapon'],
|
||
|
|
'kills': int(k),
|
||
|
|
'share': k / total,
|
||
|
|
'hs_rate': hs / k if k else 0.0,
|
||
|
|
'price': wi.price if wi else None,
|
||
|
|
'side': wi.side if wi else None,
|
||
|
|
'category': wi.category if wi else None,
|
||
|
|
})
|
||
|
|
top_json[str(pid)] = json.dumps(items, ensure_ascii=False)
|
||
|
|
if top_json:
|
||
|
|
df['rd_weapon_top_json'] = df['steam_id_64'].map(top_json).fillna("[]")
|
||
|
|
|
||
|
|
if not df_rounds.empty and not df_fh_sides.empty and not df_events.empty:
|
||
|
|
df_rounds2 = df_rounds.copy()
|
||
|
|
if not df_meta.empty:
|
||
|
|
df_rounds2 = df_rounds2.merge(df_meta[['match_id', 'halftime_round']], on='match_id', how='left')
|
||
|
|
df_rounds2 = df_rounds2.sort_values(['match_id', 'round_num'])
|
||
|
|
df_rounds2['prev_ct'] = df_rounds2.groupby('match_id')['ct_score'].shift(1).fillna(0)
|
||
|
|
df_rounds2['prev_t'] = df_rounds2.groupby('match_id')['t_score'].shift(1).fillna(0)
|
||
|
|
df_rounds2['ct_deficit'] = df_rounds2['prev_t'] - df_rounds2['prev_ct']
|
||
|
|
df_rounds2['t_deficit'] = df_rounds2['prev_ct'] - df_rounds2['prev_t']
|
||
|
|
df_rounds2['mp_score'] = df_rounds2['halftime_round'].fillna(15)
|
||
|
|
df_rounds2['is_match_point_round'] = (df_rounds2['prev_ct'] == df_rounds2['mp_score']) | (df_rounds2['prev_t'] == df_rounds2['mp_score'])
|
||
|
|
df_rounds2['reg_rounds'] = (df_rounds2['halftime_round'].fillna(15) * 2).astype(int)
|
||
|
|
df_rounds2['is_overtime_round'] = df_rounds2['round_num'] > df_rounds2['reg_rounds']
|
||
|
|
|
||
|
|
all_rounds = df_rounds2[['match_id', 'round_num']].drop_duplicates()
|
||
|
|
df_player_rounds = all_rounds.merge(df_fh_sides, on='match_id', how='inner')
|
||
|
|
if 'halftime_round' not in df_player_rounds.columns:
|
||
|
|
df_player_rounds['halftime_round'] = 15
|
||
|
|
df_player_rounds['halftime_round'] = pd.to_numeric(df_player_rounds['halftime_round'], errors='coerce').fillna(15).astype(int)
|
||
|
|
mask_fh = df_player_rounds['round_num'] <= df_player_rounds['halftime_round']
|
||
|
|
df_player_rounds['side'] = np.where(mask_fh, df_player_rounds['fh_side'], np.where(df_player_rounds['fh_side'] == 'CT', 'T', 'CT'))
|
||
|
|
df_player_rounds = df_player_rounds.merge(
|
||
|
|
df_rounds2[['match_id', 'round_num', 'ct_deficit', 't_deficit', 'is_match_point_round', 'is_overtime_round', 'reg_rounds']],
|
||
|
|
on=['match_id', 'round_num'],
|
||
|
|
how='left'
|
||
|
|
)
|
||
|
|
df_player_rounds['deficit'] = np.where(
|
||
|
|
df_player_rounds['side'] == 'CT',
|
||
|
|
df_player_rounds['ct_deficit'],
|
||
|
|
np.where(df_player_rounds['side'] == 'T', df_player_rounds['t_deficit'], 0)
|
||
|
|
)
|
||
|
|
df_player_rounds['is_pressure_round'] = (df_player_rounds['deficit'] >= 3).astype(int)
|
||
|
|
df_player_rounds['is_pistol_round'] = (
|
||
|
|
(df_player_rounds['round_num'] == 1) |
|
||
|
|
(df_player_rounds['round_num'] == df_player_rounds['halftime_round'] + 1)
|
||
|
|
).astype(int)
|
||
|
|
|
||
|
|
kills_per_round = df_events.groupby(['match_id', 'round_num', 'attacker_steam_id']).size().reset_index(name='kills')
|
||
|
|
df_player_rounds = df_player_rounds.merge(
|
||
|
|
kills_per_round.rename(columns={'attacker_steam_id': 'steam_id_64'}),
|
||
|
|
on=['match_id', 'round_num', 'steam_id_64'],
|
||
|
|
how='left'
|
||
|
|
)
|
||
|
|
df_player_rounds['kills'] = df_player_rounds['kills'].fillna(0)
|
||
|
|
|
||
|
|
grp = df_player_rounds.groupby(['steam_id_64', 'is_pressure_round'])['kills'].agg(['mean', 'count']).reset_index()
|
||
|
|
pressure = grp.pivot(index='steam_id_64', columns='is_pressure_round').fillna(0)
|
||
|
|
if ('mean', 1) in pressure.columns and ('mean', 0) in pressure.columns:
|
||
|
|
pressure_kpr_ratio = (pressure[('mean', 1)] / pressure[('mean', 0)].replace(0, 1)).reset_index()
|
||
|
|
pressure_kpr_ratio.columns = ['steam_id_64', 'rd_pressure_kpr_ratio']
|
||
|
|
df = df.merge(pressure_kpr_ratio, on='steam_id_64', how='left', suffixes=('', '_calc'))
|
||
|
|
if 'rd_pressure_kpr_ratio_calc' in df.columns:
|
||
|
|
df['rd_pressure_kpr_ratio'] = df['rd_pressure_kpr_ratio_calc'].fillna(df['rd_pressure_kpr_ratio'])
|
||
|
|
df.drop(columns=['rd_pressure_kpr_ratio_calc'], inplace=True)
|
||
|
|
if ('count', 1) in pressure.columns:
|
||
|
|
pr_cnt = pressure[('count', 1)].reset_index()
|
||
|
|
pr_cnt.columns = ['steam_id_64', 'rd_pressure_rounds_down3']
|
||
|
|
df = df.merge(pr_cnt, on='steam_id_64', how='left', suffixes=('', '_calc'))
|
||
|
|
if 'rd_pressure_rounds_down3_calc' in df.columns:
|
||
|
|
df['rd_pressure_rounds_down3'] = df['rd_pressure_rounds_down3_calc'].fillna(df['rd_pressure_rounds_down3'])
|
||
|
|
df.drop(columns=['rd_pressure_rounds_down3_calc'], inplace=True)
|
||
|
|
if ('count', 0) in pressure.columns:
|
||
|
|
nr_cnt = pressure[('count', 0)].reset_index()
|
||
|
|
nr_cnt.columns = ['steam_id_64', 'rd_pressure_rounds_normal']
|
||
|
|
df = df.merge(nr_cnt, on='steam_id_64', how='left', suffixes=('', '_calc'))
|
||
|
|
if 'rd_pressure_rounds_normal_calc' in df.columns:
|
||
|
|
df['rd_pressure_rounds_normal'] = df['rd_pressure_rounds_normal_calc'].fillna(df['rd_pressure_rounds_normal'])
|
||
|
|
df.drop(columns=['rd_pressure_rounds_normal_calc'], inplace=True)
|
||
|
|
|
||
|
|
mp_grp = df_player_rounds.groupby(['steam_id_64', 'is_match_point_round'])['kills'].agg(['mean', 'count']).reset_index()
|
||
|
|
mp = mp_grp.pivot(index='steam_id_64', columns='is_match_point_round').fillna(0)
|
||
|
|
if ('mean', 1) in mp.columns and ('mean', 0) in mp.columns:
|
||
|
|
mp_ratio = (mp[('mean', 1)] / mp[('mean', 0)].replace(0, 1)).reset_index()
|
||
|
|
mp_ratio.columns = ['steam_id_64', 'rd_matchpoint_kpr_ratio']
|
||
|
|
df = df.merge(mp_ratio, on='steam_id_64', how='left', suffixes=('', '_calc'))
|
||
|
|
if 'rd_matchpoint_kpr_ratio_calc' in df.columns:
|
||
|
|
df['rd_matchpoint_kpr_ratio'] = df['rd_matchpoint_kpr_ratio_calc'].fillna(df['rd_matchpoint_kpr_ratio'])
|
||
|
|
df.drop(columns=['rd_matchpoint_kpr_ratio_calc'], inplace=True)
|
||
|
|
if ('count', 1) in mp.columns:
|
||
|
|
mp_cnt = mp[('count', 1)].reset_index()
|
||
|
|
mp_cnt.columns = ['steam_id_64', 'rd_matchpoint_rounds']
|
||
|
|
df = df.merge(mp_cnt, on='steam_id_64', how='left', suffixes=('', '_calc'))
|
||
|
|
if 'rd_matchpoint_rounds_calc' in df.columns:
|
||
|
|
df['rd_matchpoint_rounds'] = df['rd_matchpoint_rounds_calc'].fillna(df['rd_matchpoint_rounds'])
|
||
|
|
df.drop(columns=['rd_matchpoint_rounds_calc'], inplace=True)
|
||
|
|
|
||
|
|
try:
|
||
|
|
q_player_team = f"SELECT match_id, steam_id_64, team_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders})"
|
||
|
|
df_player_team = pd.read_sql_query(q_player_team, conn, params=valid_ids)
|
||
|
|
except Exception:
|
||
|
|
df_player_team = pd.DataFrame()
|
||
|
|
|
||
|
|
if not df_player_team.empty:
|
||
|
|
try:
|
||
|
|
q_team_roles = f"""
|
||
|
|
SELECT match_id, group_id as team_id, group_fh_role
|
||
|
|
FROM fact_match_teams
|
||
|
|
WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders}))
|
||
|
|
"""
|
||
|
|
df_team_roles = pd.read_sql_query(q_team_roles, conn, params=valid_ids)
|
||
|
|
except Exception:
|
||
|
|
df_team_roles = pd.DataFrame()
|
||
|
|
|
||
|
|
if not df_team_roles.empty:
|
||
|
|
team_round = df_rounds2[['match_id', 'round_num', 'ct_score', 't_score', 'prev_ct', 'prev_t', 'halftime_round']].merge(df_team_roles, on='match_id', how='inner')
|
||
|
|
fh_ct = team_round['group_fh_role'] == 1
|
||
|
|
mask_fh = team_round['round_num'] <= team_round['halftime_round']
|
||
|
|
team_round['team_side'] = np.where(mask_fh, np.where(fh_ct, 'CT', 'T'), np.where(fh_ct, 'T', 'CT'))
|
||
|
|
team_round['team_prev_score'] = np.where(team_round['team_side'] == 'CT', team_round['prev_ct'], team_round['prev_t'])
|
||
|
|
team_round['team_score_after'] = np.where(team_round['team_side'] == 'CT', team_round['ct_score'], team_round['t_score'])
|
||
|
|
team_round['opp_prev_score'] = np.where(team_round['team_side'] == 'CT', team_round['prev_t'], team_round['prev_ct'])
|
||
|
|
team_round['opp_score_after'] = np.where(team_round['team_side'] == 'CT', team_round['t_score'], team_round['ct_score'])
|
||
|
|
team_round['deficit_before'] = team_round['opp_prev_score'] - team_round['team_prev_score']
|
||
|
|
team_round['deficit_after'] = team_round['opp_score_after'] - team_round['team_score_after']
|
||
|
|
team_round['is_comeback_round'] = ((team_round['deficit_before'] > 0) & (team_round['deficit_after'] < team_round['deficit_before'])).astype(int)
|
||
|
|
comeback_keys = team_round[team_round['is_comeback_round'] == 1][['match_id', 'round_num', 'team_id']].drop_duplicates()
|
||
|
|
|
||
|
|
if not comeback_keys.empty:
|
||
|
|
ev_att = df_events[['match_id', 'round_num', 'attacker_steam_id', 'event_time']].merge(
|
||
|
|
df_player_team.rename(columns={'steam_id_64': 'attacker_steam_id', 'team_id': 'att_team_id'}),
|
||
|
|
on=['match_id', 'attacker_steam_id'],
|
||
|
|
how='left'
|
||
|
|
)
|
||
|
|
team_kills = ev_att[ev_att['att_team_id'].notna()].groupby(['match_id', 'round_num', 'att_team_id']).size().reset_index(name='team_kills')
|
||
|
|
player_kills = ev_att.groupby(['match_id', 'round_num', 'attacker_steam_id', 'att_team_id']).size().reset_index(name='player_kills')
|
||
|
|
|
||
|
|
player_kills = player_kills.merge(
|
||
|
|
comeback_keys.rename(columns={'team_id': 'att_team_id'}),
|
||
|
|
on=['match_id', 'round_num', 'att_team_id'],
|
||
|
|
how='inner'
|
||
|
|
)
|
||
|
|
if not player_kills.empty:
|
||
|
|
player_kills = player_kills.merge(team_kills, on=['match_id', 'round_num', 'att_team_id'], how='left').fillna({'team_kills': 0})
|
||
|
|
player_kills['share'] = player_kills['player_kills'] / player_kills['team_kills'].replace(0, 1)
|
||
|
|
cb_share = player_kills.groupby('attacker_steam_id')['share'].mean().reset_index()
|
||
|
|
cb_share.rename(columns={'attacker_steam_id': 'steam_id_64', 'share': 'rd_comeback_kill_share'}, inplace=True)
|
||
|
|
df = df.merge(cb_share, on='steam_id_64', how='left', suffixes=('', '_calc'))
|
||
|
|
if 'rd_comeback_kill_share_calc' in df.columns:
|
||
|
|
df['rd_comeback_kill_share'] = df['rd_comeback_kill_share_calc'].fillna(df['rd_comeback_kill_share'])
|
||
|
|
df.drop(columns=['rd_comeback_kill_share_calc'], inplace=True)
|
||
|
|
|
||
|
|
cb_rounds = comeback_keys.merge(df_player_team, left_on=['match_id', 'team_id'], right_on=['match_id', 'team_id'], how='inner')
|
||
|
|
cb_cnt = cb_rounds.groupby('steam_id_64').size().reset_index(name='rd_comeback_rounds')
|
||
|
|
df = df.merge(cb_cnt, on='steam_id_64', how='left', suffixes=('', '_calc'))
|
||
|
|
if 'rd_comeback_rounds_calc' in df.columns:
|
||
|
|
df['rd_comeback_rounds'] = df['rd_comeback_rounds_calc'].fillna(df['rd_comeback_rounds'])
|
||
|
|
df.drop(columns=['rd_comeback_rounds_calc'], inplace=True)
|
||
|
|
|
||
|
|
death_team = df_events[['match_id', 'round_num', 'event_time', 'victim_steam_id']].merge(
|
||
|
|
df_player_team.rename(columns={'steam_id_64': 'victim_steam_id', 'team_id': 'team_id'}),
|
||
|
|
on=['match_id', 'victim_steam_id'],
|
||
|
|
how='left'
|
||
|
|
)
|
||
|
|
death_team = death_team[death_team['team_id'].notna()]
|
||
|
|
if not death_team.empty:
|
||
|
|
roster = df_player_team.rename(columns={'steam_id_64': 'steam_id_64', 'team_id': 'team_id'})[['match_id', 'team_id', 'steam_id_64']].drop_duplicates()
|
||
|
|
opp = death_team.merge(roster, on=['match_id', 'team_id'], how='inner', suffixes=('', '_teammate'))
|
||
|
|
opp = opp[opp['steam_id_64'] != opp['victim_steam_id']]
|
||
|
|
opp_time = opp.groupby(['match_id', 'round_num', 'steam_id_64'], as_index=False)['event_time'].min().rename(columns={'event_time': 'teammate_death_time'})
|
||
|
|
|
||
|
|
kills_time = df_events[['match_id', 'round_num', 'event_time', 'attacker_steam_id']].rename(columns={'attacker_steam_id': 'steam_id_64', 'event_time': 'kill_time'})
|
||
|
|
m = opp_time.merge(kills_time, on=['match_id', 'round_num', 'steam_id_64'], how='left')
|
||
|
|
m['in_window'] = ((m['kill_time'] >= m['teammate_death_time']) & (m['kill_time'] <= m['teammate_death_time'] + 10)).astype(int)
|
||
|
|
success = m.groupby(['match_id', 'round_num', 'steam_id_64'], as_index=False)['in_window'].max()
|
||
|
|
rate = success.groupby('steam_id_64')['in_window'].mean().reset_index()
|
||
|
|
rate.rename(columns={'in_window': 'rd_trade_response_10s_rate'}, inplace=True)
|
||
|
|
df = df.merge(rate, on='steam_id_64', how='left', suffixes=('', '_calc'))
|
||
|
|
if 'rd_trade_response_10s_rate_calc' in df.columns:
|
||
|
|
df['rd_trade_response_10s_rate'] = df['rd_trade_response_10s_rate_calc'].fillna(df['rd_trade_response_10s_rate'])
|
||
|
|
df.drop(columns=['rd_trade_response_10s_rate_calc'], inplace=True)
|
||
|
|
|
||
|
|
eco_rows = []
|
||
|
|
try:
|
||
|
|
q_econ = f"""
|
||
|
|
SELECT match_id, round_num, steam_id_64, equipment_value, round_performance_score
|
||
|
|
FROM fact_round_player_economy
|
||
|
|
WHERE steam_id_64 IN ({placeholders})
|
||
|
|
"""
|
||
|
|
df_econ = pd.read_sql_query(q_econ, conn, params=valid_ids)
|
||
|
|
except Exception:
|
||
|
|
df_econ = pd.DataFrame()
|
||
|
|
|
||
|
|
if not df_econ.empty:
|
||
|
|
df_econ['equipment_value'] = pd.to_numeric(df_econ['equipment_value'], errors='coerce').fillna(0).astype(int)
|
||
|
|
df_econ['round_performance_score'] = pd.to_numeric(df_econ['round_performance_score'], errors='coerce').fillna(0.0)
|
||
|
|
df_econ = df_econ.merge(df_rounds2[['match_id', 'round_num', 'is_overtime_round', 'is_match_point_round', 'ct_deficit', 't_deficit', 'prev_ct', 'prev_t']], on=['match_id', 'round_num'], how='left')
|
||
|
|
df_econ = df_econ.merge(df_fh_sides[['match_id', 'steam_id_64', 'fh_side', 'halftime_round']], on=['match_id', 'steam_id_64'], how='left')
|
||
|
|
mask_fh = df_econ['round_num'] <= df_econ['halftime_round']
|
||
|
|
df_econ['side'] = np.where(mask_fh, df_econ['fh_side'], np.where(df_econ['fh_side'] == 'CT', 'T', 'CT'))
|
||
|
|
df_econ['deficit'] = np.where(df_econ['side'] == 'CT', df_econ['ct_deficit'], df_econ['t_deficit'])
|
||
|
|
df_econ['is_pressure_round'] = (df_econ['deficit'] >= 3).astype(int)
|
||
|
|
|
||
|
|
perf_grp = df_econ.groupby(['steam_id_64', 'is_pressure_round'])['round_performance_score'].agg(['mean', 'count']).reset_index()
|
||
|
|
perf = perf_grp.pivot(index='steam_id_64', columns='is_pressure_round').fillna(0)
|
||
|
|
if ('mean', 1) in perf.columns and ('mean', 0) in perf.columns:
|
||
|
|
perf_ratio = (perf[('mean', 1)] / perf[('mean', 0)].replace(0, 1)).reset_index()
|
||
|
|
perf_ratio.columns = ['steam_id_64', 'rd_pressure_perf_ratio']
|
||
|
|
df = df.merge(perf_ratio, on='steam_id_64', how='left', suffixes=('', '_calc'))
|
||
|
|
if 'rd_pressure_perf_ratio_calc' in df.columns:
|
||
|
|
df['rd_pressure_perf_ratio'] = df['rd_pressure_perf_ratio_calc'].fillna(df['rd_pressure_perf_ratio'])
|
||
|
|
df.drop(columns=['rd_pressure_perf_ratio_calc'], inplace=True)
|
||
|
|
|
||
|
|
mp_perf_grp = df_econ.groupby(['steam_id_64', 'is_match_point_round'])['round_performance_score'].agg(['mean', 'count']).reset_index()
|
||
|
|
mp_perf = mp_perf_grp.pivot(index='steam_id_64', columns='is_match_point_round').fillna(0)
|
||
|
|
if ('mean', 1) in mp_perf.columns and ('mean', 0) in mp_perf.columns:
|
||
|
|
mp_perf_ratio = (mp_perf[('mean', 1)] / mp_perf[('mean', 0)].replace(0, 1)).reset_index()
|
||
|
|
mp_perf_ratio.columns = ['steam_id_64', 'rd_matchpoint_perf_ratio']
|
||
|
|
df = df.merge(mp_perf_ratio, on='steam_id_64', how='left', suffixes=('', '_calc'))
|
||
|
|
if 'rd_matchpoint_perf_ratio_calc' in df.columns:
|
||
|
|
df['rd_matchpoint_perf_ratio'] = df['rd_matchpoint_perf_ratio_calc'].fillna(df['rd_matchpoint_perf_ratio'])
|
||
|
|
df.drop(columns=['rd_matchpoint_perf_ratio_calc'], inplace=True)
|
||
|
|
|
||
|
|
eco = df_econ.copy()
|
||
|
|
eco['round_type'] = np.select(
|
||
|
|
[
|
||
|
|
eco['is_overtime_round'] == 1,
|
||
|
|
eco['equipment_value'] < 2000,
|
||
|
|
eco['equipment_value'] >= 4000,
|
||
|
|
],
|
||
|
|
[
|
||
|
|
'overtime',
|
||
|
|
'eco',
|
||
|
|
'fullbuy',
|
||
|
|
],
|
||
|
|
default='rifle'
|
||
|
|
)
|
||
|
|
eco_rounds = eco.groupby(['steam_id_64', 'round_type']).size().reset_index(name='rounds')
|
||
|
|
perf_mean = eco.groupby(['steam_id_64', 'round_type'])['round_performance_score'].mean().reset_index(name='perf')
|
||
|
|
eco_rows = eco_rounds.merge(perf_mean, on=['steam_id_64', 'round_type'], how='left')
|
||
|
|
|
||
|
|
if eco_rows is not None and len(eco_rows) > 0:
|
||
|
|
kpr_rounds = df_player_rounds[['match_id', 'round_num', 'steam_id_64', 'kills', 'is_pistol_round', 'is_overtime_round']].copy()
|
||
|
|
kpr_rounds['round_type'] = np.select(
|
||
|
|
[
|
||
|
|
kpr_rounds['is_overtime_round'] == 1,
|
||
|
|
kpr_rounds['is_pistol_round'] == 1,
|
||
|
|
],
|
||
|
|
[
|
||
|
|
'overtime',
|
||
|
|
'pistol',
|
||
|
|
],
|
||
|
|
default='reg'
|
||
|
|
)
|
||
|
|
kpr = kpr_rounds.groupby(['steam_id_64', 'round_type']).agg(kpr=('kills', 'mean'), rounds=('kills', 'size')).reset_index()
|
||
|
|
kpr_dict = {}
|
||
|
|
for pid, g in kpr.groupby('steam_id_64'):
|
||
|
|
d = {}
|
||
|
|
for _, r in g.iterrows():
|
||
|
|
d[r['round_type']] = {'kpr': float(r['kpr']), 'rounds': int(r['rounds'])}
|
||
|
|
kpr_dict[str(pid)] = d
|
||
|
|
|
||
|
|
econ_dict = {}
|
||
|
|
if isinstance(eco_rows, pd.DataFrame) and not eco_rows.empty:
|
||
|
|
for pid, g in eco_rows.groupby('steam_id_64'):
|
||
|
|
d = {}
|
||
|
|
for _, r in g.iterrows():
|
||
|
|
d[r['round_type']] = {'perf': float(r['perf']) if r['perf'] is not None else 0.0, 'rounds': int(r['rounds'])}
|
||
|
|
econ_dict[str(pid)] = d
|
||
|
|
|
||
|
|
out = {}
|
||
|
|
for pid in df['steam_id_64'].astype(str).tolist():
|
||
|
|
merged = {}
|
||
|
|
if pid in kpr_dict:
|
||
|
|
merged.update(kpr_dict[pid])
|
||
|
|
if pid in econ_dict:
|
||
|
|
for k, v in econ_dict[pid].items():
|
||
|
|
merged.setdefault(k, {}).update(v)
|
||
|
|
out[pid] = json.dumps(merged, ensure_ascii=False)
|
||
|
|
df['rd_roundtype_split_json'] = df['steam_id_64'].astype(str).map(out).fillna("{}")
|
||
|
|
|
||
|
|
# Final Mappings
|
||
|
|
df['total_matches'] = df['matches_played']
|
||
|
|
|
||
|
|
for c in df.columns:
|
||
|
|
if df[c].dtype.kind in "biufc":
|
||
|
|
df[c] = df[c].fillna(0)
|
||
|
|
else:
|
||
|
|
df[c] = df[c].fillna("")
|
||
|
|
return df
|
||
|
|
|
||
|
|
@staticmethod
|
||
|
|
def _calculate_economy_features(conn, player_ids):
|
||
|
|
if not player_ids: return None
|
||
|
|
placeholders = ','.join(['?'] * len(player_ids))
|
||
|
|
|
||
|
|
# 1. Investment Efficiency (Damage / Equipment Value)
|
||
|
|
# We need total damage and total equipment value
|
||
|
|
# fact_match_players has sum_util_dmg (only nade damage), but we need total damage.
|
||
|
|
# fact_match_players has 'basic_avg_adr' * rounds.
|
||
|
|
# Better to query fact_round_player_economy for equipment value sum.
|
||
|
|
|
||
|
|
q_eco_val = f"""
|
||
|
|
SELECT steam_id_64, SUM(equipment_value) as total_spend, COUNT(*) as rounds_tracked
|
||
|
|
FROM fact_round_player_economy
|
||
|
|
WHERE steam_id_64 IN ({placeholders})
|
||
|
|
GROUP BY steam_id_64
|
||
|
|
"""
|
||
|
|
df_spend = pd.read_sql_query(q_eco_val, conn, params=player_ids)
|
||
|
|
|
||
|
|
# Get Total Damage from fact_match_players (derived from ADR * Rounds)
|
||
|
|
# MUST filter by matches that actually have economy data to ensure consistency
|
||
|
|
q_dmg = f"""
|
||
|
|
SELECT mp.steam_id_64, SUM(mp.adr * mp.round_total) as total_damage
|
||
|
|
FROM fact_match_players mp
|
||
|
|
JOIN (
|
||
|
|
SELECT DISTINCT match_id, steam_id_64
|
||
|
|
FROM fact_round_player_economy
|
||
|
|
WHERE steam_id_64 IN ({placeholders})
|
||
|
|
) eco ON mp.match_id = eco.match_id AND mp.steam_id_64 = eco.steam_id_64
|
||
|
|
WHERE mp.steam_id_64 IN ({placeholders})
|
||
|
|
GROUP BY mp.steam_id_64
|
||
|
|
"""
|
||
|
|
df_dmg = pd.read_sql_query(q_dmg, conn, params=player_ids + player_ids)
|
||
|
|
|
||
|
|
df = df_spend.merge(df_dmg, on='steam_id_64', how='inner')
|
||
|
|
|
||
|
|
# Metric 1: Damage per 1000$
|
||
|
|
# Avoid div by zero
|
||
|
|
df['eco_avg_damage_per_1k'] = df['total_damage'] / (df['total_spend'] / 1000.0).replace(0, 1)
|
||
|
|
|
||
|
|
# 2. Eco Round Performance (Equipment < 2000)
|
||
|
|
# We need kills in these rounds.
|
||
|
|
# Join economy with events? That's heavy.
|
||
|
|
# Alternative: Approximate.
|
||
|
|
# Let's do it properly: Get rounds where equip < 2000, count kills.
|
||
|
|
|
||
|
|
# Subquery for Eco Rounds keys: (match_id, round_num, steam_id_64)
|
||
|
|
# Then join with events.
|
||
|
|
|
||
|
|
q_eco_perf = f"""
|
||
|
|
SELECT
|
||
|
|
e.attacker_steam_id as steam_id_64,
|
||
|
|
COUNT(*) as eco_kills,
|
||
|
|
SUM(CASE WHEN e.event_type='death' THEN 1 ELSE 0 END) as eco_deaths
|
||
|
|
FROM fact_round_events e
|
||
|
|
JOIN fact_round_player_economy eco
|
||
|
|
ON e.match_id = eco.match_id
|
||
|
|
AND e.round_num = eco.round_num
|
||
|
|
AND (e.attacker_steam_id = eco.steam_id_64 OR e.victim_steam_id = eco.steam_id_64)
|
||
|
|
WHERE (e.event_type = 'kill' AND e.attacker_steam_id = eco.steam_id_64)
|
||
|
|
OR (e.event_type = 'kill' AND e.victim_steam_id = eco.steam_id_64) -- Count deaths properly
|
||
|
|
AND eco.equipment_value < 2000
|
||
|
|
AND eco.steam_id_64 IN ({placeholders})
|
||
|
|
GROUP BY eco.steam_id_64
|
||
|
|
"""
|
||
|
|
# Wait, the join condition OR is tricky for grouping.
|
||
|
|
# Let's separate Kills and Deaths or do two queries.
|
||
|
|
# Simpler:
|
||
|
|
|
||
|
|
# Eco Kills
|
||
|
|
q_eco_kills = f"""
|
||
|
|
SELECT
|
||
|
|
e.attacker_steam_id as steam_id_64,
|
||
|
|
COUNT(*) as eco_kills
|
||
|
|
FROM fact_round_events e
|
||
|
|
JOIN fact_round_player_economy eco
|
||
|
|
ON e.match_id = eco.match_id
|
||
|
|
AND e.round_num = eco.round_num
|
||
|
|
AND e.attacker_steam_id = eco.steam_id_64
|
||
|
|
WHERE e.event_type = 'kill'
|
||
|
|
AND eco.equipment_value < 2000
|
||
|
|
AND eco.steam_id_64 IN ({placeholders})
|
||
|
|
GROUP BY e.attacker_steam_id
|
||
|
|
"""
|
||
|
|
df_eco_kills = pd.read_sql_query(q_eco_kills, conn, params=player_ids)
|
||
|
|
|
||
|
|
# Eco Deaths
|
||
|
|
q_eco_deaths = f"""
|
||
|
|
SELECT
|
||
|
|
e.victim_steam_id as steam_id_64,
|
||
|
|
COUNT(*) as eco_deaths
|
||
|
|
FROM fact_round_events e
|
||
|
|
JOIN fact_round_player_economy eco
|
||
|
|
ON e.match_id = eco.match_id
|
||
|
|
AND e.round_num = eco.round_num
|
||
|
|
AND e.victim_steam_id = eco.steam_id_64
|
||
|
|
WHERE e.event_type = 'kill'
|
||
|
|
AND eco.equipment_value < 2000
|
||
|
|
AND eco.steam_id_64 IN ({placeholders})
|
||
|
|
GROUP BY e.victim_steam_id
|
||
|
|
"""
|
||
|
|
df_eco_deaths = pd.read_sql_query(q_eco_deaths, conn, params=player_ids)
|
||
|
|
|
||
|
|
# Get count of eco rounds
|
||
|
|
q_eco_rounds = f"""
|
||
|
|
SELECT steam_id_64, COUNT(*) as eco_round_count
|
||
|
|
FROM fact_round_player_economy
|
||
|
|
WHERE equipment_value < 2000 AND steam_id_64 IN ({placeholders})
|
||
|
|
GROUP BY steam_id_64
|
||
|
|
"""
|
||
|
|
df_eco_cnt = pd.read_sql_query(q_eco_rounds, conn, params=player_ids)
|
||
|
|
|
||
|
|
df_perf = df_eco_cnt.merge(df_eco_kills, on='steam_id_64', how='left').merge(df_eco_deaths, on='steam_id_64', how='left').fillna(0)
|
||
|
|
|
||
|
|
# Eco Rating (KPR)
|
||
|
|
df_perf['eco_rating_eco_rounds'] = df_perf['eco_kills'] / df_perf['eco_round_count'].replace(0, 1)
|
||
|
|
|
||
|
|
# Eco KD
|
||
|
|
df_perf['eco_kd_ratio'] = df_perf['eco_kills'] / df_perf['eco_deaths'].replace(0, 1)
|
||
|
|
|
||
|
|
# Eco Rounds per Match
|
||
|
|
# We need total matches WHERE economy data exists.
|
||
|
|
# Otherwise, if we have 100 matches but only 10 with eco data, the avg will be diluted.
|
||
|
|
q_matches = f"""
|
||
|
|
SELECT steam_id_64, COUNT(DISTINCT match_id) as matches_tracked
|
||
|
|
FROM fact_round_player_economy
|
||
|
|
WHERE steam_id_64 IN ({placeholders})
|
||
|
|
GROUP BY steam_id_64
|
||
|
|
"""
|
||
|
|
df_matches = pd.read_sql_query(q_matches, conn, params=player_ids)
|
||
|
|
|
||
|
|
df_perf = df_perf.merge(df_matches, on='steam_id_64', how='left')
|
||
|
|
df_perf['eco_avg_rounds'] = df_perf['eco_round_count'] / df_perf['matches_tracked'].replace(0, 1)
|
||
|
|
|
||
|
|
# Merge all
|
||
|
|
df_final = df.merge(df_perf[['steam_id_64', 'eco_rating_eco_rounds', 'eco_kd_ratio', 'eco_avg_rounds']], on='steam_id_64', how='left')
|
||
|
|
|
||
|
|
return df_final[['steam_id_64', 'eco_avg_damage_per_1k', 'eco_rating_eco_rounds', 'eco_kd_ratio', 'eco_avg_rounds']]
|
||
|
|
|
||
|
|
@staticmethod
|
||
|
|
def _calculate_pace_features(conn, player_ids):
|
||
|
|
if not player_ids: return None
|
||
|
|
placeholders = ','.join(['?'] * len(player_ids))
|
||
|
|
|
||
|
|
# 1. Avg Time to First Contact
|
||
|
|
# Find min(event_time) per round per player (Attacker or Victim)
|
||
|
|
q_first_contact = f"""
|
||
|
|
SELECT
|
||
|
|
player_id as steam_id_64,
|
||
|
|
AVG(first_time) as pace_avg_time_to_first_contact
|
||
|
|
FROM (
|
||
|
|
SELECT
|
||
|
|
match_id, round_num,
|
||
|
|
CASE
|
||
|
|
WHEN attacker_steam_id IN ({placeholders}) THEN attacker_steam_id
|
||
|
|
ELSE victim_steam_id
|
||
|
|
END as player_id,
|
||
|
|
MIN(event_time) as first_time
|
||
|
|
FROM fact_round_events
|
||
|
|
WHERE (attacker_steam_id IN ({placeholders}) OR victim_steam_id IN ({placeholders}))
|
||
|
|
AND event_type IN ('kill', 'death') -- focus on combat
|
||
|
|
GROUP BY match_id, round_num, player_id
|
||
|
|
) sub
|
||
|
|
GROUP BY player_id
|
||
|
|
"""
|
||
|
|
# Note: 'death' isn't an event_type, it's 'kill'.
|
||
|
|
# We check if player is attacker or victim in 'kill' event.
|
||
|
|
|
||
|
|
# Corrected Query:
|
||
|
|
q_first_contact = f"""
|
||
|
|
SELECT
|
||
|
|
player_id as steam_id_64,
|
||
|
|
AVG(first_time) as pace_avg_time_to_first_contact
|
||
|
|
FROM (
|
||
|
|
SELECT
|
||
|
|
match_id, round_num,
|
||
|
|
p_id as player_id,
|
||
|
|
MIN(event_time) as first_time
|
||
|
|
FROM (
|
||
|
|
SELECT match_id, round_num, event_time, attacker_steam_id as p_id FROM fact_round_events WHERE event_type='kill'
|
||
|
|
UNION ALL
|
||
|
|
SELECT match_id, round_num, event_time, victim_steam_id as p_id FROM fact_round_events WHERE event_type='kill'
|
||
|
|
) raw
|
||
|
|
WHERE p_id IN ({placeholders})
|
||
|
|
GROUP BY match_id, round_num, p_id
|
||
|
|
) sub
|
||
|
|
GROUP BY player_id
|
||
|
|
"""
|
||
|
|
df_time = pd.read_sql_query(q_first_contact, conn, params=player_ids)
|
||
|
|
# Wait, params=player_ids won't work with f-string placeholders if I use ? inside.
|
||
|
|
# My placeholders variable is literal string "?,?,?".
|
||
|
|
# So params should be player_ids.
|
||
|
|
# But in UNION ALL, I have two WHERE clauses.
|
||
|
|
# Actually I can optimize:
|
||
|
|
# WHERE attacker_steam_id IN (...) OR victim_steam_id IN (...)
|
||
|
|
# Then unpivot in python or SQL.
|
||
|
|
|
||
|
|
# Let's use Python for unpivoting to be safe and clear.
|
||
|
|
q_events = f"""
|
||
|
|
SELECT match_id, round_num, event_time, attacker_steam_id, victim_steam_id
|
||
|
|
FROM fact_round_events
|
||
|
|
WHERE event_type='kill'
|
||
|
|
AND (attacker_steam_id IN ({placeholders}) OR victim_steam_id IN ({placeholders}))
|
||
|
|
"""
|
||
|
|
# This params needs player_ids * 2
|
||
|
|
df_ev = pd.read_sql_query(q_events, conn, params=list(player_ids) + list(player_ids))
|
||
|
|
|
||
|
|
pace_list = []
|
||
|
|
if not df_ev.empty:
|
||
|
|
# Unpivot
|
||
|
|
att = df_ev[df_ev['attacker_steam_id'].isin(player_ids)][['match_id', 'round_num', 'event_time', 'attacker_steam_id']].rename(columns={'attacker_steam_id': 'steam_id_64'})
|
||
|
|
vic = df_ev[df_ev['victim_steam_id'].isin(player_ids)][['match_id', 'round_num', 'event_time', 'victim_steam_id']].rename(columns={'victim_steam_id': 'steam_id_64'})
|
||
|
|
combined = pd.concat([att, vic])
|
||
|
|
|
||
|
|
# Group by round, get min time
|
||
|
|
first_contacts = combined.groupby(['match_id', 'round_num', 'steam_id_64'])['event_time'].min().reset_index()
|
||
|
|
|
||
|
|
# Average per player
|
||
|
|
avg_time = first_contacts.groupby('steam_id_64')['event_time'].mean().reset_index()
|
||
|
|
avg_time.rename(columns={'event_time': 'pace_avg_time_to_first_contact'}, inplace=True)
|
||
|
|
pace_list.append(avg_time)
|
||
|
|
|
||
|
|
# 2. Trade Kill Rate
|
||
|
|
# "Kill a killer within 5s of teammate death"
|
||
|
|
# We need to reconstruct the flow.
|
||
|
|
# Iterate matches? Vectorized is hard.
|
||
|
|
# Let's try a simplified approach:
|
||
|
|
# For each match, sort events by time.
|
||
|
|
# If (Kill A->B) at T1, and (Kill C->A) at T2, and T2-T1 <= 5, and C & B are same team.
|
||
|
|
# We don't have team info in events easily (we have side logic elsewhere).
|
||
|
|
# Assuming Side logic: If A->B (A=CT, B=T). Then C->A (C=T).
|
||
|
|
# So B and C are T.
|
||
|
|
|
||
|
|
# Let's fetch basic trade info using self-join in SQL?
|
||
|
|
# A kills B at T1.
|
||
|
|
# C kills A at T2.
|
||
|
|
# T2 > T1 and T2 - T1 <= 5.
|
||
|
|
# C is the Trader. B is the Victim (Teammate).
|
||
|
|
# We want C's Trade Rate.
|
||
|
|
|
||
|
|
q_trades = f"""
|
||
|
|
SELECT
|
||
|
|
t2.attacker_steam_id as trader_id,
|
||
|
|
COUNT(*) as trade_count
|
||
|
|
FROM fact_round_events t1
|
||
|
|
JOIN fact_round_events t2
|
||
|
|
ON t1.match_id = t2.match_id
|
||
|
|
AND t1.round_num = t2.round_num
|
||
|
|
WHERE t1.event_type = 'kill' AND t2.event_type = 'kill'
|
||
|
|
AND t1.attacker_steam_id = t2.victim_steam_id -- Avenger kills the Killer
|
||
|
|
AND t2.event_time > t1.event_time
|
||
|
|
AND t2.event_time - t1.event_time <= 5
|
||
|
|
AND t2.attacker_steam_id IN ({placeholders})
|
||
|
|
GROUP BY t2.attacker_steam_id
|
||
|
|
"""
|
||
|
|
df_trades = pd.read_sql_query(q_trades, conn, params=player_ids)
|
||
|
|
|
||
|
|
# Denominator: Opportunities? Or just Total Kills?
|
||
|
|
# Trade Kill Rate usually means % of Kills that were Trades.
|
||
|
|
# Let's use that.
|
||
|
|
|
||
|
|
# Get Total Kills
|
||
|
|
q_kills = f"""
|
||
|
|
SELECT attacker_steam_id as steam_id_64, COUNT(*) as total_kills
|
||
|
|
FROM fact_round_events
|
||
|
|
WHERE event_type='kill' AND attacker_steam_id IN ({placeholders})
|
||
|
|
GROUP BY attacker_steam_id
|
||
|
|
"""
|
||
|
|
df_tot_kills = pd.read_sql_query(q_kills, conn, params=player_ids)
|
||
|
|
|
||
|
|
if not df_trades.empty:
|
||
|
|
df_trades = df_trades.merge(df_tot_kills, left_on='trader_id', right_on='steam_id_64', how='right').fillna(0)
|
||
|
|
df_trades['pace_trade_kill_rate'] = df_trades['trade_count'] / df_trades['total_kills'].replace(0, 1)
|
||
|
|
else:
|
||
|
|
df_trades = df_tot_kills.copy()
|
||
|
|
df_trades['pace_trade_kill_rate'] = 0
|
||
|
|
|
||
|
|
df_final = pd.DataFrame({'steam_id_64': list(player_ids)})
|
||
|
|
|
||
|
|
if pace_list:
|
||
|
|
df_final = df_final.merge(pace_list[0], on='steam_id_64', how='left')
|
||
|
|
|
||
|
|
# Merge Trade Rate
|
||
|
|
if not df_trades.empty:
|
||
|
|
df_final = df_final.merge(df_trades[['steam_id_64', 'pace_trade_kill_rate']], on='steam_id_64', how='left')
|
||
|
|
|
||
|
|
# 3. New Pace Metrics
|
||
|
|
# pace_opening_kill_time: Avg time of Opening Kills (where attacker_steam_id = player AND is_first_kill = 1?)
|
||
|
|
# Wait, fact_round_events doesn't store 'is_first_kill' directly? It stores 'first_kill' in fact_match_players but that's aggregate.
|
||
|
|
# It stores 'event_type'. We need to check if it was the FIRST kill of the round.
|
||
|
|
# Query: For each round, find the FIRST kill event. Check if attacker is our player. Get time.
|
||
|
|
|
||
|
|
q_opening_time = f"""
|
||
|
|
SELECT
|
||
|
|
attacker_steam_id as steam_id_64,
|
||
|
|
AVG(event_time) as pace_opening_kill_time
|
||
|
|
FROM (
|
||
|
|
SELECT
|
||
|
|
match_id, round_num,
|
||
|
|
attacker_steam_id,
|
||
|
|
MIN(event_time) as event_time
|
||
|
|
FROM fact_round_events
|
||
|
|
WHERE event_type='kill'
|
||
|
|
GROUP BY match_id, round_num
|
||
|
|
) first_kills
|
||
|
|
WHERE attacker_steam_id IN ({placeholders})
|
||
|
|
GROUP BY attacker_steam_id
|
||
|
|
"""
|
||
|
|
df_opening_time = pd.read_sql_query(q_opening_time, conn, params=player_ids)
|
||
|
|
|
||
|
|
# pace_avg_life_time: Avg time alive per round
|
||
|
|
# Logic: Round Duration - Death Time (if died). Else Round Duration.
|
||
|
|
# We need Round Duration (fact_rounds doesn't have duration? fact_matches has match duration).
|
||
|
|
# Usually round duration is fixed or we use last event time.
|
||
|
|
# Let's approximate: If died, time = death_time. If survived, time = max_event_time_of_round.
|
||
|
|
# Better: survival time.
|
||
|
|
|
||
|
|
q_survival = f"""
|
||
|
|
SELECT
|
||
|
|
p.steam_id_64,
|
||
|
|
AVG(
|
||
|
|
CASE
|
||
|
|
WHEN d.death_time IS NOT NULL THEN d.death_time
|
||
|
|
ELSE r.round_end_time -- Use max event time as proxy for round end
|
||
|
|
END
|
||
|
|
) as pace_avg_life_time
|
||
|
|
FROM fact_match_players p
|
||
|
|
JOIN (
|
||
|
|
SELECT match_id, round_num, MAX(event_time) as round_end_time
|
||
|
|
FROM fact_round_events
|
||
|
|
GROUP BY match_id, round_num
|
||
|
|
) r ON p.match_id = r.match_id
|
||
|
|
LEFT JOIN (
|
||
|
|
SELECT match_id, round_num, victim_steam_id, MIN(event_time) as death_time
|
||
|
|
FROM fact_round_events
|
||
|
|
WHERE event_type='kill'
|
||
|
|
GROUP BY match_id, round_num, victim_steam_id
|
||
|
|
) d ON p.match_id = d.match_id AND p.steam_id_64 = d.victim_steam_id
|
||
|
|
-- We need to join rounds to ensure we track every round the player played?
|
||
|
|
-- fact_match_players is per match. We need per round.
|
||
|
|
-- We can use fact_round_player_economy to get all rounds a player played.
|
||
|
|
JOIN fact_round_player_economy e ON p.match_id = e.match_id AND p.steam_id_64 = e.steam_id_64 AND r.round_num = e.round_num
|
||
|
|
WHERE p.steam_id_64 IN ({placeholders})
|
||
|
|
GROUP BY p.steam_id_64
|
||
|
|
"""
|
||
|
|
# This join is heavy. Let's simplify.
|
||
|
|
# Just use death events for "Time of Death".
|
||
|
|
# And for rounds without death, use 115s (avg round length)? Or max event time?
|
||
|
|
# Let's stick to what we have.
|
||
|
|
|
||
|
|
df_survival = pd.read_sql_query(q_survival, conn, params=player_ids)
|
||
|
|
|
||
|
|
if not df_opening_time.empty:
|
||
|
|
df_final = df_final.merge(df_opening_time, on='steam_id_64', how='left')
|
||
|
|
|
||
|
|
if not df_survival.empty:
|
||
|
|
df_final = df_final.merge(df_survival, on='steam_id_64', how='left')
|
||
|
|
|
||
|
|
return df_final.fillna(0)
|
||
|
|
|
||
|
|
|
||
|
|
@staticmethod
|
||
|
|
def _calculate_ultimate_scores(df):
|
||
|
|
def n(col):
|
||
|
|
if col not in df.columns: return 50
|
||
|
|
s = df[col]
|
||
|
|
if s.max() == s.min(): return 50
|
||
|
|
return (s - s.min()) / (s.max() - s.min()) * 100
|
||
|
|
|
||
|
|
df = df.copy()
|
||
|
|
|
||
|
|
# BAT (30%)
|
||
|
|
df['score_bat'] = (
|
||
|
|
0.25 * n('basic_avg_rating') +
|
||
|
|
0.20 * n('basic_avg_kd') +
|
||
|
|
0.15 * n('basic_avg_adr') +
|
||
|
|
0.10 * n('bat_avg_duel_win_rate') +
|
||
|
|
0.10 * n('bat_kd_diff_high_elo') +
|
||
|
|
0.10 * n('basic_avg_kill_3')
|
||
|
|
)
|
||
|
|
|
||
|
|
# STA (15%)
|
||
|
|
df['score_sta'] = (
|
||
|
|
0.30 * (100 - n('sta_rating_volatility')) +
|
||
|
|
0.30 * n('sta_loss_rating') +
|
||
|
|
0.20 * n('sta_win_rating') +
|
||
|
|
0.10 * (100 - abs(n('sta_time_rating_corr')))
|
||
|
|
)
|
||
|
|
|
||
|
|
# HPS (20%)
|
||
|
|
df['score_hps'] = (
|
||
|
|
0.25 * n('sum_1v3p') +
|
||
|
|
0.20 * n('hps_match_point_win_rate') +
|
||
|
|
0.20 * n('hps_comeback_kd_diff') +
|
||
|
|
0.15 * n('hps_pressure_entry_rate') +
|
||
|
|
0.20 * n('basic_avg_rating')
|
||
|
|
)
|
||
|
|
|
||
|
|
# PTL (10%)
|
||
|
|
df['score_ptl'] = (
|
||
|
|
0.30 * n('ptl_pistol_kills') +
|
||
|
|
0.30 * n('ptl_pistol_win_rate') +
|
||
|
|
0.20 * n('ptl_pistol_kd') +
|
||
|
|
0.20 * n('ptl_pistol_util_efficiency')
|
||
|
|
)
|
||
|
|
|
||
|
|
# T/CT (10%)
|
||
|
|
df['score_tct'] = (
|
||
|
|
0.35 * n('side_rating_ct') +
|
||
|
|
0.35 * n('side_rating_t') +
|
||
|
|
0.15 * n('side_first_kill_rate_ct') +
|
||
|
|
0.15 * n('side_first_kill_rate_t')
|
||
|
|
)
|
||
|
|
|
||
|
|
# UTIL (10%)
|
||
|
|
# Emphasize prop frequency (usage_rate)
|
||
|
|
df['score_util'] = (
|
||
|
|
0.35 * n('util_usage_rate') +
|
||
|
|
0.25 * n('util_avg_nade_dmg') +
|
||
|
|
0.20 * n('util_avg_flash_time') +
|
||
|
|
0.20 * n('util_avg_flash_enemy')
|
||
|
|
)
|
||
|
|
|
||
|
|
# ECO (New)
|
||
|
|
df['score_eco'] = (
|
||
|
|
0.50 * n('eco_avg_damage_per_1k') +
|
||
|
|
0.50 * n('eco_rating_eco_rounds')
|
||
|
|
)
|
||
|
|
|
||
|
|
# PACE (New)
|
||
|
|
# Aggression Score: Faster first contact (lower time) -> higher score
|
||
|
|
df['score_pace'] = (
|
||
|
|
0.50 * (100 - n('pace_avg_time_to_first_contact')) +
|
||
|
|
0.50 * n('pace_trade_kill_rate')
|
||
|
|
)
|
||
|
|
|
||
|
|
return df
|
||
|
|
|
||
|
|
@staticmethod
|
||
|
|
def get_roster_features_distribution(target_steam_id):
|
||
|
|
"""
|
||
|
|
Calculates rank and distribution of the target player's L3 features (Scores) within the active roster.
|
||
|
|
"""
|
||
|
|
from web.services.web_service import WebService
|
||
|
|
import json
|
||
|
|
|
||
|
|
# 1. Get Active Roster IDs
|
||
|
|
lineups = WebService.get_lineups()
|
||
|
|
active_roster_ids = []
|
||
|
|
if lineups:
|
||
|
|
try:
|
||
|
|
raw_ids = json.loads(lineups[0]['player_ids_json'])
|
||
|
|
active_roster_ids = [str(uid) for uid in raw_ids]
|
||
|
|
except:
|
||
|
|
pass
|
||
|
|
|
||
|
|
if not active_roster_ids:
|
||
|
|
return None
|
||
|
|
|
||
|
|
# 2. Fetch L3 features for all roster members
|
||
|
|
placeholders = ','.join('?' for _ in active_roster_ids)
|
||
|
|
# Select all columns (simplified) or explicit list including raw metrics
|
||
|
|
sql = f"SELECT * FROM dm_player_features WHERE steam_id_64 IN ({placeholders})"
|
||
|
|
rows = query_db('l3', sql, active_roster_ids)
|
||
|
|
|
||
|
|
if not rows:
|
||
|
|
return None
|
||
|
|
|
||
|
|
stats_map = {row['steam_id_64']: dict(row) for row in rows}
|
||
|
|
target_steam_id = str(target_steam_id)
|
||
|
|
|
||
|
|
# If target not in map (maybe no L3 data yet), default to 0
|
||
|
|
if target_steam_id not in stats_map:
|
||
|
|
stats_map[target_steam_id] = {} # Empty dict, will fallback to 0 in loop
|
||
|
|
|
||
|
|
# 3. Calculate Distribution
|
||
|
|
# Include Scores AND Raw Metrics used in Profile
|
||
|
|
metrics = [
|
||
|
|
# Scores
|
||
|
|
'score_bat', 'score_sta', 'score_hps', 'score_ptl', 'score_tct', 'score_util', 'score_eco', 'score_pace',
|
||
|
|
# Core
|
||
|
|
'basic_avg_rating', 'basic_avg_kd', 'basic_avg_adr', 'basic_avg_kast', 'basic_avg_rws',
|
||
|
|
# Combat
|
||
|
|
'basic_avg_headshot_kills', 'basic_headshot_rate', 'basic_avg_assisted_kill', 'basic_avg_awp_kill', 'basic_avg_jump_count',
|
||
|
|
# Obj
|
||
|
|
'basic_avg_mvps', 'basic_avg_plants', 'basic_avg_defuses', 'basic_avg_flash_assists',
|
||
|
|
# Opening
|
||
|
|
'basic_avg_first_kill', 'basic_avg_first_death', 'basic_first_kill_rate', 'basic_first_death_rate',
|
||
|
|
# Multi
|
||
|
|
'basic_avg_kill_2', 'basic_avg_kill_3', 'basic_avg_kill_4', 'basic_avg_kill_5',
|
||
|
|
'basic_avg_perfect_kill', 'basic_avg_revenge_kill',
|
||
|
|
# STA & BAT Details
|
||
|
|
'sta_last_30_rating', 'sta_win_rating', 'sta_loss_rating', 'sta_rating_volatility', 'sta_time_rating_corr',
|
||
|
|
'bat_kd_diff_high_elo', 'bat_avg_duel_win_rate',
|
||
|
|
# HPS & PTL Details
|
||
|
|
'hps_clutch_win_rate_1v1', 'hps_clutch_win_rate_1v3_plus', 'hps_match_point_win_rate', 'hps_pressure_entry_rate',
|
||
|
|
'hps_comeback_kd_diff', 'hps_losing_streak_kd_diff',
|
||
|
|
'ptl_pistol_kills', 'ptl_pistol_win_rate', 'ptl_pistol_kd', 'ptl_pistol_util_efficiency',
|
||
|
|
# UTIL Details
|
||
|
|
'util_usage_rate', 'util_avg_nade_dmg', 'util_avg_flash_time', 'util_avg_flash_enemy',
|
||
|
|
# ECO & PACE (New)
|
||
|
|
'eco_avg_damage_per_1k', 'eco_rating_eco_rounds', 'eco_kd_ratio', 'eco_avg_rounds',
|
||
|
|
'pace_avg_time_to_first_contact', 'pace_trade_kill_rate', 'pace_opening_kill_time', 'pace_avg_life_time',
|
||
|
|
# Party
|
||
|
|
'party_1_win_rate', 'party_1_rating', 'party_1_adr',
|
||
|
|
'party_2_win_rate', 'party_2_rating', 'party_2_adr',
|
||
|
|
'party_3_win_rate', 'party_3_rating', 'party_3_adr',
|
||
|
|
'party_4_win_rate', 'party_4_rating', 'party_4_adr',
|
||
|
|
'party_5_win_rate', 'party_5_rating', 'party_5_adr',
|
||
|
|
# Rating Dist
|
||
|
|
'rating_dist_carry_rate', 'rating_dist_normal_rate', 'rating_dist_sacrifice_rate', 'rating_dist_sleeping_rate',
|
||
|
|
# ELO
|
||
|
|
'elo_lt1200_rating', 'elo_1200_1400_rating', 'elo_1400_1600_rating', 'elo_1600_1800_rating', 'elo_1800_2000_rating', 'elo_gt2000_rating'
|
||
|
|
]
|
||
|
|
|
||
|
|
result = {}
|
||
|
|
|
||
|
|
for m in metrics:
|
||
|
|
# Handle missing columns gracefully
|
||
|
|
values = []
|
||
|
|
for p in stats_map.values():
|
||
|
|
val = p.get(m)
|
||
|
|
if val is None: val = 0
|
||
|
|
values.append(float(val))
|
||
|
|
|
||
|
|
target_val = stats_map[target_steam_id].get(m)
|
||
|
|
if target_val is None: target_val = 0
|
||
|
|
target_val = float(target_val)
|
||
|
|
|
||
|
|
if not values:
|
||
|
|
result[m] = None
|
||
|
|
continue
|
||
|
|
|
||
|
|
# For PACE (Time), lower is better usually, but rank logic assumes Higher is Better (reverse=True).
|
||
|
|
# If we want Rank #1 to be Lowest Time, we should sort normal.
|
||
|
|
# But standardized scores handle this. For raw metrics, let's keep consistent (Higher = Rank 1)
|
||
|
|
# unless we explicitly handle "Low is Good".
|
||
|
|
# For now, keep simple: Rank 1 = Highest Value.
|
||
|
|
# For Time: Rank 1 = Slowest. (User can interpret)
|
||
|
|
|
||
|
|
values.sort(reverse=True)
|
||
|
|
|
||
|
|
try:
|
||
|
|
rank = values.index(target_val) + 1
|
||
|
|
except ValueError:
|
||
|
|
rank = len(values)
|
||
|
|
|
||
|
|
result[m] = {
|
||
|
|
'val': target_val,
|
||
|
|
'rank': rank,
|
||
|
|
'total': len(values),
|
||
|
|
'min': min(values),
|
||
|
|
'max': max(values),
|
||
|
|
'avg': sum(values) / len(values)
|
||
|
|
}
|
||
|
|
|
||
|
|
return result
|