910 lines
42 KiB
Python
910 lines
42 KiB
Python
from web.database import query_db, get_db, execute_db
|
|
import sqlite3
|
|
import pandas as pd
|
|
import numpy as np
|
|
|
|
class FeatureService:
|
|
@staticmethod
|
|
def get_player_features(steam_id):
|
|
sql = "SELECT * FROM dm_player_features WHERE steam_id_64 = ?"
|
|
return query_db('l3', sql, [steam_id], one=True)
|
|
|
|
@staticmethod
|
|
def get_players_list(page=1, per_page=20, sort_by='rating', search=None):
|
|
offset = (page - 1) * per_page
|
|
|
|
# Sort Mapping
|
|
sort_map = {
|
|
'rating': 'basic_avg_rating',
|
|
'kd': 'basic_avg_kd',
|
|
'kast': 'basic_avg_kast',
|
|
'matches': 'matches_played'
|
|
}
|
|
order_col = sort_map.get(sort_by, 'basic_avg_rating')
|
|
|
|
from web.services.stats_service import StatsService
|
|
|
|
# Helper to attach match counts
|
|
def attach_match_counts(player_list):
|
|
if not player_list:
|
|
return
|
|
ids = [p['steam_id_64'] for p in player_list]
|
|
# Batch query for counts from L2
|
|
placeholders = ','.join('?' for _ in ids)
|
|
sql = f"""
|
|
SELECT steam_id_64, COUNT(*) as cnt
|
|
FROM fact_match_players
|
|
WHERE steam_id_64 IN ({placeholders})
|
|
GROUP BY steam_id_64
|
|
"""
|
|
counts = query_db('l2', sql, ids)
|
|
cnt_dict = {r['steam_id_64']: r['cnt'] for r in counts}
|
|
for p in player_list:
|
|
p['matches_played'] = cnt_dict.get(p['steam_id_64'], 0)
|
|
|
|
if search:
|
|
# Get all matching players
|
|
l2_players, _ = StatsService.get_players(page=1, per_page=100, search=search)
|
|
if not l2_players:
|
|
return [], 0
|
|
|
|
steam_ids = [p['steam_id_64'] for p in l2_players]
|
|
placeholders = ','.join('?' for _ in steam_ids)
|
|
sql = f"SELECT * FROM dm_player_features WHERE steam_id_64 IN ({placeholders})"
|
|
features = query_db('l3', sql, steam_ids)
|
|
f_dict = {f['steam_id_64']: f for f in features}
|
|
|
|
# Get counts for sorting
|
|
count_sql = f"SELECT steam_id_64, COUNT(*) as cnt FROM fact_match_players WHERE steam_id_64 IN ({placeholders}) GROUP BY steam_id_64"
|
|
counts = query_db('l2', count_sql, steam_ids)
|
|
cnt_dict = {r['steam_id_64']: r['cnt'] for r in counts}
|
|
|
|
merged = []
|
|
for p in l2_players:
|
|
f = f_dict.get(p['steam_id_64'])
|
|
m = dict(p)
|
|
if f:
|
|
m.update(dict(f))
|
|
else:
|
|
# Fallback Calc
|
|
stats = StatsService.get_player_basic_stats(p['steam_id_64'])
|
|
if stats:
|
|
m['basic_avg_rating'] = stats['rating']
|
|
m['basic_avg_kd'] = stats['kd']
|
|
m['basic_avg_kast'] = stats['kast']
|
|
else:
|
|
m['basic_avg_rating'] = 0
|
|
m['basic_avg_kd'] = 0
|
|
m['basic_avg_kast'] = 0
|
|
|
|
m['matches_played'] = cnt_dict.get(p['steam_id_64'], 0)
|
|
merged.append(m)
|
|
|
|
merged.sort(key=lambda x: x.get(order_col, 0) or 0, reverse=True)
|
|
|
|
total = len(merged)
|
|
start = (page - 1) * per_page
|
|
end = start + per_page
|
|
return merged[start:end], total
|
|
|
|
else:
|
|
# Browse mode
|
|
l3_count = query_db('l3', "SELECT COUNT(*) as cnt FROM dm_player_features", one=True)['cnt']
|
|
|
|
if l3_count == 0 or sort_by == 'matches':
|
|
if sort_by == 'matches':
|
|
sql = """
|
|
SELECT steam_id_64, COUNT(*) as cnt
|
|
FROM fact_match_players
|
|
GROUP BY steam_id_64
|
|
ORDER BY cnt DESC
|
|
LIMIT ? OFFSET ?
|
|
"""
|
|
top_ids = query_db('l2', sql, [per_page, offset])
|
|
if not top_ids:
|
|
return [], 0
|
|
|
|
total = query_db('l2', "SELECT COUNT(DISTINCT steam_id_64) as cnt FROM fact_match_players", one=True)['cnt']
|
|
|
|
ids = [r['steam_id_64'] for r in top_ids]
|
|
l2_players = StatsService.get_players_by_ids(ids)
|
|
|
|
# Merge logic
|
|
merged = []
|
|
p_ph = ','.join('?' for _ in ids)
|
|
f_sql = f"SELECT * FROM dm_player_features WHERE steam_id_64 IN ({p_ph})"
|
|
features = query_db('l3', f_sql, ids)
|
|
f_dict = {f['steam_id_64']: f for f in features}
|
|
|
|
p_dict = {p['steam_id_64']: p for p in l2_players}
|
|
|
|
for r in top_ids:
|
|
sid = r['steam_id_64']
|
|
p = p_dict.get(sid)
|
|
if not p: continue
|
|
|
|
m = dict(p)
|
|
f = f_dict.get(sid)
|
|
if f:
|
|
m.update(dict(f))
|
|
else:
|
|
stats = StatsService.get_player_basic_stats(sid)
|
|
if stats:
|
|
m['basic_avg_rating'] = stats['rating']
|
|
m['basic_avg_kd'] = stats['kd']
|
|
m['basic_avg_kast'] = stats['kast']
|
|
else:
|
|
m['basic_avg_rating'] = 0
|
|
m['basic_avg_kd'] = 0
|
|
m['basic_avg_kast'] = 0
|
|
|
|
m['matches_played'] = r['cnt']
|
|
merged.append(m)
|
|
|
|
return merged, total
|
|
|
|
# L3 empty fallback
|
|
l2_players, total = StatsService.get_players(page, per_page, sort_by=None)
|
|
merged = []
|
|
attach_match_counts(l2_players)
|
|
|
|
for p in l2_players:
|
|
m = dict(p)
|
|
stats = StatsService.get_player_basic_stats(p['steam_id_64'])
|
|
if stats:
|
|
m['basic_avg_rating'] = stats['rating']
|
|
m['basic_avg_kd'] = stats['kd']
|
|
m['basic_avg_kast'] = stats['kast']
|
|
else:
|
|
m['basic_avg_rating'] = 0
|
|
m['basic_avg_kd'] = 0
|
|
m['basic_avg_kast'] = 0
|
|
m['matches_played'] = p.get('matches_played', 0)
|
|
merged.append(m)
|
|
|
|
if sort_by != 'rating':
|
|
merged.sort(key=lambda x: x.get(order_col, 0) or 0, reverse=True)
|
|
|
|
return merged, total
|
|
|
|
# Normal L3 browse
|
|
sql = f"SELECT * FROM dm_player_features ORDER BY {order_col} DESC LIMIT ? OFFSET ?"
|
|
features = query_db('l3', sql, [per_page, offset])
|
|
|
|
total = query_db('l3', "SELECT COUNT(*) as cnt FROM dm_player_features", one=True)['cnt']
|
|
|
|
if not features:
|
|
return [], total
|
|
|
|
steam_ids = [f['steam_id_64'] for f in features]
|
|
l2_players = StatsService.get_players_by_ids(steam_ids)
|
|
p_dict = {p['steam_id_64']: p for p in l2_players}
|
|
|
|
merged = []
|
|
for f in features:
|
|
m = dict(f)
|
|
p = p_dict.get(f['steam_id_64'])
|
|
if p:
|
|
m.update(dict(p))
|
|
else:
|
|
m['username'] = f['steam_id_64']
|
|
m['avatar_url'] = None
|
|
merged.append(m)
|
|
|
|
return merged, total
|
|
|
|
@staticmethod
|
|
def rebuild_all_features(min_matches=5):
|
|
"""
|
|
Refreshes the L3 Data Mart with full feature calculations.
|
|
"""
|
|
from web.config import Config
|
|
l3_db_path = Config.DB_L3_PATH
|
|
l2_db_path = Config.DB_L2_PATH
|
|
|
|
conn_l2 = sqlite3.connect(l2_db_path)
|
|
conn_l2.row_factory = sqlite3.Row
|
|
|
|
try:
|
|
print("Loading L2 data...")
|
|
df = FeatureService._load_and_calculate_dataframe(conn_l2, min_matches)
|
|
|
|
if df is None or df.empty:
|
|
print("No data to process.")
|
|
return 0
|
|
|
|
print("Calculating Scores...")
|
|
df = FeatureService._calculate_ultimate_scores(df)
|
|
|
|
print("Saving to L3...")
|
|
conn_l3 = sqlite3.connect(l3_db_path)
|
|
|
|
cursor = conn_l3.cursor()
|
|
|
|
# Ensure columns exist in DataFrame match DB columns
|
|
cursor.execute("PRAGMA table_info(dm_player_features)")
|
|
valid_cols = [r[1] for r in cursor.fetchall()]
|
|
|
|
# Filter DF columns
|
|
df_cols = [c for c in df.columns if c in valid_cols]
|
|
df_to_save = df[df_cols].copy()
|
|
df_to_save['updated_at'] = pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
|
|
# Generate Insert SQL
|
|
placeholders = ','.join(['?'] * len(df_to_save.columns))
|
|
cols_str = ','.join(df_to_save.columns)
|
|
sql = f"INSERT OR REPLACE INTO dm_player_features ({cols_str}) VALUES ({placeholders})"
|
|
|
|
data = df_to_save.values.tolist()
|
|
cursor.executemany(sql, data)
|
|
conn_l3.commit()
|
|
conn_l3.close()
|
|
|
|
return len(df)
|
|
|
|
except Exception as e:
|
|
print(f"Rebuild Error: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return 0
|
|
finally:
|
|
conn_l2.close()
|
|
|
|
@staticmethod
|
|
def _load_and_calculate_dataframe(conn, min_matches):
|
|
# 1. Basic Stats
|
|
query_basic = """
|
|
SELECT
|
|
steam_id_64,
|
|
COUNT(*) as matches_played,
|
|
SUM(round_total) as rounds_played,
|
|
AVG(rating) as basic_avg_rating,
|
|
AVG(kd_ratio) as basic_avg_kd,
|
|
AVG(adr) as basic_avg_adr,
|
|
AVG(kast) as basic_avg_kast,
|
|
AVG(rws) as basic_avg_rws,
|
|
SUM(headshot_count) as sum_hs,
|
|
SUM(kills) as sum_kills,
|
|
SUM(deaths) as sum_deaths,
|
|
SUM(first_kill) as sum_fk,
|
|
SUM(first_death) as sum_fd,
|
|
SUM(clutch_1v1) as sum_1v1,
|
|
SUM(clutch_1v2) as sum_1v2,
|
|
SUM(clutch_1v3) + SUM(clutch_1v4) + SUM(clutch_1v5) as sum_1v3p,
|
|
SUM(kill_2) as sum_2k,
|
|
SUM(kill_3) as sum_3k,
|
|
SUM(kill_4) as sum_4k,
|
|
SUM(kill_5) as sum_5k,
|
|
SUM(assisted_kill) as sum_assist,
|
|
SUM(perfect_kill) as sum_perfect,
|
|
SUM(revenge_kill) as sum_revenge,
|
|
SUM(awp_kill) as sum_awp,
|
|
SUM(jump_count) as sum_jump,
|
|
SUM(mvp_count) as sum_mvps,
|
|
SUM(planted_bomb) as sum_plants,
|
|
SUM(defused_bomb) as sum_defuses,
|
|
SUM(CASE
|
|
WHEN flash_assists > 0 THEN flash_assists
|
|
WHEN assists > assisted_kill THEN assists - assisted_kill
|
|
ELSE 0
|
|
END) as sum_flash_assists,
|
|
SUM(throw_harm) as sum_util_dmg,
|
|
SUM(flash_time) as sum_flash_time,
|
|
SUM(flash_enemy) as sum_flash_enemy,
|
|
SUM(flash_team) as sum_flash_team,
|
|
SUM(util_flash_usage) as sum_util_flash,
|
|
SUM(util_smoke_usage) as sum_util_smoke,
|
|
SUM(util_molotov_usage) as sum_util_molotov,
|
|
SUM(util_he_usage) as sum_util_he,
|
|
SUM(util_decoy_usage) as sum_util_decoy
|
|
FROM fact_match_players
|
|
GROUP BY steam_id_64
|
|
HAVING COUNT(*) >= ?
|
|
"""
|
|
df = pd.read_sql_query(query_basic, conn, params=(min_matches,))
|
|
if df.empty: return None
|
|
|
|
# Basic Derived
|
|
df['basic_headshot_rate'] = df['sum_hs'] / df['sum_kills'].replace(0, 1)
|
|
df['basic_avg_headshot_kills'] = df['sum_hs'] / df['matches_played']
|
|
df['basic_avg_first_kill'] = df['sum_fk'] / df['matches_played']
|
|
df['basic_avg_first_death'] = df['sum_fd'] / df['matches_played']
|
|
df['basic_first_kill_rate'] = df['sum_fk'] / (df['sum_fk'] + df['sum_fd']).replace(0, 1)
|
|
df['basic_first_death_rate'] = df['sum_fd'] / (df['sum_fk'] + df['sum_fd']).replace(0, 1)
|
|
df['basic_avg_kill_2'] = df['sum_2k'] / df['matches_played']
|
|
df['basic_avg_kill_3'] = df['sum_3k'] / df['matches_played']
|
|
df['basic_avg_kill_4'] = df['sum_4k'] / df['matches_played']
|
|
df['basic_avg_kill_5'] = df['sum_5k'] / df['matches_played']
|
|
df['basic_avg_assisted_kill'] = df['sum_assist'] / df['matches_played']
|
|
df['basic_avg_perfect_kill'] = df['sum_perfect'] / df['matches_played']
|
|
df['basic_avg_revenge_kill'] = df['sum_revenge'] / df['matches_played']
|
|
df['basic_avg_awp_kill'] = df['sum_awp'] / df['matches_played']
|
|
df['basic_avg_jump_count'] = df['sum_jump'] / df['matches_played']
|
|
df['basic_avg_mvps'] = df['sum_mvps'] / df['matches_played']
|
|
df['basic_avg_plants'] = df['sum_plants'] / df['matches_played']
|
|
df['basic_avg_defuses'] = df['sum_defuses'] / df['matches_played']
|
|
df['basic_avg_flash_assists'] = df['sum_flash_assists'] / df['matches_played']
|
|
|
|
# UTIL Basic
|
|
df['util_avg_nade_dmg'] = df['sum_util_dmg'] / df['matches_played']
|
|
df['util_avg_flash_time'] = df['sum_flash_time'] / df['matches_played']
|
|
df['util_avg_flash_enemy'] = df['sum_flash_enemy'] / df['matches_played']
|
|
|
|
valid_ids = tuple(df['steam_id_64'].tolist())
|
|
placeholders = ','.join(['?'] * len(valid_ids))
|
|
|
|
# 2. STA (Detailed)
|
|
query_sta = f"""
|
|
SELECT mp.steam_id_64, mp.rating, mp.is_win, m.start_time, m.duration
|
|
FROM fact_match_players mp
|
|
JOIN fact_matches m ON mp.match_id = m.match_id
|
|
WHERE mp.steam_id_64 IN ({placeholders})
|
|
ORDER BY mp.steam_id_64, m.start_time
|
|
"""
|
|
df_matches = pd.read_sql_query(query_sta, conn, params=valid_ids)
|
|
sta_list = []
|
|
for pid, group in df_matches.groupby('steam_id_64'):
|
|
group = group.sort_values('start_time')
|
|
last_30 = group.tail(30)
|
|
|
|
# Fatigue Calc
|
|
# Simple heuristic: split matches by day, compare early (first 3) vs late (rest)
|
|
group['date'] = pd.to_datetime(group['start_time'], unit='s').dt.date
|
|
day_counts = group.groupby('date').size()
|
|
busy_days = day_counts[day_counts >= 4].index # Days with 4+ matches
|
|
|
|
fatigue_decays = []
|
|
for day in busy_days:
|
|
day_matches = group[group['date'] == day]
|
|
if len(day_matches) >= 4:
|
|
early_rating = day_matches.head(3)['rating'].mean()
|
|
late_rating = day_matches.tail(len(day_matches) - 3)['rating'].mean()
|
|
fatigue_decays.append(early_rating - late_rating)
|
|
|
|
avg_fatigue = np.mean(fatigue_decays) if fatigue_decays else 0
|
|
|
|
sta_list.append({
|
|
'steam_id_64': pid,
|
|
'sta_last_30_rating': last_30['rating'].mean(),
|
|
'sta_win_rating': group[group['is_win']==1]['rating'].mean(),
|
|
'sta_loss_rating': group[group['is_win']==0]['rating'].mean(),
|
|
'sta_rating_volatility': group.tail(10)['rating'].std() if len(group) > 1 else 0,
|
|
'sta_time_rating_corr': group['duration'].corr(group['rating']) if len(group)>2 and group['rating'].std() > 0 else 0,
|
|
'sta_fatigue_decay': avg_fatigue
|
|
})
|
|
df = df.merge(pd.DataFrame(sta_list), on='steam_id_64', how='left')
|
|
|
|
# 3. BAT (High ELO)
|
|
query_elo = f"""
|
|
SELECT mp.steam_id_64, mp.kd_ratio,
|
|
(SELECT AVG(group_origin_elo) FROM fact_match_teams fmt WHERE fmt.match_id = mp.match_id AND group_origin_elo > 0) as elo
|
|
FROM fact_match_players mp
|
|
WHERE mp.steam_id_64 IN ({placeholders})
|
|
"""
|
|
df_elo = pd.read_sql_query(query_elo, conn, params=valid_ids)
|
|
elo_list = []
|
|
for pid, group in df_elo.groupby('steam_id_64'):
|
|
avg = group['elo'].mean() or 1000
|
|
elo_list.append({
|
|
'steam_id_64': pid,
|
|
'bat_kd_diff_high_elo': group[group['elo'] > avg]['kd_ratio'].mean(),
|
|
'bat_kd_diff_low_elo': group[group['elo'] <= avg]['kd_ratio'].mean()
|
|
})
|
|
df = df.merge(pd.DataFrame(elo_list), on='steam_id_64', how='left')
|
|
|
|
# Duel Win Rate
|
|
query_duel = f"""
|
|
SELECT steam_id_64, SUM(entry_kills) as ek, SUM(entry_deaths) as ed
|
|
FROM fact_match_players WHERE steam_id_64 IN ({placeholders}) GROUP BY steam_id_64
|
|
"""
|
|
df_duel = pd.read_sql_query(query_duel, conn, params=valid_ids)
|
|
df_duel['bat_avg_duel_win_rate'] = df_duel['ek'] / (df_duel['ek'] + df_duel['ed']).replace(0, 1)
|
|
df = df.merge(df_duel[['steam_id_64', 'bat_avg_duel_win_rate']], on='steam_id_64', how='left')
|
|
|
|
# 4. HPS
|
|
# Clutch Rate
|
|
df['hps_clutch_win_rate_1v1'] = df['sum_1v1'] / df['matches_played']
|
|
df['hps_clutch_win_rate_1v3_plus'] = df['sum_1v3p'] / df['matches_played']
|
|
|
|
# Prepare Detailed Event Data for HPS (Comeback), PTL (KD), and T/CT
|
|
|
|
# A. Determine Side Info using fact_match_teams
|
|
# 1. Get Match Teams
|
|
query_teams = f"""
|
|
SELECT match_id, group_fh_role, group_uids
|
|
FROM fact_match_teams
|
|
WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders}))
|
|
"""
|
|
df_teams = pd.read_sql_query(query_teams, conn, params=valid_ids)
|
|
|
|
# 2. Get Player UIDs
|
|
query_uids = f"SELECT match_id, steam_id_64, uid FROM fact_match_players WHERE steam_id_64 IN ({placeholders})"
|
|
df_uids = pd.read_sql_query(query_uids, conn, params=valid_ids)
|
|
|
|
# 3. Get Match Meta (Start Time for MR12/MR15)
|
|
query_meta = f"SELECT match_id, start_time FROM fact_matches WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders}))"
|
|
df_meta = pd.read_sql_query(query_meta, conn, params=valid_ids)
|
|
df_meta['halftime_round'] = np.where(df_meta['start_time'] > 1695772800, 12, 15) # CS2 Release Date approx
|
|
|
|
# 4. Build FH Side DataFrame
|
|
fh_rows = []
|
|
if not df_teams.empty and not df_uids.empty:
|
|
match_teams = {} # match_id -> [(role, [uids])]
|
|
for _, row in df_teams.iterrows():
|
|
mid = row['match_id']
|
|
role = row['group_fh_role'] # 1=CT, 0=T
|
|
try:
|
|
uids = str(row['group_uids']).split(',')
|
|
uids = [u.strip() for u in uids if u.strip()]
|
|
except:
|
|
uids = []
|
|
if mid not in match_teams: match_teams[mid] = []
|
|
match_teams[mid].append((role, uids))
|
|
|
|
for _, row in df_uids.iterrows():
|
|
mid = row['match_id']
|
|
sid = row['steam_id_64']
|
|
uid = str(row['uid'])
|
|
if mid in match_teams:
|
|
for role, uids in match_teams[mid]:
|
|
if uid in uids:
|
|
fh_rows.append({
|
|
'match_id': mid,
|
|
'steam_id_64': sid,
|
|
'fh_side': 'CT' if role == 1 else 'T'
|
|
})
|
|
break
|
|
|
|
df_fh_sides = pd.DataFrame(fh_rows)
|
|
if not df_fh_sides.empty:
|
|
df_fh_sides = df_fh_sides.merge(df_meta[['match_id', 'halftime_round']], on='match_id', how='left')
|
|
|
|
# B. Get Kill Events
|
|
query_events = f"""
|
|
SELECT match_id, round_num, attacker_steam_id, victim_steam_id, event_type, is_headshot, event_time
|
|
FROM fact_round_events
|
|
WHERE event_type='kill'
|
|
AND (attacker_steam_id IN ({placeholders}) OR victim_steam_id IN ({placeholders}))
|
|
"""
|
|
df_events = pd.read_sql_query(query_events, conn, params=valid_ids + valid_ids)
|
|
|
|
# C. Get Round Scores
|
|
query_rounds = f"""
|
|
SELECT match_id, round_num, ct_score, t_score, winner_side
|
|
FROM fact_rounds
|
|
WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders}))
|
|
"""
|
|
df_rounds = pd.read_sql_query(query_rounds, conn, params=valid_ids)
|
|
|
|
# Fix missing winner_side by calculating from score changes
|
|
if not df_rounds.empty:
|
|
df_rounds = df_rounds.sort_values(['match_id', 'round_num']).reset_index(drop=True)
|
|
df_rounds['prev_ct'] = df_rounds.groupby('match_id')['ct_score'].shift(1).fillna(0)
|
|
df_rounds['prev_t'] = df_rounds.groupby('match_id')['t_score'].shift(1).fillna(0)
|
|
|
|
# Determine winner based on score increment
|
|
df_rounds['ct_win'] = (df_rounds['ct_score'] > df_rounds['prev_ct'])
|
|
df_rounds['t_win'] = (df_rounds['t_score'] > df_rounds['prev_t'])
|
|
|
|
df_rounds['calculated_winner'] = np.where(df_rounds['ct_win'], 'CT',
|
|
np.where(df_rounds['t_win'], 'T', None))
|
|
|
|
# Force overwrite winner_side with calculated winner since DB data is unreliable (mostly NULL)
|
|
df_rounds['winner_side'] = df_rounds['calculated_winner']
|
|
|
|
# Fallback for Round 1 if still None (e.g. if prev is 0 and score is 1)
|
|
# Logic above handles Round 1 correctly (prev is 0).
|
|
|
|
# --- Process Logic ---
|
|
# Logic above handles Round 1 correctly (prev is 0).
|
|
|
|
# --- Process Logic ---
|
|
has_events = not df_events.empty
|
|
has_sides = not df_fh_sides.empty
|
|
|
|
if has_events and has_sides:
|
|
# 1. Attacker Side
|
|
df_events = df_events.merge(df_fh_sides, left_on=['match_id', 'attacker_steam_id'], right_on=['match_id', 'steam_id_64'], how='left')
|
|
df_events.rename(columns={'fh_side': 'att_fh_side'}, inplace=True)
|
|
df_events.drop(columns=['steam_id_64'], inplace=True)
|
|
|
|
# 2. Victim Side
|
|
df_events = df_events.merge(df_fh_sides, left_on=['match_id', 'victim_steam_id'], right_on=['match_id', 'steam_id_64'], how='left', suffixes=('', '_vic'))
|
|
df_events.rename(columns={'fh_side': 'vic_fh_side'}, inplace=True)
|
|
df_events.drop(columns=['steam_id_64'], inplace=True)
|
|
|
|
# 3. Determine Actual Side (CT/T)
|
|
# Logic: If round <= halftime -> FH Side. Else -> Opposite.
|
|
def calc_side(fh_side, round_num, halftime):
|
|
if pd.isna(fh_side): return None
|
|
if round_num <= halftime: return fh_side
|
|
return 'T' if fh_side == 'CT' else 'CT'
|
|
|
|
# Vectorized approach
|
|
# Attacker
|
|
mask_fh_att = df_events['round_num'] <= df_events['halftime_round']
|
|
df_events['attacker_side'] = np.where(mask_fh_att, df_events['att_fh_side'],
|
|
np.where(df_events['att_fh_side'] == 'CT', 'T', 'CT'))
|
|
# Victim
|
|
mask_fh_vic = df_events['round_num'] <= df_events['halftime_round']
|
|
df_events['victim_side'] = np.where(mask_fh_vic, df_events['vic_fh_side'],
|
|
np.where(df_events['vic_fh_side'] == 'CT', 'T', 'CT'))
|
|
|
|
# Merge Scores
|
|
df_events = df_events.merge(df_rounds, on=['match_id', 'round_num'], how='left')
|
|
|
|
# --- HPS: Match Point & Comeback ---
|
|
# Match Point Win Rate
|
|
mp_rounds = df_rounds[((df_rounds['ct_score'] == 12) | (df_rounds['t_score'] == 12) |
|
|
(df_rounds['ct_score'] == 15) | (df_rounds['t_score'] == 15))]
|
|
|
|
if not mp_rounds.empty and has_sides:
|
|
# Need player side for these rounds
|
|
# Expand sides for all rounds
|
|
q_all_rounds = f"SELECT match_id, round_num FROM fact_rounds WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders}))"
|
|
df_all_rounds = pd.read_sql_query(q_all_rounds, conn, params=valid_ids)
|
|
|
|
df_player_rounds = df_all_rounds.merge(df_fh_sides, on='match_id')
|
|
mask_fh = df_player_rounds['round_num'] <= df_player_rounds['halftime_round']
|
|
df_player_rounds['side'] = np.where(mask_fh, df_player_rounds['fh_side'],
|
|
np.where(df_player_rounds['fh_side'] == 'CT', 'T', 'CT'))
|
|
|
|
# Filter for MP rounds
|
|
# Join mp_rounds with df_player_rounds
|
|
mp_player = df_player_rounds.merge(mp_rounds[['match_id', 'round_num', 'winner_side']], on=['match_id', 'round_num'])
|
|
mp_player['is_win'] = (mp_player['side'] == mp_player['winner_side']).astype(int)
|
|
|
|
hps_mp = mp_player.groupby('steam_id_64')['is_win'].mean().reset_index()
|
|
hps_mp.rename(columns={'is_win': 'hps_match_point_win_rate'}, inplace=True)
|
|
df = df.merge(hps_mp, on='steam_id_64', how='left')
|
|
else:
|
|
df['hps_match_point_win_rate'] = 0.5
|
|
|
|
# Comeback KD Diff
|
|
# Attacker Context
|
|
df_events['att_team_score'] = np.where(df_events['attacker_side'] == 'CT', df_events['ct_score'], df_events['t_score'])
|
|
df_events['att_opp_score'] = np.where(df_events['attacker_side'] == 'CT', df_events['t_score'], df_events['ct_score'])
|
|
df_events['is_comeback_att'] = (df_events['att_team_score'] + 4 <= df_events['att_opp_score'])
|
|
|
|
# Victim Context
|
|
df_events['vic_team_score'] = np.where(df_events['victim_side'] == 'CT', df_events['ct_score'], df_events['t_score'])
|
|
df_events['vic_opp_score'] = np.where(df_events['victim_side'] == 'CT', df_events['t_score'], df_events['ct_score'])
|
|
df_events['is_comeback_vic'] = (df_events['vic_team_score'] + 4 <= df_events['vic_opp_score'])
|
|
|
|
att_k = df_events.groupby('attacker_steam_id').size()
|
|
vic_d = df_events.groupby('victim_steam_id').size()
|
|
|
|
cb_k = df_events[df_events['is_comeback_att']].groupby('attacker_steam_id').size()
|
|
cb_d = df_events[df_events['is_comeback_vic']].groupby('victim_steam_id').size()
|
|
|
|
kd_stats = pd.DataFrame({'k': att_k, 'd': vic_d, 'cb_k': cb_k, 'cb_d': cb_d}).fillna(0)
|
|
kd_stats['kd'] = kd_stats['k'] / kd_stats['d'].replace(0, 1)
|
|
kd_stats['cb_kd'] = kd_stats['cb_k'] / kd_stats['cb_d'].replace(0, 1)
|
|
kd_stats['hps_comeback_kd_diff'] = kd_stats['cb_kd'] - kd_stats['kd']
|
|
|
|
kd_stats.index.name = 'steam_id_64'
|
|
df = df.merge(kd_stats[['hps_comeback_kd_diff']], on='steam_id_64', how='left')
|
|
|
|
# --- PTL: Pistol Stats ---
|
|
pistol_rounds = [1, 13]
|
|
df_pistol = df_events[df_events['round_num'].isin(pistol_rounds)]
|
|
|
|
if not df_pistol.empty:
|
|
pk = df_pistol.groupby('attacker_steam_id').size()
|
|
pd_death = df_pistol.groupby('victim_steam_id').size()
|
|
p_stats = pd.DataFrame({'pk': pk, 'pd': pd_death}).fillna(0)
|
|
p_stats['ptl_pistol_kd'] = p_stats['pk'] / p_stats['pd'].replace(0, 1)
|
|
|
|
phs = df_pistol[df_pistol['is_headshot'] == 1].groupby('attacker_steam_id').size()
|
|
p_stats['phs'] = phs
|
|
p_stats['phs'] = p_stats['phs'].fillna(0)
|
|
p_stats['ptl_pistol_util_efficiency'] = p_stats['phs'] / p_stats['pk'].replace(0, 1)
|
|
|
|
p_stats.index.name = 'steam_id_64'
|
|
df = df.merge(p_stats[['ptl_pistol_kd', 'ptl_pistol_util_efficiency']], on='steam_id_64', how='left')
|
|
else:
|
|
df['ptl_pistol_kd'] = 1.0
|
|
df['ptl_pistol_util_efficiency'] = 0.0
|
|
|
|
# --- T/CT Stats ---
|
|
ct_k = df_events[df_events['attacker_side'] == 'CT'].groupby('attacker_steam_id').size()
|
|
ct_d = df_events[df_events['victim_side'] == 'CT'].groupby('victim_steam_id').size()
|
|
t_k = df_events[df_events['attacker_side'] == 'T'].groupby('attacker_steam_id').size()
|
|
t_d = df_events[df_events['victim_side'] == 'T'].groupby('victim_steam_id').size()
|
|
|
|
side_stats = pd.DataFrame({'ct_k': ct_k, 'ct_d': ct_d, 't_k': t_k, 't_d': t_d}).fillna(0)
|
|
side_stats['side_rating_ct'] = side_stats['ct_k'] / side_stats['ct_d'].replace(0, 1)
|
|
side_stats['side_rating_t'] = side_stats['t_k'] / side_stats['t_d'].replace(0, 1)
|
|
side_stats['side_kd_diff_ct_t'] = side_stats['side_rating_ct'] - side_stats['side_rating_t']
|
|
|
|
side_stats.index.name = 'steam_id_64'
|
|
df = df.merge(side_stats[['side_rating_ct', 'side_rating_t', 'side_kd_diff_ct_t']], on='steam_id_64', how='left')
|
|
|
|
# Side First Kill Rate
|
|
# Need total rounds per side for denominator
|
|
# Use df_player_rounds calculated in Match Point section
|
|
# If not calculated there (no MP rounds), calc now
|
|
if 'df_player_rounds' not in locals():
|
|
q_all_rounds = f"SELECT match_id, round_num FROM fact_rounds WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders}))"
|
|
df_all_rounds = pd.read_sql_query(q_all_rounds, conn, params=valid_ids)
|
|
df_player_rounds = df_all_rounds.merge(df_fh_sides, on='match_id')
|
|
mask_fh = df_player_rounds['round_num'] <= df_player_rounds['halftime_round']
|
|
df_player_rounds['side'] = np.where(mask_fh, df_player_rounds['fh_side'],
|
|
np.where(df_player_rounds['fh_side'] == 'CT', 'T', 'CT'))
|
|
|
|
rounds_per_side = df_player_rounds.groupby(['steam_id_64', 'side']).size().unstack(fill_value=0)
|
|
if 'CT' not in rounds_per_side.columns: rounds_per_side['CT'] = 0
|
|
if 'T' not in rounds_per_side.columns: rounds_per_side['T'] = 0
|
|
|
|
# First Kills (Earliest event in round)
|
|
# Group by match, round -> min time.
|
|
fk_events = df_events.sort_values('event_time').drop_duplicates(['match_id', 'round_num'])
|
|
fk_ct = fk_events[fk_events['attacker_side'] == 'CT'].groupby('attacker_steam_id').size()
|
|
fk_t = fk_events[fk_events['attacker_side'] == 'T'].groupby('attacker_steam_id').size()
|
|
|
|
fk_stats = pd.DataFrame({'fk_ct': fk_ct, 'fk_t': fk_t}).fillna(0)
|
|
fk_stats = fk_stats.join(rounds_per_side, how='outer').fillna(0)
|
|
|
|
fk_stats['side_first_kill_rate_ct'] = fk_stats['fk_ct'] / fk_stats['CT'].replace(0, 1)
|
|
fk_stats['side_first_kill_rate_t'] = fk_stats['fk_t'] / fk_stats['T'].replace(0, 1)
|
|
|
|
fk_stats.index.name = 'steam_id_64'
|
|
df = df.merge(fk_stats[['side_first_kill_rate_ct', 'side_first_kill_rate_t']], on='steam_id_64', how='left')
|
|
|
|
else:
|
|
# Fallbacks
|
|
cols = ['hps_match_point_win_rate', 'hps_comeback_kd_diff', 'ptl_pistol_kd', 'ptl_pistol_util_efficiency',
|
|
'side_rating_ct', 'side_rating_t', 'side_first_kill_rate_ct', 'side_first_kill_rate_t', 'side_kd_diff_ct_t']
|
|
for c in cols:
|
|
df[c] = 0
|
|
|
|
df['hps_match_point_win_rate'] = df['hps_match_point_win_rate'].fillna(0.5)
|
|
|
|
# HPS Pressure Entry Rate (Entry Kills in Losing Matches)
|
|
q_mp_team = f"SELECT match_id, steam_id_64, is_win, entry_kills FROM fact_match_players WHERE steam_id_64 IN ({placeholders})"
|
|
df_mp_team = pd.read_sql_query(q_mp_team, conn, params=valid_ids)
|
|
if not df_mp_team.empty:
|
|
losing_matches = df_mp_team[df_mp_team['is_win'] == 0]
|
|
if not losing_matches.empty:
|
|
# Average entry kills per losing match
|
|
pressure_entry = losing_matches.groupby('steam_id_64')['entry_kills'].mean().reset_index()
|
|
pressure_entry.rename(columns={'entry_kills': 'hps_pressure_entry_rate'}, inplace=True)
|
|
df = df.merge(pressure_entry, on='steam_id_64', how='left')
|
|
|
|
if 'hps_pressure_entry_rate' not in df.columns:
|
|
df['hps_pressure_entry_rate'] = 0
|
|
df['hps_pressure_entry_rate'] = df['hps_pressure_entry_rate'].fillna(0)
|
|
|
|
# 5. PTL (Additional Features: Kills & Multi)
|
|
query_ptl = f"""
|
|
SELECT ev.attacker_steam_id as steam_id_64, COUNT(*) as pistol_kills
|
|
FROM fact_round_events ev
|
|
WHERE ev.event_type = 'kill' AND ev.round_num IN (1, 13)
|
|
AND ev.attacker_steam_id IN ({placeholders})
|
|
GROUP BY ev.attacker_steam_id
|
|
"""
|
|
df_ptl = pd.read_sql_query(query_ptl, conn, params=valid_ids)
|
|
if not df_ptl.empty:
|
|
df = df.merge(df_ptl, on='steam_id_64', how='left')
|
|
df['ptl_pistol_kills'] = df['pistol_kills'] / df['matches_played']
|
|
else:
|
|
df['ptl_pistol_kills'] = 0
|
|
|
|
query_ptl_multi = f"""
|
|
SELECT attacker_steam_id as steam_id_64, COUNT(*) as multi_cnt
|
|
FROM (
|
|
SELECT match_id, round_num, attacker_steam_id, COUNT(*) as k
|
|
FROM fact_round_events
|
|
WHERE event_type = 'kill' AND round_num IN (1, 13)
|
|
AND attacker_steam_id IN ({placeholders})
|
|
GROUP BY match_id, round_num, attacker_steam_id
|
|
HAVING k >= 2
|
|
)
|
|
GROUP BY attacker_steam_id
|
|
"""
|
|
df_ptl_multi = pd.read_sql_query(query_ptl_multi, conn, params=valid_ids)
|
|
if not df_ptl_multi.empty:
|
|
df = df.merge(df_ptl_multi, on='steam_id_64', how='left')
|
|
df['ptl_pistol_multikills'] = df['multi_cnt'] / df['matches_played']
|
|
else:
|
|
df['ptl_pistol_multikills'] = 0
|
|
|
|
# PTL Win Rate (Pandas Logic using fixed winner_side)
|
|
if not df_rounds.empty and has_sides:
|
|
# Ensure df_player_rounds exists
|
|
if 'df_player_rounds' not in locals():
|
|
q_all_rounds = f"SELECT match_id, round_num FROM fact_rounds WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ({placeholders}))"
|
|
df_all_rounds = pd.read_sql_query(q_all_rounds, conn, params=valid_ids)
|
|
df_player_rounds = df_all_rounds.merge(df_fh_sides, on='match_id')
|
|
mask_fh = df_player_rounds['round_num'] <= df_player_rounds['halftime_round']
|
|
df_player_rounds['side'] = np.where(mask_fh, df_player_rounds['fh_side'],
|
|
np.where(df_player_rounds['fh_side'] == 'CT', 'T', 'CT'))
|
|
|
|
# Filter for Pistol Rounds (1, 13)
|
|
player_pistol = df_player_rounds[df_player_rounds['round_num'].isin([1, 13])].copy()
|
|
|
|
# Merge with df_rounds to get calculated winner_side
|
|
# Note: df_rounds has the fixed 'winner_side' column
|
|
player_pistol = player_pistol.merge(df_rounds[['match_id', 'round_num', 'winner_side']], on=['match_id', 'round_num'], how='left')
|
|
|
|
# Calculate Win
|
|
player_pistol['is_win'] = (player_pistol['side'] == player_pistol['winner_side']).astype(int)
|
|
|
|
ptl_wins = player_pistol.groupby('steam_id_64')['is_win'].agg(['sum', 'count']).reset_index()
|
|
ptl_wins.rename(columns={'sum': 'pistol_wins', 'count': 'pistol_rounds'}, inplace=True)
|
|
|
|
ptl_wins['ptl_pistol_win_rate'] = ptl_wins['pistol_wins'] / ptl_wins['pistol_rounds'].replace(0, 1)
|
|
df = df.merge(ptl_wins[['steam_id_64', 'ptl_pistol_win_rate']], on='steam_id_64', how='left')
|
|
else:
|
|
df['ptl_pistol_win_rate'] = 0.5
|
|
|
|
df['ptl_pistol_multikills'] = df['ptl_pistol_multikills'].fillna(0)
|
|
df['ptl_pistol_win_rate'] = df['ptl_pistol_win_rate'].fillna(0.5)
|
|
|
|
# 7. UTIL (Enhanced with Prop Frequency)
|
|
# Usage Rate: Average number of grenades purchased per round
|
|
df['util_usage_rate'] = (
|
|
df['sum_util_flash'] + df['sum_util_smoke'] +
|
|
df['sum_util_molotov'] + df['sum_util_he'] + df['sum_util_decoy']
|
|
) / df['rounds_played'].replace(0, 1) * 100 # Multiply by 100 to make it comparable to other metrics (e.g. 1.5 nades/round -> 150)
|
|
|
|
# Fallback if no new data yet (rely on old logic or keep 0)
|
|
# We can try to fetch equipment_value as backup if sum is 0
|
|
if df['util_usage_rate'].sum() == 0:
|
|
query_eco = f"""
|
|
SELECT steam_id_64, AVG(equipment_value) as avg_equip_val
|
|
FROM fact_round_player_economy
|
|
WHERE steam_id_64 IN ({placeholders})
|
|
GROUP BY steam_id_64
|
|
"""
|
|
df_eco = pd.read_sql_query(query_eco, conn, params=valid_ids)
|
|
if not df_eco.empty:
|
|
df_eco['util_usage_rate_backup'] = df_eco['avg_equip_val'] / 50.0 # Scaling factor for equipment value
|
|
df = df.merge(df_eco[['steam_id_64', 'util_usage_rate_backup']], on='steam_id_64', how='left')
|
|
df['util_usage_rate'] = df['util_usage_rate_backup'].fillna(0)
|
|
df.drop(columns=['util_usage_rate_backup'], inplace=True)
|
|
|
|
# Final Mappings
|
|
df['total_matches'] = df['matches_played']
|
|
|
|
return df.fillna(0)
|
|
|
|
|
|
@staticmethod
|
|
def _calculate_ultimate_scores(df):
|
|
def n(col):
|
|
if col not in df.columns: return 50
|
|
s = df[col]
|
|
if s.max() == s.min(): return 50
|
|
return (s - s.min()) / (s.max() - s.min()) * 100
|
|
|
|
df = df.copy()
|
|
|
|
# BAT (30%)
|
|
df['score_bat'] = (
|
|
0.25 * n('basic_avg_rating') +
|
|
0.20 * n('basic_avg_kd') +
|
|
0.15 * n('basic_avg_adr') +
|
|
0.10 * n('bat_avg_duel_win_rate') +
|
|
0.10 * n('bat_kd_diff_high_elo') +
|
|
0.10 * n('basic_avg_kill_3')
|
|
)
|
|
|
|
# STA (15%)
|
|
df['score_sta'] = (
|
|
0.30 * (100 - n('sta_rating_volatility')) +
|
|
0.30 * n('sta_loss_rating') +
|
|
0.20 * n('sta_win_rating') +
|
|
0.10 * (100 - abs(n('sta_time_rating_corr')))
|
|
)
|
|
|
|
# HPS (20%)
|
|
df['score_hps'] = (
|
|
0.30 * n('sum_1v3p') +
|
|
0.20 * n('hps_match_point_win_rate') +
|
|
0.20 * n('hps_comeback_kd_diff') +
|
|
0.15 * n('hps_pressure_entry_rate') +
|
|
0.15 * n('basic_avg_rating')
|
|
)
|
|
|
|
# PTL (10%)
|
|
df['score_ptl'] = (
|
|
0.40 * n('ptl_pistol_kills') +
|
|
0.40 * n('ptl_pistol_win_rate') +
|
|
0.20 * n('basic_avg_headshot_kills') # Pistol rounds rely on HS
|
|
)
|
|
|
|
# T/CT (10%)
|
|
df['score_tct'] = (
|
|
0.35 * n('side_rating_ct') +
|
|
0.35 * n('side_rating_t') +
|
|
0.15 * n('side_first_kill_rate_ct') +
|
|
0.15 * n('side_first_kill_rate_t')
|
|
)
|
|
|
|
# UTIL (10%)
|
|
# Emphasize prop frequency (usage_rate)
|
|
df['score_util'] = (
|
|
0.35 * n('util_usage_rate') +
|
|
0.25 * n('util_avg_nade_dmg') +
|
|
0.20 * n('util_avg_flash_time') +
|
|
0.20 * n('util_avg_flash_enemy')
|
|
)
|
|
|
|
return df
|
|
|
|
@staticmethod
|
|
def get_roster_features_distribution(target_steam_id):
|
|
"""
|
|
Calculates rank and distribution of the target player's L3 features (Scores) within the active roster.
|
|
"""
|
|
from web.services.web_service import WebService
|
|
import json
|
|
|
|
# 1. Get Active Roster IDs
|
|
lineups = WebService.get_lineups()
|
|
active_roster_ids = []
|
|
if lineups:
|
|
try:
|
|
raw_ids = json.loads(lineups[0]['player_ids_json'])
|
|
active_roster_ids = [str(uid) for uid in raw_ids]
|
|
except:
|
|
pass
|
|
|
|
if not active_roster_ids:
|
|
return None
|
|
|
|
# 2. Fetch L3 features for all roster members
|
|
placeholders = ','.join('?' for _ in active_roster_ids)
|
|
sql = f"""
|
|
SELECT
|
|
steam_id_64,
|
|
score_bat, score_sta, score_hps, score_ptl, score_tct, score_util
|
|
FROM dm_player_features
|
|
WHERE steam_id_64 IN ({placeholders})
|
|
"""
|
|
rows = query_db('l3', sql, active_roster_ids)
|
|
|
|
if not rows:
|
|
return None
|
|
|
|
stats_map = {row['steam_id_64']: dict(row) for row in rows}
|
|
target_steam_id = str(target_steam_id)
|
|
|
|
# If target not in map (maybe no L3 data yet), default to 0
|
|
if target_steam_id not in stats_map:
|
|
stats_map[target_steam_id] = {
|
|
'score_bat': 0, 'score_sta': 0, 'score_hps': 0,
|
|
'score_ptl': 0, 'score_tct': 0, 'score_util': 0
|
|
}
|
|
|
|
# 3. Calculate Distribution
|
|
metrics = ['score_bat', 'score_sta', 'score_hps', 'score_ptl', 'score_tct', 'score_util']
|
|
result = {}
|
|
|
|
for m in metrics:
|
|
values = [p.get(m, 0) or 0 for p in stats_map.values()]
|
|
target_val = stats_map[target_steam_id].get(m, 0) or 0
|
|
|
|
if not values:
|
|
result[m] = None
|
|
continue
|
|
|
|
values.sort(reverse=True)
|
|
|
|
try:
|
|
rank = values.index(target_val) + 1
|
|
except ValueError:
|
|
rank = len(values)
|
|
|
|
result[m] = {
|
|
'val': target_val,
|
|
'rank': rank,
|
|
'total': len(values),
|
|
'min': min(values),
|
|
'max': max(values),
|
|
'avg': sum(values) / len(values)
|
|
}
|
|
|
|
return result
|