2026-01-26 21:10:42 +08:00
from web . database import query_db , get_db , execute_db
import sqlite3
import pandas as pd
import numpy as np
2026-01-26 02:13:06 +08:00
class FeatureService :
@staticmethod
def get_player_features ( steam_id ) :
sql = " SELECT * FROM dm_player_features WHERE steam_id_64 = ? "
return query_db ( ' l3 ' , sql , [ steam_id ] , one = True )
@staticmethod
def get_players_list ( page = 1 , per_page = 20 , sort_by = ' rating ' , search = None ) :
offset = ( page - 1 ) * per_page
# Sort Mapping
sort_map = {
' rating ' : ' basic_avg_rating ' ,
' kd ' : ' basic_avg_kd ' ,
' kast ' : ' basic_avg_kast ' ,
' matches ' : ' matches_played '
}
order_col = sort_map . get ( sort_by , ' basic_avg_rating ' )
from web . services . stats_service import StatsService
# Helper to attach match counts
def attach_match_counts ( player_list ) :
if not player_list :
return
ids = [ p [ ' steam_id_64 ' ] for p in player_list ]
# Batch query for counts from L2
placeholders = ' , ' . join ( ' ? ' for _ in ids )
sql = f """
SELECT steam_id_64 , COUNT ( * ) as cnt
FROM fact_match_players
WHERE steam_id_64 IN ( { placeholders } )
GROUP BY steam_id_64
"""
counts = query_db ( ' l2 ' , sql , ids )
cnt_dict = { r [ ' steam_id_64 ' ] : r [ ' cnt ' ] for r in counts }
for p in player_list :
p [ ' matches_played ' ] = cnt_dict . get ( p [ ' steam_id_64 ' ] , 0 )
if search :
# Get all matching players
l2_players , _ = StatsService . get_players ( page = 1 , per_page = 100 , search = search )
if not l2_players :
return [ ] , 0
steam_ids = [ p [ ' steam_id_64 ' ] for p in l2_players ]
placeholders = ' , ' . join ( ' ? ' for _ in steam_ids )
sql = f " SELECT * FROM dm_player_features WHERE steam_id_64 IN ( { placeholders } ) "
features = query_db ( ' l3 ' , sql , steam_ids )
f_dict = { f [ ' steam_id_64 ' ] : f for f in features }
# Get counts for sorting
count_sql = f " SELECT steam_id_64, COUNT(*) as cnt FROM fact_match_players WHERE steam_id_64 IN ( { placeholders } ) GROUP BY steam_id_64 "
counts = query_db ( ' l2 ' , count_sql , steam_ids )
cnt_dict = { r [ ' steam_id_64 ' ] : r [ ' cnt ' ] for r in counts }
merged = [ ]
for p in l2_players :
f = f_dict . get ( p [ ' steam_id_64 ' ] )
m = dict ( p )
if f :
m . update ( dict ( f ) )
else :
# Fallback Calc
stats = StatsService . get_player_basic_stats ( p [ ' steam_id_64 ' ] )
if stats :
m [ ' basic_avg_rating ' ] = stats [ ' rating ' ]
m [ ' basic_avg_kd ' ] = stats [ ' kd ' ]
m [ ' basic_avg_kast ' ] = stats [ ' kast ' ]
else :
m [ ' basic_avg_rating ' ] = 0
m [ ' basic_avg_kd ' ] = 0
2026-01-26 21:10:42 +08:00
m [ ' basic_avg_kast ' ] = 0
2026-01-26 02:13:06 +08:00
m [ ' matches_played ' ] = cnt_dict . get ( p [ ' steam_id_64 ' ] , 0 )
merged . append ( m )
merged . sort ( key = lambda x : x . get ( order_col , 0 ) or 0 , reverse = True )
total = len ( merged )
start = ( page - 1 ) * per_page
end = start + per_page
return merged [ start : end ] , total
else :
# Browse mode
l3_count = query_db ( ' l3 ' , " SELECT COUNT(*) as cnt FROM dm_player_features " , one = True ) [ ' cnt ' ]
if l3_count == 0 or sort_by == ' matches ' :
if sort_by == ' matches ' :
sql = """
SELECT steam_id_64 , COUNT ( * ) as cnt
FROM fact_match_players
GROUP BY steam_id_64
ORDER BY cnt DESC
LIMIT ? OFFSET ?
"""
top_ids = query_db ( ' l2 ' , sql , [ per_page , offset ] )
if not top_ids :
return [ ] , 0
total = query_db ( ' l2 ' , " SELECT COUNT(DISTINCT steam_id_64) as cnt FROM fact_match_players " , one = True ) [ ' cnt ' ]
ids = [ r [ ' steam_id_64 ' ] for r in top_ids ]
l2_players = StatsService . get_players_by_ids ( ids )
2026-01-26 21:10:42 +08:00
# Merge logic
2026-01-26 02:13:06 +08:00
merged = [ ]
p_ph = ' , ' . join ( ' ? ' for _ in ids )
f_sql = f " SELECT * FROM dm_player_features WHERE steam_id_64 IN ( { p_ph } ) "
features = query_db ( ' l3 ' , f_sql , ids )
f_dict = { f [ ' steam_id_64 ' ] : f for f in features }
p_dict = { p [ ' steam_id_64 ' ] : p for p in l2_players }
2026-01-26 21:10:42 +08:00
for r in top_ids :
2026-01-26 02:13:06 +08:00
sid = r [ ' steam_id_64 ' ]
p = p_dict . get ( sid )
if not p : continue
m = dict ( p )
f = f_dict . get ( sid )
if f :
m . update ( dict ( f ) )
else :
stats = StatsService . get_player_basic_stats ( sid )
if stats :
m [ ' basic_avg_rating ' ] = stats [ ' rating ' ]
m [ ' basic_avg_kd ' ] = stats [ ' kd ' ]
m [ ' basic_avg_kast ' ] = stats [ ' kast ' ]
else :
m [ ' basic_avg_rating ' ] = 0
m [ ' basic_avg_kd ' ] = 0
m [ ' basic_avg_kast ' ] = 0
m [ ' matches_played ' ] = r [ ' cnt ' ]
merged . append ( m )
return merged , total
2026-01-26 21:10:42 +08:00
# L3 empty fallback
2026-01-26 02:13:06 +08:00
l2_players , total = StatsService . get_players ( page , per_page , sort_by = None )
merged = [ ]
2026-01-26 21:10:42 +08:00
attach_match_counts ( l2_players )
2026-01-26 02:13:06 +08:00
for p in l2_players :
m = dict ( p )
stats = StatsService . get_player_basic_stats ( p [ ' steam_id_64 ' ] )
if stats :
m [ ' basic_avg_rating ' ] = stats [ ' rating ' ]
m [ ' basic_avg_kd ' ] = stats [ ' kd ' ]
m [ ' basic_avg_kast ' ] = stats [ ' kast ' ]
else :
m [ ' basic_avg_rating ' ] = 0
m [ ' basic_avg_kd ' ] = 0
m [ ' basic_avg_kast ' ] = 0
m [ ' matches_played ' ] = p . get ( ' matches_played ' , 0 )
merged . append ( m )
if sort_by != ' rating ' :
merged . sort ( key = lambda x : x . get ( order_col , 0 ) or 0 , reverse = True )
return merged , total
2026-01-27 00:57:35 +08:00
2026-01-26 21:10:42 +08:00
# Normal L3 browse
2026-01-26 02:13:06 +08:00
sql = f " SELECT * FROM dm_player_features ORDER BY { order_col } DESC LIMIT ? OFFSET ? "
features = query_db ( ' l3 ' , sql , [ per_page , offset ] )
total = query_db ( ' l3 ' , " SELECT COUNT(*) as cnt FROM dm_player_features " , one = True ) [ ' cnt ' ]
if not features :
return [ ] , total
steam_ids = [ f [ ' steam_id_64 ' ] for f in features ]
l2_players = StatsService . get_players_by_ids ( steam_ids )
p_dict = { p [ ' steam_id_64 ' ] : p for p in l2_players }
merged = [ ]
for f in features :
m = dict ( f )
p = p_dict . get ( f [ ' steam_id_64 ' ] )
if p :
m . update ( dict ( p ) )
else :
2026-01-26 21:10:42 +08:00
m [ ' username ' ] = f [ ' steam_id_64 ' ]
2026-01-26 02:13:06 +08:00
m [ ' avatar_url ' ] = None
merged . append ( m )
return merged , total
@staticmethod
2026-01-26 21:10:42 +08:00
def rebuild_all_features ( min_matches = 5 ) :
"""
Refreshes the L3 Data Mart with full feature calculations .
"""
from web . config import Config
2026-01-27 00:57:35 +08:00
from web . services . web_service import WebService
import json
2026-01-26 21:10:42 +08:00
l3_db_path = Config . DB_L3_PATH
l2_db_path = Config . DB_L2_PATH
2026-01-27 00:57:35 +08:00
# Get Team Players
lineups = WebService . get_lineups ( )
team_player_ids = set ( )
for lineup in lineups :
if lineup [ ' player_ids_json ' ] :
try :
ids = json . loads ( lineup [ ' player_ids_json ' ] )
# Ensure IDs are strings
team_player_ids . update ( [ str ( i ) for i in ids ] )
except :
pass
if not team_player_ids :
print ( " No players found in any team lineup. Skipping L3 rebuild. " )
return 0
2026-01-26 21:10:42 +08:00
conn_l2 = sqlite3 . connect ( l2_db_path )
conn_l2 . row_factory = sqlite3 . Row
try :
2026-01-27 00:57:35 +08:00
print ( f " Loading L2 data for { len ( team_player_ids ) } players... " )
df = FeatureService . _load_and_calculate_dataframe ( conn_l2 , list ( team_player_ids ) )
2026-01-26 02:13:06 +08:00
2026-01-26 21:10:42 +08:00
if df is None or df . empty :
print ( " No data to process. " )
return 0
print ( " Calculating Scores... " )
df = FeatureService . _calculate_ultimate_scores ( df )
print ( " Saving to L3... " )
conn_l3 = sqlite3 . connect ( l3_db_path )
cursor = conn_l3 . cursor ( )
# Ensure columns exist in DataFrame match DB columns
cursor . execute ( " PRAGMA table_info(dm_player_features) " )
valid_cols = [ r [ 1 ] for r in cursor . fetchall ( ) ]
# Filter DF columns
df_cols = [ c for c in df . columns if c in valid_cols ]
df_to_save = df [ df_cols ] . copy ( )
df_to_save [ ' updated_at ' ] = pd . Timestamp . now ( ) . strftime ( ' % Y- % m- %d % H: % M: % S ' )
# Generate Insert SQL
2026-01-27 00:57:35 +08:00
print ( f " DEBUG: Saving { len ( df_to_save . columns ) } columns to L3. Sample side_kd_ct: { df_to_save . get ( ' side_kd_ct ' , pd . Series ( [ 0 ] ) ) . iloc [ 0 ] } " )
2026-01-26 21:10:42 +08:00
placeholders = ' , ' . join ( [ ' ? ' ] * len ( df_to_save . columns ) )
cols_str = ' , ' . join ( df_to_save . columns )
sql = f " INSERT OR REPLACE INTO dm_player_features ( { cols_str } ) VALUES ( { placeholders } ) "
data = df_to_save . values . tolist ( )
cursor . executemany ( sql , data )
conn_l3 . commit ( )
conn_l3 . close ( )
return len ( df )
except Exception as e :
print ( f " Rebuild Error: { e } " )
import traceback
traceback . print_exc ( )
return 0
finally :
conn_l2 . close ( )
@staticmethod
2026-01-27 00:57:35 +08:00
def _load_and_calculate_dataframe ( conn , player_ids ) :
if not player_ids :
return None
placeholders = ' , ' . join ( [ ' ? ' ] * len ( player_ids ) )
2026-01-26 21:10:42 +08:00
# 1. Basic Stats
2026-01-27 00:57:35 +08:00
query_basic = f """
2026-01-26 21:10:42 +08:00
SELECT
steam_id_64 ,
COUNT ( * ) as matches_played ,
SUM ( round_total ) as rounds_played ,
AVG ( rating ) as basic_avg_rating ,
AVG ( kd_ratio ) as basic_avg_kd ,
AVG ( adr ) as basic_avg_adr ,
AVG ( kast ) as basic_avg_kast ,
AVG ( rws ) as basic_avg_rws ,
SUM ( headshot_count ) as sum_hs ,
SUM ( kills ) as sum_kills ,
SUM ( deaths ) as sum_deaths ,
SUM ( first_kill ) as sum_fk ,
SUM ( first_death ) as sum_fd ,
SUM ( clutch_1v1 ) as sum_1v1 ,
SUM ( clutch_1v2 ) as sum_1v2 ,
2026-01-27 03:11:17 +08:00
SUM ( clutch_1v3 ) + SUM ( clutch_1v4 ) + SUM ( clutch_1v5 ) as sum_1v3p ,
2026-01-26 21:10:42 +08:00
SUM ( kill_2 ) as sum_2k ,
SUM ( kill_3 ) as sum_3k ,
SUM ( kill_4 ) as sum_4k ,
SUM ( kill_5 ) as sum_5k ,
SUM ( assisted_kill ) as sum_assist ,
SUM ( perfect_kill ) as sum_perfect ,
SUM ( revenge_kill ) as sum_revenge ,
SUM ( awp_kill ) as sum_awp ,
SUM ( jump_count ) as sum_jump ,
2026-01-26 22:04:29 +08:00
SUM ( mvp_count ) as sum_mvps ,
SUM ( planted_bomb ) as sum_plants ,
SUM ( defused_bomb ) as sum_defuses ,
SUM ( CASE
WHEN flash_assists > 0 THEN flash_assists
WHEN assists > assisted_kill THEN assists - assisted_kill
ELSE 0
END ) as sum_flash_assists ,
2026-01-26 21:10:42 +08:00
SUM ( throw_harm ) as sum_util_dmg ,
SUM ( flash_time ) as sum_flash_time ,
SUM ( flash_enemy ) as sum_flash_enemy ,
SUM ( flash_team ) as sum_flash_team ,
SUM ( util_flash_usage ) as sum_util_flash ,
SUM ( util_smoke_usage ) as sum_util_smoke ,
SUM ( util_molotov_usage ) as sum_util_molotov ,
SUM ( util_he_usage ) as sum_util_he ,
SUM ( util_decoy_usage ) as sum_util_decoy
FROM fact_match_players
2026-01-27 00:57:35 +08:00
WHERE steam_id_64 IN ( { placeholders } )
2026-01-26 21:10:42 +08:00
GROUP BY steam_id_64
"""
2026-01-27 00:57:35 +08:00
df = pd . read_sql_query ( query_basic , conn , params = player_ids )
2026-01-26 21:10:42 +08:00
if df . empty : return None
# Basic Derived
df [ ' basic_headshot_rate ' ] = df [ ' sum_hs ' ] / df [ ' sum_kills ' ] . replace ( 0 , 1 )
df [ ' basic_avg_headshot_kills ' ] = df [ ' sum_hs ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_first_kill ' ] = df [ ' sum_fk ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_first_death ' ] = df [ ' sum_fd ' ] / df [ ' matches_played ' ]
df [ ' basic_first_kill_rate ' ] = df [ ' sum_fk ' ] / ( df [ ' sum_fk ' ] + df [ ' sum_fd ' ] ) . replace ( 0 , 1 )
df [ ' basic_first_death_rate ' ] = df [ ' sum_fd ' ] / ( df [ ' sum_fk ' ] + df [ ' sum_fd ' ] ) . replace ( 0 , 1 )
df [ ' basic_avg_kill_2 ' ] = df [ ' sum_2k ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_kill_3 ' ] = df [ ' sum_3k ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_kill_4 ' ] = df [ ' sum_4k ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_kill_5 ' ] = df [ ' sum_5k ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_assisted_kill ' ] = df [ ' sum_assist ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_perfect_kill ' ] = df [ ' sum_perfect ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_revenge_kill ' ] = df [ ' sum_revenge ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_awp_kill ' ] = df [ ' sum_awp ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_jump_count ' ] = df [ ' sum_jump ' ] / df [ ' matches_played ' ]
2026-01-26 22:04:29 +08:00
df [ ' basic_avg_mvps ' ] = df [ ' sum_mvps ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_plants ' ] = df [ ' sum_plants ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_defuses ' ] = df [ ' sum_defuses ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_flash_assists ' ] = df [ ' sum_flash_assists ' ] / df [ ' matches_played ' ]
2026-01-26 21:10:42 +08:00
# UTIL Basic
df [ ' util_avg_nade_dmg ' ] = df [ ' sum_util_dmg ' ] / df [ ' matches_played ' ]
df [ ' util_avg_flash_time ' ] = df [ ' sum_flash_time ' ] / df [ ' matches_played ' ]
df [ ' util_avg_flash_enemy ' ] = df [ ' sum_flash_enemy ' ] / df [ ' matches_played ' ]
valid_ids = tuple ( df [ ' steam_id_64 ' ] . tolist ( ) )
placeholders = ' , ' . join ( [ ' ? ' ] * len ( valid_ids ) )
# 2. STA (Detailed)
query_sta = f """
SELECT mp . steam_id_64 , mp . rating , mp . is_win , m . start_time , m . duration
FROM fact_match_players mp
JOIN fact_matches m ON mp . match_id = m . match_id
WHERE mp . steam_id_64 IN ( { placeholders } )
ORDER BY mp . steam_id_64 , m . start_time
"""
df_matches = pd . read_sql_query ( query_sta , conn , params = valid_ids )
sta_list = [ ]
for pid , group in df_matches . groupby ( ' steam_id_64 ' ) :
group = group . sort_values ( ' start_time ' )
last_30 = group . tail ( 30 )
# Fatigue Calc
# Simple heuristic: split matches by day, compare early (first 3) vs late (rest)
group [ ' date ' ] = pd . to_datetime ( group [ ' start_time ' ] , unit = ' s ' ) . dt . date
day_counts = group . groupby ( ' date ' ) . size ( )
busy_days = day_counts [ day_counts > = 4 ] . index # Days with 4+ matches
fatigue_decays = [ ]
for day in busy_days :
day_matches = group [ group [ ' date ' ] == day ]
if len ( day_matches ) > = 4 :
early_rating = day_matches . head ( 3 ) [ ' rating ' ] . mean ( )
late_rating = day_matches . tail ( len ( day_matches ) - 3 ) [ ' rating ' ] . mean ( )
fatigue_decays . append ( early_rating - late_rating )
avg_fatigue = np . mean ( fatigue_decays ) if fatigue_decays else 0
sta_list . append ( {
' steam_id_64 ' : pid ,
' sta_last_30_rating ' : last_30 [ ' rating ' ] . mean ( ) ,
' sta_win_rating ' : group [ group [ ' is_win ' ] == 1 ] [ ' rating ' ] . mean ( ) ,
' sta_loss_rating ' : group [ group [ ' is_win ' ] == 0 ] [ ' rating ' ] . mean ( ) ,
' sta_rating_volatility ' : group . tail ( 10 ) [ ' rating ' ] . std ( ) if len ( group ) > 1 else 0 ,
' sta_time_rating_corr ' : group [ ' duration ' ] . corr ( group [ ' rating ' ] ) if len ( group ) > 2 and group [ ' rating ' ] . std ( ) > 0 else 0 ,
' sta_fatigue_decay ' : avg_fatigue
} )
df = df . merge ( pd . DataFrame ( sta_list ) , on = ' steam_id_64 ' , how = ' left ' )
# 3. BAT (High ELO)
query_elo = f """
SELECT mp . steam_id_64 , mp . kd_ratio ,
( SELECT AVG ( group_origin_elo ) FROM fact_match_teams fmt WHERE fmt . match_id = mp . match_id AND group_origin_elo > 0 ) as elo
FROM fact_match_players mp
WHERE mp . steam_id_64 IN ( { placeholders } )
"""
df_elo = pd . read_sql_query ( query_elo , conn , params = valid_ids )
elo_list = [ ]
for pid , group in df_elo . groupby ( ' steam_id_64 ' ) :
avg = group [ ' elo ' ] . mean ( ) or 1000
elo_list . append ( {
' steam_id_64 ' : pid ,
' bat_kd_diff_high_elo ' : group [ group [ ' elo ' ] > avg ] [ ' kd_ratio ' ] . mean ( ) ,
' bat_kd_diff_low_elo ' : group [ group [ ' elo ' ] < = avg ] [ ' kd_ratio ' ] . mean ( )
} )
df = df . merge ( pd . DataFrame ( elo_list ) , on = ' steam_id_64 ' , how = ' left ' )
# Duel Win Rate
query_duel = f """
SELECT steam_id_64 , SUM ( entry_kills ) as ek , SUM ( entry_deaths ) as ed
FROM fact_match_players WHERE steam_id_64 IN ( { placeholders } ) GROUP BY steam_id_64
"""
df_duel = pd . read_sql_query ( query_duel , conn , params = valid_ids )
df_duel [ ' bat_avg_duel_win_rate ' ] = df_duel [ ' ek ' ] / ( df_duel [ ' ek ' ] + df_duel [ ' ed ' ] ) . replace ( 0 , 1 )
df = df . merge ( df_duel [ [ ' steam_id_64 ' , ' bat_avg_duel_win_rate ' ] ] , on = ' steam_id_64 ' , how = ' left ' )
# 4. HPS
# Clutch Rate
df [ ' hps_clutch_win_rate_1v1 ' ] = df [ ' sum_1v1 ' ] / df [ ' matches_played ' ]
df [ ' hps_clutch_win_rate_1v3_plus ' ] = df [ ' sum_1v3p ' ] / df [ ' matches_played ' ]
# Prepare Detailed Event Data for HPS (Comeback), PTL (KD), and T/CT
# A. Determine Side Info using fact_match_teams
# 1. Get Match Teams
query_teams = f """
SELECT match_id , group_fh_role , group_uids
FROM fact_match_teams
WHERE match_id IN ( SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ( { placeholders } ) )
"""
df_teams = pd . read_sql_query ( query_teams , conn , params = valid_ids )
# 2. Get Player UIDs
query_uids = f " SELECT match_id, steam_id_64, uid FROM fact_match_players WHERE steam_id_64 IN ( { placeholders } ) "
df_uids = pd . read_sql_query ( query_uids , conn , params = valid_ids )
# 3. Get Match Meta (Start Time for MR12/MR15)
query_meta = f " SELECT match_id, start_time FROM fact_matches WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ( { placeholders } )) "
df_meta = pd . read_sql_query ( query_meta , conn , params = valid_ids )
df_meta [ ' halftime_round ' ] = np . where ( df_meta [ ' start_time ' ] > 1695772800 , 12 , 15 ) # CS2 Release Date approx
# 4. Build FH Side DataFrame
fh_rows = [ ]
if not df_teams . empty and not df_uids . empty :
match_teams = { } # match_id -> [(role, [uids])]
for _ , row in df_teams . iterrows ( ) :
mid = row [ ' match_id ' ]
role = row [ ' group_fh_role ' ] # 1=CT, 0=T
try :
uids = str ( row [ ' group_uids ' ] ) . split ( ' , ' )
uids = [ u . strip ( ) for u in uids if u . strip ( ) ]
except :
uids = [ ]
if mid not in match_teams : match_teams [ mid ] = [ ]
match_teams [ mid ] . append ( ( role , uids ) )
for _ , row in df_uids . iterrows ( ) :
mid = row [ ' match_id ' ]
sid = row [ ' steam_id_64 ' ]
uid = str ( row [ ' uid ' ] )
if mid in match_teams :
for role , uids in match_teams [ mid ] :
if uid in uids :
fh_rows . append ( {
' match_id ' : mid ,
' steam_id_64 ' : sid ,
' fh_side ' : ' CT ' if role == 1 else ' T '
} )
break
df_fh_sides = pd . DataFrame ( fh_rows )
if not df_fh_sides . empty :
df_fh_sides = df_fh_sides . merge ( df_meta [ [ ' match_id ' , ' halftime_round ' ] ] , on = ' match_id ' , how = ' left ' )
# B. Get Kill Events
query_events = f """
SELECT match_id , round_num , attacker_steam_id , victim_steam_id , event_type , is_headshot , event_time
FROM fact_round_events
WHERE event_type = ' kill '
AND ( attacker_steam_id IN ( { placeholders } ) OR victim_steam_id IN ( { placeholders } ) )
"""
df_events = pd . read_sql_query ( query_events , conn , params = valid_ids + valid_ids )
# C. Get Round Scores
query_rounds = f """
SELECT match_id , round_num , ct_score , t_score , winner_side
FROM fact_rounds
WHERE match_id IN ( SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ( { placeholders } ) )
"""
df_rounds = pd . read_sql_query ( query_rounds , conn , params = valid_ids )
# Fix missing winner_side by calculating from score changes
if not df_rounds . empty :
df_rounds = df_rounds . sort_values ( [ ' match_id ' , ' round_num ' ] ) . reset_index ( drop = True )
df_rounds [ ' prev_ct ' ] = df_rounds . groupby ( ' match_id ' ) [ ' ct_score ' ] . shift ( 1 ) . fillna ( 0 )
df_rounds [ ' prev_t ' ] = df_rounds . groupby ( ' match_id ' ) [ ' t_score ' ] . shift ( 1 ) . fillna ( 0 )
# Determine winner based on score increment
df_rounds [ ' ct_win ' ] = ( df_rounds [ ' ct_score ' ] > df_rounds [ ' prev_ct ' ] )
df_rounds [ ' t_win ' ] = ( df_rounds [ ' t_score ' ] > df_rounds [ ' prev_t ' ] )
df_rounds [ ' calculated_winner ' ] = np . where ( df_rounds [ ' ct_win ' ] , ' CT ' ,
np . where ( df_rounds [ ' t_win ' ] , ' T ' , None ) )
# Force overwrite winner_side with calculated winner since DB data is unreliable (mostly NULL)
df_rounds [ ' winner_side ' ] = df_rounds [ ' calculated_winner ' ]
2026-01-27 00:57:35 +08:00
# Ensure winner_side is string type to match side ('CT', 'T')
df_rounds [ ' winner_side ' ] = df_rounds [ ' winner_side ' ] . astype ( str )
2026-01-26 21:10:42 +08:00
# Fallback for Round 1 if still None (e.g. if prev is 0 and score is 1)
# Logic above handles Round 1 correctly (prev is 0).
# --- Process Logic ---
# Logic above handles Round 1 correctly (prev is 0).
# --- Process Logic ---
has_events = not df_events . empty
has_sides = not df_fh_sides . empty
if has_events and has_sides :
# 1. Attacker Side
df_events = df_events . merge ( df_fh_sides , left_on = [ ' match_id ' , ' attacker_steam_id ' ] , right_on = [ ' match_id ' , ' steam_id_64 ' ] , how = ' left ' )
df_events . rename ( columns = { ' fh_side ' : ' att_fh_side ' } , inplace = True )
df_events . drop ( columns = [ ' steam_id_64 ' ] , inplace = True )
# 2. Victim Side
df_events = df_events . merge ( df_fh_sides , left_on = [ ' match_id ' , ' victim_steam_id ' ] , right_on = [ ' match_id ' , ' steam_id_64 ' ] , how = ' left ' , suffixes = ( ' ' , ' _vic ' ) )
df_events . rename ( columns = { ' fh_side ' : ' vic_fh_side ' } , inplace = True )
df_events . drop ( columns = [ ' steam_id_64 ' ] , inplace = True )
# 3. Determine Actual Side (CT/T)
# Logic: If round <= halftime -> FH Side. Else -> Opposite.
def calc_side ( fh_side , round_num , halftime ) :
if pd . isna ( fh_side ) : return None
if round_num < = halftime : return fh_side
return ' T ' if fh_side == ' CT ' else ' CT '
# Vectorized approach
# Attacker
mask_fh_att = df_events [ ' round_num ' ] < = df_events [ ' halftime_round ' ]
df_events [ ' attacker_side ' ] = np . where ( mask_fh_att , df_events [ ' att_fh_side ' ] ,
np . where ( df_events [ ' att_fh_side ' ] == ' CT ' , ' T ' , ' CT ' ) )
# Victim
mask_fh_vic = df_events [ ' round_num ' ] < = df_events [ ' halftime_round ' ]
df_events [ ' victim_side ' ] = np . where ( mask_fh_vic , df_events [ ' vic_fh_side ' ] ,
np . where ( df_events [ ' vic_fh_side ' ] == ' CT ' , ' T ' , ' CT ' ) )
# Merge Scores
df_events = df_events . merge ( df_rounds , on = [ ' match_id ' , ' round_num ' ] , how = ' left ' )
2026-01-27 00:57:35 +08:00
# --- BAT: Win Rate vs All ---
# Removed as per request (Difficult to calculate / All Zeros)
df [ ' bat_win_rate_vs_all ' ] = 0
2026-01-26 21:10:42 +08:00
# --- HPS: Match Point & Comeback ---
# Match Point Win Rate
mp_rounds = df_rounds [ ( ( df_rounds [ ' ct_score ' ] == 12 ) | ( df_rounds [ ' t_score ' ] == 12 ) |
( df_rounds [ ' ct_score ' ] == 15 ) | ( df_rounds [ ' t_score ' ] == 15 ) ) ]
if not mp_rounds . empty and has_sides :
# Need player side for these rounds
# Expand sides for all rounds
q_all_rounds = f " SELECT match_id, round_num FROM fact_rounds WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ( { placeholders } )) "
df_all_rounds = pd . read_sql_query ( q_all_rounds , conn , params = valid_ids )
df_player_rounds = df_all_rounds . merge ( df_fh_sides , on = ' match_id ' )
mask_fh = df_player_rounds [ ' round_num ' ] < = df_player_rounds [ ' halftime_round ' ]
df_player_rounds [ ' side ' ] = np . where ( mask_fh , df_player_rounds [ ' fh_side ' ] ,
np . where ( df_player_rounds [ ' fh_side ' ] == ' CT ' , ' T ' , ' CT ' ) )
# Filter for MP rounds
# Join mp_rounds with df_player_rounds
mp_player = df_player_rounds . merge ( mp_rounds [ [ ' match_id ' , ' round_num ' , ' winner_side ' ] ] , on = [ ' match_id ' , ' round_num ' ] )
mp_player [ ' is_win ' ] = ( mp_player [ ' side ' ] == mp_player [ ' winner_side ' ] ) . astype ( int )
hps_mp = mp_player . groupby ( ' steam_id_64 ' ) [ ' is_win ' ] . mean ( ) . reset_index ( )
hps_mp . rename ( columns = { ' is_win ' : ' hps_match_point_win_rate ' } , inplace = True )
df = df . merge ( hps_mp , on = ' steam_id_64 ' , how = ' left ' )
else :
df [ ' hps_match_point_win_rate ' ] = 0.5
# Comeback KD Diff
# Attacker Context
df_events [ ' att_team_score ' ] = np . where ( df_events [ ' attacker_side ' ] == ' CT ' , df_events [ ' ct_score ' ] , df_events [ ' t_score ' ] )
df_events [ ' att_opp_score ' ] = np . where ( df_events [ ' attacker_side ' ] == ' CT ' , df_events [ ' t_score ' ] , df_events [ ' ct_score ' ] )
df_events [ ' is_comeback_att ' ] = ( df_events [ ' att_team_score ' ] + 4 < = df_events [ ' att_opp_score ' ] )
# Victim Context
df_events [ ' vic_team_score ' ] = np . where ( df_events [ ' victim_side ' ] == ' CT ' , df_events [ ' ct_score ' ] , df_events [ ' t_score ' ] )
df_events [ ' vic_opp_score ' ] = np . where ( df_events [ ' victim_side ' ] == ' CT ' , df_events [ ' t_score ' ] , df_events [ ' ct_score ' ] )
df_events [ ' is_comeback_vic ' ] = ( df_events [ ' vic_team_score ' ] + 4 < = df_events [ ' vic_opp_score ' ] )
att_k = df_events . groupby ( ' attacker_steam_id ' ) . size ( )
vic_d = df_events . groupby ( ' victim_steam_id ' ) . size ( )
cb_k = df_events [ df_events [ ' is_comeback_att ' ] ] . groupby ( ' attacker_steam_id ' ) . size ( )
cb_d = df_events [ df_events [ ' is_comeback_vic ' ] ] . groupby ( ' victim_steam_id ' ) . size ( )
kd_stats = pd . DataFrame ( { ' k ' : att_k , ' d ' : vic_d , ' cb_k ' : cb_k , ' cb_d ' : cb_d } ) . fillna ( 0 )
kd_stats [ ' kd ' ] = kd_stats [ ' k ' ] / kd_stats [ ' d ' ] . replace ( 0 , 1 )
kd_stats [ ' cb_kd ' ] = kd_stats [ ' cb_k ' ] / kd_stats [ ' cb_d ' ] . replace ( 0 , 1 )
kd_stats [ ' hps_comeback_kd_diff ' ] = kd_stats [ ' cb_kd ' ] - kd_stats [ ' kd ' ]
kd_stats . index . name = ' steam_id_64 '
df = df . merge ( kd_stats [ [ ' hps_comeback_kd_diff ' ] ] , on = ' steam_id_64 ' , how = ' left ' )
2026-01-27 00:57:35 +08:00
# HPS: Losing Streak KD Diff
# Logic: KD in rounds where team has lost >= 3 consecutive rounds vs Global KD
# 1. Identify Streak Rounds
if not df_rounds . empty :
# Ensure sorted
df_rounds = df_rounds . sort_values ( [ ' match_id ' , ' round_num ' ] )
# Shift to check previous results
# We need to handle match boundaries. Groupby match_id is safer.
# CT Loss Streak
g = df_rounds . groupby ( ' match_id ' )
df_rounds [ ' ct_lost_1 ' ] = g [ ' t_win ' ] . shift ( 1 ) . fillna ( False )
df_rounds [ ' ct_lost_2 ' ] = g [ ' t_win ' ] . shift ( 2 ) . fillna ( False )
df_rounds [ ' ct_lost_3 ' ] = g [ ' t_win ' ] . shift ( 3 ) . fillna ( False )
df_rounds [ ' ct_in_loss_streak ' ] = ( df_rounds [ ' ct_lost_1 ' ] & df_rounds [ ' ct_lost_2 ' ] & df_rounds [ ' ct_lost_3 ' ] )
# T Loss Streak
df_rounds [ ' t_lost_1 ' ] = g [ ' ct_win ' ] . shift ( 1 ) . fillna ( False )
df_rounds [ ' t_lost_2 ' ] = g [ ' ct_win ' ] . shift ( 2 ) . fillna ( False )
df_rounds [ ' t_lost_3 ' ] = g [ ' ct_win ' ] . shift ( 3 ) . fillna ( False )
df_rounds [ ' t_in_loss_streak ' ] = ( df_rounds [ ' t_lost_1 ' ] & df_rounds [ ' t_lost_2 ' ] & df_rounds [ ' t_lost_3 ' ] )
# Merge into events
# df_events already has 'match_id', 'round_num', 'attacker_side'
# We need to merge streak info
streak_cols = df_rounds [ [ ' match_id ' , ' round_num ' , ' ct_in_loss_streak ' , ' t_in_loss_streak ' ] ]
df_events = df_events . merge ( streak_cols , on = [ ' match_id ' , ' round_num ' ] , how = ' left ' )
# Determine if attacker is in streak
df_events [ ' att_is_loss_streak ' ] = np . where (
df_events [ ' attacker_side ' ] == ' CT ' , df_events [ ' ct_in_loss_streak ' ] ,
np . where ( df_events [ ' attacker_side ' ] == ' T ' , df_events [ ' t_in_loss_streak ' ] , False )
)
# Determine if victim is in streak (for deaths)
df_events [ ' vic_is_loss_streak ' ] = np . where (
df_events [ ' victim_side ' ] == ' CT ' , df_events [ ' ct_in_loss_streak ' ] ,
np . where ( df_events [ ' victim_side ' ] == ' T ' , df_events [ ' t_in_loss_streak ' ] , False )
)
# Calculate KD in Streak
ls_k = df_events [ df_events [ ' att_is_loss_streak ' ] ] . groupby ( ' attacker_steam_id ' ) . size ( )
ls_d = df_events [ df_events [ ' vic_is_loss_streak ' ] ] . groupby ( ' victim_steam_id ' ) . size ( )
ls_stats = pd . DataFrame ( { ' ls_k ' : ls_k , ' ls_d ' : ls_d } ) . fillna ( 0 )
ls_stats [ ' ls_kd ' ] = ls_stats [ ' ls_k ' ] / ls_stats [ ' ls_d ' ] . replace ( 0 , 1 )
# Compare with Global KD (from df_sides or recomputed)
# Recompute global KD from events to be consistent
g_k = df_events . groupby ( ' attacker_steam_id ' ) . size ( )
g_d = df_events . groupby ( ' victim_steam_id ' ) . size ( )
g_stats = pd . DataFrame ( { ' g_k ' : g_k , ' g_d ' : g_d } ) . fillna ( 0 )
g_stats [ ' g_kd ' ] = g_stats [ ' g_k ' ] / g_stats [ ' g_d ' ] . replace ( 0 , 1 )
ls_stats = ls_stats . join ( g_stats [ [ ' g_kd ' ] ] , how = ' outer ' ) . fillna ( 0 )
ls_stats [ ' hps_losing_streak_kd_diff ' ] = ls_stats [ ' ls_kd ' ] - ls_stats [ ' g_kd ' ]
ls_stats . index . name = ' steam_id_64 '
df = df . merge ( ls_stats [ [ ' hps_losing_streak_kd_diff ' ] ] , on = ' steam_id_64 ' , how = ' left ' )
else :
df [ ' hps_losing_streak_kd_diff ' ] = 0
# HPS: Momentum Multi-kill Rate
# Team won 3+ rounds -> 2+ kills
# Need sequential win info.
# Hard to vectorise fully without accurate round sequence reconstruction including missing rounds.
# Placeholder: 0
df [ ' hps_momentum_multikill_rate ' ] = 0
# HPS: Tilt Rating Drop
df [ ' hps_tilt_rating_drop ' ] = 0
# HPS: Clutch Rating Rise
df [ ' hps_clutch_rating_rise ' ] = 0
# HPS: Undermanned Survival
df [ ' hps_undermanned_survival_time ' ] = 0
2026-01-26 21:10:42 +08:00
# --- PTL: Pistol Stats ---
pistol_rounds = [ 1 , 13 ]
df_pistol = df_events [ df_events [ ' round_num ' ] . isin ( pistol_rounds ) ]
if not df_pistol . empty :
pk = df_pistol . groupby ( ' attacker_steam_id ' ) . size ( )
pd_death = df_pistol . groupby ( ' victim_steam_id ' ) . size ( )
p_stats = pd . DataFrame ( { ' pk ' : pk , ' pd ' : pd_death } ) . fillna ( 0 )
p_stats [ ' ptl_pistol_kd ' ] = p_stats [ ' pk ' ] / p_stats [ ' pd ' ] . replace ( 0 , 1 )
phs = df_pistol [ df_pistol [ ' is_headshot ' ] == 1 ] . groupby ( ' attacker_steam_id ' ) . size ( )
p_stats [ ' phs ' ] = phs
p_stats [ ' phs ' ] = p_stats [ ' phs ' ] . fillna ( 0 )
p_stats [ ' ptl_pistol_util_efficiency ' ] = p_stats [ ' phs ' ] / p_stats [ ' pk ' ] . replace ( 0 , 1 )
p_stats . index . name = ' steam_id_64 '
df = df . merge ( p_stats [ [ ' ptl_pistol_kd ' , ' ptl_pistol_util_efficiency ' ] ] , on = ' steam_id_64 ' , how = ' left ' )
else :
df [ ' ptl_pistol_kd ' ] = 1.0
df [ ' ptl_pistol_util_efficiency ' ] = 0.0
2026-01-27 00:57:35 +08:00
# --- T/CT Stats (Directly from L2 Side Tables) ---
query_sides_l2 = f """
SELECT
steam_id_64 ,
' CT ' as side ,
COUNT ( * ) as matches ,
SUM ( round_total ) as rounds ,
AVG ( rating2 ) as rating ,
SUM ( kills ) as kills ,
SUM ( deaths ) as deaths ,
SUM ( assists ) as assists ,
AVG ( CAST ( is_win as FLOAT ) ) as win_rate ,
SUM ( first_kill ) as fk ,
SUM ( first_death ) as fd ,
AVG ( kast ) as kast ,
AVG ( rws ) as rws ,
SUM ( kill_2 + kill_3 + kill_4 + kill_5 ) as multi_kill_rounds ,
SUM ( headshot_count ) as hs
FROM fact_match_players_ct
WHERE steam_id_64 IN ( { placeholders } )
GROUP BY steam_id_64
2026-01-26 21:10:42 +08:00
2026-01-27 00:57:35 +08:00
UNION ALL
2026-01-26 21:10:42 +08:00
2026-01-27 00:57:35 +08:00
SELECT
steam_id_64 ,
' T ' as side ,
COUNT ( * ) as matches ,
SUM ( round_total ) as rounds ,
AVG ( rating2 ) as rating ,
SUM ( kills ) as kills ,
SUM ( deaths ) as deaths ,
SUM ( assists ) as assists ,
AVG ( CAST ( is_win as FLOAT ) ) as win_rate ,
SUM ( first_kill ) as fk ,
SUM ( first_death ) as fd ,
AVG ( kast ) as kast ,
AVG ( rws ) as rws ,
SUM ( kill_2 + kill_3 + kill_4 + kill_5 ) as multi_kill_rounds ,
SUM ( headshot_count ) as hs
FROM fact_match_players_t
WHERE steam_id_64 IN ( { placeholders } )
GROUP BY steam_id_64
"""
2026-01-26 21:10:42 +08:00
2026-01-27 00:57:35 +08:00
df_sides = pd . read_sql_query ( query_sides_l2 , conn , params = valid_ids + valid_ids )
2026-01-26 21:10:42 +08:00
2026-01-27 00:57:35 +08:00
if not df_sides . empty :
# Calculate Derived Rates per row before pivoting
df_sides [ ' rounds ' ] = df_sides [ ' rounds ' ] . replace ( 0 , 1 ) # Avoid div by zero
# KD Calculation (Sum of Kills / Sum of Deaths)
df_sides [ ' kd ' ] = df_sides [ ' kills ' ] / df_sides [ ' deaths ' ] . replace ( 0 , 1 )
# KAST Proxy (if KAST is 0)
# KAST ~= (Kills + Assists + Survived) / Rounds
# Survived = Rounds - Deaths
if df_sides [ ' kast ' ] . mean ( ) == 0 :
df_sides [ ' survived ' ] = df_sides [ ' rounds ' ] - df_sides [ ' deaths ' ]
2026-01-27 01:40:56 +08:00
df_sides [ ' kast ' ] = ( df_sides [ ' kills ' ] + df_sides [ ' assists ' ] + df_sides [ ' survived ' ] ) / df_sides [ ' rounds ' ]
2026-01-27 00:57:35 +08:00
df_sides [ ' fk_rate ' ] = df_sides [ ' fk ' ] / df_sides [ ' rounds ' ]
df_sides [ ' fd_rate ' ] = df_sides [ ' fd ' ] / df_sides [ ' rounds ' ]
df_sides [ ' mk_rate ' ] = df_sides [ ' multi_kill_rounds ' ] / df_sides [ ' rounds ' ]
df_sides [ ' hs_rate ' ] = df_sides [ ' hs ' ] / df_sides [ ' kills ' ] . replace ( 0 , 1 )
# Pivot
# We want columns like side_rating_ct, side_rating_t, etc.
pivoted = df_sides . pivot ( index = ' steam_id_64 ' , columns = ' side ' ) . reset_index ( )
# Flatten MultiIndex columns
new_cols = [ ' steam_id_64 ' ]
for col_name , side in pivoted . columns [ 1 : ] :
# Map L2 column names to Feature names
# rating -> side_rating_{side}
# kd -> side_kd_{side}
# win_rate -> side_win_rate_{side}
# fk_rate -> side_first_kill_rate_{side}
# fd_rate -> side_first_death_rate_{side}
# kast -> side_kast_{side}
# rws -> side_rws_{side}
# mk_rate -> side_multikill_rate_{side}
# hs_rate -> side_headshot_rate_{side}
target_map = {
' rating ' : ' side_rating ' ,
' kd ' : ' side_kd ' ,
' win_rate ' : ' side_win_rate ' ,
' fk_rate ' : ' side_first_kill_rate ' ,
' fd_rate ' : ' side_first_death_rate ' ,
' kast ' : ' side_kast ' ,
' rws ' : ' side_rws ' ,
' mk_rate ' : ' side_multikill_rate ' ,
' hs_rate ' : ' side_headshot_rate '
}
if col_name in target_map :
new_cols . append ( f " { target_map [ col_name ] } _ { side . lower ( ) } " )
else :
new_cols . append ( f " { col_name } _ { side . lower ( ) } " ) # Fallback for intermediate cols if needed
pivoted . columns = new_cols
# Select only relevant columns to merge
cols_to_merge = [ c for c in new_cols if c . startswith ( ' side_ ' ) ]
cols_to_merge . append ( ' steam_id_64 ' )
df = df . merge ( pivoted [ cols_to_merge ] , on = ' steam_id_64 ' , how = ' left ' )
# Fill NaN with 0 for side stats
for c in cols_to_merge :
if c != ' steam_id_64 ' :
df [ c ] = df [ c ] . fillna ( 0 )
# Add calculated diffs for scoring/display if needed (or just let template handle it)
# KD Diff for L3 Score calculation
if ' side_rating_ct ' in df . columns and ' side_rating_t ' in df . columns :
df [ ' side_kd_diff_ct_t ' ] = df [ ' side_rating_ct ' ] - df [ ' side_rating_t ' ]
else :
df [ ' side_kd_diff_ct_t ' ] = 0
# --- Obj Override from Main Table (sum_plants, sum_defuses) ---
# side_obj_t = sum_plants / matches_played
# side_obj_ct = sum_defuses / matches_played
df [ ' side_obj_t ' ] = df [ ' sum_plants ' ] / df [ ' matches_played ' ] . replace ( 0 , 1 )
df [ ' side_obj_ct ' ] = df [ ' sum_defuses ' ] / df [ ' matches_played ' ] . replace ( 0 , 1 )
df [ ' side_obj_t ' ] = df [ ' side_obj_t ' ] . fillna ( 0 )
df [ ' side_obj_ct ' ] = df [ ' side_obj_ct ' ] . fillna ( 0 )
2026-01-26 21:10:42 +08:00
else :
# Fallbacks
cols = [ ' hps_match_point_win_rate ' , ' hps_comeback_kd_diff ' , ' ptl_pistol_kd ' , ' ptl_pistol_util_efficiency ' ,
2026-01-27 00:57:35 +08:00
' side_rating_ct ' , ' side_rating_t ' , ' side_first_kill_rate_ct ' , ' side_first_kill_rate_t ' , ' side_kd_diff_ct_t ' ,
' bat_win_rate_vs_all ' , ' hps_losing_streak_kd_diff ' , ' hps_momentum_multikill_rate ' ,
' hps_tilt_rating_drop ' , ' hps_clutch_rating_rise ' , ' hps_undermanned_survival_time ' ,
' side_win_rate_ct ' , ' side_win_rate_t ' , ' side_kd_ct ' , ' side_kd_t ' ,
' side_kast_ct ' , ' side_kast_t ' , ' side_rws_ct ' , ' side_rws_t ' ,
' side_first_death_rate_ct ' , ' side_first_death_rate_t ' ,
' side_multikill_rate_ct ' , ' side_multikill_rate_t ' ,
' side_headshot_rate_ct ' , ' side_headshot_rate_t ' ,
' side_obj_ct ' , ' side_obj_t ' ]
2026-01-26 21:10:42 +08:00
for c in cols :
df [ c ] = 0
df [ ' hps_match_point_win_rate ' ] = df [ ' hps_match_point_win_rate ' ] . fillna ( 0.5 )
2026-01-27 00:57:35 +08:00
df [ ' bat_win_rate_vs_all ' ] = df [ ' bat_win_rate_vs_all ' ] . fillna ( 0.5 )
df [ ' hps_losing_streak_kd_diff ' ] = df [ ' hps_losing_streak_kd_diff ' ] . fillna ( 0 )
2026-01-26 21:10:42 +08:00
2026-01-27 00:57:35 +08:00
# HPS Pressure Entry Rate (Entry Kills per Round in Losing Matches)
q_mp_team = f " SELECT match_id, steam_id_64, is_win, entry_kills, round_total FROM fact_match_players WHERE steam_id_64 IN ( { placeholders } ) "
2026-01-26 21:10:42 +08:00
df_mp_team = pd . read_sql_query ( q_mp_team , conn , params = valid_ids )
if not df_mp_team . empty :
losing_matches = df_mp_team [ df_mp_team [ ' is_win ' ] == 0 ]
if not losing_matches . empty :
2026-01-27 00:57:35 +08:00
# Sum Entry Kills / Sum Rounds
pressure_entry = losing_matches . groupby ( ' steam_id_64 ' ) [ [ ' entry_kills ' , ' round_total ' ] ] . sum ( ) . reset_index ( )
pressure_entry [ ' hps_pressure_entry_rate ' ] = pressure_entry [ ' entry_kills ' ] / pressure_entry [ ' round_total ' ] . replace ( 0 , 1 )
df = df . merge ( pressure_entry [ [ ' steam_id_64 ' , ' hps_pressure_entry_rate ' ] ] , on = ' steam_id_64 ' , how = ' left ' )
2026-01-26 21:10:42 +08:00
if ' hps_pressure_entry_rate ' not in df . columns :
df [ ' hps_pressure_entry_rate ' ] = 0
df [ ' hps_pressure_entry_rate ' ] = df [ ' hps_pressure_entry_rate ' ] . fillna ( 0 )
# 5. PTL (Additional Features: Kills & Multi)
query_ptl = f """
SELECT ev . attacker_steam_id as steam_id_64 , COUNT ( * ) as pistol_kills
FROM fact_round_events ev
WHERE ev . event_type = ' kill ' AND ev . round_num IN ( 1 , 13 )
AND ev . attacker_steam_id IN ( { placeholders } )
GROUP BY ev . attacker_steam_id
"""
df_ptl = pd . read_sql_query ( query_ptl , conn , params = valid_ids )
if not df_ptl . empty :
df = df . merge ( df_ptl , on = ' steam_id_64 ' , how = ' left ' )
df [ ' ptl_pistol_kills ' ] = df [ ' pistol_kills ' ] / df [ ' matches_played ' ]
else :
df [ ' ptl_pistol_kills ' ] = 0
query_ptl_multi = f """
SELECT attacker_steam_id as steam_id_64 , COUNT ( * ) as multi_cnt
FROM (
SELECT match_id , round_num , attacker_steam_id , COUNT ( * ) as k
FROM fact_round_events
WHERE event_type = ' kill ' AND round_num IN ( 1 , 13 )
AND attacker_steam_id IN ( { placeholders } )
GROUP BY match_id , round_num , attacker_steam_id
HAVING k > = 2
)
GROUP BY attacker_steam_id
2026-01-26 02:13:06 +08:00
"""
2026-01-26 21:10:42 +08:00
df_ptl_multi = pd . read_sql_query ( query_ptl_multi , conn , params = valid_ids )
if not df_ptl_multi . empty :
df = df . merge ( df_ptl_multi , on = ' steam_id_64 ' , how = ' left ' )
df [ ' ptl_pistol_multikills ' ] = df [ ' multi_cnt ' ] / df [ ' matches_played ' ]
else :
df [ ' ptl_pistol_multikills ' ] = 0
# PTL Win Rate (Pandas Logic using fixed winner_side)
if not df_rounds . empty and has_sides :
# Ensure df_player_rounds exists
if ' df_player_rounds ' not in locals ( ) :
q_all_rounds = f " SELECT match_id, round_num FROM fact_rounds WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ( { placeholders } )) "
df_all_rounds = pd . read_sql_query ( q_all_rounds , conn , params = valid_ids )
df_player_rounds = df_all_rounds . merge ( df_fh_sides , on = ' match_id ' )
mask_fh = df_player_rounds [ ' round_num ' ] < = df_player_rounds [ ' halftime_round ' ]
df_player_rounds [ ' side ' ] = np . where ( mask_fh , df_player_rounds [ ' fh_side ' ] ,
np . where ( df_player_rounds [ ' fh_side ' ] == ' CT ' , ' T ' , ' CT ' ) )
2026-01-27 00:57:35 +08:00
# Filter for Pistol Rounds (1 and after halftime)
# Use halftime_round logic (MR12: 13, MR15: 16)
player_pistol = df_player_rounds [
( df_player_rounds [ ' round_num ' ] == 1 ) |
( df_player_rounds [ ' round_num ' ] == df_player_rounds [ ' halftime_round ' ] + 1 )
] . copy ( )
2026-01-26 21:10:42 +08:00
# Merge with df_rounds to get calculated winner_side
2026-01-27 00:57:35 +08:00
df_rounds [ ' winner_side ' ] = df_rounds [ ' winner_side ' ] . astype ( str ) # Ensure string for merge safety
2026-01-26 21:10:42 +08:00
player_pistol = player_pistol . merge ( df_rounds [ [ ' match_id ' , ' round_num ' , ' winner_side ' ] ] , on = [ ' match_id ' , ' round_num ' ] , how = ' left ' )
# Calculate Win
2026-01-27 00:57:35 +08:00
# Ensure winner_side is in player_pistol columns after merge
if ' winner_side ' in player_pistol . columns :
player_pistol [ ' is_win ' ] = ( player_pistol [ ' side ' ] == player_pistol [ ' winner_side ' ] ) . astype ( int )
else :
player_pistol [ ' is_win ' ] = 0
2026-01-26 21:10:42 +08:00
ptl_wins = player_pistol . groupby ( ' steam_id_64 ' ) [ ' is_win ' ] . agg ( [ ' sum ' , ' count ' ] ) . reset_index ( )
ptl_wins . rename ( columns = { ' sum ' : ' pistol_wins ' , ' count ' : ' pistol_rounds ' } , inplace = True )
ptl_wins [ ' ptl_pistol_win_rate ' ] = ptl_wins [ ' pistol_wins ' ] / ptl_wins [ ' pistol_rounds ' ] . replace ( 0 , 1 )
df = df . merge ( ptl_wins [ [ ' steam_id_64 ' , ' ptl_pistol_win_rate ' ] ] , on = ' steam_id_64 ' , how = ' left ' )
else :
df [ ' ptl_pistol_win_rate ' ] = 0.5
df [ ' ptl_pistol_multikills ' ] = df [ ' ptl_pistol_multikills ' ] . fillna ( 0 )
df [ ' ptl_pistol_win_rate ' ] = df [ ' ptl_pistol_win_rate ' ] . fillna ( 0.5 )
# 7. UTIL (Enhanced with Prop Frequency)
# Usage Rate: Average number of grenades purchased per round
df [ ' util_usage_rate ' ] = (
df [ ' sum_util_flash ' ] + df [ ' sum_util_smoke ' ] +
df [ ' sum_util_molotov ' ] + df [ ' sum_util_he ' ] + df [ ' sum_util_decoy ' ]
) / df [ ' rounds_played ' ] . replace ( 0 , 1 ) * 100 # Multiply by 100 to make it comparable to other metrics (e.g. 1.5 nades/round -> 150)
# Fallback if no new data yet (rely on old logic or keep 0)
# We can try to fetch equipment_value as backup if sum is 0
if df [ ' util_usage_rate ' ] . sum ( ) == 0 :
query_eco = f """
SELECT steam_id_64 , AVG ( equipment_value ) as avg_equip_val
FROM fact_round_player_economy
WHERE steam_id_64 IN ( { placeholders } )
GROUP BY steam_id_64
"""
df_eco = pd . read_sql_query ( query_eco , conn , params = valid_ids )
if not df_eco . empty :
df_eco [ ' util_usage_rate_backup ' ] = df_eco [ ' avg_equip_val ' ] / 50.0 # Scaling factor for equipment value
df = df . merge ( df_eco [ [ ' steam_id_64 ' , ' util_usage_rate_backup ' ] ] , on = ' steam_id_64 ' , how = ' left ' )
df [ ' util_usage_rate ' ] = df [ ' util_usage_rate_backup ' ] . fillna ( 0 )
df . drop ( columns = [ ' util_usage_rate_backup ' ] , inplace = True )
2026-01-27 16:51:53 +08:00
# --- 8. New Feature Dimensions (Party, Rating Dist, ELO) ---
# Fetch Base Data for Calculation
q_new_feats = f """
SELECT mp . steam_id_64 , mp . match_id , mp . match_team_id , mp . team_id ,
mp . rating , mp . adr , mp . is_win
FROM fact_match_players mp
WHERE mp . steam_id_64 IN ( { placeholders } )
"""
df_base = pd . read_sql_query ( q_new_feats , conn , params = valid_ids )
if not df_base . empty :
# 8.1 Party Size Stats
# Get party sizes for these matches
# We need to query party sizes for ALL matches involved
match_ids = df_base [ ' match_id ' ] . unique ( )
if len ( match_ids ) > 0 :
match_id_ph = ' , ' . join ( [ ' ? ' ] * len ( match_ids ) )
q_party_size = f """
SELECT match_id , match_team_id , COUNT ( * ) as party_size
FROM fact_match_players
WHERE match_id IN ( { match_id_ph } ) AND match_team_id > 0
GROUP BY match_id , match_team_id
"""
# Split match_ids into chunks if too many
chunk_size = 900
party_sizes_list = [ ]
for i in range ( 0 , len ( match_ids ) , chunk_size ) :
chunk = match_ids [ i : i + chunk_size ]
chunk_ph = ' , ' . join ( [ ' ? ' ] * len ( chunk ) )
q_chunk = q_party_size . replace ( match_id_ph , chunk_ph )
party_sizes_list . append ( pd . read_sql_query ( q_chunk , conn , params = list ( chunk ) ) )
if party_sizes_list :
df_party_sizes = pd . concat ( party_sizes_list )
# Merge party size to base data
df_base_party = df_base . merge ( df_party_sizes , on = [ ' match_id ' , ' match_team_id ' ] , how = ' left ' )
# Calculate Stats per Party Size (1-5)
# We want columns like party_1_win_rate, party_1_rating, party_1_adr
party_stats = df_base_party . groupby ( [ ' steam_id_64 ' , ' party_size ' ] ) . agg ( {
' is_win ' : ' mean ' ,
' rating ' : ' mean ' ,
' adr ' : ' mean '
} ) . reset_index ( )
# Pivot
pivoted_party = party_stats . pivot ( index = ' steam_id_64 ' , columns = ' party_size ' ) . reset_index ( )
# Flatten and rename
new_party_cols = [ ' steam_id_64 ' ]
for col in pivoted_party . columns :
if col [ 0 ] == ' steam_id_64 ' : continue
metric , size = col
if size in [ 1 , 2 , 3 , 4 , 5 ] :
# metric is is_win, rating, adr
metric_name = ' win_rate ' if metric == ' is_win ' else metric
new_party_cols . append ( f " party_ { int ( size ) } _ { metric_name } " )
# Handle MultiIndex column flattening properly
# The pivot creates MultiIndex. We need to construct a flat DataFrame.
flat_data = { ' steam_id_64 ' : pivoted_party [ ' steam_id_64 ' ] }
for size in [ 1 , 2 , 3 , 4 , 5 ] :
if size in pivoted_party [ ' is_win ' ] . columns :
flat_data [ f " party_ { size } _win_rate " ] = pivoted_party [ ' is_win ' ] [ size ]
if size in pivoted_party [ ' rating ' ] . columns :
flat_data [ f " party_ { size } _rating " ] = pivoted_party [ ' rating ' ] [ size ]
if size in pivoted_party [ ' adr ' ] . columns :
flat_data [ f " party_ { size } _adr " ] = pivoted_party [ ' adr ' ] [ size ]
df_party_flat = pd . DataFrame ( flat_data )
df = df . merge ( df_party_flat , on = ' steam_id_64 ' , how = ' left ' )
# 8.2 Rating Distribution
# rating_dist_carry_rate (>1.5), normal (1.0-1.5), sacrifice (0.6-1.0), sleeping (<0.6)
df_base [ ' rating_tier ' ] = pd . cut ( df_base [ ' rating ' ] ,
bins = [ - 1 , 0.6 , 1.0 , 1.5 , 100 ] ,
labels = [ ' sleeping ' , ' sacrifice ' , ' normal ' , ' carry ' ] ,
right = False ) # <0.6, 0.6-<1.0, 1.0-<1.5, >=1.5 (wait, cut behavior)
# Standard cut: right=True by default (a, b]. We want:
# < 0.6
# 0.6 <= x < 1.0
# 1.0 <= x < 1.5
# >= 1.5
# So bins=[-inf, 0.6, 1.0, 1.5, inf], right=False -> [a, b)
df_base [ ' rating_tier ' ] = pd . cut ( df_base [ ' rating ' ] ,
bins = [ - float ( ' inf ' ) , 0.6 , 1.0 , 1.5 , float ( ' inf ' ) ] ,
labels = [ ' sleeping ' , ' sacrifice ' , ' normal ' , ' carry ' ] ,
right = False )
# Wait, 1.5 should be Normal or Carry?
# User: >1.5 Carry, 1.0~1.5 Normal. So 1.5 is Normal? Or Carry?
# Usually inclusive on lower bound.
# 1.5 -> Carry (>1.5 usually means >= 1.5 or strictly >).
# "1.0~1.5 正常" implies [1.0, 1.5]. ">1.5 Carry" implies (1.5, inf).
# Let's assume >= 1.5 is Carry.
# So bins: (-inf, 0.6), [0.6, 1.0), [1.0, 1.5), [1.5, inf)
# right=False gives [a, b).
# So [1.5, inf) is correct for Carry.
dist_stats = df_base . groupby ( [ ' steam_id_64 ' , ' rating_tier ' ] ) . size ( ) . unstack ( fill_value = 0 )
# Calculate rates
dist_stats = dist_stats . div ( dist_stats . sum ( axis = 1 ) , axis = 0 )
dist_stats . columns = [ f " rating_dist_ { c } _rate " for c in dist_stats . columns ]
dist_stats = dist_stats . reset_index ( )
df = df . merge ( dist_stats , on = ' steam_id_64 ' , how = ' left ' )
# 8.3 ELO Stratification
# Fetch Match Teams ELO
if len ( match_ids ) > 0 :
q_elo = f """
SELECT match_id , group_id , group_origin_elo
FROM fact_match_teams
WHERE match_id IN ( { match_id_ph } )
"""
# Use chunking again
elo_list = [ ]
for i in range ( 0 , len ( match_ids ) , chunk_size ) :
chunk = match_ids [ i : i + chunk_size ]
chunk_ph = ' , ' . join ( [ ' ? ' ] * len ( chunk ) )
q_chunk = q_elo . replace ( match_id_ph , chunk_ph )
elo_list . append ( pd . read_sql_query ( q_chunk , conn , params = list ( chunk ) ) )
if elo_list :
df_elo_teams = pd . concat ( elo_list )
# Merge to get Opponent ELO
# Player has match_id, team_id.
# Join on match_id.
# Filter where group_id != team_id
df_merged_elo = df_base . merge ( df_elo_teams , on = ' match_id ' , how = ' left ' )
df_merged_elo = df_merged_elo [ df_merged_elo [ ' group_id ' ] != df_merged_elo [ ' team_id ' ] ]
# Now df_merged_elo has 'group_origin_elo' which is Opponent ELO
# Binning: <1200, 1200-1400, 1400-1600, 1600-1800, 1800-2000, >2000
# bins: [-inf, 1200, 1400, 1600, 1800, 2000, inf]
elo_bins = [ - float ( ' inf ' ) , 1200 , 1400 , 1600 , 1800 , 2000 , float ( ' inf ' ) ]
elo_labels = [ ' lt1200 ' , ' 1200_1400 ' , ' 1400_1600 ' , ' 1600_1800 ' , ' 1800_2000 ' , ' gt2000 ' ]
df_merged_elo [ ' elo_bin ' ] = pd . cut ( df_merged_elo [ ' group_origin_elo ' ] , bins = elo_bins , labels = elo_labels , right = False )
elo_stats = df_merged_elo . groupby ( [ ' steam_id_64 ' , ' elo_bin ' ] ) . agg ( {
' rating ' : ' mean '
} ) . unstack ( fill_value = 0 ) # We only need rating for now
# Rename columns
# elo_stats columns are MultiIndex (rating, bin).
# We want: elo_{bin}_rating
flat_elo_data = { ' steam_id_64 ' : elo_stats . index }
for bin_label in elo_labels :
if bin_label in elo_stats [ ' rating ' ] . columns :
flat_elo_data [ f " elo_ { bin_label } _rating " ] = elo_stats [ ' rating ' ] [ bin_label ] . values
df_elo_flat = pd . DataFrame ( flat_elo_data )
df = df . merge ( df_elo_flat , on = ' steam_id_64 ' , how = ' left ' )
2026-01-26 21:10:42 +08:00
# Final Mappings
df [ ' total_matches ' ] = df [ ' matches_played ' ]
return df . fillna ( 0 )
@staticmethod
def _calculate_ultimate_scores ( df ) :
def n ( col ) :
if col not in df . columns : return 50
s = df [ col ]
if s . max ( ) == s . min ( ) : return 50
return ( s - s . min ( ) ) / ( s . max ( ) - s . min ( ) ) * 100
df = df . copy ( )
# BAT (30%)
df [ ' score_bat ' ] = (
0.25 * n ( ' basic_avg_rating ' ) +
0.20 * n ( ' basic_avg_kd ' ) +
0.15 * n ( ' basic_avg_adr ' ) +
0.10 * n ( ' bat_avg_duel_win_rate ' ) +
0.10 * n ( ' bat_kd_diff_high_elo ' ) +
0.10 * n ( ' basic_avg_kill_3 ' )
)
# STA (15%)
df [ ' score_sta ' ] = (
0.30 * ( 100 - n ( ' sta_rating_volatility ' ) ) +
0.30 * n ( ' sta_loss_rating ' ) +
0.20 * n ( ' sta_win_rating ' ) +
0.10 * ( 100 - abs ( n ( ' sta_time_rating_corr ' ) ) )
)
2026-01-26 02:13:06 +08:00
2026-01-26 21:10:42 +08:00
# HPS (20%)
df [ ' score_hps ' ] = (
2026-01-27 00:57:35 +08:00
0.25 * n ( ' sum_1v3p ' ) +
2026-01-26 21:10:42 +08:00
0.20 * n ( ' hps_match_point_win_rate ' ) +
0.20 * n ( ' hps_comeback_kd_diff ' ) +
0.15 * n ( ' hps_pressure_entry_rate ' ) +
2026-01-27 00:57:35 +08:00
0.20 * n ( ' basic_avg_rating ' )
2026-01-26 21:10:42 +08:00
)
2026-01-26 02:13:06 +08:00
2026-01-26 21:10:42 +08:00
# PTL (10%)
df [ ' score_ptl ' ] = (
2026-01-27 00:57:35 +08:00
0.30 * n ( ' ptl_pistol_kills ' ) +
0.30 * n ( ' ptl_pistol_win_rate ' ) +
0.20 * n ( ' ptl_pistol_kd ' ) +
0.20 * n ( ' ptl_pistol_util_efficiency ' )
2026-01-26 21:10:42 +08:00
)
# T/CT (10%)
df [ ' score_tct ' ] = (
0.35 * n ( ' side_rating_ct ' ) +
0.35 * n ( ' side_rating_t ' ) +
0.15 * n ( ' side_first_kill_rate_ct ' ) +
0.15 * n ( ' side_first_kill_rate_t ' )
)
# UTIL (10%)
# Emphasize prop frequency (usage_rate)
df [ ' score_util ' ] = (
0.35 * n ( ' util_usage_rate ' ) +
0.25 * n ( ' util_avg_nade_dmg ' ) +
0.20 * n ( ' util_avg_flash_time ' ) +
0.20 * n ( ' util_avg_flash_enemy ' )
)
return df
2026-01-26 02:13:06 +08:00
@staticmethod
2026-01-26 21:10:42 +08:00
def get_roster_features_distribution ( target_steam_id ) :
"""
Calculates rank and distribution of the target player ' s L3 features (Scores) within the active roster.
2026-01-26 02:13:06 +08:00
"""
2026-01-26 21:10:42 +08:00
from web . services . web_service import WebService
import json
# 1. Get Active Roster IDs
lineups = WebService . get_lineups ( )
active_roster_ids = [ ]
if lineups :
try :
raw_ids = json . loads ( lineups [ 0 ] [ ' player_ids_json ' ] )
active_roster_ids = [ str ( uid ) for uid in raw_ids ]
except :
pass
if not active_roster_ids :
return None
# 2. Fetch L3 features for all roster members
placeholders = ' , ' . join ( ' ? ' for _ in active_roster_ids )
sql = f """
SELECT
steam_id_64 ,
score_bat , score_sta , score_hps , score_ptl , score_tct , score_util
FROM dm_player_features
WHERE steam_id_64 IN ( { placeholders } )
"""
rows = query_db ( ' l3 ' , sql , active_roster_ids )
if not rows :
return None
stats_map = { row [ ' steam_id_64 ' ] : dict ( row ) for row in rows }
target_steam_id = str ( target_steam_id )
# If target not in map (maybe no L3 data yet), default to 0
if target_steam_id not in stats_map :
stats_map [ target_steam_id ] = {
' score_bat ' : 0 , ' score_sta ' : 0 , ' score_hps ' : 0 ,
' score_ptl ' : 0 , ' score_tct ' : 0 , ' score_util ' : 0
}
# 3. Calculate Distribution
metrics = [ ' score_bat ' , ' score_sta ' , ' score_hps ' , ' score_ptl ' , ' score_tct ' , ' score_util ' ]
result = { }
for m in metrics :
values = [ p . get ( m , 0 ) or 0 for p in stats_map . values ( ) ]
target_val = stats_map [ target_steam_id ] . get ( m , 0 ) or 0
if not values :
result [ m ] = None
continue
values . sort ( reverse = True )
try :
rank = values . index ( target_val ) + 1
except ValueError :
rank = len ( values )
result [ m ] = {
' val ' : target_val ,
' rank ' : rank ,
' total ' : len ( values ) ,
' min ' : min ( values ) ,
' max ' : max ( values ) ,
' avg ' : sum ( values ) / len ( values )
}
return result