2026-01-26 21:10:42 +08:00
from web . database import query_db , get_db , execute_db
import sqlite3
import pandas as pd
import numpy as np
2026-01-28 01:20:26 +08:00
from web . services . weapon_service import get_weapon_info
2026-01-26 02:13:06 +08:00
class FeatureService :
@staticmethod
def get_player_features ( steam_id ) :
sql = " SELECT * FROM dm_player_features WHERE steam_id_64 = ? "
return query_db ( ' l3 ' , sql , [ steam_id ] , one = True )
@staticmethod
def get_players_list ( page = 1 , per_page = 20 , sort_by = ' rating ' , search = None ) :
offset = ( page - 1 ) * per_page
# Sort Mapping
sort_map = {
' rating ' : ' basic_avg_rating ' ,
' kd ' : ' basic_avg_kd ' ,
' kast ' : ' basic_avg_kast ' ,
' matches ' : ' matches_played '
}
order_col = sort_map . get ( sort_by , ' basic_avg_rating ' )
from web . services . stats_service import StatsService
# Helper to attach match counts
def attach_match_counts ( player_list ) :
if not player_list :
return
ids = [ p [ ' steam_id_64 ' ] for p in player_list ]
# Batch query for counts from L2
placeholders = ' , ' . join ( ' ? ' for _ in ids )
sql = f """
SELECT steam_id_64 , COUNT ( * ) as cnt
FROM fact_match_players
WHERE steam_id_64 IN ( { placeholders } )
GROUP BY steam_id_64
"""
counts = query_db ( ' l2 ' , sql , ids )
cnt_dict = { r [ ' steam_id_64 ' ] : r [ ' cnt ' ] for r in counts }
for p in player_list :
p [ ' matches_played ' ] = cnt_dict . get ( p [ ' steam_id_64 ' ] , 0 )
if search :
# Get all matching players
l2_players , _ = StatsService . get_players ( page = 1 , per_page = 100 , search = search )
if not l2_players :
return [ ] , 0
steam_ids = [ p [ ' steam_id_64 ' ] for p in l2_players ]
placeholders = ' , ' . join ( ' ? ' for _ in steam_ids )
sql = f " SELECT * FROM dm_player_features WHERE steam_id_64 IN ( { placeholders } ) "
features = query_db ( ' l3 ' , sql , steam_ids )
f_dict = { f [ ' steam_id_64 ' ] : f for f in features }
# Get counts for sorting
count_sql = f " SELECT steam_id_64, COUNT(*) as cnt FROM fact_match_players WHERE steam_id_64 IN ( { placeholders } ) GROUP BY steam_id_64 "
counts = query_db ( ' l2 ' , count_sql , steam_ids )
cnt_dict = { r [ ' steam_id_64 ' ] : r [ ' cnt ' ] for r in counts }
merged = [ ]
for p in l2_players :
f = f_dict . get ( p [ ' steam_id_64 ' ] )
m = dict ( p )
if f :
m . update ( dict ( f ) )
else :
# Fallback Calc
stats = StatsService . get_player_basic_stats ( p [ ' steam_id_64 ' ] )
if stats :
m [ ' basic_avg_rating ' ] = stats [ ' rating ' ]
m [ ' basic_avg_kd ' ] = stats [ ' kd ' ]
m [ ' basic_avg_kast ' ] = stats [ ' kast ' ]
else :
m [ ' basic_avg_rating ' ] = 0
m [ ' basic_avg_kd ' ] = 0
2026-01-26 21:10:42 +08:00
m [ ' basic_avg_kast ' ] = 0
2026-01-26 02:13:06 +08:00
m [ ' matches_played ' ] = cnt_dict . get ( p [ ' steam_id_64 ' ] , 0 )
merged . append ( m )
merged . sort ( key = lambda x : x . get ( order_col , 0 ) or 0 , reverse = True )
total = len ( merged )
start = ( page - 1 ) * per_page
end = start + per_page
return merged [ start : end ] , total
else :
# Browse mode
l3_count = query_db ( ' l3 ' , " SELECT COUNT(*) as cnt FROM dm_player_features " , one = True ) [ ' cnt ' ]
if l3_count == 0 or sort_by == ' matches ' :
if sort_by == ' matches ' :
sql = """
SELECT steam_id_64 , COUNT ( * ) as cnt
FROM fact_match_players
GROUP BY steam_id_64
ORDER BY cnt DESC
LIMIT ? OFFSET ?
"""
top_ids = query_db ( ' l2 ' , sql , [ per_page , offset ] )
if not top_ids :
return [ ] , 0
total = query_db ( ' l2 ' , " SELECT COUNT(DISTINCT steam_id_64) as cnt FROM fact_match_players " , one = True ) [ ' cnt ' ]
ids = [ r [ ' steam_id_64 ' ] for r in top_ids ]
l2_players = StatsService . get_players_by_ids ( ids )
2026-01-26 21:10:42 +08:00
# Merge logic
2026-01-26 02:13:06 +08:00
merged = [ ]
p_ph = ' , ' . join ( ' ? ' for _ in ids )
f_sql = f " SELECT * FROM dm_player_features WHERE steam_id_64 IN ( { p_ph } ) "
features = query_db ( ' l3 ' , f_sql , ids )
f_dict = { f [ ' steam_id_64 ' ] : f for f in features }
p_dict = { p [ ' steam_id_64 ' ] : p for p in l2_players }
2026-01-26 21:10:42 +08:00
for r in top_ids :
2026-01-26 02:13:06 +08:00
sid = r [ ' steam_id_64 ' ]
p = p_dict . get ( sid )
if not p : continue
m = dict ( p )
f = f_dict . get ( sid )
if f :
m . update ( dict ( f ) )
else :
stats = StatsService . get_player_basic_stats ( sid )
if stats :
m [ ' basic_avg_rating ' ] = stats [ ' rating ' ]
m [ ' basic_avg_kd ' ] = stats [ ' kd ' ]
m [ ' basic_avg_kast ' ] = stats [ ' kast ' ]
else :
m [ ' basic_avg_rating ' ] = 0
m [ ' basic_avg_kd ' ] = 0
m [ ' basic_avg_kast ' ] = 0
m [ ' matches_played ' ] = r [ ' cnt ' ]
merged . append ( m )
return merged , total
2026-01-26 21:10:42 +08:00
# L3 empty fallback
2026-01-26 02:13:06 +08:00
l2_players , total = StatsService . get_players ( page , per_page , sort_by = None )
merged = [ ]
2026-01-26 21:10:42 +08:00
attach_match_counts ( l2_players )
2026-01-26 02:13:06 +08:00
for p in l2_players :
m = dict ( p )
stats = StatsService . get_player_basic_stats ( p [ ' steam_id_64 ' ] )
if stats :
m [ ' basic_avg_rating ' ] = stats [ ' rating ' ]
m [ ' basic_avg_kd ' ] = stats [ ' kd ' ]
m [ ' basic_avg_kast ' ] = stats [ ' kast ' ]
else :
m [ ' basic_avg_rating ' ] = 0
m [ ' basic_avg_kd ' ] = 0
m [ ' basic_avg_kast ' ] = 0
m [ ' matches_played ' ] = p . get ( ' matches_played ' , 0 )
merged . append ( m )
if sort_by != ' rating ' :
merged . sort ( key = lambda x : x . get ( order_col , 0 ) or 0 , reverse = True )
return merged , total
2026-01-27 00:57:35 +08:00
2026-01-26 21:10:42 +08:00
# Normal L3 browse
2026-01-26 02:13:06 +08:00
sql = f " SELECT * FROM dm_player_features ORDER BY { order_col } DESC LIMIT ? OFFSET ? "
features = query_db ( ' l3 ' , sql , [ per_page , offset ] )
total = query_db ( ' l3 ' , " SELECT COUNT(*) as cnt FROM dm_player_features " , one = True ) [ ' cnt ' ]
if not features :
return [ ] , total
steam_ids = [ f [ ' steam_id_64 ' ] for f in features ]
l2_players = StatsService . get_players_by_ids ( steam_ids )
p_dict = { p [ ' steam_id_64 ' ] : p for p in l2_players }
merged = [ ]
for f in features :
m = dict ( f )
p = p_dict . get ( f [ ' steam_id_64 ' ] )
if p :
m . update ( dict ( p ) )
else :
2026-01-26 21:10:42 +08:00
m [ ' username ' ] = f [ ' steam_id_64 ' ]
2026-01-26 02:13:06 +08:00
m [ ' avatar_url ' ] = None
merged . append ( m )
return merged , total
@staticmethod
2026-01-26 21:10:42 +08:00
def rebuild_all_features ( min_matches = 5 ) :
"""
Refreshes the L3 Data Mart with full feature calculations .
"""
from web . config import Config
2026-01-27 00:57:35 +08:00
from web . services . web_service import WebService
import json
2026-01-26 21:10:42 +08:00
l3_db_path = Config . DB_L3_PATH
l2_db_path = Config . DB_L2_PATH
2026-01-27 00:57:35 +08:00
# Get Team Players
lineups = WebService . get_lineups ( )
team_player_ids = set ( )
for lineup in lineups :
if lineup [ ' player_ids_json ' ] :
try :
ids = json . loads ( lineup [ ' player_ids_json ' ] )
# Ensure IDs are strings
team_player_ids . update ( [ str ( i ) for i in ids ] )
except :
pass
if not team_player_ids :
print ( " No players found in any team lineup. Skipping L3 rebuild. " )
return 0
2026-01-26 21:10:42 +08:00
conn_l2 = sqlite3 . connect ( l2_db_path )
conn_l2 . row_factory = sqlite3 . Row
try :
2026-01-27 00:57:35 +08:00
print ( f " Loading L2 data for { len ( team_player_ids ) } players... " )
df = FeatureService . _load_and_calculate_dataframe ( conn_l2 , list ( team_player_ids ) )
2026-01-26 02:13:06 +08:00
2026-01-26 21:10:42 +08:00
if df is None or df . empty :
print ( " No data to process. " )
return 0
print ( " Calculating Scores... " )
df = FeatureService . _calculate_ultimate_scores ( df )
print ( " Saving to L3... " )
conn_l3 = sqlite3 . connect ( l3_db_path )
cursor = conn_l3 . cursor ( )
# Ensure columns exist in DataFrame match DB columns
cursor . execute ( " PRAGMA table_info(dm_player_features) " )
valid_cols = [ r [ 1 ] for r in cursor . fetchall ( ) ]
# Filter DF columns
df_cols = [ c for c in df . columns if c in valid_cols ]
df_to_save = df [ df_cols ] . copy ( )
df_to_save [ ' updated_at ' ] = pd . Timestamp . now ( ) . strftime ( ' % Y- % m- %d % H: % M: % S ' )
# Generate Insert SQL
2026-01-27 00:57:35 +08:00
print ( f " DEBUG: Saving { len ( df_to_save . columns ) } columns to L3. Sample side_kd_ct: { df_to_save . get ( ' side_kd_ct ' , pd . Series ( [ 0 ] ) ) . iloc [ 0 ] } " )
2026-01-26 21:10:42 +08:00
placeholders = ' , ' . join ( [ ' ? ' ] * len ( df_to_save . columns ) )
cols_str = ' , ' . join ( df_to_save . columns )
sql = f " INSERT OR REPLACE INTO dm_player_features ( { cols_str } ) VALUES ( { placeholders } ) "
data = df_to_save . values . tolist ( )
cursor . executemany ( sql , data )
conn_l3 . commit ( )
conn_l3 . close ( )
return len ( df )
except Exception as e :
print ( f " Rebuild Error: { e } " )
import traceback
traceback . print_exc ( )
return 0
finally :
conn_l2 . close ( )
@staticmethod
2026-01-27 00:57:35 +08:00
def _load_and_calculate_dataframe ( conn , player_ids ) :
if not player_ids :
return None
placeholders = ' , ' . join ( [ ' ? ' ] * len ( player_ids ) )
2026-01-26 21:10:42 +08:00
# 1. Basic Stats
2026-01-27 00:57:35 +08:00
query_basic = f """
2026-01-26 21:10:42 +08:00
SELECT
steam_id_64 ,
COUNT ( * ) as matches_played ,
SUM ( round_total ) as rounds_played ,
AVG ( rating ) as basic_avg_rating ,
AVG ( kd_ratio ) as basic_avg_kd ,
AVG ( adr ) as basic_avg_adr ,
AVG ( kast ) as basic_avg_kast ,
AVG ( rws ) as basic_avg_rws ,
SUM ( headshot_count ) as sum_hs ,
SUM ( kills ) as sum_kills ,
SUM ( deaths ) as sum_deaths ,
SUM ( first_kill ) as sum_fk ,
SUM ( first_death ) as sum_fd ,
SUM ( clutch_1v1 ) as sum_1v1 ,
SUM ( clutch_1v2 ) as sum_1v2 ,
2026-01-27 03:11:17 +08:00
SUM ( clutch_1v3 ) + SUM ( clutch_1v4 ) + SUM ( clutch_1v5 ) as sum_1v3p ,
2026-01-26 21:10:42 +08:00
SUM ( kill_2 ) as sum_2k ,
SUM ( kill_3 ) as sum_3k ,
SUM ( kill_4 ) as sum_4k ,
SUM ( kill_5 ) as sum_5k ,
SUM ( assisted_kill ) as sum_assist ,
SUM ( perfect_kill ) as sum_perfect ,
SUM ( revenge_kill ) as sum_revenge ,
SUM ( awp_kill ) as sum_awp ,
SUM ( jump_count ) as sum_jump ,
2026-01-26 22:04:29 +08:00
SUM ( mvp_count ) as sum_mvps ,
SUM ( planted_bomb ) as sum_plants ,
SUM ( defused_bomb ) as sum_defuses ,
SUM ( CASE
WHEN flash_assists > 0 THEN flash_assists
WHEN assists > assisted_kill THEN assists - assisted_kill
ELSE 0
END ) as sum_flash_assists ,
2026-01-26 21:10:42 +08:00
SUM ( throw_harm ) as sum_util_dmg ,
SUM ( flash_time ) as sum_flash_time ,
SUM ( flash_enemy ) as sum_flash_enemy ,
SUM ( flash_team ) as sum_flash_team ,
SUM ( util_flash_usage ) as sum_util_flash ,
SUM ( util_smoke_usage ) as sum_util_smoke ,
SUM ( util_molotov_usage ) as sum_util_molotov ,
SUM ( util_he_usage ) as sum_util_he ,
SUM ( util_decoy_usage ) as sum_util_decoy
FROM fact_match_players
2026-01-27 00:57:35 +08:00
WHERE steam_id_64 IN ( { placeholders } )
2026-01-26 21:10:42 +08:00
GROUP BY steam_id_64
"""
2026-01-27 00:57:35 +08:00
df = pd . read_sql_query ( query_basic , conn , params = player_ids )
2026-01-26 21:10:42 +08:00
if df . empty : return None
# Basic Derived
df [ ' basic_headshot_rate ' ] = df [ ' sum_hs ' ] / df [ ' sum_kills ' ] . replace ( 0 , 1 )
df [ ' basic_avg_headshot_kills ' ] = df [ ' sum_hs ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_first_kill ' ] = df [ ' sum_fk ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_first_death ' ] = df [ ' sum_fd ' ] / df [ ' matches_played ' ]
df [ ' basic_first_kill_rate ' ] = df [ ' sum_fk ' ] / ( df [ ' sum_fk ' ] + df [ ' sum_fd ' ] ) . replace ( 0 , 1 )
df [ ' basic_first_death_rate ' ] = df [ ' sum_fd ' ] / ( df [ ' sum_fk ' ] + df [ ' sum_fd ' ] ) . replace ( 0 , 1 )
df [ ' basic_avg_kill_2 ' ] = df [ ' sum_2k ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_kill_3 ' ] = df [ ' sum_3k ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_kill_4 ' ] = df [ ' sum_4k ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_kill_5 ' ] = df [ ' sum_5k ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_assisted_kill ' ] = df [ ' sum_assist ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_perfect_kill ' ] = df [ ' sum_perfect ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_revenge_kill ' ] = df [ ' sum_revenge ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_awp_kill ' ] = df [ ' sum_awp ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_jump_count ' ] = df [ ' sum_jump ' ] / df [ ' matches_played ' ]
2026-01-26 22:04:29 +08:00
df [ ' basic_avg_mvps ' ] = df [ ' sum_mvps ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_plants ' ] = df [ ' sum_plants ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_defuses ' ] = df [ ' sum_defuses ' ] / df [ ' matches_played ' ]
df [ ' basic_avg_flash_assists ' ] = df [ ' sum_flash_assists ' ] / df [ ' matches_played ' ]
2026-01-26 21:10:42 +08:00
# UTIL Basic
df [ ' util_avg_nade_dmg ' ] = df [ ' sum_util_dmg ' ] / df [ ' matches_played ' ]
df [ ' util_avg_flash_time ' ] = df [ ' sum_flash_time ' ] / df [ ' matches_played ' ]
df [ ' util_avg_flash_enemy ' ] = df [ ' sum_flash_enemy ' ] / df [ ' matches_played ' ]
valid_ids = tuple ( df [ ' steam_id_64 ' ] . tolist ( ) )
placeholders = ' , ' . join ( [ ' ? ' ] * len ( valid_ids ) )
2026-01-28 01:20:26 +08:00
try :
query_weapon_kills = f """
SELECT attacker_steam_id as steam_id_64 ,
SUM ( CASE WHEN lower ( weapon ) LIKE ' % knife % ' OR lower ( weapon ) LIKE ' % bayonet % ' THEN 1 ELSE 0 END ) as knife_kills ,
SUM ( CASE WHEN lower ( weapon ) LIKE ' % taser % ' OR lower ( weapon ) LIKE ' % zeus % ' THEN 1 ELSE 0 END ) as zeus_kills
FROM fact_round_events
WHERE event_type = ' kill '
AND attacker_steam_id IN ( { placeholders } )
GROUP BY attacker_steam_id
"""
df_weapon_kills = pd . read_sql_query ( query_weapon_kills , conn , params = valid_ids )
if not df_weapon_kills . empty :
df = df . merge ( df_weapon_kills , on = ' steam_id_64 ' , how = ' left ' )
else :
df [ ' knife_kills ' ] = 0
df [ ' zeus_kills ' ] = 0
except Exception :
df [ ' knife_kills ' ] = 0
df [ ' zeus_kills ' ] = 0
df [ ' basic_avg_knife_kill ' ] = df [ ' knife_kills ' ] . fillna ( 0 ) / df [ ' matches_played ' ] . replace ( 0 , 1 )
df [ ' basic_avg_zeus_kill ' ] = df [ ' zeus_kills ' ] . fillna ( 0 ) / df [ ' matches_played ' ] . replace ( 0 , 1 )
try :
query_zeus_pick = f """
SELECT steam_id_64 ,
AVG ( CASE WHEN has_zeus = 1 THEN 1.0 ELSE 0.0 END ) as basic_zeus_pick_rate
FROM fact_round_player_economy
WHERE steam_id_64 IN ( { placeholders } )
GROUP BY steam_id_64
"""
df_zeus_pick = pd . read_sql_query ( query_zeus_pick , conn , params = valid_ids )
if not df_zeus_pick . empty :
df = df . merge ( df_zeus_pick , on = ' steam_id_64 ' , how = ' left ' )
except Exception :
df [ ' basic_zeus_pick_rate ' ] = 0.0
df [ ' basic_zeus_pick_rate ' ] = df . get ( ' basic_zeus_pick_rate ' , 0.0 )
df [ ' basic_zeus_pick_rate ' ] = pd . to_numeric ( df [ ' basic_zeus_pick_rate ' ] , errors = ' coerce ' ) . fillna ( 0.0 )
2026-01-26 21:10:42 +08:00
# 2. STA (Detailed)
query_sta = f """
SELECT mp . steam_id_64 , mp . rating , mp . is_win , m . start_time , m . duration
FROM fact_match_players mp
JOIN fact_matches m ON mp . match_id = m . match_id
WHERE mp . steam_id_64 IN ( { placeholders } )
ORDER BY mp . steam_id_64 , m . start_time
"""
df_matches = pd . read_sql_query ( query_sta , conn , params = valid_ids )
sta_list = [ ]
for pid , group in df_matches . groupby ( ' steam_id_64 ' ) :
group = group . sort_values ( ' start_time ' )
last_30 = group . tail ( 30 )
# Fatigue Calc
# Simple heuristic: split matches by day, compare early (first 3) vs late (rest)
group [ ' date ' ] = pd . to_datetime ( group [ ' start_time ' ] , unit = ' s ' ) . dt . date
day_counts = group . groupby ( ' date ' ) . size ( )
busy_days = day_counts [ day_counts > = 4 ] . index # Days with 4+ matches
fatigue_decays = [ ]
for day in busy_days :
day_matches = group [ group [ ' date ' ] == day ]
if len ( day_matches ) > = 4 :
early_rating = day_matches . head ( 3 ) [ ' rating ' ] . mean ( )
late_rating = day_matches . tail ( len ( day_matches ) - 3 ) [ ' rating ' ] . mean ( )
fatigue_decays . append ( early_rating - late_rating )
avg_fatigue = np . mean ( fatigue_decays ) if fatigue_decays else 0
sta_list . append ( {
' steam_id_64 ' : pid ,
' sta_last_30_rating ' : last_30 [ ' rating ' ] . mean ( ) ,
' sta_win_rating ' : group [ group [ ' is_win ' ] == 1 ] [ ' rating ' ] . mean ( ) ,
' sta_loss_rating ' : group [ group [ ' is_win ' ] == 0 ] [ ' rating ' ] . mean ( ) ,
' sta_rating_volatility ' : group . tail ( 10 ) [ ' rating ' ] . std ( ) if len ( group ) > 1 else 0 ,
' sta_time_rating_corr ' : group [ ' duration ' ] . corr ( group [ ' rating ' ] ) if len ( group ) > 2 and group [ ' rating ' ] . std ( ) > 0 else 0 ,
' sta_fatigue_decay ' : avg_fatigue
} )
df = df . merge ( pd . DataFrame ( sta_list ) , on = ' steam_id_64 ' , how = ' left ' )
# 3. BAT (High ELO)
query_elo = f """
SELECT mp . steam_id_64 , mp . kd_ratio ,
( SELECT AVG ( group_origin_elo ) FROM fact_match_teams fmt WHERE fmt . match_id = mp . match_id AND group_origin_elo > 0 ) as elo
FROM fact_match_players mp
WHERE mp . steam_id_64 IN ( { placeholders } )
"""
df_elo = pd . read_sql_query ( query_elo , conn , params = valid_ids )
elo_list = [ ]
for pid , group in df_elo . groupby ( ' steam_id_64 ' ) :
avg = group [ ' elo ' ] . mean ( ) or 1000
elo_list . append ( {
' steam_id_64 ' : pid ,
' bat_kd_diff_high_elo ' : group [ group [ ' elo ' ] > avg ] [ ' kd_ratio ' ] . mean ( ) ,
' bat_kd_diff_low_elo ' : group [ group [ ' elo ' ] < = avg ] [ ' kd_ratio ' ] . mean ( )
} )
df = df . merge ( pd . DataFrame ( elo_list ) , on = ' steam_id_64 ' , how = ' left ' )
# Duel Win Rate
query_duel = f """
SELECT steam_id_64 , SUM ( entry_kills ) as ek , SUM ( entry_deaths ) as ed
FROM fact_match_players WHERE steam_id_64 IN ( { placeholders } ) GROUP BY steam_id_64
"""
df_duel = pd . read_sql_query ( query_duel , conn , params = valid_ids )
df_duel [ ' bat_avg_duel_win_rate ' ] = df_duel [ ' ek ' ] / ( df_duel [ ' ek ' ] + df_duel [ ' ed ' ] ) . replace ( 0 , 1 )
df = df . merge ( df_duel [ [ ' steam_id_64 ' , ' bat_avg_duel_win_rate ' ] ] , on = ' steam_id_64 ' , how = ' left ' )
# 4. HPS
# Clutch Rate
df [ ' hps_clutch_win_rate_1v1 ' ] = df [ ' sum_1v1 ' ] / df [ ' matches_played ' ]
df [ ' hps_clutch_win_rate_1v3_plus ' ] = df [ ' sum_1v3p ' ] / df [ ' matches_played ' ]
# Prepare Detailed Event Data for HPS (Comeback), PTL (KD), and T/CT
# A. Determine Side Info using fact_match_teams
# 1. Get Match Teams
query_teams = f """
SELECT match_id , group_fh_role , group_uids
FROM fact_match_teams
WHERE match_id IN ( SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ( { placeholders } ) )
"""
df_teams = pd . read_sql_query ( query_teams , conn , params = valid_ids )
# 2. Get Player UIDs
query_uids = f " SELECT match_id, steam_id_64, uid FROM fact_match_players WHERE steam_id_64 IN ( { placeholders } ) "
df_uids = pd . read_sql_query ( query_uids , conn , params = valid_ids )
# 3. Get Match Meta (Start Time for MR12/MR15)
query_meta = f " SELECT match_id, start_time FROM fact_matches WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ( { placeholders } )) "
df_meta = pd . read_sql_query ( query_meta , conn , params = valid_ids )
df_meta [ ' halftime_round ' ] = np . where ( df_meta [ ' start_time ' ] > 1695772800 , 12 , 15 ) # CS2 Release Date approx
# 4. Build FH Side DataFrame
fh_rows = [ ]
if not df_teams . empty and not df_uids . empty :
match_teams = { } # match_id -> [(role, [uids])]
for _ , row in df_teams . iterrows ( ) :
mid = row [ ' match_id ' ]
role = row [ ' group_fh_role ' ] # 1=CT, 0=T
try :
uids = str ( row [ ' group_uids ' ] ) . split ( ' , ' )
uids = [ u . strip ( ) for u in uids if u . strip ( ) ]
except :
uids = [ ]
if mid not in match_teams : match_teams [ mid ] = [ ]
match_teams [ mid ] . append ( ( role , uids ) )
for _ , row in df_uids . iterrows ( ) :
mid = row [ ' match_id ' ]
sid = row [ ' steam_id_64 ' ]
uid = str ( row [ ' uid ' ] )
if mid in match_teams :
for role , uids in match_teams [ mid ] :
if uid in uids :
fh_rows . append ( {
' match_id ' : mid ,
' steam_id_64 ' : sid ,
' fh_side ' : ' CT ' if role == 1 else ' T '
} )
break
df_fh_sides = pd . DataFrame ( fh_rows )
2026-01-28 01:20:26 +08:00
if df_fh_sides . empty :
df_fh_sides = pd . DataFrame ( columns = [ ' match_id ' , ' steam_id_64 ' , ' fh_side ' , ' halftime_round ' ] )
else :
2026-01-26 21:10:42 +08:00
df_fh_sides = df_fh_sides . merge ( df_meta [ [ ' match_id ' , ' halftime_round ' ] ] , on = ' match_id ' , how = ' left ' )
2026-01-28 01:20:26 +08:00
if ' halftime_round ' not in df_fh_sides . columns :
df_fh_sides [ ' halftime_round ' ] = 15
df_fh_sides [ ' halftime_round ' ] = df_fh_sides [ ' halftime_round ' ] . fillna ( 15 ) . astype ( int )
2026-01-26 21:10:42 +08:00
# B. Get Kill Events
query_events = f """
2026-01-28 01:20:26 +08:00
SELECT match_id , round_num , attacker_steam_id , victim_steam_id , event_type , is_headshot , event_time ,
weapon , trade_killer_steam_id , flash_assist_steam_id
2026-01-26 21:10:42 +08:00
FROM fact_round_events
WHERE event_type = ' kill '
AND ( attacker_steam_id IN ( { placeholders } ) OR victim_steam_id IN ( { placeholders } ) )
"""
df_events = pd . read_sql_query ( query_events , conn , params = valid_ids + valid_ids )
# C. Get Round Scores
query_rounds = f """
2026-01-28 01:20:26 +08:00
SELECT match_id , round_num , ct_score , t_score , winner_side , duration
2026-01-26 21:10:42 +08:00
FROM fact_rounds
WHERE match_id IN ( SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ( { placeholders } ) )
"""
df_rounds = pd . read_sql_query ( query_rounds , conn , params = valid_ids )
# Fix missing winner_side by calculating from score changes
if not df_rounds . empty :
df_rounds = df_rounds . sort_values ( [ ' match_id ' , ' round_num ' ] ) . reset_index ( drop = True )
df_rounds [ ' prev_ct ' ] = df_rounds . groupby ( ' match_id ' ) [ ' ct_score ' ] . shift ( 1 ) . fillna ( 0 )
df_rounds [ ' prev_t ' ] = df_rounds . groupby ( ' match_id ' ) [ ' t_score ' ] . shift ( 1 ) . fillna ( 0 )
# Determine winner based on score increment
df_rounds [ ' ct_win ' ] = ( df_rounds [ ' ct_score ' ] > df_rounds [ ' prev_ct ' ] )
df_rounds [ ' t_win ' ] = ( df_rounds [ ' t_score ' ] > df_rounds [ ' prev_t ' ] )
df_rounds [ ' calculated_winner ' ] = np . where ( df_rounds [ ' ct_win ' ] , ' CT ' ,
np . where ( df_rounds [ ' t_win ' ] , ' T ' , None ) )
# Force overwrite winner_side with calculated winner since DB data is unreliable (mostly NULL)
df_rounds [ ' winner_side ' ] = df_rounds [ ' calculated_winner ' ]
2026-01-27 00:57:35 +08:00
# Ensure winner_side is string type to match side ('CT', 'T')
df_rounds [ ' winner_side ' ] = df_rounds [ ' winner_side ' ] . astype ( str )
2026-01-26 21:10:42 +08:00
# Fallback for Round 1 if still None (e.g. if prev is 0 and score is 1)
# Logic above handles Round 1 correctly (prev is 0).
# --- Process Logic ---
# Logic above handles Round 1 correctly (prev is 0).
# --- Process Logic ---
has_events = not df_events . empty
has_sides = not df_fh_sides . empty
if has_events and has_sides :
# 1. Attacker Side
df_events = df_events . merge ( df_fh_sides , left_on = [ ' match_id ' , ' attacker_steam_id ' ] , right_on = [ ' match_id ' , ' steam_id_64 ' ] , how = ' left ' )
df_events . rename ( columns = { ' fh_side ' : ' att_fh_side ' } , inplace = True )
df_events . drop ( columns = [ ' steam_id_64 ' ] , inplace = True )
# 2. Victim Side
df_events = df_events . merge ( df_fh_sides , left_on = [ ' match_id ' , ' victim_steam_id ' ] , right_on = [ ' match_id ' , ' steam_id_64 ' ] , how = ' left ' , suffixes = ( ' ' , ' _vic ' ) )
df_events . rename ( columns = { ' fh_side ' : ' vic_fh_side ' } , inplace = True )
df_events . drop ( columns = [ ' steam_id_64 ' ] , inplace = True )
# 3. Determine Actual Side (CT/T)
# Logic: If round <= halftime -> FH Side. Else -> Opposite.
def calc_side ( fh_side , round_num , halftime ) :
if pd . isna ( fh_side ) : return None
if round_num < = halftime : return fh_side
return ' T ' if fh_side == ' CT ' else ' CT '
# Vectorized approach
# Attacker
mask_fh_att = df_events [ ' round_num ' ] < = df_events [ ' halftime_round ' ]
df_events [ ' attacker_side ' ] = np . where ( mask_fh_att , df_events [ ' att_fh_side ' ] ,
np . where ( df_events [ ' att_fh_side ' ] == ' CT ' , ' T ' , ' CT ' ) )
# Victim
mask_fh_vic = df_events [ ' round_num ' ] < = df_events [ ' halftime_round ' ]
df_events [ ' victim_side ' ] = np . where ( mask_fh_vic , df_events [ ' vic_fh_side ' ] ,
np . where ( df_events [ ' vic_fh_side ' ] == ' CT ' , ' T ' , ' CT ' ) )
# Merge Scores
df_events = df_events . merge ( df_rounds , on = [ ' match_id ' , ' round_num ' ] , how = ' left ' )
2026-01-27 00:57:35 +08:00
# --- BAT: Win Rate vs All ---
# Removed as per request (Difficult to calculate / All Zeros)
df [ ' bat_win_rate_vs_all ' ] = 0
2026-01-26 21:10:42 +08:00
# --- HPS: Match Point & Comeback ---
# Match Point Win Rate
mp_rounds = df_rounds [ ( ( df_rounds [ ' ct_score ' ] == 12 ) | ( df_rounds [ ' t_score ' ] == 12 ) |
( df_rounds [ ' ct_score ' ] == 15 ) | ( df_rounds [ ' t_score ' ] == 15 ) ) ]
if not mp_rounds . empty and has_sides :
# Need player side for these rounds
# Expand sides for all rounds
q_all_rounds = f " SELECT match_id, round_num FROM fact_rounds WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ( { placeholders } )) "
df_all_rounds = pd . read_sql_query ( q_all_rounds , conn , params = valid_ids )
df_player_rounds = df_all_rounds . merge ( df_fh_sides , on = ' match_id ' )
mask_fh = df_player_rounds [ ' round_num ' ] < = df_player_rounds [ ' halftime_round ' ]
df_player_rounds [ ' side ' ] = np . where ( mask_fh , df_player_rounds [ ' fh_side ' ] ,
np . where ( df_player_rounds [ ' fh_side ' ] == ' CT ' , ' T ' , ' CT ' ) )
# Filter for MP rounds
# Join mp_rounds with df_player_rounds
mp_player = df_player_rounds . merge ( mp_rounds [ [ ' match_id ' , ' round_num ' , ' winner_side ' ] ] , on = [ ' match_id ' , ' round_num ' ] )
mp_player [ ' is_win ' ] = ( mp_player [ ' side ' ] == mp_player [ ' winner_side ' ] ) . astype ( int )
hps_mp = mp_player . groupby ( ' steam_id_64 ' ) [ ' is_win ' ] . mean ( ) . reset_index ( )
hps_mp . rename ( columns = { ' is_win ' : ' hps_match_point_win_rate ' } , inplace = True )
df = df . merge ( hps_mp , on = ' steam_id_64 ' , how = ' left ' )
else :
df [ ' hps_match_point_win_rate ' ] = 0.5
# Comeback KD Diff
# Attacker Context
df_events [ ' att_team_score ' ] = np . where ( df_events [ ' attacker_side ' ] == ' CT ' , df_events [ ' ct_score ' ] , df_events [ ' t_score ' ] )
df_events [ ' att_opp_score ' ] = np . where ( df_events [ ' attacker_side ' ] == ' CT ' , df_events [ ' t_score ' ] , df_events [ ' ct_score ' ] )
df_events [ ' is_comeback_att ' ] = ( df_events [ ' att_team_score ' ] + 4 < = df_events [ ' att_opp_score ' ] )
# Victim Context
df_events [ ' vic_team_score ' ] = np . where ( df_events [ ' victim_side ' ] == ' CT ' , df_events [ ' ct_score ' ] , df_events [ ' t_score ' ] )
df_events [ ' vic_opp_score ' ] = np . where ( df_events [ ' victim_side ' ] == ' CT ' , df_events [ ' t_score ' ] , df_events [ ' ct_score ' ] )
df_events [ ' is_comeback_vic ' ] = ( df_events [ ' vic_team_score ' ] + 4 < = df_events [ ' vic_opp_score ' ] )
att_k = df_events . groupby ( ' attacker_steam_id ' ) . size ( )
vic_d = df_events . groupby ( ' victim_steam_id ' ) . size ( )
cb_k = df_events [ df_events [ ' is_comeback_att ' ] ] . groupby ( ' attacker_steam_id ' ) . size ( )
cb_d = df_events [ df_events [ ' is_comeback_vic ' ] ] . groupby ( ' victim_steam_id ' ) . size ( )
kd_stats = pd . DataFrame ( { ' k ' : att_k , ' d ' : vic_d , ' cb_k ' : cb_k , ' cb_d ' : cb_d } ) . fillna ( 0 )
kd_stats [ ' kd ' ] = kd_stats [ ' k ' ] / kd_stats [ ' d ' ] . replace ( 0 , 1 )
kd_stats [ ' cb_kd ' ] = kd_stats [ ' cb_k ' ] / kd_stats [ ' cb_d ' ] . replace ( 0 , 1 )
kd_stats [ ' hps_comeback_kd_diff ' ] = kd_stats [ ' cb_kd ' ] - kd_stats [ ' kd ' ]
kd_stats . index . name = ' steam_id_64 '
df = df . merge ( kd_stats [ [ ' hps_comeback_kd_diff ' ] ] , on = ' steam_id_64 ' , how = ' left ' )
2026-01-27 00:57:35 +08:00
# HPS: Losing Streak KD Diff
# Logic: KD in rounds where team has lost >= 3 consecutive rounds vs Global KD
# 1. Identify Streak Rounds
if not df_rounds . empty :
# Ensure sorted
df_rounds = df_rounds . sort_values ( [ ' match_id ' , ' round_num ' ] )
# Shift to check previous results
# We need to handle match boundaries. Groupby match_id is safer.
# CT Loss Streak
g = df_rounds . groupby ( ' match_id ' )
df_rounds [ ' ct_lost_1 ' ] = g [ ' t_win ' ] . shift ( 1 ) . fillna ( False )
df_rounds [ ' ct_lost_2 ' ] = g [ ' t_win ' ] . shift ( 2 ) . fillna ( False )
df_rounds [ ' ct_lost_3 ' ] = g [ ' t_win ' ] . shift ( 3 ) . fillna ( False )
df_rounds [ ' ct_in_loss_streak ' ] = ( df_rounds [ ' ct_lost_1 ' ] & df_rounds [ ' ct_lost_2 ' ] & df_rounds [ ' ct_lost_3 ' ] )
# T Loss Streak
df_rounds [ ' t_lost_1 ' ] = g [ ' ct_win ' ] . shift ( 1 ) . fillna ( False )
df_rounds [ ' t_lost_2 ' ] = g [ ' ct_win ' ] . shift ( 2 ) . fillna ( False )
df_rounds [ ' t_lost_3 ' ] = g [ ' ct_win ' ] . shift ( 3 ) . fillna ( False )
df_rounds [ ' t_in_loss_streak ' ] = ( df_rounds [ ' t_lost_1 ' ] & df_rounds [ ' t_lost_2 ' ] & df_rounds [ ' t_lost_3 ' ] )
# Merge into events
# df_events already has 'match_id', 'round_num', 'attacker_side'
# We need to merge streak info
streak_cols = df_rounds [ [ ' match_id ' , ' round_num ' , ' ct_in_loss_streak ' , ' t_in_loss_streak ' ] ]
df_events = df_events . merge ( streak_cols , on = [ ' match_id ' , ' round_num ' ] , how = ' left ' )
# Determine if attacker is in streak
df_events [ ' att_is_loss_streak ' ] = np . where (
df_events [ ' attacker_side ' ] == ' CT ' , df_events [ ' ct_in_loss_streak ' ] ,
np . where ( df_events [ ' attacker_side ' ] == ' T ' , df_events [ ' t_in_loss_streak ' ] , False )
)
# Determine if victim is in streak (for deaths)
df_events [ ' vic_is_loss_streak ' ] = np . where (
df_events [ ' victim_side ' ] == ' CT ' , df_events [ ' ct_in_loss_streak ' ] ,
np . where ( df_events [ ' victim_side ' ] == ' T ' , df_events [ ' t_in_loss_streak ' ] , False )
)
# Calculate KD in Streak
ls_k = df_events [ df_events [ ' att_is_loss_streak ' ] ] . groupby ( ' attacker_steam_id ' ) . size ( )
ls_d = df_events [ df_events [ ' vic_is_loss_streak ' ] ] . groupby ( ' victim_steam_id ' ) . size ( )
ls_stats = pd . DataFrame ( { ' ls_k ' : ls_k , ' ls_d ' : ls_d } ) . fillna ( 0 )
ls_stats [ ' ls_kd ' ] = ls_stats [ ' ls_k ' ] / ls_stats [ ' ls_d ' ] . replace ( 0 , 1 )
# Compare with Global KD (from df_sides or recomputed)
# Recompute global KD from events to be consistent
g_k = df_events . groupby ( ' attacker_steam_id ' ) . size ( )
g_d = df_events . groupby ( ' victim_steam_id ' ) . size ( )
g_stats = pd . DataFrame ( { ' g_k ' : g_k , ' g_d ' : g_d } ) . fillna ( 0 )
g_stats [ ' g_kd ' ] = g_stats [ ' g_k ' ] / g_stats [ ' g_d ' ] . replace ( 0 , 1 )
ls_stats = ls_stats . join ( g_stats [ [ ' g_kd ' ] ] , how = ' outer ' ) . fillna ( 0 )
ls_stats [ ' hps_losing_streak_kd_diff ' ] = ls_stats [ ' ls_kd ' ] - ls_stats [ ' g_kd ' ]
ls_stats . index . name = ' steam_id_64 '
df = df . merge ( ls_stats [ [ ' hps_losing_streak_kd_diff ' ] ] , on = ' steam_id_64 ' , how = ' left ' )
else :
df [ ' hps_losing_streak_kd_diff ' ] = 0
# HPS: Momentum Multi-kill Rate
# Team won 3+ rounds -> 2+ kills
# Need sequential win info.
# Hard to vectorise fully without accurate round sequence reconstruction including missing rounds.
# Placeholder: 0
df [ ' hps_momentum_multikill_rate ' ] = 0
# HPS: Tilt Rating Drop
df [ ' hps_tilt_rating_drop ' ] = 0
# HPS: Clutch Rating Rise
df [ ' hps_clutch_rating_rise ' ] = 0
# HPS: Undermanned Survival
df [ ' hps_undermanned_survival_time ' ] = 0
2026-01-26 21:10:42 +08:00
# --- PTL: Pistol Stats ---
pistol_rounds = [ 1 , 13 ]
df_pistol = df_events [ df_events [ ' round_num ' ] . isin ( pistol_rounds ) ]
if not df_pistol . empty :
pk = df_pistol . groupby ( ' attacker_steam_id ' ) . size ( )
pd_death = df_pistol . groupby ( ' victim_steam_id ' ) . size ( )
p_stats = pd . DataFrame ( { ' pk ' : pk , ' pd ' : pd_death } ) . fillna ( 0 )
p_stats [ ' ptl_pistol_kd ' ] = p_stats [ ' pk ' ] / p_stats [ ' pd ' ] . replace ( 0 , 1 )
phs = df_pistol [ df_pistol [ ' is_headshot ' ] == 1 ] . groupby ( ' attacker_steam_id ' ) . size ( )
p_stats [ ' phs ' ] = phs
p_stats [ ' phs ' ] = p_stats [ ' phs ' ] . fillna ( 0 )
p_stats [ ' ptl_pistol_util_efficiency ' ] = p_stats [ ' phs ' ] / p_stats [ ' pk ' ] . replace ( 0 , 1 )
p_stats . index . name = ' steam_id_64 '
df = df . merge ( p_stats [ [ ' ptl_pistol_kd ' , ' ptl_pistol_util_efficiency ' ] ] , on = ' steam_id_64 ' , how = ' left ' )
else :
df [ ' ptl_pistol_kd ' ] = 1.0
df [ ' ptl_pistol_util_efficiency ' ] = 0.0
2026-01-27 00:57:35 +08:00
# --- T/CT Stats (Directly from L2 Side Tables) ---
query_sides_l2 = f """
SELECT
steam_id_64 ,
' CT ' as side ,
COUNT ( * ) as matches ,
SUM ( round_total ) as rounds ,
AVG ( rating2 ) as rating ,
SUM ( kills ) as kills ,
SUM ( deaths ) as deaths ,
SUM ( assists ) as assists ,
AVG ( CAST ( is_win as FLOAT ) ) as win_rate ,
SUM ( first_kill ) as fk ,
SUM ( first_death ) as fd ,
AVG ( kast ) as kast ,
AVG ( rws ) as rws ,
SUM ( kill_2 + kill_3 + kill_4 + kill_5 ) as multi_kill_rounds ,
SUM ( headshot_count ) as hs
FROM fact_match_players_ct
WHERE steam_id_64 IN ( { placeholders } )
GROUP BY steam_id_64
2026-01-26 21:10:42 +08:00
2026-01-27 00:57:35 +08:00
UNION ALL
2026-01-26 21:10:42 +08:00
2026-01-27 00:57:35 +08:00
SELECT
steam_id_64 ,
' T ' as side ,
COUNT ( * ) as matches ,
SUM ( round_total ) as rounds ,
AVG ( rating2 ) as rating ,
SUM ( kills ) as kills ,
SUM ( deaths ) as deaths ,
SUM ( assists ) as assists ,
AVG ( CAST ( is_win as FLOAT ) ) as win_rate ,
SUM ( first_kill ) as fk ,
SUM ( first_death ) as fd ,
AVG ( kast ) as kast ,
AVG ( rws ) as rws ,
SUM ( kill_2 + kill_3 + kill_4 + kill_5 ) as multi_kill_rounds ,
SUM ( headshot_count ) as hs
FROM fact_match_players_t
WHERE steam_id_64 IN ( { placeholders } )
GROUP BY steam_id_64
"""
2026-01-26 21:10:42 +08:00
2026-01-27 00:57:35 +08:00
df_sides = pd . read_sql_query ( query_sides_l2 , conn , params = valid_ids + valid_ids )
2026-01-26 21:10:42 +08:00
2026-01-27 00:57:35 +08:00
if not df_sides . empty :
# Calculate Derived Rates per row before pivoting
df_sides [ ' rounds ' ] = df_sides [ ' rounds ' ] . replace ( 0 , 1 ) # Avoid div by zero
# KD Calculation (Sum of Kills / Sum of Deaths)
df_sides [ ' kd ' ] = df_sides [ ' kills ' ] / df_sides [ ' deaths ' ] . replace ( 0 , 1 )
# KAST Proxy (if KAST is 0)
# KAST ~= (Kills + Assists + Survived) / Rounds
# Survived = Rounds - Deaths
if df_sides [ ' kast ' ] . mean ( ) == 0 :
df_sides [ ' survived ' ] = df_sides [ ' rounds ' ] - df_sides [ ' deaths ' ]
2026-01-27 01:40:56 +08:00
df_sides [ ' kast ' ] = ( df_sides [ ' kills ' ] + df_sides [ ' assists ' ] + df_sides [ ' survived ' ] ) / df_sides [ ' rounds ' ]
2026-01-27 00:57:35 +08:00
df_sides [ ' fk_rate ' ] = df_sides [ ' fk ' ] / df_sides [ ' rounds ' ]
df_sides [ ' fd_rate ' ] = df_sides [ ' fd ' ] / df_sides [ ' rounds ' ]
df_sides [ ' mk_rate ' ] = df_sides [ ' multi_kill_rounds ' ] / df_sides [ ' rounds ' ]
df_sides [ ' hs_rate ' ] = df_sides [ ' hs ' ] / df_sides [ ' kills ' ] . replace ( 0 , 1 )
# Pivot
# We want columns like side_rating_ct, side_rating_t, etc.
pivoted = df_sides . pivot ( index = ' steam_id_64 ' , columns = ' side ' ) . reset_index ( )
# Flatten MultiIndex columns
new_cols = [ ' steam_id_64 ' ]
for col_name , side in pivoted . columns [ 1 : ] :
# Map L2 column names to Feature names
# rating -> side_rating_{side}
# kd -> side_kd_{side}
# win_rate -> side_win_rate_{side}
# fk_rate -> side_first_kill_rate_{side}
# fd_rate -> side_first_death_rate_{side}
# kast -> side_kast_{side}
# rws -> side_rws_{side}
# mk_rate -> side_multikill_rate_{side}
# hs_rate -> side_headshot_rate_{side}
target_map = {
' rating ' : ' side_rating ' ,
' kd ' : ' side_kd ' ,
' win_rate ' : ' side_win_rate ' ,
' fk_rate ' : ' side_first_kill_rate ' ,
' fd_rate ' : ' side_first_death_rate ' ,
' kast ' : ' side_kast ' ,
' rws ' : ' side_rws ' ,
' mk_rate ' : ' side_multikill_rate ' ,
' hs_rate ' : ' side_headshot_rate '
}
if col_name in target_map :
new_cols . append ( f " { target_map [ col_name ] } _ { side . lower ( ) } " )
else :
new_cols . append ( f " { col_name } _ { side . lower ( ) } " ) # Fallback for intermediate cols if needed
pivoted . columns = new_cols
# Select only relevant columns to merge
cols_to_merge = [ c for c in new_cols if c . startswith ( ' side_ ' ) ]
cols_to_merge . append ( ' steam_id_64 ' )
df = df . merge ( pivoted [ cols_to_merge ] , on = ' steam_id_64 ' , how = ' left ' )
# Fill NaN with 0 for side stats
for c in cols_to_merge :
if c != ' steam_id_64 ' :
df [ c ] = df [ c ] . fillna ( 0 )
# Add calculated diffs for scoring/display if needed (or just let template handle it)
# KD Diff for L3 Score calculation
if ' side_rating_ct ' in df . columns and ' side_rating_t ' in df . columns :
df [ ' side_kd_diff_ct_t ' ] = df [ ' side_rating_ct ' ] - df [ ' side_rating_t ' ]
else :
df [ ' side_kd_diff_ct_t ' ] = 0
# --- Obj Override from Main Table (sum_plants, sum_defuses) ---
# side_obj_t = sum_plants / matches_played
# side_obj_ct = sum_defuses / matches_played
df [ ' side_obj_t ' ] = df [ ' sum_plants ' ] / df [ ' matches_played ' ] . replace ( 0 , 1 )
df [ ' side_obj_ct ' ] = df [ ' sum_defuses ' ] / df [ ' matches_played ' ] . replace ( 0 , 1 )
df [ ' side_obj_t ' ] = df [ ' side_obj_t ' ] . fillna ( 0 )
df [ ' side_obj_ct ' ] = df [ ' side_obj_ct ' ] . fillna ( 0 )
2026-01-26 21:10:42 +08:00
else :
# Fallbacks
cols = [ ' hps_match_point_win_rate ' , ' hps_comeback_kd_diff ' , ' ptl_pistol_kd ' , ' ptl_pistol_util_efficiency ' ,
2026-01-27 00:57:35 +08:00
' side_rating_ct ' , ' side_rating_t ' , ' side_first_kill_rate_ct ' , ' side_first_kill_rate_t ' , ' side_kd_diff_ct_t ' ,
' bat_win_rate_vs_all ' , ' hps_losing_streak_kd_diff ' , ' hps_momentum_multikill_rate ' ,
' hps_tilt_rating_drop ' , ' hps_clutch_rating_rise ' , ' hps_undermanned_survival_time ' ,
' side_win_rate_ct ' , ' side_win_rate_t ' , ' side_kd_ct ' , ' side_kd_t ' ,
' side_kast_ct ' , ' side_kast_t ' , ' side_rws_ct ' , ' side_rws_t ' ,
' side_first_death_rate_ct ' , ' side_first_death_rate_t ' ,
' side_multikill_rate_ct ' , ' side_multikill_rate_t ' ,
' side_headshot_rate_ct ' , ' side_headshot_rate_t ' ,
' side_obj_ct ' , ' side_obj_t ' ]
2026-01-26 21:10:42 +08:00
for c in cols :
df [ c ] = 0
df [ ' hps_match_point_win_rate ' ] = df [ ' hps_match_point_win_rate ' ] . fillna ( 0.5 )
2026-01-27 00:57:35 +08:00
df [ ' bat_win_rate_vs_all ' ] = df [ ' bat_win_rate_vs_all ' ] . fillna ( 0.5 )
df [ ' hps_losing_streak_kd_diff ' ] = df [ ' hps_losing_streak_kd_diff ' ] . fillna ( 0 )
2026-01-26 21:10:42 +08:00
2026-01-27 00:57:35 +08:00
# HPS Pressure Entry Rate (Entry Kills per Round in Losing Matches)
q_mp_team = f " SELECT match_id, steam_id_64, is_win, entry_kills, round_total FROM fact_match_players WHERE steam_id_64 IN ( { placeholders } ) "
2026-01-26 21:10:42 +08:00
df_mp_team = pd . read_sql_query ( q_mp_team , conn , params = valid_ids )
if not df_mp_team . empty :
losing_matches = df_mp_team [ df_mp_team [ ' is_win ' ] == 0 ]
if not losing_matches . empty :
2026-01-27 00:57:35 +08:00
# Sum Entry Kills / Sum Rounds
pressure_entry = losing_matches . groupby ( ' steam_id_64 ' ) [ [ ' entry_kills ' , ' round_total ' ] ] . sum ( ) . reset_index ( )
pressure_entry [ ' hps_pressure_entry_rate ' ] = pressure_entry [ ' entry_kills ' ] / pressure_entry [ ' round_total ' ] . replace ( 0 , 1 )
df = df . merge ( pressure_entry [ [ ' steam_id_64 ' , ' hps_pressure_entry_rate ' ] ] , on = ' steam_id_64 ' , how = ' left ' )
2026-01-26 21:10:42 +08:00
if ' hps_pressure_entry_rate ' not in df . columns :
df [ ' hps_pressure_entry_rate ' ] = 0
df [ ' hps_pressure_entry_rate ' ] = df [ ' hps_pressure_entry_rate ' ] . fillna ( 0 )
# 5. PTL (Additional Features: Kills & Multi)
query_ptl = f """
SELECT ev . attacker_steam_id as steam_id_64 , COUNT ( * ) as pistol_kills
FROM fact_round_events ev
WHERE ev . event_type = ' kill ' AND ev . round_num IN ( 1 , 13 )
AND ev . attacker_steam_id IN ( { placeholders } )
GROUP BY ev . attacker_steam_id
"""
df_ptl = pd . read_sql_query ( query_ptl , conn , params = valid_ids )
if not df_ptl . empty :
df = df . merge ( df_ptl , on = ' steam_id_64 ' , how = ' left ' )
df [ ' ptl_pistol_kills ' ] = df [ ' pistol_kills ' ] / df [ ' matches_played ' ]
else :
df [ ' ptl_pistol_kills ' ] = 0
query_ptl_multi = f """
SELECT attacker_steam_id as steam_id_64 , COUNT ( * ) as multi_cnt
FROM (
SELECT match_id , round_num , attacker_steam_id , COUNT ( * ) as k
FROM fact_round_events
WHERE event_type = ' kill ' AND round_num IN ( 1 , 13 )
AND attacker_steam_id IN ( { placeholders } )
GROUP BY match_id , round_num , attacker_steam_id
HAVING k > = 2
)
GROUP BY attacker_steam_id
2026-01-26 02:13:06 +08:00
"""
2026-01-26 21:10:42 +08:00
df_ptl_multi = pd . read_sql_query ( query_ptl_multi , conn , params = valid_ids )
if not df_ptl_multi . empty :
df = df . merge ( df_ptl_multi , on = ' steam_id_64 ' , how = ' left ' )
df [ ' ptl_pistol_multikills ' ] = df [ ' multi_cnt ' ] / df [ ' matches_played ' ]
else :
df [ ' ptl_pistol_multikills ' ] = 0
# PTL Win Rate (Pandas Logic using fixed winner_side)
if not df_rounds . empty and has_sides :
# Ensure df_player_rounds exists
if ' df_player_rounds ' not in locals ( ) :
q_all_rounds = f " SELECT match_id, round_num FROM fact_rounds WHERE match_id IN (SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ( { placeholders } )) "
df_all_rounds = pd . read_sql_query ( q_all_rounds , conn , params = valid_ids )
df_player_rounds = df_all_rounds . merge ( df_fh_sides , on = ' match_id ' )
mask_fh = df_player_rounds [ ' round_num ' ] < = df_player_rounds [ ' halftime_round ' ]
df_player_rounds [ ' side ' ] = np . where ( mask_fh , df_player_rounds [ ' fh_side ' ] ,
np . where ( df_player_rounds [ ' fh_side ' ] == ' CT ' , ' T ' , ' CT ' ) )
2026-01-27 00:57:35 +08:00
# Filter for Pistol Rounds (1 and after halftime)
# Use halftime_round logic (MR12: 13, MR15: 16)
player_pistol = df_player_rounds [
( df_player_rounds [ ' round_num ' ] == 1 ) |
( df_player_rounds [ ' round_num ' ] == df_player_rounds [ ' halftime_round ' ] + 1 )
] . copy ( )
2026-01-26 21:10:42 +08:00
# Merge with df_rounds to get calculated winner_side
2026-01-27 00:57:35 +08:00
df_rounds [ ' winner_side ' ] = df_rounds [ ' winner_side ' ] . astype ( str ) # Ensure string for merge safety
2026-01-26 21:10:42 +08:00
player_pistol = player_pistol . merge ( df_rounds [ [ ' match_id ' , ' round_num ' , ' winner_side ' ] ] , on = [ ' match_id ' , ' round_num ' ] , how = ' left ' )
# Calculate Win
2026-01-27 00:57:35 +08:00
# Ensure winner_side is in player_pistol columns after merge
if ' winner_side ' in player_pistol . columns :
player_pistol [ ' is_win ' ] = ( player_pistol [ ' side ' ] == player_pistol [ ' winner_side ' ] ) . astype ( int )
else :
player_pistol [ ' is_win ' ] = 0
2026-01-26 21:10:42 +08:00
ptl_wins = player_pistol . groupby ( ' steam_id_64 ' ) [ ' is_win ' ] . agg ( [ ' sum ' , ' count ' ] ) . reset_index ( )
ptl_wins . rename ( columns = { ' sum ' : ' pistol_wins ' , ' count ' : ' pistol_rounds ' } , inplace = True )
ptl_wins [ ' ptl_pistol_win_rate ' ] = ptl_wins [ ' pistol_wins ' ] / ptl_wins [ ' pistol_rounds ' ] . replace ( 0 , 1 )
df = df . merge ( ptl_wins [ [ ' steam_id_64 ' , ' ptl_pistol_win_rate ' ] ] , on = ' steam_id_64 ' , how = ' left ' )
else :
df [ ' ptl_pistol_win_rate ' ] = 0.5
df [ ' ptl_pistol_multikills ' ] = df [ ' ptl_pistol_multikills ' ] . fillna ( 0 )
df [ ' ptl_pistol_win_rate ' ] = df [ ' ptl_pistol_win_rate ' ] . fillna ( 0.5 )
# 7. UTIL (Enhanced with Prop Frequency)
# Usage Rate: Average number of grenades purchased per round
df [ ' util_usage_rate ' ] = (
df [ ' sum_util_flash ' ] + df [ ' sum_util_smoke ' ] +
df [ ' sum_util_molotov ' ] + df [ ' sum_util_he ' ] + df [ ' sum_util_decoy ' ]
) / df [ ' rounds_played ' ] . replace ( 0 , 1 ) * 100 # Multiply by 100 to make it comparable to other metrics (e.g. 1.5 nades/round -> 150)
# Fallback if no new data yet (rely on old logic or keep 0)
# We can try to fetch equipment_value as backup if sum is 0
if df [ ' util_usage_rate ' ] . sum ( ) == 0 :
query_eco = f """
SELECT steam_id_64 , AVG ( equipment_value ) as avg_equip_val
FROM fact_round_player_economy
WHERE steam_id_64 IN ( { placeholders } )
GROUP BY steam_id_64
"""
df_eco = pd . read_sql_query ( query_eco , conn , params = valid_ids )
if not df_eco . empty :
df_eco [ ' util_usage_rate_backup ' ] = df_eco [ ' avg_equip_val ' ] / 50.0 # Scaling factor for equipment value
df = df . merge ( df_eco [ [ ' steam_id_64 ' , ' util_usage_rate_backup ' ] ] , on = ' steam_id_64 ' , how = ' left ' )
df [ ' util_usage_rate ' ] = df [ ' util_usage_rate_backup ' ] . fillna ( 0 )
df . drop ( columns = [ ' util_usage_rate_backup ' ] , inplace = True )
2026-01-27 16:51:53 +08:00
# --- 8. New Feature Dimensions (Party, Rating Dist, ELO) ---
# Fetch Base Data for Calculation
q_new_feats = f """
SELECT mp . steam_id_64 , mp . match_id , mp . match_team_id , mp . team_id ,
2026-01-28 01:20:26 +08:00
mp . rating , mp . adr , mp . is_win , mp . map as map_name
2026-01-27 16:51:53 +08:00
FROM fact_match_players mp
WHERE mp . steam_id_64 IN ( { placeholders } )
"""
df_base = pd . read_sql_query ( q_new_feats , conn , params = valid_ids )
if not df_base . empty :
# 8.1 Party Size Stats
# Get party sizes for these matches
# We need to query party sizes for ALL matches involved
match_ids = df_base [ ' match_id ' ] . unique ( )
if len ( match_ids ) > 0 :
match_id_ph = ' , ' . join ( [ ' ? ' ] * len ( match_ids ) )
q_party_size = f """
SELECT match_id , match_team_id , COUNT ( * ) as party_size
FROM fact_match_players
WHERE match_id IN ( { match_id_ph } ) AND match_team_id > 0
GROUP BY match_id , match_team_id
"""
chunk_size = 900
party_sizes_list = [ ]
for i in range ( 0 , len ( match_ids ) , chunk_size ) :
chunk = match_ids [ i : i + chunk_size ]
chunk_ph = ' , ' . join ( [ ' ? ' ] * len ( chunk ) )
q_chunk = q_party_size . replace ( match_id_ph , chunk_ph )
party_sizes_list . append ( pd . read_sql_query ( q_chunk , conn , params = list ( chunk ) ) )
if party_sizes_list :
df_party_sizes = pd . concat ( party_sizes_list )
df_base_party = df_base . merge ( df_party_sizes , on = [ ' match_id ' , ' match_team_id ' ] , how = ' left ' )
2026-01-27 22:01:17 +08:00
else :
df_base_party = df_base . copy ( )
df_base_party [ ' party_size ' ] = df_base_party [ ' party_size ' ] . fillna ( 1 )
df_base_party = df_base_party [ df_base_party [ ' party_size ' ] . isin ( [ 1 , 2 , 3 , 4 , 5 ] ) ]
party_stats = df_base_party . groupby ( [ ' steam_id_64 ' , ' party_size ' ] ) . agg ( {
' is_win ' : ' mean ' ,
' rating ' : ' mean ' ,
' adr ' : ' mean '
} ) . reset_index ( )
pivoted_party = party_stats . pivot ( index = ' steam_id_64 ' , columns = ' party_size ' ) . reset_index ( )
new_party_cols = [ ' steam_id_64 ' ]
for col in pivoted_party . columns :
if col [ 0 ] == ' steam_id_64 ' : continue
metric , size = col
if size in [ 1 , 2 , 3 , 4 , 5 ] :
metric_name = ' win_rate ' if metric == ' is_win ' else metric
new_party_cols . append ( f " party_ { int ( size ) } _ { metric_name } " )
flat_data = { ' steam_id_64 ' : pivoted_party [ ' steam_id_64 ' ] }
for size in [ 1 , 2 , 3 , 4 , 5 ] :
if size in pivoted_party [ ' is_win ' ] . columns :
flat_data [ f " party_ { size } _win_rate " ] = pivoted_party [ ' is_win ' ] [ size ]
if size in pivoted_party [ ' rating ' ] . columns :
flat_data [ f " party_ { size } _rating " ] = pivoted_party [ ' rating ' ] [ size ]
if size in pivoted_party [ ' adr ' ] . columns :
flat_data [ f " party_ { size } _adr " ] = pivoted_party [ ' adr ' ] [ size ]
df_party_flat = pd . DataFrame ( flat_data )
df = df . merge ( df_party_flat , on = ' steam_id_64 ' , how = ' left ' )
2026-01-27 16:51:53 +08:00
# 8.2 Rating Distribution
# rating_dist_carry_rate (>1.5), normal (1.0-1.5), sacrifice (0.6-1.0), sleeping (<0.6)
df_base [ ' rating_tier ' ] = pd . cut ( df_base [ ' rating ' ] ,
bins = [ - 1 , 0.6 , 1.0 , 1.5 , 100 ] ,
labels = [ ' sleeping ' , ' sacrifice ' , ' normal ' , ' carry ' ] ,
right = False ) # <0.6, 0.6-<1.0, 1.0-<1.5, >=1.5 (wait, cut behavior)
# Standard cut: right=True by default (a, b]. We want:
# < 0.6
# 0.6 <= x < 1.0
# 1.0 <= x < 1.5
# >= 1.5
# So bins=[-inf, 0.6, 1.0, 1.5, inf], right=False -> [a, b)
df_base [ ' rating_tier ' ] = pd . cut ( df_base [ ' rating ' ] ,
bins = [ - float ( ' inf ' ) , 0.6 , 1.0 , 1.5 , float ( ' inf ' ) ] ,
labels = [ ' sleeping ' , ' sacrifice ' , ' normal ' , ' carry ' ] ,
right = False )
# Wait, 1.5 should be Normal or Carry?
# User: >1.5 Carry, 1.0~1.5 Normal. So 1.5 is Normal? Or Carry?
# Usually inclusive on lower bound.
# 1.5 -> Carry (>1.5 usually means >= 1.5 or strictly >).
# "1.0~1.5 正常" implies [1.0, 1.5]. ">1.5 Carry" implies (1.5, inf).
# Let's assume >= 1.5 is Carry.
# So bins: (-inf, 0.6), [0.6, 1.0), [1.0, 1.5), [1.5, inf)
# right=False gives [a, b).
# So [1.5, inf) is correct for Carry.
dist_stats = df_base . groupby ( [ ' steam_id_64 ' , ' rating_tier ' ] ) . size ( ) . unstack ( fill_value = 0 )
# Calculate rates
dist_stats = dist_stats . div ( dist_stats . sum ( axis = 1 ) , axis = 0 )
dist_stats . columns = [ f " rating_dist_ { c } _rate " for c in dist_stats . columns ]
dist_stats = dist_stats . reset_index ( )
df = df . merge ( dist_stats , on = ' steam_id_64 ' , how = ' left ' )
# 8.3 ELO Stratification
# Fetch Match Teams ELO
if len ( match_ids ) > 0 :
q_elo = f """
SELECT match_id , group_id , group_origin_elo
FROM fact_match_teams
WHERE match_id IN ( { match_id_ph } )
"""
# Use chunking again
elo_list = [ ]
for i in range ( 0 , len ( match_ids ) , chunk_size ) :
chunk = match_ids [ i : i + chunk_size ]
chunk_ph = ' , ' . join ( [ ' ? ' ] * len ( chunk ) )
q_chunk = q_elo . replace ( match_id_ph , chunk_ph )
elo_list . append ( pd . read_sql_query ( q_chunk , conn , params = list ( chunk ) ) )
if elo_list :
df_elo_teams = pd . concat ( elo_list )
# Merge to get Opponent ELO
# Player has match_id, team_id.
# Join on match_id.
# Filter where group_id != team_id
df_merged_elo = df_base . merge ( df_elo_teams , on = ' match_id ' , how = ' left ' )
df_merged_elo = df_merged_elo [ df_merged_elo [ ' group_id ' ] != df_merged_elo [ ' team_id ' ] ]
# Now df_merged_elo has 'group_origin_elo' which is Opponent ELO
# Binning: <1200, 1200-1400, 1400-1600, 1600-1800, 1800-2000, >2000
# bins: [-inf, 1200, 1400, 1600, 1800, 2000, inf]
elo_bins = [ - float ( ' inf ' ) , 1200 , 1400 , 1600 , 1800 , 2000 , float ( ' inf ' ) ]
elo_labels = [ ' lt1200 ' , ' 1200_1400 ' , ' 1400_1600 ' , ' 1600_1800 ' , ' 1800_2000 ' , ' gt2000 ' ]
df_merged_elo [ ' elo_bin ' ] = pd . cut ( df_merged_elo [ ' group_origin_elo ' ] , bins = elo_bins , labels = elo_labels , right = False )
elo_stats = df_merged_elo . groupby ( [ ' steam_id_64 ' , ' elo_bin ' ] ) . agg ( {
' rating ' : ' mean '
} ) . unstack ( fill_value = 0 ) # We only need rating for now
# Rename columns
# elo_stats columns are MultiIndex (rating, bin).
# We want: elo_{bin}_rating
flat_elo_data = { ' steam_id_64 ' : elo_stats . index }
for bin_label in elo_labels :
if bin_label in elo_stats [ ' rating ' ] . columns :
flat_elo_data [ f " elo_ { bin_label } _rating " ] = elo_stats [ ' rating ' ] [ bin_label ] . values
df_elo_flat = pd . DataFrame ( flat_elo_data )
df = df . merge ( df_elo_flat , on = ' steam_id_64 ' , how = ' left ' )
2026-01-27 21:26:07 +08:00
# 9. New Features: Economy & Pace
df_eco = FeatureService . _calculate_economy_features ( conn , valid_ids )
if df_eco is not None :
df = df . merge ( df_eco , on = ' steam_id_64 ' , how = ' left ' )
df_pace = FeatureService . _calculate_pace_features ( conn , valid_ids )
if df_pace is not None :
df = df . merge ( df_pace , on = ' steam_id_64 ' , how = ' left ' )
2026-01-28 01:20:26 +08:00
if not df_base . empty :
player_mean = df_base . groupby ( ' steam_id_64 ' , as_index = False ) [ ' rating ' ] . mean ( ) . rename ( columns = { ' rating ' : ' player_mean_rating ' } )
map_mean = df_base . groupby ( [ ' steam_id_64 ' , ' map_name ' ] , as_index = False ) [ ' rating ' ] . mean ( ) . rename ( columns = { ' rating ' : ' map_mean_rating ' } )
map_dev = map_mean . merge ( player_mean , on = ' steam_id_64 ' , how = ' left ' )
map_dev [ ' abs_dev ' ] = ( map_dev [ ' map_mean_rating ' ] - map_dev [ ' player_mean_rating ' ] ) . abs ( )
map_coef = map_dev . groupby ( ' steam_id_64 ' , as_index = False ) [ ' abs_dev ' ] . mean ( ) . rename ( columns = { ' abs_dev ' : ' map_stability_coef ' } )
df = df . merge ( map_coef , on = ' steam_id_64 ' , how = ' left ' )
import json
df [ ' rd_phase_kill_early_share ' ] = 0.0
df [ ' rd_phase_kill_mid_share ' ] = 0.0
df [ ' rd_phase_kill_late_share ' ] = 0.0
df [ ' rd_phase_death_early_share ' ] = 0.0
df [ ' rd_phase_death_mid_share ' ] = 0.0
df [ ' rd_phase_death_late_share ' ] = 0.0
2026-01-28 01:38:45 +08:00
df [ ' rd_phase_kill_early_share_t ' ] = 0.0
df [ ' rd_phase_kill_mid_share_t ' ] = 0.0
df [ ' rd_phase_kill_late_share_t ' ] = 0.0
df [ ' rd_phase_kill_early_share_ct ' ] = 0.0
df [ ' rd_phase_kill_mid_share_ct ' ] = 0.0
df [ ' rd_phase_kill_late_share_ct ' ] = 0.0
df [ ' rd_phase_death_early_share_t ' ] = 0.0
df [ ' rd_phase_death_mid_share_t ' ] = 0.0
df [ ' rd_phase_death_late_share_t ' ] = 0.0
df [ ' rd_phase_death_early_share_ct ' ] = 0.0
df [ ' rd_phase_death_mid_share_ct ' ] = 0.0
df [ ' rd_phase_death_late_share_ct ' ] = 0.0
2026-01-28 01:20:26 +08:00
df [ ' rd_firstdeath_team_first_death_rounds ' ] = 0
df [ ' rd_firstdeath_team_first_death_win_rate ' ] = 0.0
df [ ' rd_invalid_death_rounds ' ] = 0
df [ ' rd_invalid_death_rate ' ] = 0.0
df [ ' rd_pressure_kpr_ratio ' ] = 0.0
df [ ' rd_pressure_perf_ratio ' ] = 0.0
df [ ' rd_pressure_rounds_down3 ' ] = 0
df [ ' rd_pressure_rounds_normal ' ] = 0
df [ ' rd_matchpoint_kpr_ratio ' ] = 0.0
df [ ' rd_matchpoint_perf_ratio ' ] = 0.0
df [ ' rd_matchpoint_rounds ' ] = 0
df [ ' rd_comeback_kill_share ' ] = 0.0
df [ ' rd_comeback_rounds ' ] = 0
df [ ' rd_trade_response_10s_rate ' ] = 0.0
df [ ' rd_weapon_top_json ' ] = " [] "
df [ ' rd_roundtype_split_json ' ] = " {} "
if not df_events . empty :
df_events [ ' event_time ' ] = pd . to_numeric ( df_events [ ' event_time ' ] , errors = ' coerce ' ) . fillna ( 0 ) . astype ( int )
df_events [ ' phase_bucket ' ] = pd . cut (
df_events [ ' event_time ' ] ,
bins = [ - 1 , 30 , 60 , float ( ' inf ' ) ] ,
labels = [ ' early ' , ' mid ' , ' late ' ]
)
k_cnt = df_events . groupby ( [ ' attacker_steam_id ' , ' phase_bucket ' ] ) . size ( ) . unstack ( fill_value = 0 )
k_tot = k_cnt . sum ( axis = 1 ) . replace ( 0 , 1 )
k_share = k_cnt . div ( k_tot , axis = 0 )
k_share . index . name = ' steam_id_64 '
k_share = k_share . reset_index ( ) . rename ( columns = {
' early ' : ' rd_phase_kill_early_share ' ,
' mid ' : ' rd_phase_kill_mid_share ' ,
' late ' : ' rd_phase_kill_late_share '
} )
df = df . merge (
k_share [ [ ' steam_id_64 ' , ' rd_phase_kill_early_share ' , ' rd_phase_kill_mid_share ' , ' rd_phase_kill_late_share ' ] ] ,
on = ' steam_id_64 ' ,
how = ' left ' ,
suffixes = ( ' ' , ' _calc ' )
)
for c in [ ' rd_phase_kill_early_share ' , ' rd_phase_kill_mid_share ' , ' rd_phase_kill_late_share ' ] :
if f ' { c } _calc ' in df . columns :
df [ c ] = df [ f ' { c } _calc ' ] . fillna ( df [ c ] )
df . drop ( columns = [ f ' { c } _calc ' ] , inplace = True )
d_cnt = df_events . groupby ( [ ' victim_steam_id ' , ' phase_bucket ' ] ) . size ( ) . unstack ( fill_value = 0 )
d_tot = d_cnt . sum ( axis = 1 ) . replace ( 0 , 1 )
d_share = d_cnt . div ( d_tot , axis = 0 )
d_share . index . name = ' steam_id_64 '
d_share = d_share . reset_index ( ) . rename ( columns = {
' early ' : ' rd_phase_death_early_share ' ,
' mid ' : ' rd_phase_death_mid_share ' ,
' late ' : ' rd_phase_death_late_share '
} )
df = df . merge (
d_share [ [ ' steam_id_64 ' , ' rd_phase_death_early_share ' , ' rd_phase_death_mid_share ' , ' rd_phase_death_late_share ' ] ] ,
on = ' steam_id_64 ' ,
how = ' left ' ,
suffixes = ( ' ' , ' _calc ' )
)
for c in [ ' rd_phase_death_early_share ' , ' rd_phase_death_mid_share ' , ' rd_phase_death_late_share ' ] :
if f ' { c } _calc ' in df . columns :
df [ c ] = df [ f ' { c } _calc ' ] . fillna ( df [ c ] )
df . drop ( columns = [ f ' { c } _calc ' ] , inplace = True )
2026-01-28 01:38:45 +08:00
if ' attacker_side ' in df_events . columns :
k_side = df_events [ df_events [ ' attacker_side ' ] . isin ( [ ' CT ' , ' T ' ] ) ] . copy ( )
if not k_side . empty :
k_cnt_side = k_side . groupby ( [ ' attacker_steam_id ' , ' attacker_side ' , ' phase_bucket ' ] ) . size ( ) . reset_index ( name = ' cnt ' )
k_piv = k_cnt_side . pivot_table ( index = [ ' attacker_steam_id ' , ' attacker_side ' ] , columns = ' phase_bucket ' , values = ' cnt ' , fill_value = 0 )
k_piv [ ' tot ' ] = k_piv . sum ( axis = 1 ) . replace ( 0 , 1 )
k_piv = k_piv . div ( k_piv [ ' tot ' ] , axis = 0 ) . drop ( columns = [ ' tot ' ] )
k_piv = k_piv . reset_index ( ) . rename ( columns = { ' attacker_steam_id ' : ' steam_id_64 ' } )
for side , suffix in [ ( ' T ' , ' _t ' ) , ( ' CT ' , ' _ct ' ) ] :
tmp = k_piv [ k_piv [ ' attacker_side ' ] == side ] . copy ( )
if not tmp . empty :
tmp = tmp . rename ( columns = {
' early ' : f ' rd_phase_kill_early_share { suffix } ' ,
' mid ' : f ' rd_phase_kill_mid_share { suffix } ' ,
' late ' : f ' rd_phase_kill_late_share { suffix } ' ,
} )
df = df . merge (
tmp [ [ ' steam_id_64 ' , f ' rd_phase_kill_early_share { suffix } ' , f ' rd_phase_kill_mid_share { suffix } ' , f ' rd_phase_kill_late_share { suffix } ' ] ] ,
on = ' steam_id_64 ' ,
how = ' left ' ,
suffixes = ( ' ' , ' _calc ' )
)
for c in [ f ' rd_phase_kill_early_share { suffix } ' , f ' rd_phase_kill_mid_share { suffix } ' , f ' rd_phase_kill_late_share { suffix } ' ] :
if f ' { c } _calc ' in df . columns :
df [ c ] = df [ f ' { c } _calc ' ] . fillna ( df [ c ] )
df . drop ( columns = [ f ' { c } _calc ' ] , inplace = True )
if ' victim_side ' in df_events . columns :
d_side = df_events [ df_events [ ' victim_side ' ] . isin ( [ ' CT ' , ' T ' ] ) ] . copy ( )
if not d_side . empty :
d_cnt_side = d_side . groupby ( [ ' victim_steam_id ' , ' victim_side ' , ' phase_bucket ' ] ) . size ( ) . reset_index ( name = ' cnt ' )
d_piv = d_cnt_side . pivot_table ( index = [ ' victim_steam_id ' , ' victim_side ' ] , columns = ' phase_bucket ' , values = ' cnt ' , fill_value = 0 )
d_piv [ ' tot ' ] = d_piv . sum ( axis = 1 ) . replace ( 0 , 1 )
d_piv = d_piv . div ( d_piv [ ' tot ' ] , axis = 0 ) . drop ( columns = [ ' tot ' ] )
d_piv = d_piv . reset_index ( ) . rename ( columns = { ' victim_steam_id ' : ' steam_id_64 ' } )
for side , suffix in [ ( ' T ' , ' _t ' ) , ( ' CT ' , ' _ct ' ) ] :
tmp = d_piv [ d_piv [ ' victim_side ' ] == side ] . copy ( )
if not tmp . empty :
tmp = tmp . rename ( columns = {
' early ' : f ' rd_phase_death_early_share { suffix } ' ,
' mid ' : f ' rd_phase_death_mid_share { suffix } ' ,
' late ' : f ' rd_phase_death_late_share { suffix } ' ,
} )
df = df . merge (
tmp [ [ ' steam_id_64 ' , f ' rd_phase_death_early_share { suffix } ' , f ' rd_phase_death_mid_share { suffix } ' , f ' rd_phase_death_late_share { suffix } ' ] ] ,
on = ' steam_id_64 ' ,
how = ' left ' ,
suffixes = ( ' ' , ' _calc ' )
)
for c in [ f ' rd_phase_death_early_share { suffix } ' , f ' rd_phase_death_mid_share { suffix } ' , f ' rd_phase_death_late_share { suffix } ' ] :
if f ' { c } _calc ' in df . columns :
df [ c ] = df [ f ' { c } _calc ' ] . fillna ( df [ c ] )
df . drop ( columns = [ f ' { c } _calc ' ] , inplace = True )
2026-01-28 01:20:26 +08:00
if ' victim_side ' in df_events . columns and ' winner_side ' in df_events . columns :
death_rows = df_events [ [ ' match_id ' , ' round_num ' , ' event_time ' , ' victim_steam_id ' , ' victim_side ' , ' winner_side ' ] ] . copy ( )
death_rows = death_rows [ death_rows [ ' victim_side ' ] . isin ( [ ' CT ' , ' T ' ] ) & death_rows [ ' winner_side ' ] . isin ( [ ' CT ' , ' T ' ] ) ]
if not death_rows . empty :
min_death = death_rows . groupby ( [ ' match_id ' , ' round_num ' , ' victim_side ' ] , as_index = False ) [ ' event_time ' ] . min ( ) . rename ( columns = { ' event_time ' : ' min_time ' } )
first_deaths = death_rows . merge ( min_death , on = [ ' match_id ' , ' round_num ' , ' victim_side ' ] , how = ' inner ' )
first_deaths = first_deaths [ first_deaths [ ' event_time ' ] == first_deaths [ ' min_time ' ] ]
first_deaths [ ' is_win ' ] = ( first_deaths [ ' victim_side ' ] == first_deaths [ ' winner_side ' ] ) . astype ( int )
fd_agg = first_deaths . groupby ( ' victim_steam_id ' ) [ ' is_win ' ] . agg ( [ ' count ' , ' mean ' ] ) . reset_index ( )
fd_agg . rename ( columns = {
' victim_steam_id ' : ' steam_id_64 ' ,
' count ' : ' rd_firstdeath_team_first_death_rounds ' ,
' mean ' : ' rd_firstdeath_team_first_death_win_rate '
} , inplace = True )
df = df . merge ( fd_agg , on = ' steam_id_64 ' , how = ' left ' , suffixes = ( ' ' , ' _calc ' ) )
for c in [ ' rd_firstdeath_team_first_death_rounds ' , ' rd_firstdeath_team_first_death_win_rate ' ] :
if f ' { c } _calc ' in df . columns :
df [ c ] = df [ f ' { c } _calc ' ] . fillna ( df [ c ] )
df . drop ( columns = [ f ' { c } _calc ' ] , inplace = True )
kills_per_round = df_events . groupby ( [ ' match_id ' , ' round_num ' , ' attacker_steam_id ' ] ) . size ( ) . reset_index ( name = ' kills ' )
flash_round = df_events [ df_events [ ' flash_assist_steam_id ' ] . notna ( ) & ( df_events [ ' flash_assist_steam_id ' ] != ' ' ) ] \
. groupby ( [ ' match_id ' , ' round_num ' , ' flash_assist_steam_id ' ] ) . size ( ) . reset_index ( name = ' flash_assists ' )
death_round = df_events . groupby ( [ ' match_id ' , ' round_num ' , ' victim_steam_id ' ] ) . size ( ) . reset_index ( name = ' deaths ' )
death_eval = death_round . rename ( columns = { ' victim_steam_id ' : ' steam_id_64 ' } ) . merge (
kills_per_round . rename ( columns = { ' attacker_steam_id ' : ' steam_id_64 ' } ) [ [ ' match_id ' , ' round_num ' , ' steam_id_64 ' , ' kills ' ] ] ,
on = [ ' match_id ' , ' round_num ' , ' steam_id_64 ' ] ,
how = ' left '
) . merge (
flash_round . rename ( columns = { ' flash_assist_steam_id ' : ' steam_id_64 ' } ) [ [ ' match_id ' , ' round_num ' , ' steam_id_64 ' , ' flash_assists ' ] ] ,
on = [ ' match_id ' , ' round_num ' , ' steam_id_64 ' ] ,
how = ' left '
) . fillna ( { ' kills ' : 0 , ' flash_assists ' : 0 } )
death_eval [ ' is_invalid ' ] = ( ( death_eval [ ' kills ' ] < = 0 ) & ( death_eval [ ' flash_assists ' ] < = 0 ) ) . astype ( int )
invalid_agg = death_eval . groupby ( ' steam_id_64 ' ) [ ' is_invalid ' ] . agg ( [ ' sum ' , ' count ' ] ) . reset_index ( )
invalid_agg . rename ( columns = { ' sum ' : ' rd_invalid_death_rounds ' , ' count ' : ' death_rounds ' } , inplace = True )
invalid_agg [ ' rd_invalid_death_rate ' ] = invalid_agg [ ' rd_invalid_death_rounds ' ] / invalid_agg [ ' death_rounds ' ] . replace ( 0 , 1 )
df = df . merge (
invalid_agg [ [ ' steam_id_64 ' , ' rd_invalid_death_rounds ' , ' rd_invalid_death_rate ' ] ] ,
on = ' steam_id_64 ' ,
how = ' left ' ,
suffixes = ( ' ' , ' _calc ' )
)
for c in [ ' rd_invalid_death_rounds ' , ' rd_invalid_death_rate ' ] :
if f ' { c } _calc ' in df . columns :
df [ c ] = df [ f ' { c } _calc ' ] . fillna ( df [ c ] )
df . drop ( columns = [ f ' { c } _calc ' ] , inplace = True )
if ' weapon ' in df_events . columns :
w = df_events . copy ( )
w [ ' weapon ' ] = w [ ' weapon ' ] . fillna ( ' ' ) . astype ( str )
w = w [ w [ ' weapon ' ] != ' ' ]
if not w . empty :
w_agg = w . groupby ( [ ' attacker_steam_id ' , ' weapon ' ] ) . agg (
kills = ( ' weapon ' , ' size ' ) ,
hs = ( ' is_headshot ' , ' sum ' ) ,
) . reset_index ( )
top_json = { }
for pid , g in w_agg . groupby ( ' attacker_steam_id ' ) :
g = g . sort_values ( ' kills ' , ascending = False )
total = float ( g [ ' kills ' ] . sum ( ) ) if g [ ' kills ' ] . sum ( ) else 1.0
top = g . head ( 5 )
items = [ ]
for _ , r in top . iterrows ( ) :
k = float ( r [ ' kills ' ] )
hs = float ( r [ ' hs ' ] )
wi = get_weapon_info ( r [ ' weapon ' ] )
items . append ( {
' weapon ' : r [ ' weapon ' ] ,
' kills ' : int ( k ) ,
' share ' : k / total ,
' hs_rate ' : hs / k if k else 0.0 ,
' price ' : wi . price if wi else None ,
' side ' : wi . side if wi else None ,
' category ' : wi . category if wi else None ,
} )
top_json [ str ( pid ) ] = json . dumps ( items , ensure_ascii = False )
if top_json :
df [ ' rd_weapon_top_json ' ] = df [ ' steam_id_64 ' ] . map ( top_json ) . fillna ( " [] " )
if not df_rounds . empty and not df_fh_sides . empty and not df_events . empty :
df_rounds2 = df_rounds . copy ( )
if not df_meta . empty :
df_rounds2 = df_rounds2 . merge ( df_meta [ [ ' match_id ' , ' halftime_round ' ] ] , on = ' match_id ' , how = ' left ' )
df_rounds2 = df_rounds2 . sort_values ( [ ' match_id ' , ' round_num ' ] )
df_rounds2 [ ' prev_ct ' ] = df_rounds2 . groupby ( ' match_id ' ) [ ' ct_score ' ] . shift ( 1 ) . fillna ( 0 )
df_rounds2 [ ' prev_t ' ] = df_rounds2 . groupby ( ' match_id ' ) [ ' t_score ' ] . shift ( 1 ) . fillna ( 0 )
df_rounds2 [ ' ct_deficit ' ] = df_rounds2 [ ' prev_t ' ] - df_rounds2 [ ' prev_ct ' ]
df_rounds2 [ ' t_deficit ' ] = df_rounds2 [ ' prev_ct ' ] - df_rounds2 [ ' prev_t ' ]
df_rounds2 [ ' mp_score ' ] = df_rounds2 [ ' halftime_round ' ] . fillna ( 15 )
df_rounds2 [ ' is_match_point_round ' ] = ( df_rounds2 [ ' prev_ct ' ] == df_rounds2 [ ' mp_score ' ] ) | ( df_rounds2 [ ' prev_t ' ] == df_rounds2 [ ' mp_score ' ] )
df_rounds2 [ ' reg_rounds ' ] = ( df_rounds2 [ ' halftime_round ' ] . fillna ( 15 ) * 2 ) . astype ( int )
df_rounds2 [ ' is_overtime_round ' ] = df_rounds2 [ ' round_num ' ] > df_rounds2 [ ' reg_rounds ' ]
all_rounds = df_rounds2 [ [ ' match_id ' , ' round_num ' ] ] . drop_duplicates ( )
df_player_rounds = all_rounds . merge ( df_fh_sides , on = ' match_id ' , how = ' inner ' )
if ' halftime_round ' not in df_player_rounds . columns :
df_player_rounds [ ' halftime_round ' ] = 15
df_player_rounds [ ' halftime_round ' ] = pd . to_numeric ( df_player_rounds [ ' halftime_round ' ] , errors = ' coerce ' ) . fillna ( 15 ) . astype ( int )
mask_fh = df_player_rounds [ ' round_num ' ] < = df_player_rounds [ ' halftime_round ' ]
df_player_rounds [ ' side ' ] = np . where ( mask_fh , df_player_rounds [ ' fh_side ' ] , np . where ( df_player_rounds [ ' fh_side ' ] == ' CT ' , ' T ' , ' CT ' ) )
df_player_rounds = df_player_rounds . merge (
df_rounds2 [ [ ' match_id ' , ' round_num ' , ' ct_deficit ' , ' t_deficit ' , ' is_match_point_round ' , ' is_overtime_round ' , ' reg_rounds ' ] ] ,
on = [ ' match_id ' , ' round_num ' ] ,
how = ' left '
)
df_player_rounds [ ' deficit ' ] = np . where (
df_player_rounds [ ' side ' ] == ' CT ' ,
df_player_rounds [ ' ct_deficit ' ] ,
np . where ( df_player_rounds [ ' side ' ] == ' T ' , df_player_rounds [ ' t_deficit ' ] , 0 )
)
df_player_rounds [ ' is_pressure_round ' ] = ( df_player_rounds [ ' deficit ' ] > = 3 ) . astype ( int )
df_player_rounds [ ' is_pistol_round ' ] = (
( df_player_rounds [ ' round_num ' ] == 1 ) |
( df_player_rounds [ ' round_num ' ] == df_player_rounds [ ' halftime_round ' ] + 1 )
) . astype ( int )
kills_per_round = df_events . groupby ( [ ' match_id ' , ' round_num ' , ' attacker_steam_id ' ] ) . size ( ) . reset_index ( name = ' kills ' )
df_player_rounds = df_player_rounds . merge (
kills_per_round . rename ( columns = { ' attacker_steam_id ' : ' steam_id_64 ' } ) ,
on = [ ' match_id ' , ' round_num ' , ' steam_id_64 ' ] ,
how = ' left '
)
df_player_rounds [ ' kills ' ] = df_player_rounds [ ' kills ' ] . fillna ( 0 )
grp = df_player_rounds . groupby ( [ ' steam_id_64 ' , ' is_pressure_round ' ] ) [ ' kills ' ] . agg ( [ ' mean ' , ' count ' ] ) . reset_index ( )
pressure = grp . pivot ( index = ' steam_id_64 ' , columns = ' is_pressure_round ' ) . fillna ( 0 )
if ( ' mean ' , 1 ) in pressure . columns and ( ' mean ' , 0 ) in pressure . columns :
pressure_kpr_ratio = ( pressure [ ( ' mean ' , 1 ) ] / pressure [ ( ' mean ' , 0 ) ] . replace ( 0 , 1 ) ) . reset_index ( )
pressure_kpr_ratio . columns = [ ' steam_id_64 ' , ' rd_pressure_kpr_ratio ' ]
df = df . merge ( pressure_kpr_ratio , on = ' steam_id_64 ' , how = ' left ' , suffixes = ( ' ' , ' _calc ' ) )
if ' rd_pressure_kpr_ratio_calc ' in df . columns :
df [ ' rd_pressure_kpr_ratio ' ] = df [ ' rd_pressure_kpr_ratio_calc ' ] . fillna ( df [ ' rd_pressure_kpr_ratio ' ] )
df . drop ( columns = [ ' rd_pressure_kpr_ratio_calc ' ] , inplace = True )
if ( ' count ' , 1 ) in pressure . columns :
pr_cnt = pressure [ ( ' count ' , 1 ) ] . reset_index ( )
pr_cnt . columns = [ ' steam_id_64 ' , ' rd_pressure_rounds_down3 ' ]
df = df . merge ( pr_cnt , on = ' steam_id_64 ' , how = ' left ' , suffixes = ( ' ' , ' _calc ' ) )
if ' rd_pressure_rounds_down3_calc ' in df . columns :
df [ ' rd_pressure_rounds_down3 ' ] = df [ ' rd_pressure_rounds_down3_calc ' ] . fillna ( df [ ' rd_pressure_rounds_down3 ' ] )
df . drop ( columns = [ ' rd_pressure_rounds_down3_calc ' ] , inplace = True )
if ( ' count ' , 0 ) in pressure . columns :
nr_cnt = pressure [ ( ' count ' , 0 ) ] . reset_index ( )
nr_cnt . columns = [ ' steam_id_64 ' , ' rd_pressure_rounds_normal ' ]
df = df . merge ( nr_cnt , on = ' steam_id_64 ' , how = ' left ' , suffixes = ( ' ' , ' _calc ' ) )
if ' rd_pressure_rounds_normal_calc ' in df . columns :
df [ ' rd_pressure_rounds_normal ' ] = df [ ' rd_pressure_rounds_normal_calc ' ] . fillna ( df [ ' rd_pressure_rounds_normal ' ] )
df . drop ( columns = [ ' rd_pressure_rounds_normal_calc ' ] , inplace = True )
mp_grp = df_player_rounds . groupby ( [ ' steam_id_64 ' , ' is_match_point_round ' ] ) [ ' kills ' ] . agg ( [ ' mean ' , ' count ' ] ) . reset_index ( )
mp = mp_grp . pivot ( index = ' steam_id_64 ' , columns = ' is_match_point_round ' ) . fillna ( 0 )
if ( ' mean ' , 1 ) in mp . columns and ( ' mean ' , 0 ) in mp . columns :
mp_ratio = ( mp [ ( ' mean ' , 1 ) ] / mp [ ( ' mean ' , 0 ) ] . replace ( 0 , 1 ) ) . reset_index ( )
mp_ratio . columns = [ ' steam_id_64 ' , ' rd_matchpoint_kpr_ratio ' ]
df = df . merge ( mp_ratio , on = ' steam_id_64 ' , how = ' left ' , suffixes = ( ' ' , ' _calc ' ) )
if ' rd_matchpoint_kpr_ratio_calc ' in df . columns :
df [ ' rd_matchpoint_kpr_ratio ' ] = df [ ' rd_matchpoint_kpr_ratio_calc ' ] . fillna ( df [ ' rd_matchpoint_kpr_ratio ' ] )
df . drop ( columns = [ ' rd_matchpoint_kpr_ratio_calc ' ] , inplace = True )
if ( ' count ' , 1 ) in mp . columns :
mp_cnt = mp [ ( ' count ' , 1 ) ] . reset_index ( )
mp_cnt . columns = [ ' steam_id_64 ' , ' rd_matchpoint_rounds ' ]
df = df . merge ( mp_cnt , on = ' steam_id_64 ' , how = ' left ' , suffixes = ( ' ' , ' _calc ' ) )
if ' rd_matchpoint_rounds_calc ' in df . columns :
df [ ' rd_matchpoint_rounds ' ] = df [ ' rd_matchpoint_rounds_calc ' ] . fillna ( df [ ' rd_matchpoint_rounds ' ] )
df . drop ( columns = [ ' rd_matchpoint_rounds_calc ' ] , inplace = True )
try :
q_player_team = f " SELECT match_id, steam_id_64, team_id FROM fact_match_players WHERE steam_id_64 IN ( { placeholders } ) "
df_player_team = pd . read_sql_query ( q_player_team , conn , params = valid_ids )
except Exception :
df_player_team = pd . DataFrame ( )
if not df_player_team . empty :
try :
q_team_roles = f """
SELECT match_id , group_id as team_id , group_fh_role
FROM fact_match_teams
WHERE match_id IN ( SELECT match_id FROM fact_match_players WHERE steam_id_64 IN ( { placeholders } ) )
"""
df_team_roles = pd . read_sql_query ( q_team_roles , conn , params = valid_ids )
except Exception :
df_team_roles = pd . DataFrame ( )
if not df_team_roles . empty :
team_round = df_rounds2 [ [ ' match_id ' , ' round_num ' , ' ct_score ' , ' t_score ' , ' prev_ct ' , ' prev_t ' , ' halftime_round ' ] ] . merge ( df_team_roles , on = ' match_id ' , how = ' inner ' )
fh_ct = team_round [ ' group_fh_role ' ] == 1
mask_fh = team_round [ ' round_num ' ] < = team_round [ ' halftime_round ' ]
team_round [ ' team_side ' ] = np . where ( mask_fh , np . where ( fh_ct , ' CT ' , ' T ' ) , np . where ( fh_ct , ' T ' , ' CT ' ) )
team_round [ ' team_prev_score ' ] = np . where ( team_round [ ' team_side ' ] == ' CT ' , team_round [ ' prev_ct ' ] , team_round [ ' prev_t ' ] )
team_round [ ' team_score_after ' ] = np . where ( team_round [ ' team_side ' ] == ' CT ' , team_round [ ' ct_score ' ] , team_round [ ' t_score ' ] )
team_round [ ' opp_prev_score ' ] = np . where ( team_round [ ' team_side ' ] == ' CT ' , team_round [ ' prev_t ' ] , team_round [ ' prev_ct ' ] )
team_round [ ' opp_score_after ' ] = np . where ( team_round [ ' team_side ' ] == ' CT ' , team_round [ ' t_score ' ] , team_round [ ' ct_score ' ] )
team_round [ ' deficit_before ' ] = team_round [ ' opp_prev_score ' ] - team_round [ ' team_prev_score ' ]
team_round [ ' deficit_after ' ] = team_round [ ' opp_score_after ' ] - team_round [ ' team_score_after ' ]
team_round [ ' is_comeback_round ' ] = ( ( team_round [ ' deficit_before ' ] > 0 ) & ( team_round [ ' deficit_after ' ] < team_round [ ' deficit_before ' ] ) ) . astype ( int )
comeback_keys = team_round [ team_round [ ' is_comeback_round ' ] == 1 ] [ [ ' match_id ' , ' round_num ' , ' team_id ' ] ] . drop_duplicates ( )
if not comeback_keys . empty :
ev_att = df_events [ [ ' match_id ' , ' round_num ' , ' attacker_steam_id ' , ' event_time ' ] ] . merge (
df_player_team . rename ( columns = { ' steam_id_64 ' : ' attacker_steam_id ' , ' team_id ' : ' att_team_id ' } ) ,
on = [ ' match_id ' , ' attacker_steam_id ' ] ,
how = ' left '
)
team_kills = ev_att [ ev_att [ ' att_team_id ' ] . notna ( ) ] . groupby ( [ ' match_id ' , ' round_num ' , ' att_team_id ' ] ) . size ( ) . reset_index ( name = ' team_kills ' )
player_kills = ev_att . groupby ( [ ' match_id ' , ' round_num ' , ' attacker_steam_id ' , ' att_team_id ' ] ) . size ( ) . reset_index ( name = ' player_kills ' )
player_kills = player_kills . merge (
comeback_keys . rename ( columns = { ' team_id ' : ' att_team_id ' } ) ,
on = [ ' match_id ' , ' round_num ' , ' att_team_id ' ] ,
how = ' inner '
)
if not player_kills . empty :
player_kills = player_kills . merge ( team_kills , on = [ ' match_id ' , ' round_num ' , ' att_team_id ' ] , how = ' left ' ) . fillna ( { ' team_kills ' : 0 } )
player_kills [ ' share ' ] = player_kills [ ' player_kills ' ] / player_kills [ ' team_kills ' ] . replace ( 0 , 1 )
cb_share = player_kills . groupby ( ' attacker_steam_id ' ) [ ' share ' ] . mean ( ) . reset_index ( )
cb_share . rename ( columns = { ' attacker_steam_id ' : ' steam_id_64 ' , ' share ' : ' rd_comeback_kill_share ' } , inplace = True )
df = df . merge ( cb_share , on = ' steam_id_64 ' , how = ' left ' , suffixes = ( ' ' , ' _calc ' ) )
if ' rd_comeback_kill_share_calc ' in df . columns :
df [ ' rd_comeback_kill_share ' ] = df [ ' rd_comeback_kill_share_calc ' ] . fillna ( df [ ' rd_comeback_kill_share ' ] )
df . drop ( columns = [ ' rd_comeback_kill_share_calc ' ] , inplace = True )
cb_rounds = comeback_keys . merge ( df_player_team , left_on = [ ' match_id ' , ' team_id ' ] , right_on = [ ' match_id ' , ' team_id ' ] , how = ' inner ' )
cb_cnt = cb_rounds . groupby ( ' steam_id_64 ' ) . size ( ) . reset_index ( name = ' rd_comeback_rounds ' )
df = df . merge ( cb_cnt , on = ' steam_id_64 ' , how = ' left ' , suffixes = ( ' ' , ' _calc ' ) )
if ' rd_comeback_rounds_calc ' in df . columns :
df [ ' rd_comeback_rounds ' ] = df [ ' rd_comeback_rounds_calc ' ] . fillna ( df [ ' rd_comeback_rounds ' ] )
df . drop ( columns = [ ' rd_comeback_rounds_calc ' ] , inplace = True )
death_team = df_events [ [ ' match_id ' , ' round_num ' , ' event_time ' , ' victim_steam_id ' ] ] . merge (
df_player_team . rename ( columns = { ' steam_id_64 ' : ' victim_steam_id ' , ' team_id ' : ' team_id ' } ) ,
on = [ ' match_id ' , ' victim_steam_id ' ] ,
how = ' left '
)
death_team = death_team [ death_team [ ' team_id ' ] . notna ( ) ]
if not death_team . empty :
roster = df_player_team . rename ( columns = { ' steam_id_64 ' : ' steam_id_64 ' , ' team_id ' : ' team_id ' } ) [ [ ' match_id ' , ' team_id ' , ' steam_id_64 ' ] ] . drop_duplicates ( )
opp = death_team . merge ( roster , on = [ ' match_id ' , ' team_id ' ] , how = ' inner ' , suffixes = ( ' ' , ' _teammate ' ) )
opp = opp [ opp [ ' steam_id_64 ' ] != opp [ ' victim_steam_id ' ] ]
opp_time = opp . groupby ( [ ' match_id ' , ' round_num ' , ' steam_id_64 ' ] , as_index = False ) [ ' event_time ' ] . min ( ) . rename ( columns = { ' event_time ' : ' teammate_death_time ' } )
kills_time = df_events [ [ ' match_id ' , ' round_num ' , ' event_time ' , ' attacker_steam_id ' ] ] . rename ( columns = { ' attacker_steam_id ' : ' steam_id_64 ' , ' event_time ' : ' kill_time ' } )
m = opp_time . merge ( kills_time , on = [ ' match_id ' , ' round_num ' , ' steam_id_64 ' ] , how = ' left ' )
m [ ' in_window ' ] = ( ( m [ ' kill_time ' ] > = m [ ' teammate_death_time ' ] ) & ( m [ ' kill_time ' ] < = m [ ' teammate_death_time ' ] + 10 ) ) . astype ( int )
success = m . groupby ( [ ' match_id ' , ' round_num ' , ' steam_id_64 ' ] , as_index = False ) [ ' in_window ' ] . max ( )
rate = success . groupby ( ' steam_id_64 ' ) [ ' in_window ' ] . mean ( ) . reset_index ( )
rate . rename ( columns = { ' in_window ' : ' rd_trade_response_10s_rate ' } , inplace = True )
df = df . merge ( rate , on = ' steam_id_64 ' , how = ' left ' , suffixes = ( ' ' , ' _calc ' ) )
if ' rd_trade_response_10s_rate_calc ' in df . columns :
df [ ' rd_trade_response_10s_rate ' ] = df [ ' rd_trade_response_10s_rate_calc ' ] . fillna ( df [ ' rd_trade_response_10s_rate ' ] )
df . drop ( columns = [ ' rd_trade_response_10s_rate_calc ' ] , inplace = True )
eco_rows = [ ]
try :
q_econ = f """
SELECT match_id , round_num , steam_id_64 , equipment_value , round_performance_score
FROM fact_round_player_economy
WHERE steam_id_64 IN ( { placeholders } )
"""
df_econ = pd . read_sql_query ( q_econ , conn , params = valid_ids )
except Exception :
df_econ = pd . DataFrame ( )
if not df_econ . empty :
df_econ [ ' equipment_value ' ] = pd . to_numeric ( df_econ [ ' equipment_value ' ] , errors = ' coerce ' ) . fillna ( 0 ) . astype ( int )
df_econ [ ' round_performance_score ' ] = pd . to_numeric ( df_econ [ ' round_performance_score ' ] , errors = ' coerce ' ) . fillna ( 0.0 )
df_econ = df_econ . merge ( df_rounds2 [ [ ' match_id ' , ' round_num ' , ' is_overtime_round ' , ' is_match_point_round ' , ' ct_deficit ' , ' t_deficit ' , ' prev_ct ' , ' prev_t ' ] ] , on = [ ' match_id ' , ' round_num ' ] , how = ' left ' )
df_econ = df_econ . merge ( df_fh_sides [ [ ' match_id ' , ' steam_id_64 ' , ' fh_side ' , ' halftime_round ' ] ] , on = [ ' match_id ' , ' steam_id_64 ' ] , how = ' left ' )
mask_fh = df_econ [ ' round_num ' ] < = df_econ [ ' halftime_round ' ]
df_econ [ ' side ' ] = np . where ( mask_fh , df_econ [ ' fh_side ' ] , np . where ( df_econ [ ' fh_side ' ] == ' CT ' , ' T ' , ' CT ' ) )
df_econ [ ' deficit ' ] = np . where ( df_econ [ ' side ' ] == ' CT ' , df_econ [ ' ct_deficit ' ] , df_econ [ ' t_deficit ' ] )
df_econ [ ' is_pressure_round ' ] = ( df_econ [ ' deficit ' ] > = 3 ) . astype ( int )
perf_grp = df_econ . groupby ( [ ' steam_id_64 ' , ' is_pressure_round ' ] ) [ ' round_performance_score ' ] . agg ( [ ' mean ' , ' count ' ] ) . reset_index ( )
perf = perf_grp . pivot ( index = ' steam_id_64 ' , columns = ' is_pressure_round ' ) . fillna ( 0 )
if ( ' mean ' , 1 ) in perf . columns and ( ' mean ' , 0 ) in perf . columns :
perf_ratio = ( perf [ ( ' mean ' , 1 ) ] / perf [ ( ' mean ' , 0 ) ] . replace ( 0 , 1 ) ) . reset_index ( )
perf_ratio . columns = [ ' steam_id_64 ' , ' rd_pressure_perf_ratio ' ]
df = df . merge ( perf_ratio , on = ' steam_id_64 ' , how = ' left ' , suffixes = ( ' ' , ' _calc ' ) )
if ' rd_pressure_perf_ratio_calc ' in df . columns :
df [ ' rd_pressure_perf_ratio ' ] = df [ ' rd_pressure_perf_ratio_calc ' ] . fillna ( df [ ' rd_pressure_perf_ratio ' ] )
df . drop ( columns = [ ' rd_pressure_perf_ratio_calc ' ] , inplace = True )
mp_perf_grp = df_econ . groupby ( [ ' steam_id_64 ' , ' is_match_point_round ' ] ) [ ' round_performance_score ' ] . agg ( [ ' mean ' , ' count ' ] ) . reset_index ( )
mp_perf = mp_perf_grp . pivot ( index = ' steam_id_64 ' , columns = ' is_match_point_round ' ) . fillna ( 0 )
if ( ' mean ' , 1 ) in mp_perf . columns and ( ' mean ' , 0 ) in mp_perf . columns :
mp_perf_ratio = ( mp_perf [ ( ' mean ' , 1 ) ] / mp_perf [ ( ' mean ' , 0 ) ] . replace ( 0 , 1 ) ) . reset_index ( )
mp_perf_ratio . columns = [ ' steam_id_64 ' , ' rd_matchpoint_perf_ratio ' ]
df = df . merge ( mp_perf_ratio , on = ' steam_id_64 ' , how = ' left ' , suffixes = ( ' ' , ' _calc ' ) )
if ' rd_matchpoint_perf_ratio_calc ' in df . columns :
df [ ' rd_matchpoint_perf_ratio ' ] = df [ ' rd_matchpoint_perf_ratio_calc ' ] . fillna ( df [ ' rd_matchpoint_perf_ratio ' ] )
df . drop ( columns = [ ' rd_matchpoint_perf_ratio_calc ' ] , inplace = True )
eco = df_econ . copy ( )
eco [ ' round_type ' ] = np . select (
[
eco [ ' is_overtime_round ' ] == 1 ,
eco [ ' equipment_value ' ] < 2000 ,
eco [ ' equipment_value ' ] > = 4000 ,
] ,
[
' overtime ' ,
' eco ' ,
' fullbuy ' ,
] ,
default = ' rifle '
)
eco_rounds = eco . groupby ( [ ' steam_id_64 ' , ' round_type ' ] ) . size ( ) . reset_index ( name = ' rounds ' )
perf_mean = eco . groupby ( [ ' steam_id_64 ' , ' round_type ' ] ) [ ' round_performance_score ' ] . mean ( ) . reset_index ( name = ' perf ' )
eco_rows = eco_rounds . merge ( perf_mean , on = [ ' steam_id_64 ' , ' round_type ' ] , how = ' left ' )
if eco_rows is not None and len ( eco_rows ) > 0 :
kpr_rounds = df_player_rounds [ [ ' match_id ' , ' round_num ' , ' steam_id_64 ' , ' kills ' , ' is_pistol_round ' , ' is_overtime_round ' ] ] . copy ( )
kpr_rounds [ ' round_type ' ] = np . select (
[
kpr_rounds [ ' is_overtime_round ' ] == 1 ,
kpr_rounds [ ' is_pistol_round ' ] == 1 ,
] ,
[
' overtime ' ,
' pistol ' ,
] ,
default = ' reg '
)
kpr = kpr_rounds . groupby ( [ ' steam_id_64 ' , ' round_type ' ] ) . agg ( kpr = ( ' kills ' , ' mean ' ) , rounds = ( ' kills ' , ' size ' ) ) . reset_index ( )
kpr_dict = { }
for pid , g in kpr . groupby ( ' steam_id_64 ' ) :
d = { }
for _ , r in g . iterrows ( ) :
d [ r [ ' round_type ' ] ] = { ' kpr ' : float ( r [ ' kpr ' ] ) , ' rounds ' : int ( r [ ' rounds ' ] ) }
kpr_dict [ str ( pid ) ] = d
econ_dict = { }
if isinstance ( eco_rows , pd . DataFrame ) and not eco_rows . empty :
for pid , g in eco_rows . groupby ( ' steam_id_64 ' ) :
d = { }
for _ , r in g . iterrows ( ) :
d [ r [ ' round_type ' ] ] = { ' perf ' : float ( r [ ' perf ' ] ) if r [ ' perf ' ] is not None else 0.0 , ' rounds ' : int ( r [ ' rounds ' ] ) }
econ_dict [ str ( pid ) ] = d
out = { }
for pid in df [ ' steam_id_64 ' ] . astype ( str ) . tolist ( ) :
merged = { }
if pid in kpr_dict :
merged . update ( kpr_dict [ pid ] )
if pid in econ_dict :
for k , v in econ_dict [ pid ] . items ( ) :
merged . setdefault ( k , { } ) . update ( v )
out [ pid ] = json . dumps ( merged , ensure_ascii = False )
df [ ' rd_roundtype_split_json ' ] = df [ ' steam_id_64 ' ] . astype ( str ) . map ( out ) . fillna ( " {} " )
2026-01-26 21:10:42 +08:00
# Final Mappings
df [ ' total_matches ' ] = df [ ' matches_played ' ]
2026-01-28 01:20:26 +08:00
for c in df . columns :
if df [ c ] . dtype . kind in " biufc " :
df [ c ] = df [ c ] . fillna ( 0 )
else :
df [ c ] = df [ c ] . fillna ( " " )
return df
2026-01-26 21:10:42 +08:00
2026-01-27 21:26:07 +08:00
@staticmethod
def _calculate_economy_features ( conn , player_ids ) :
if not player_ids : return None
placeholders = ' , ' . join ( [ ' ? ' ] * len ( player_ids ) )
# 1. Investment Efficiency (Damage / Equipment Value)
# We need total damage and total equipment value
# fact_match_players has sum_util_dmg (only nade damage), but we need total damage.
# fact_match_players has 'basic_avg_adr' * rounds.
# Better to query fact_round_player_economy for equipment value sum.
q_eco_val = f """
SELECT steam_id_64 , SUM ( equipment_value ) as total_spend , COUNT ( * ) as rounds_tracked
FROM fact_round_player_economy
WHERE steam_id_64 IN ( { placeholders } )
GROUP BY steam_id_64
"""
df_spend = pd . read_sql_query ( q_eco_val , conn , params = player_ids )
# Get Total Damage from fact_match_players (derived from ADR * Rounds)
# MUST filter by matches that actually have economy data to ensure consistency
q_dmg = f """
SELECT mp . steam_id_64 , SUM ( mp . adr * mp . round_total ) as total_damage
FROM fact_match_players mp
JOIN (
SELECT DISTINCT match_id , steam_id_64
FROM fact_round_player_economy
WHERE steam_id_64 IN ( { placeholders } )
) eco ON mp . match_id = eco . match_id AND mp . steam_id_64 = eco . steam_id_64
WHERE mp . steam_id_64 IN ( { placeholders } )
GROUP BY mp . steam_id_64
"""
df_dmg = pd . read_sql_query ( q_dmg , conn , params = player_ids + player_ids )
df = df_spend . merge ( df_dmg , on = ' steam_id_64 ' , how = ' inner ' )
# Metric 1: Damage per 1000$
# Avoid div by zero
df [ ' eco_avg_damage_per_1k ' ] = df [ ' total_damage ' ] / ( df [ ' total_spend ' ] / 1000.0 ) . replace ( 0 , 1 )
# 2. Eco Round Performance (Equipment < 2000)
# We need kills in these rounds.
# Join economy with events? That's heavy.
# Alternative: Approximate.
# Let's do it properly: Get rounds where equip < 2000, count kills.
# Subquery for Eco Rounds keys: (match_id, round_num, steam_id_64)
# Then join with events.
q_eco_perf = f """
SELECT
e . attacker_steam_id as steam_id_64 ,
COUNT ( * ) as eco_kills ,
SUM ( CASE WHEN e . event_type = ' death ' THEN 1 ELSE 0 END ) as eco_deaths
FROM fact_round_events e
JOIN fact_round_player_economy eco
ON e . match_id = eco . match_id
AND e . round_num = eco . round_num
AND ( e . attacker_steam_id = eco . steam_id_64 OR e . victim_steam_id = eco . steam_id_64 )
WHERE ( e . event_type = ' kill ' AND e . attacker_steam_id = eco . steam_id_64 )
OR ( e . event_type = ' kill ' AND e . victim_steam_id = eco . steam_id_64 ) - - Count deaths properly
AND eco . equipment_value < 2000
AND eco . steam_id_64 IN ( { placeholders } )
GROUP BY eco . steam_id_64
"""
# Wait, the join condition OR is tricky for grouping.
# Let's separate Kills and Deaths or do two queries.
# Simpler:
# Eco Kills
q_eco_kills = f """
SELECT
e . attacker_steam_id as steam_id_64 ,
COUNT ( * ) as eco_kills
FROM fact_round_events e
JOIN fact_round_player_economy eco
ON e . match_id = eco . match_id
AND e . round_num = eco . round_num
AND e . attacker_steam_id = eco . steam_id_64
WHERE e . event_type = ' kill '
AND eco . equipment_value < 2000
AND eco . steam_id_64 IN ( { placeholders } )
GROUP BY e . attacker_steam_id
"""
df_eco_kills = pd . read_sql_query ( q_eco_kills , conn , params = player_ids )
# Eco Deaths
q_eco_deaths = f """
SELECT
e . victim_steam_id as steam_id_64 ,
COUNT ( * ) as eco_deaths
FROM fact_round_events e
JOIN fact_round_player_economy eco
ON e . match_id = eco . match_id
AND e . round_num = eco . round_num
AND e . victim_steam_id = eco . steam_id_64
WHERE e . event_type = ' kill '
AND eco . equipment_value < 2000
AND eco . steam_id_64 IN ( { placeholders } )
GROUP BY e . victim_steam_id
"""
df_eco_deaths = pd . read_sql_query ( q_eco_deaths , conn , params = player_ids )
# Get count of eco rounds
q_eco_rounds = f """
SELECT steam_id_64 , COUNT ( * ) as eco_round_count
FROM fact_round_player_economy
WHERE equipment_value < 2000 AND steam_id_64 IN ( { placeholders } )
GROUP BY steam_id_64
"""
df_eco_cnt = pd . read_sql_query ( q_eco_rounds , conn , params = player_ids )
df_perf = df_eco_cnt . merge ( df_eco_kills , on = ' steam_id_64 ' , how = ' left ' ) . merge ( df_eco_deaths , on = ' steam_id_64 ' , how = ' left ' ) . fillna ( 0 )
# Eco Rating (KPR)
df_perf [ ' eco_rating_eco_rounds ' ] = df_perf [ ' eco_kills ' ] / df_perf [ ' eco_round_count ' ] . replace ( 0 , 1 )
# Eco KD
df_perf [ ' eco_kd_ratio ' ] = df_perf [ ' eco_kills ' ] / df_perf [ ' eco_deaths ' ] . replace ( 0 , 1 )
# Eco Rounds per Match
# We need total matches WHERE economy data exists.
# Otherwise, if we have 100 matches but only 10 with eco data, the avg will be diluted.
q_matches = f """
SELECT steam_id_64 , COUNT ( DISTINCT match_id ) as matches_tracked
FROM fact_round_player_economy
WHERE steam_id_64 IN ( { placeholders } )
GROUP BY steam_id_64
"""
df_matches = pd . read_sql_query ( q_matches , conn , params = player_ids )
df_perf = df_perf . merge ( df_matches , on = ' steam_id_64 ' , how = ' left ' )
df_perf [ ' eco_avg_rounds ' ] = df_perf [ ' eco_round_count ' ] / df_perf [ ' matches_tracked ' ] . replace ( 0 , 1 )
# Merge all
df_final = df . merge ( df_perf [ [ ' steam_id_64 ' , ' eco_rating_eco_rounds ' , ' eco_kd_ratio ' , ' eco_avg_rounds ' ] ] , on = ' steam_id_64 ' , how = ' left ' )
return df_final [ [ ' steam_id_64 ' , ' eco_avg_damage_per_1k ' , ' eco_rating_eco_rounds ' , ' eco_kd_ratio ' , ' eco_avg_rounds ' ] ]
@staticmethod
def _calculate_pace_features ( conn , player_ids ) :
if not player_ids : return None
placeholders = ' , ' . join ( [ ' ? ' ] * len ( player_ids ) )
# 1. Avg Time to First Contact
# Find min(event_time) per round per player (Attacker or Victim)
q_first_contact = f """
SELECT
player_id as steam_id_64 ,
AVG ( first_time ) as pace_avg_time_to_first_contact
FROM (
SELECT
match_id , round_num ,
CASE
WHEN attacker_steam_id IN ( { placeholders } ) THEN attacker_steam_id
ELSE victim_steam_id
END as player_id ,
MIN ( event_time ) as first_time
FROM fact_round_events
WHERE ( attacker_steam_id IN ( { placeholders } ) OR victim_steam_id IN ( { placeholders } ) )
AND event_type IN ( ' kill ' , ' death ' ) - - focus on combat
GROUP BY match_id , round_num , player_id
) sub
GROUP BY player_id
"""
# Note: 'death' isn't an event_type, it's 'kill'.
# We check if player is attacker or victim in 'kill' event.
# Corrected Query:
q_first_contact = f """
SELECT
player_id as steam_id_64 ,
AVG ( first_time ) as pace_avg_time_to_first_contact
FROM (
SELECT
match_id , round_num ,
p_id as player_id ,
MIN ( event_time ) as first_time
FROM (
SELECT match_id , round_num , event_time , attacker_steam_id as p_id FROM fact_round_events WHERE event_type = ' kill '
UNION ALL
SELECT match_id , round_num , event_time , victim_steam_id as p_id FROM fact_round_events WHERE event_type = ' kill '
) raw
WHERE p_id IN ( { placeholders } )
GROUP BY match_id , round_num , p_id
) sub
GROUP BY player_id
"""
df_time = pd . read_sql_query ( q_first_contact , conn , params = player_ids )
# Wait, params=player_ids won't work with f-string placeholders if I use ? inside.
# My placeholders variable is literal string "?,?,?".
# So params should be player_ids.
# But in UNION ALL, I have two WHERE clauses.
# Actually I can optimize:
# WHERE attacker_steam_id IN (...) OR victim_steam_id IN (...)
# Then unpivot in python or SQL.
# Let's use Python for unpivoting to be safe and clear.
q_events = f """
SELECT match_id , round_num , event_time , attacker_steam_id , victim_steam_id
FROM fact_round_events
WHERE event_type = ' kill '
AND ( attacker_steam_id IN ( { placeholders } ) OR victim_steam_id IN ( { placeholders } ) )
"""
# This params needs player_ids * 2
df_ev = pd . read_sql_query ( q_events , conn , params = list ( player_ids ) + list ( player_ids ) )
pace_list = [ ]
if not df_ev . empty :
# Unpivot
att = df_ev [ df_ev [ ' attacker_steam_id ' ] . isin ( player_ids ) ] [ [ ' match_id ' , ' round_num ' , ' event_time ' , ' attacker_steam_id ' ] ] . rename ( columns = { ' attacker_steam_id ' : ' steam_id_64 ' } )
vic = df_ev [ df_ev [ ' victim_steam_id ' ] . isin ( player_ids ) ] [ [ ' match_id ' , ' round_num ' , ' event_time ' , ' victim_steam_id ' ] ] . rename ( columns = { ' victim_steam_id ' : ' steam_id_64 ' } )
combined = pd . concat ( [ att , vic ] )
# Group by round, get min time
first_contacts = combined . groupby ( [ ' match_id ' , ' round_num ' , ' steam_id_64 ' ] ) [ ' event_time ' ] . min ( ) . reset_index ( )
# Average per player
avg_time = first_contacts . groupby ( ' steam_id_64 ' ) [ ' event_time ' ] . mean ( ) . reset_index ( )
avg_time . rename ( columns = { ' event_time ' : ' pace_avg_time_to_first_contact ' } , inplace = True )
pace_list . append ( avg_time )
# 2. Trade Kill Rate
# "Kill a killer within 5s of teammate death"
# We need to reconstruct the flow.
# Iterate matches? Vectorized is hard.
# Let's try a simplified approach:
# For each match, sort events by time.
# If (Kill A->B) at T1, and (Kill C->A) at T2, and T2-T1 <= 5, and C & B are same team.
# We don't have team info in events easily (we have side logic elsewhere).
# Assuming Side logic: If A->B (A=CT, B=T). Then C->A (C=T).
# So B and C are T.
# Let's fetch basic trade info using self-join in SQL?
# A kills B at T1.
# C kills A at T2.
# T2 > T1 and T2 - T1 <= 5.
# C is the Trader. B is the Victim (Teammate).
# We want C's Trade Rate.
q_trades = f """
SELECT
t2 . attacker_steam_id as trader_id ,
COUNT ( * ) as trade_count
FROM fact_round_events t1
JOIN fact_round_events t2
ON t1 . match_id = t2 . match_id
AND t1 . round_num = t2 . round_num
WHERE t1 . event_type = ' kill ' AND t2 . event_type = ' kill '
AND t1 . attacker_steam_id = t2 . victim_steam_id - - Avenger kills the Killer
AND t2 . event_time > t1 . event_time
AND t2 . event_time - t1 . event_time < = 5
AND t2 . attacker_steam_id IN ( { placeholders } )
GROUP BY t2 . attacker_steam_id
"""
df_trades = pd . read_sql_query ( q_trades , conn , params = player_ids )
# Denominator: Opportunities? Or just Total Kills?
# Trade Kill Rate usually means % of Kills that were Trades.
# Let's use that.
# Get Total Kills
q_kills = f """
SELECT attacker_steam_id as steam_id_64 , COUNT ( * ) as total_kills
FROM fact_round_events
WHERE event_type = ' kill ' AND attacker_steam_id IN ( { placeholders } )
GROUP BY attacker_steam_id
"""
df_tot_kills = pd . read_sql_query ( q_kills , conn , params = player_ids )
if not df_trades . empty :
df_trades = df_trades . merge ( df_tot_kills , left_on = ' trader_id ' , right_on = ' steam_id_64 ' , how = ' right ' ) . fillna ( 0 )
df_trades [ ' pace_trade_kill_rate ' ] = df_trades [ ' trade_count ' ] / df_trades [ ' total_kills ' ] . replace ( 0 , 1 )
else :
df_trades = df_tot_kills . copy ( )
df_trades [ ' pace_trade_kill_rate ' ] = 0
df_final = pd . DataFrame ( { ' steam_id_64 ' : list ( player_ids ) } )
if pace_list :
df_final = df_final . merge ( pace_list [ 0 ] , on = ' steam_id_64 ' , how = ' left ' )
# Merge Trade Rate
if not df_trades . empty :
df_final = df_final . merge ( df_trades [ [ ' steam_id_64 ' , ' pace_trade_kill_rate ' ] ] , on = ' steam_id_64 ' , how = ' left ' )
# 3. New Pace Metrics
# pace_opening_kill_time: Avg time of Opening Kills (where attacker_steam_id = player AND is_first_kill = 1?)
# Wait, fact_round_events doesn't store 'is_first_kill' directly? It stores 'first_kill' in fact_match_players but that's aggregate.
# It stores 'event_type'. We need to check if it was the FIRST kill of the round.
# Query: For each round, find the FIRST kill event. Check if attacker is our player. Get time.
q_opening_time = f """
SELECT
attacker_steam_id as steam_id_64 ,
AVG ( event_time ) as pace_opening_kill_time
FROM (
SELECT
match_id , round_num ,
attacker_steam_id ,
MIN ( event_time ) as event_time
FROM fact_round_events
WHERE event_type = ' kill '
GROUP BY match_id , round_num
) first_kills
WHERE attacker_steam_id IN ( { placeholders } )
GROUP BY attacker_steam_id
"""
df_opening_time = pd . read_sql_query ( q_opening_time , conn , params = player_ids )
# pace_avg_life_time: Avg time alive per round
# Logic: Round Duration - Death Time (if died). Else Round Duration.
# We need Round Duration (fact_rounds doesn't have duration? fact_matches has match duration).
# Usually round duration is fixed or we use last event time.
# Let's approximate: If died, time = death_time. If survived, time = max_event_time_of_round.
# Better: survival time.
q_survival = f """
SELECT
p . steam_id_64 ,
AVG (
CASE
WHEN d . death_time IS NOT NULL THEN d . death_time
ELSE r . round_end_time - - Use max event time as proxy for round end
END
) as pace_avg_life_time
FROM fact_match_players p
JOIN (
SELECT match_id , round_num , MAX ( event_time ) as round_end_time
FROM fact_round_events
GROUP BY match_id , round_num
) r ON p . match_id = r . match_id
LEFT JOIN (
SELECT match_id , round_num , victim_steam_id , MIN ( event_time ) as death_time
FROM fact_round_events
WHERE event_type = ' kill '
GROUP BY match_id , round_num , victim_steam_id
) d ON p . match_id = d . match_id AND p . steam_id_64 = d . victim_steam_id
- - We need to join rounds to ensure we track every round the player played ?
- - fact_match_players is per match . We need per round .
- - We can use fact_round_player_economy to get all rounds a player played .
JOIN fact_round_player_economy e ON p . match_id = e . match_id AND p . steam_id_64 = e . steam_id_64 AND r . round_num = e . round_num
WHERE p . steam_id_64 IN ( { placeholders } )
GROUP BY p . steam_id_64
"""
# This join is heavy. Let's simplify.
# Just use death events for "Time of Death".
# And for rounds without death, use 115s (avg round length)? Or max event time?
# Let's stick to what we have.
df_survival = pd . read_sql_query ( q_survival , conn , params = player_ids )
if not df_opening_time . empty :
df_final = df_final . merge ( df_opening_time , on = ' steam_id_64 ' , how = ' left ' )
if not df_survival . empty :
df_final = df_final . merge ( df_survival , on = ' steam_id_64 ' , how = ' left ' )
return df_final . fillna ( 0 )
2026-01-26 21:10:42 +08:00
@staticmethod
def _calculate_ultimate_scores ( df ) :
def n ( col ) :
if col not in df . columns : return 50
s = df [ col ]
if s . max ( ) == s . min ( ) : return 50
return ( s - s . min ( ) ) / ( s . max ( ) - s . min ( ) ) * 100
df = df . copy ( )
# BAT (30%)
df [ ' score_bat ' ] = (
0.25 * n ( ' basic_avg_rating ' ) +
0.20 * n ( ' basic_avg_kd ' ) +
0.15 * n ( ' basic_avg_adr ' ) +
0.10 * n ( ' bat_avg_duel_win_rate ' ) +
0.10 * n ( ' bat_kd_diff_high_elo ' ) +
0.10 * n ( ' basic_avg_kill_3 ' )
)
# STA (15%)
df [ ' score_sta ' ] = (
0.30 * ( 100 - n ( ' sta_rating_volatility ' ) ) +
0.30 * n ( ' sta_loss_rating ' ) +
0.20 * n ( ' sta_win_rating ' ) +
0.10 * ( 100 - abs ( n ( ' sta_time_rating_corr ' ) ) )
)
2026-01-26 02:13:06 +08:00
2026-01-26 21:10:42 +08:00
# HPS (20%)
df [ ' score_hps ' ] = (
2026-01-27 00:57:35 +08:00
0.25 * n ( ' sum_1v3p ' ) +
2026-01-26 21:10:42 +08:00
0.20 * n ( ' hps_match_point_win_rate ' ) +
0.20 * n ( ' hps_comeback_kd_diff ' ) +
0.15 * n ( ' hps_pressure_entry_rate ' ) +
2026-01-27 00:57:35 +08:00
0.20 * n ( ' basic_avg_rating ' )
2026-01-26 21:10:42 +08:00
)
2026-01-26 02:13:06 +08:00
2026-01-26 21:10:42 +08:00
# PTL (10%)
df [ ' score_ptl ' ] = (
2026-01-27 00:57:35 +08:00
0.30 * n ( ' ptl_pistol_kills ' ) +
0.30 * n ( ' ptl_pistol_win_rate ' ) +
0.20 * n ( ' ptl_pistol_kd ' ) +
0.20 * n ( ' ptl_pistol_util_efficiency ' )
2026-01-26 21:10:42 +08:00
)
# T/CT (10%)
df [ ' score_tct ' ] = (
0.35 * n ( ' side_rating_ct ' ) +
0.35 * n ( ' side_rating_t ' ) +
0.15 * n ( ' side_first_kill_rate_ct ' ) +
0.15 * n ( ' side_first_kill_rate_t ' )
)
# UTIL (10%)
# Emphasize prop frequency (usage_rate)
df [ ' score_util ' ] = (
0.35 * n ( ' util_usage_rate ' ) +
0.25 * n ( ' util_avg_nade_dmg ' ) +
0.20 * n ( ' util_avg_flash_time ' ) +
0.20 * n ( ' util_avg_flash_enemy ' )
)
2026-01-27 21:26:07 +08:00
# ECO (New)
df [ ' score_eco ' ] = (
0.50 * n ( ' eco_avg_damage_per_1k ' ) +
0.50 * n ( ' eco_rating_eco_rounds ' )
)
# PACE (New)
# Aggression Score: Faster first contact (lower time) -> higher score
df [ ' score_pace ' ] = (
0.50 * ( 100 - n ( ' pace_avg_time_to_first_contact ' ) ) +
0.50 * n ( ' pace_trade_kill_rate ' )
)
2026-01-26 21:10:42 +08:00
return df
2026-01-26 02:13:06 +08:00
@staticmethod
2026-01-26 21:10:42 +08:00
def get_roster_features_distribution ( target_steam_id ) :
"""
Calculates rank and distribution of the target player ' s L3 features (Scores) within the active roster.
2026-01-26 02:13:06 +08:00
"""
2026-01-26 21:10:42 +08:00
from web . services . web_service import WebService
import json
# 1. Get Active Roster IDs
lineups = WebService . get_lineups ( )
active_roster_ids = [ ]
if lineups :
try :
raw_ids = json . loads ( lineups [ 0 ] [ ' player_ids_json ' ] )
active_roster_ids = [ str ( uid ) for uid in raw_ids ]
except :
pass
if not active_roster_ids :
return None
# 2. Fetch L3 features for all roster members
placeholders = ' , ' . join ( ' ? ' for _ in active_roster_ids )
2026-01-27 21:26:07 +08:00
# Select all columns (simplified) or explicit list including raw metrics
sql = f " SELECT * FROM dm_player_features WHERE steam_id_64 IN ( { placeholders } ) "
2026-01-26 21:10:42 +08:00
rows = query_db ( ' l3 ' , sql , active_roster_ids )
if not rows :
return None
stats_map = { row [ ' steam_id_64 ' ] : dict ( row ) for row in rows }
target_steam_id = str ( target_steam_id )
# If target not in map (maybe no L3 data yet), default to 0
if target_steam_id not in stats_map :
2026-01-27 21:26:07 +08:00
stats_map [ target_steam_id ] = { } # Empty dict, will fallback to 0 in loop
2026-01-26 21:10:42 +08:00
# 3. Calculate Distribution
2026-01-27 21:26:07 +08:00
# Include Scores AND Raw Metrics used in Profile
metrics = [
# Scores
' score_bat ' , ' score_sta ' , ' score_hps ' , ' score_ptl ' , ' score_tct ' , ' score_util ' , ' score_eco ' , ' score_pace ' ,
# Core
' basic_avg_rating ' , ' basic_avg_kd ' , ' basic_avg_adr ' , ' basic_avg_kast ' , ' basic_avg_rws ' ,
# Combat
' basic_avg_headshot_kills ' , ' basic_headshot_rate ' , ' basic_avg_assisted_kill ' , ' basic_avg_awp_kill ' , ' basic_avg_jump_count ' ,
# Obj
' basic_avg_mvps ' , ' basic_avg_plants ' , ' basic_avg_defuses ' , ' basic_avg_flash_assists ' ,
# Opening
' basic_avg_first_kill ' , ' basic_avg_first_death ' , ' basic_first_kill_rate ' , ' basic_first_death_rate ' ,
# Multi
' basic_avg_kill_2 ' , ' basic_avg_kill_3 ' , ' basic_avg_kill_4 ' , ' basic_avg_kill_5 ' ,
' basic_avg_perfect_kill ' , ' basic_avg_revenge_kill ' ,
# STA & BAT Details
' sta_last_30_rating ' , ' sta_win_rating ' , ' sta_loss_rating ' , ' sta_rating_volatility ' , ' sta_time_rating_corr ' ,
' bat_kd_diff_high_elo ' , ' bat_avg_duel_win_rate ' ,
# HPS & PTL Details
' hps_clutch_win_rate_1v1 ' , ' hps_clutch_win_rate_1v3_plus ' , ' hps_match_point_win_rate ' , ' hps_pressure_entry_rate ' ,
' hps_comeback_kd_diff ' , ' hps_losing_streak_kd_diff ' ,
' ptl_pistol_kills ' , ' ptl_pistol_win_rate ' , ' ptl_pistol_kd ' , ' ptl_pistol_util_efficiency ' ,
# UTIL Details
' util_usage_rate ' , ' util_avg_nade_dmg ' , ' util_avg_flash_time ' , ' util_avg_flash_enemy ' ,
# ECO & PACE (New)
' eco_avg_damage_per_1k ' , ' eco_rating_eco_rounds ' , ' eco_kd_ratio ' , ' eco_avg_rounds ' ,
' pace_avg_time_to_first_contact ' , ' pace_trade_kill_rate ' , ' pace_opening_kill_time ' , ' pace_avg_life_time ' ,
# Party
' party_1_win_rate ' , ' party_1_rating ' , ' party_1_adr ' ,
' party_2_win_rate ' , ' party_2_rating ' , ' party_2_adr ' ,
' party_3_win_rate ' , ' party_3_rating ' , ' party_3_adr ' ,
' party_4_win_rate ' , ' party_4_rating ' , ' party_4_adr ' ,
' party_5_win_rate ' , ' party_5_rating ' , ' party_5_adr ' ,
# Rating Dist
' rating_dist_carry_rate ' , ' rating_dist_normal_rate ' , ' rating_dist_sacrifice_rate ' , ' rating_dist_sleeping_rate ' ,
# ELO
' elo_lt1200_rating ' , ' elo_1200_1400_rating ' , ' elo_1400_1600_rating ' , ' elo_1600_1800_rating ' , ' elo_1800_2000_rating ' , ' elo_gt2000_rating '
]
2026-01-26 21:10:42 +08:00
result = { }
for m in metrics :
2026-01-27 21:26:07 +08:00
# Handle missing columns gracefully
values = [ ]
for p in stats_map . values ( ) :
val = p . get ( m )
if val is None : val = 0
values . append ( float ( val ) )
target_val = stats_map [ target_steam_id ] . get ( m )
if target_val is None : target_val = 0
target_val = float ( target_val )
2026-01-26 21:10:42 +08:00
if not values :
result [ m ] = None
continue
2026-01-27 21:26:07 +08:00
# For PACE (Time), lower is better usually, but rank logic assumes Higher is Better (reverse=True).
# If we want Rank #1 to be Lowest Time, we should sort normal.
# But standardized scores handle this. For raw metrics, let's keep consistent (Higher = Rank 1)
# unless we explicitly handle "Low is Good".
# For now, keep simple: Rank 1 = Highest Value.
# For Time: Rank 1 = Slowest. (User can interpret)
2026-01-26 21:10:42 +08:00
values . sort ( reverse = True )
try :
rank = values . index ( target_val ) + 1
except ValueError :
rank = len ( values )
result [ m ] = {
' val ' : target_val ,
' rank ' : rank ,
' total ' : len ( values ) ,
' min ' : min ( values ) ,
' max ' : max ( values ) ,
' avg ' : sum ( values ) / len ( values )
}
return result