1.2.0: Refined all 6D calcs and UI/UX Experiences.

This commit is contained in:
2026-01-26 21:10:42 +08:00
parent 8cc359b0ec
commit ade29ec1e8
25 changed files with 2498 additions and 482 deletions

View File

@@ -0,0 +1,499 @@
import sqlite3
import pandas as pd
import numpy as np
import os
DB_L2_PATH = r'd:\Documents\trae_projects\yrtv\database\L2\L2_Main.sqlite'
def get_db_connection():
conn = sqlite3.connect(DB_L2_PATH)
conn.row_factory = sqlite3.Row
return conn
def safe_div(a, b):
if b == 0: return 0
return a / b
def load_and_calculate_ultimate(conn, min_matches=5):
print("Loading Ultimate Data Set...")
# 1. Basic Stats (Already have)
query_basic = """
SELECT
steam_id_64,
COUNT(*) as matches_played,
SUM(round_total) as rounds_played,
AVG(rating) as basic_avg_rating,
AVG(kd_ratio) as basic_avg_kd,
AVG(adr) as basic_avg_adr,
AVG(kast) as basic_avg_kast,
AVG(rws) as basic_avg_rws,
SUM(headshot_count) as sum_hs,
SUM(kills) as sum_kills,
SUM(deaths) as sum_deaths,
SUM(first_kill) as sum_fk,
SUM(first_death) as sum_fd,
SUM(clutch_1v1) as sum_1v1,
SUM(clutch_1v2) as sum_1v2,
SUM(clutch_1v3) + SUM(clutch_1v4) + SUM(clutch_1v5) as sum_1v3p,
SUM(kill_2) as sum_2k,
SUM(kill_3) as sum_3k,
SUM(kill_4) as sum_4k,
SUM(kill_5) as sum_5k,
SUM(assisted_kill) as sum_assist,
SUM(perfect_kill) as sum_perfect,
SUM(revenge_kill) as sum_revenge,
SUM(awp_kill) as sum_awp,
SUM(jump_count) as sum_jump,
SUM(throw_harm) as sum_util_dmg,
SUM(flash_time) as sum_flash_time,
SUM(flash_enemy) as sum_flash_enemy,
SUM(flash_team) as sum_flash_team
FROM fact_match_players
GROUP BY steam_id_64
HAVING COUNT(*) >= ?
"""
df = pd.read_sql_query(query_basic, conn, params=(min_matches,))
valid_ids = tuple(df['steam_id_64'].tolist())
if not valid_ids: return None
placeholders = ','.join(['?'] * len(valid_ids))
# --- Basic Derived ---
df['basic_headshot_rate'] = df['sum_hs'] / df['sum_kills'].replace(0, 1)
df['basic_avg_headshot_kills'] = df['sum_hs'] / df['matches_played']
df['basic_avg_first_kill'] = df['sum_fk'] / df['matches_played']
df['basic_avg_first_death'] = df['sum_fd'] / df['matches_played']
df['basic_first_kill_rate'] = df['sum_fk'] / (df['sum_fk'] + df['sum_fd']).replace(0, 1)
df['basic_first_death_rate'] = df['sum_fd'] / (df['sum_fk'] + df['sum_fd']).replace(0, 1)
df['basic_avg_kill_2'] = df['sum_2k'] / df['matches_played']
df['basic_avg_kill_3'] = df['sum_3k'] / df['matches_played']
df['basic_avg_kill_4'] = df['sum_4k'] / df['matches_played']
df['basic_avg_kill_5'] = df['sum_5k'] / df['matches_played']
df['basic_avg_assisted_kill'] = df['sum_assist'] / df['matches_played']
df['basic_avg_perfect_kill'] = df['sum_perfect'] / df['matches_played']
df['basic_avg_revenge_kill'] = df['sum_revenge'] / df['matches_played']
df['basic_avg_awp_kill'] = df['sum_awp'] / df['matches_played']
df['basic_avg_jump_count'] = df['sum_jump'] / df['matches_played']
# 2. STA - Detailed Time Series
print("Calculating STA (Detailed)...")
query_sta = f"""
SELECT mp.steam_id_64, mp.rating, mp.is_win, m.start_time, m.duration
FROM fact_match_players mp
JOIN fact_matches m ON mp.match_id = m.match_id
WHERE mp.steam_id_64 IN ({placeholders})
ORDER BY mp.steam_id_64, m.start_time
"""
df_matches = pd.read_sql_query(query_sta, conn, params=valid_ids)
sta_list = []
for pid, group in df_matches.groupby('steam_id_64'):
group = group.sort_values('start_time')
# Last 30
last_30 = group.tail(30)
sta_last_30 = last_30['rating'].mean()
# Win/Loss
sta_win = group[group['is_win']==1]['rating'].mean()
sta_loss = group[group['is_win']==0]['rating'].mean()
# Volatility
sta_vol = group.tail(10)['rating'].std()
# Time Correlation (Duration vs Rating)
sta_time_corr = group['duration'].corr(group['rating']) if len(group) > 2 else 0
# Fatigue
group['date'] = pd.to_datetime(group['start_time'], unit='s').dt.date
daily = group.groupby('date')['rating'].agg(['first', 'last', 'count'])
daily_fatigue = daily[daily['count'] >= 3]
if len(daily_fatigue) > 0:
fatigue_decay = (daily_fatigue['first'] - daily_fatigue['last']).mean()
else:
fatigue_decay = 0
sta_list.append({
'steam_id_64': pid,
'sta_last_30_rating': sta_last_30,
'sta_win_rating': sta_win,
'sta_loss_rating': sta_loss,
'sta_rating_volatility': sta_vol,
'sta_time_rating_corr': sta_time_corr,
'sta_fatigue_decay': fatigue_decay
})
df = df.merge(pd.DataFrame(sta_list), on='steam_id_64', how='left')
# 3. BAT - Distance & Advanced
print("Calculating BAT (Distance & Context)...")
# Distance Logic: Get all kills with positions
# We need to map positions.
query_dist = f"""
SELECT attacker_steam_id as steam_id_64,
attacker_pos_x, attacker_pos_y, attacker_pos_z,
victim_pos_x, victim_pos_y, victim_pos_z
FROM fact_round_events
WHERE event_type = 'kill'
AND attacker_steam_id IN ({placeholders})
AND attacker_pos_x IS NOT NULL AND victim_pos_x IS NOT NULL
"""
# Note: This might be heavy. If memory issue, sample or chunk.
try:
df_dist = pd.read_sql_query(query_dist, conn, params=valid_ids)
if not df_dist.empty:
# Calc Euclidian Distance
df_dist['dist'] = np.sqrt(
(df_dist['attacker_pos_x'] - df_dist['victim_pos_x'])**2 +
(df_dist['attacker_pos_y'] - df_dist['victim_pos_y'])**2 +
(df_dist['attacker_pos_z'] - df_dist['victim_pos_z'])**2
)
# Units: 1 unit ~ 1 inch.
# Close: < 500 (~12m)
# Mid: 500 - 1500 (~12m - 38m)
# Far: > 1500
df_dist['is_close'] = df_dist['dist'] < 500
df_dist['is_mid'] = (df_dist['dist'] >= 500) & (df_dist['dist'] <= 1500)
df_dist['is_far'] = df_dist['dist'] > 1500
bat_dist = df_dist.groupby('steam_id_64').agg({
'is_close': 'mean', # % of kills that are close
'is_mid': 'mean',
'is_far': 'mean'
}).reset_index()
bat_dist.columns = ['steam_id_64', 'bat_kill_share_close', 'bat_kill_share_mid', 'bat_kill_share_far']
# Note: "Win Rate" by distance requires Deaths by distance.
# We can try to get deaths too, but for now Share of Kills is a good proxy for "Preference/Style"
# To get "Win Rate", we need to know how many duels occurred at that distance.
# Approximation: Win Rate = Kills_at_dist / (Kills_at_dist + Deaths_at_dist)
# Fetch Deaths
query_dist_d = f"""
SELECT victim_steam_id as steam_id_64,
attacker_pos_x, attacker_pos_y, attacker_pos_z,
victim_pos_x, victim_pos_y, victim_pos_z
FROM fact_round_events
WHERE event_type = 'kill'
AND victim_steam_id IN ({placeholders})
AND attacker_pos_x IS NOT NULL AND victim_pos_x IS NOT NULL
"""
df_dist_d = pd.read_sql_query(query_dist_d, conn, params=valid_ids)
df_dist_d['dist'] = np.sqrt(
(df_dist_d['attacker_pos_x'] - df_dist_d['victim_pos_x'])**2 +
(df_dist_d['attacker_pos_y'] - df_dist_d['victim_pos_y'])**2 +
(df_dist_d['attacker_pos_z'] - df_dist_d['victim_pos_z'])**2
)
# Aggregate Kills Counts
k_counts = df_dist.groupby('steam_id_64').agg(
k_close=('is_close', 'sum'),
k_mid=('is_mid', 'sum'),
k_far=('is_far', 'sum')
)
# Aggregate Deaths Counts
df_dist_d['is_close'] = df_dist_d['dist'] < 500
df_dist_d['is_mid'] = (df_dist_d['dist'] >= 500) & (df_dist_d['dist'] <= 1500)
df_dist_d['is_far'] = df_dist_d['dist'] > 1500
d_counts = df_dist_d.groupby('steam_id_64').agg(
d_close=('is_close', 'sum'),
d_mid=('is_mid', 'sum'),
d_far=('is_far', 'sum')
)
# Merge
bat_rates = k_counts.join(d_counts, how='outer').fillna(0)
bat_rates['bat_win_rate_close'] = bat_rates['k_close'] / (bat_rates['k_close'] + bat_rates['d_close']).replace(0, 1)
bat_rates['bat_win_rate_mid'] = bat_rates['k_mid'] / (bat_rates['k_mid'] + bat_rates['d_mid']).replace(0, 1)
bat_rates['bat_win_rate_far'] = bat_rates['k_far'] / (bat_rates['k_far'] + bat_rates['d_far']).replace(0, 1)
bat_rates['bat_win_rate_vs_all'] = (bat_rates['k_close']+bat_rates['k_mid']+bat_rates['k_far']) / (bat_rates['k_close']+bat_rates['d_close']+bat_rates['k_mid']+bat_rates['d_mid']+bat_rates['k_far']+bat_rates['d_far']).replace(0, 1)
df = df.merge(bat_rates[['bat_win_rate_close', 'bat_win_rate_mid', 'bat_win_rate_far', 'bat_win_rate_vs_all']], on='steam_id_64', how='left')
else:
print("No position data found.")
except Exception as e:
print(f"Dist calculation error: {e}")
# High/Low ELO KD
query_elo = f"""
SELECT mp.steam_id_64, mp.kd_ratio,
(SELECT AVG(group_origin_elo) FROM fact_match_teams fmt WHERE fmt.match_id = mp.match_id AND group_origin_elo > 0) as elo
FROM fact_match_players mp
WHERE mp.steam_id_64 IN ({placeholders})
"""
df_elo = pd.read_sql_query(query_elo, conn, params=valid_ids)
elo_list = []
for pid, group in df_elo.groupby('steam_id_64'):
avg = group['elo'].mean()
if pd.isna(avg): avg = 1000
elo_list.append({
'steam_id_64': pid,
'bat_kd_diff_high_elo': group[group['elo'] > avg]['kd_ratio'].mean(),
'bat_kd_diff_low_elo': group[group['elo'] <= avg]['kd_ratio'].mean()
})
df = df.merge(pd.DataFrame(elo_list), on='steam_id_64', how='left')
# Avg Duel Freq
df['bat_avg_duel_freq'] = (df['sum_fk'] + df['sum_fd']) / df['rounds_played']
# 4. HPS - High Pressure Contexts
print("Calculating HPS (Contexts)...")
# We need round-by-round score evolution.
# Join rounds and economy(side) and matches
query_hps_ctx = f"""
SELECT r.match_id, r.round_num, r.ct_score, r.t_score, r.winner_side,
m.score_team1, m.score_team2, m.winner_team,
e.steam_id_64, e.side as player_side,
(SELECT COUNT(*) FROM fact_round_events ev WHERE ev.match_id=r.match_id AND ev.round_num=r.round_num AND ev.attacker_steam_id=e.steam_id_64 AND ev.event_type='kill') as kills,
(SELECT COUNT(*) FROM fact_round_events ev WHERE ev.match_id=r.match_id AND ev.round_num=r.round_num AND ev.victim_steam_id=e.steam_id_64 AND ev.event_type='kill') as deaths
FROM fact_rounds r
JOIN fact_matches m ON r.match_id = m.match_id
JOIN fact_round_player_economy e ON r.match_id = e.match_id AND r.round_num = e.round_num
WHERE e.steam_id_64 IN ({placeholders})
"""
# This is heavy.
try:
# Optimization: Process per match or use SQL aggregation?
# SQL aggregation for specific conditions is better.
# 4.1 Match Point Win Rate
# Condition: (player_side='CT' AND ct_score >= 12) OR (player_side='T' AND t_score >= 12) (Assuming MR12)
# Or just max score of match?
# Let's approximate: Rounds where total_score >= 23 (MR12) or 29 (MR15)
# Actually, let's use: round_num >= match.round_total - 1? No.
# Use: Rounds where One Team Score = Match Win Score - 1.
# Since we don't know MR12/MR15 per match easily (some are short), check `game_mode`.
# Fallback: Rounds where `ct_score` or `t_score` >= 12.
# 4.2 Pressure Entry Rate (Losing Streak)
# Condition: Team score < Enemy score - 3.
# 4.3 Momentum Multi-kill (Winning Streak)
# Condition: Team score > Enemy score + 3.
# Let's load a simplified dataframe of rounds
df_rounds = pd.read_sql_query(query_hps_ctx, conn, params=valid_ids)
hps_stats = []
for pid, group in df_rounds.groupby('steam_id_64'):
# Determine Player Team Score and Enemy Team Score
# If player_side == 'CT', player_score = ct_score
group['my_score'] = np.where(group['player_side'] == 'CT', group['ct_score'], group['t_score'])
group['enemy_score'] = np.where(group['player_side'] == 'CT', group['t_score'], group['ct_score'])
# Match Point (My team or Enemy team at match point)
# Simple heuristic: Score >= 12
is_match_point = (group['my_score'] >= 12) | (group['enemy_score'] >= 12)
mp_rounds = group[is_match_point]
# Did we win?
# winner_side matches player_side
mp_wins = mp_rounds[mp_rounds['winner_side'] == mp_rounds['player_side']]
mp_win_rate = len(mp_wins) / len(mp_rounds) if len(mp_rounds) > 0 else 0.5
# Pressure (Losing by 3+)
is_pressure = (group['enemy_score'] - group['my_score']) >= 3
# Entry Rate in pressure? Need FK data.
# We only loaded kills. Let's use Kills per round in pressure.
pressure_kpr = group[is_pressure]['kills'].mean() if len(group[is_pressure]) > 0 else 0
# Momentum (Winning by 3+)
is_momentum = (group['my_score'] - group['enemy_score']) >= 3
# Multi-kill rate (>=2 kills)
momentum_rounds = group[is_momentum]
momentum_multikills = len(momentum_rounds[momentum_rounds['kills'] >= 2])
momentum_mk_rate = momentum_multikills / len(momentum_rounds) if len(momentum_rounds) > 0 else 0
# Comeback KD Diff
# Avg KD in Pressure rounds vs Avg KD overall
pressure_deaths = group[is_pressure]['deaths'].sum()
pressure_kills = group[is_pressure]['kills'].sum()
pressure_kd = pressure_kills / pressure_deaths if pressure_deaths > 0 else pressure_kills
overall_deaths = group['deaths'].sum()
overall_kills = group['kills'].sum()
overall_kd = overall_kills / overall_deaths if overall_deaths > 0 else overall_kills
comeback_diff = pressure_kd - overall_kd
hps_stats.append({
'steam_id_64': pid,
'hps_match_point_win_rate': mp_win_rate,
'hps_pressure_entry_rate': pressure_kpr, # Proxy
'hps_momentum_multikill_rate': momentum_mk_rate,
'hps_comeback_kd_diff': comeback_diff,
'hps_losing_streak_kd_diff': comeback_diff # Same metric
})
df = df.merge(pd.DataFrame(hps_stats), on='steam_id_64', how='left')
# 4.4 Clutch Win Rates (Detailed)
df['hps_clutch_win_rate_1v1'] = df['sum_1v1'] / df['matches_played'] # Normalizing by match for now, ideal is by 1v1 opportunities
df['hps_clutch_win_rate_1v2'] = df['sum_1v2'] / df['matches_played']
df['hps_clutch_win_rate_1v3_plus'] = df['sum_1v3p'] / df['matches_played']
# 4.5 Close Match Rating (from previous)
# ... (Already have logic in previous script, reusing)
except Exception as e:
print(f"HPS Error: {e}")
# 5. PTL - Pistol Detailed
print("Calculating PTL...")
# Filter Round 1, 13 (and 16 for MR15?)
# Just use 1 and 13 (common for MR12)
query_ptl = f"""
SELECT
e.steam_id_64,
(SELECT COUNT(*) FROM fact_round_events ev WHERE ev.match_id=e.match_id AND ev.round_num=e.round_num AND ev.attacker_steam_id=e.steam_id_64 AND ev.event_type='kill') as kills,
(SELECT COUNT(*) FROM fact_round_events ev WHERE ev.match_id=e.match_id AND ev.round_num=e.round_num AND ev.victim_steam_id=e.steam_id_64 AND ev.event_type='kill') as deaths,
r.winner_side, e.side as player_side,
e.equipment_value
FROM fact_round_player_economy e
JOIN fact_rounds r ON e.match_id = r.match_id AND e.round_num = r.round_num
WHERE e.steam_id_64 IN ({placeholders})
AND e.round_num IN (1, 13)
"""
try:
df_ptl_raw = pd.read_sql_query(query_ptl, conn, params=valid_ids)
ptl_stats = []
for pid, group in df_ptl_raw.groupby('steam_id_64'):
kills = group['kills'].sum()
deaths = group['deaths'].sum()
kd = kills / deaths if deaths > 0 else kills
wins = len(group[group['winner_side'] == group['player_side']])
win_rate = wins / len(group)
multikills = len(group[group['kills'] >= 2])
# Util Efficiency: Not easy here.
ptl_stats.append({
'steam_id_64': pid,
'ptl_pistol_kills': kills, # Total? Or Avg? Schema says REAL. Let's use Avg per Match later.
'ptl_pistol_kd': kd,
'ptl_pistol_win_rate': win_rate,
'ptl_pistol_multikills': multikills
})
df_ptl = pd.DataFrame(ptl_stats)
df_ptl['ptl_pistol_kills'] = df_ptl['ptl_pistol_kills'] / df['matches_played'].mean() # Approximate
df = df.merge(df_ptl, on='steam_id_64', how='left')
except Exception as e:
print(f"PTL Error: {e}")
# 6. T/CT & UTIL (Straightforward)
print("Calculating T/CT & UTIL...")
# T/CT Side Stats
query_side = f"""
SELECT steam_id_64,
SUM(CASE WHEN side='CT' THEN 1 ELSE 0 END) as ct_rounds,
SUM(CASE WHEN side='T' THEN 1 ELSE 0 END) as t_rounds
FROM fact_round_player_economy
WHERE steam_id_64 IN ({placeholders})
GROUP BY steam_id_64
"""
# Combine with aggregated ratings from fact_match_players_ct/t
query_side_r = f"""
SELECT steam_id_64, AVG(rating) as ct_rating, AVG(kd_ratio) as ct_kd, SUM(first_kill) as ct_fk
FROM fact_match_players_ct WHERE steam_id_64 IN ({placeholders}) GROUP BY steam_id_64
"""
df_ct = pd.read_sql_query(query_side_r, conn, params=valid_ids)
# Similar for T...
# Merge...
# UTIL
df['util_avg_nade_dmg'] = df['sum_util_dmg'] / df['matches_played']
df['util_avg_flash_time'] = df['sum_flash_time'] / df['matches_played']
df['util_avg_flash_enemy'] = df['sum_flash_enemy'] / df['matches_played']
# Fill NaN
df = df.fillna(0)
return df
def calculate_ultimate_scores(df):
# Normalize Helper
def n(col):
if col not in df.columns: return 50
s = df[col]
if s.max() == s.min(): return 50
return (s - s.min()) / (s.max() - s.min()) * 100
df = df.copy()
# 1. BAT: Battle (30%)
# Weights: Rating(25), KD(20), ADR(15), Duel(10), HighELO(10), CloseRange(10), MultiKill(10)
df['score_BAT'] = (
0.25 * n('basic_avg_rating') +
0.20 * n('basic_avg_kd') +
0.15 * n('basic_avg_adr') +
0.10 * n('bat_avg_duel_win_rate') + # Need to ensure col exists
0.10 * n('bat_kd_diff_high_elo') +
0.10 * n('bat_win_rate_close') +
0.10 * n('basic_avg_kill_3') # Multi-kill proxy
)
# 2. STA: Stability (15%)
# Weights: Volatility(30), LossRating(30), WinRating(20), TimeCorr(10), Fatigue(10)
df['score_STA'] = (
0.30 * (100 - n('sta_rating_volatility')) +
0.30 * n('sta_loss_rating') +
0.20 * n('sta_win_rating') +
0.10 * (100 - n('sta_time_rating_corr').abs()) + # Closer to 0 is better (independent of duration)
0.10 * (100 - n('sta_fatigue_decay'))
)
# 3. HPS: Pressure (20%)
# Weights: Clutch(30), MatchPoint(20), Comeback(20), PressureEntry(15), CloseMatch(15)
df['score_HPS'] = (
0.30 * n('sum_1v3p') + # Using high tier clutches
0.20 * n('hps_match_point_win_rate') +
0.20 * n('hps_comeback_kd_diff') +
0.15 * n('hps_pressure_entry_rate') +
0.15 * n('basic_avg_rating') # Fallback if close match rating missing
)
# 4. PTL: Pistol (10%)
# Weights: Kills(40), WinRate(30), KD(30)
df['score_PTL'] = (
0.40 * n('ptl_pistol_kills') +
0.30 * n('ptl_pistol_win_rate') +
0.30 * n('ptl_pistol_kd')
)
# 5. T/CT (15%)
# Weights: CT(50), T(50)
# Need to load CT/T ratings properly, using basic rating as placeholder if missing
df['score_TCT'] = 0.5 * n('basic_avg_rating') + 0.5 * n('basic_avg_rating')
# 6. UTIL (10%)
# Weights: Dmg(50), Flash(30), EnemiesFlashed(20)
df['score_UTIL'] = (
0.50 * n('util_avg_nade_dmg') +
0.30 * n('util_avg_flash_time') +
0.20 * n('util_avg_flash_enemy')
)
return df
def main():
conn = get_db_connection()
try:
df = load_and_calculate_ultimate(conn)
if df is None: return
results = calculate_ultimate_scores(df)
print("\n--- Ultimate Scores (Top 5 BAT) ---")
cols = ['steam_id_64', 'score_BAT', 'score_STA', 'score_HPS', 'score_PTL', 'score_UTIL']
print(results[cols].sort_values('score_BAT', ascending=False).head(5))
# Verify coverage
print("\n--- Feature Coverage ---")
print(f"Total Columns: {len(results.columns)}")
print("BAT Distances:", 'bat_win_rate_close' in results.columns)
print("HPS Contexts:", 'hps_match_point_win_rate' in results.columns)
print("PTL Detailed:", 'ptl_pistol_kd' in results.columns)
finally:
conn.close()
if __name__ == "__main__":
main()