1.7.0: New features.

This commit is contained in:
2026-01-27 21:26:07 +08:00
parent 5693eb84ee
commit 6b4cc048b3
11 changed files with 737 additions and 40 deletions

View File

@@ -1135,11 +1135,380 @@ class FeatureService:
df_elo_flat = pd.DataFrame(flat_elo_data)
df = df.merge(df_elo_flat, on='steam_id_64', how='left')
# 9. New Features: Economy & Pace
df_eco = FeatureService._calculate_economy_features(conn, valid_ids)
if df_eco is not None:
df = df.merge(df_eco, on='steam_id_64', how='left')
df_pace = FeatureService._calculate_pace_features(conn, valid_ids)
if df_pace is not None:
df = df.merge(df_pace, on='steam_id_64', how='left')
# Final Mappings
df['total_matches'] = df['matches_played']
return df.fillna(0)
@staticmethod
def _calculate_economy_features(conn, player_ids):
if not player_ids: return None
placeholders = ','.join(['?'] * len(player_ids))
# 1. Investment Efficiency (Damage / Equipment Value)
# We need total damage and total equipment value
# fact_match_players has sum_util_dmg (only nade damage), but we need total damage.
# fact_match_players has 'basic_avg_adr' * rounds.
# Better to query fact_round_player_economy for equipment value sum.
q_eco_val = f"""
SELECT steam_id_64, SUM(equipment_value) as total_spend, COUNT(*) as rounds_tracked
FROM fact_round_player_economy
WHERE steam_id_64 IN ({placeholders})
GROUP BY steam_id_64
"""
df_spend = pd.read_sql_query(q_eco_val, conn, params=player_ids)
# Get Total Damage from fact_match_players (derived from ADR * Rounds)
# MUST filter by matches that actually have economy data to ensure consistency
q_dmg = f"""
SELECT mp.steam_id_64, SUM(mp.adr * mp.round_total) as total_damage
FROM fact_match_players mp
JOIN (
SELECT DISTINCT match_id, steam_id_64
FROM fact_round_player_economy
WHERE steam_id_64 IN ({placeholders})
) eco ON mp.match_id = eco.match_id AND mp.steam_id_64 = eco.steam_id_64
WHERE mp.steam_id_64 IN ({placeholders})
GROUP BY mp.steam_id_64
"""
df_dmg = pd.read_sql_query(q_dmg, conn, params=player_ids + player_ids)
df = df_spend.merge(df_dmg, on='steam_id_64', how='inner')
# Metric 1: Damage per 1000$
# Avoid div by zero
df['eco_avg_damage_per_1k'] = df['total_damage'] / (df['total_spend'] / 1000.0).replace(0, 1)
# 2. Eco Round Performance (Equipment < 2000)
# We need kills in these rounds.
# Join economy with events? That's heavy.
# Alternative: Approximate.
# Let's do it properly: Get rounds where equip < 2000, count kills.
# Subquery for Eco Rounds keys: (match_id, round_num, steam_id_64)
# Then join with events.
q_eco_perf = f"""
SELECT
e.attacker_steam_id as steam_id_64,
COUNT(*) as eco_kills,
SUM(CASE WHEN e.event_type='death' THEN 1 ELSE 0 END) as eco_deaths
FROM fact_round_events e
JOIN fact_round_player_economy eco
ON e.match_id = eco.match_id
AND e.round_num = eco.round_num
AND (e.attacker_steam_id = eco.steam_id_64 OR e.victim_steam_id = eco.steam_id_64)
WHERE (e.event_type = 'kill' AND e.attacker_steam_id = eco.steam_id_64)
OR (e.event_type = 'kill' AND e.victim_steam_id = eco.steam_id_64) -- Count deaths properly
AND eco.equipment_value < 2000
AND eco.steam_id_64 IN ({placeholders})
GROUP BY eco.steam_id_64
"""
# Wait, the join condition OR is tricky for grouping.
# Let's separate Kills and Deaths or do two queries.
# Simpler:
# Eco Kills
q_eco_kills = f"""
SELECT
e.attacker_steam_id as steam_id_64,
COUNT(*) as eco_kills
FROM fact_round_events e
JOIN fact_round_player_economy eco
ON e.match_id = eco.match_id
AND e.round_num = eco.round_num
AND e.attacker_steam_id = eco.steam_id_64
WHERE e.event_type = 'kill'
AND eco.equipment_value < 2000
AND eco.steam_id_64 IN ({placeholders})
GROUP BY e.attacker_steam_id
"""
df_eco_kills = pd.read_sql_query(q_eco_kills, conn, params=player_ids)
# Eco Deaths
q_eco_deaths = f"""
SELECT
e.victim_steam_id as steam_id_64,
COUNT(*) as eco_deaths
FROM fact_round_events e
JOIN fact_round_player_economy eco
ON e.match_id = eco.match_id
AND e.round_num = eco.round_num
AND e.victim_steam_id = eco.steam_id_64
WHERE e.event_type = 'kill'
AND eco.equipment_value < 2000
AND eco.steam_id_64 IN ({placeholders})
GROUP BY e.victim_steam_id
"""
df_eco_deaths = pd.read_sql_query(q_eco_deaths, conn, params=player_ids)
# Get count of eco rounds
q_eco_rounds = f"""
SELECT steam_id_64, COUNT(*) as eco_round_count
FROM fact_round_player_economy
WHERE equipment_value < 2000 AND steam_id_64 IN ({placeholders})
GROUP BY steam_id_64
"""
df_eco_cnt = pd.read_sql_query(q_eco_rounds, conn, params=player_ids)
df_perf = df_eco_cnt.merge(df_eco_kills, on='steam_id_64', how='left').merge(df_eco_deaths, on='steam_id_64', how='left').fillna(0)
# Eco Rating (KPR)
df_perf['eco_rating_eco_rounds'] = df_perf['eco_kills'] / df_perf['eco_round_count'].replace(0, 1)
# Eco KD
df_perf['eco_kd_ratio'] = df_perf['eco_kills'] / df_perf['eco_deaths'].replace(0, 1)
# Eco Rounds per Match
# We need total matches WHERE economy data exists.
# Otherwise, if we have 100 matches but only 10 with eco data, the avg will be diluted.
q_matches = f"""
SELECT steam_id_64, COUNT(DISTINCT match_id) as matches_tracked
FROM fact_round_player_economy
WHERE steam_id_64 IN ({placeholders})
GROUP BY steam_id_64
"""
df_matches = pd.read_sql_query(q_matches, conn, params=player_ids)
df_perf = df_perf.merge(df_matches, on='steam_id_64', how='left')
df_perf['eco_avg_rounds'] = df_perf['eco_round_count'] / df_perf['matches_tracked'].replace(0, 1)
# Merge all
df_final = df.merge(df_perf[['steam_id_64', 'eco_rating_eco_rounds', 'eco_kd_ratio', 'eco_avg_rounds']], on='steam_id_64', how='left')
return df_final[['steam_id_64', 'eco_avg_damage_per_1k', 'eco_rating_eco_rounds', 'eco_kd_ratio', 'eco_avg_rounds']]
@staticmethod
def _calculate_pace_features(conn, player_ids):
if not player_ids: return None
placeholders = ','.join(['?'] * len(player_ids))
# 1. Avg Time to First Contact
# Find min(event_time) per round per player (Attacker or Victim)
q_first_contact = f"""
SELECT
player_id as steam_id_64,
AVG(first_time) as pace_avg_time_to_first_contact
FROM (
SELECT
match_id, round_num,
CASE
WHEN attacker_steam_id IN ({placeholders}) THEN attacker_steam_id
ELSE victim_steam_id
END as player_id,
MIN(event_time) as first_time
FROM fact_round_events
WHERE (attacker_steam_id IN ({placeholders}) OR victim_steam_id IN ({placeholders}))
AND event_type IN ('kill', 'death') -- focus on combat
GROUP BY match_id, round_num, player_id
) sub
GROUP BY player_id
"""
# Note: 'death' isn't an event_type, it's 'kill'.
# We check if player is attacker or victim in 'kill' event.
# Corrected Query:
q_first_contact = f"""
SELECT
player_id as steam_id_64,
AVG(first_time) as pace_avg_time_to_first_contact
FROM (
SELECT
match_id, round_num,
p_id as player_id,
MIN(event_time) as first_time
FROM (
SELECT match_id, round_num, event_time, attacker_steam_id as p_id FROM fact_round_events WHERE event_type='kill'
UNION ALL
SELECT match_id, round_num, event_time, victim_steam_id as p_id FROM fact_round_events WHERE event_type='kill'
) raw
WHERE p_id IN ({placeholders})
GROUP BY match_id, round_num, p_id
) sub
GROUP BY player_id
"""
df_time = pd.read_sql_query(q_first_contact, conn, params=player_ids)
# Wait, params=player_ids won't work with f-string placeholders if I use ? inside.
# My placeholders variable is literal string "?,?,?".
# So params should be player_ids.
# But in UNION ALL, I have two WHERE clauses.
# Actually I can optimize:
# WHERE attacker_steam_id IN (...) OR victim_steam_id IN (...)
# Then unpivot in python or SQL.
# Let's use Python for unpivoting to be safe and clear.
q_events = f"""
SELECT match_id, round_num, event_time, attacker_steam_id, victim_steam_id
FROM fact_round_events
WHERE event_type='kill'
AND (attacker_steam_id IN ({placeholders}) OR victim_steam_id IN ({placeholders}))
"""
# This params needs player_ids * 2
df_ev = pd.read_sql_query(q_events, conn, params=list(player_ids) + list(player_ids))
pace_list = []
if not df_ev.empty:
# Unpivot
att = df_ev[df_ev['attacker_steam_id'].isin(player_ids)][['match_id', 'round_num', 'event_time', 'attacker_steam_id']].rename(columns={'attacker_steam_id': 'steam_id_64'})
vic = df_ev[df_ev['victim_steam_id'].isin(player_ids)][['match_id', 'round_num', 'event_time', 'victim_steam_id']].rename(columns={'victim_steam_id': 'steam_id_64'})
combined = pd.concat([att, vic])
# Group by round, get min time
first_contacts = combined.groupby(['match_id', 'round_num', 'steam_id_64'])['event_time'].min().reset_index()
# Average per player
avg_time = first_contacts.groupby('steam_id_64')['event_time'].mean().reset_index()
avg_time.rename(columns={'event_time': 'pace_avg_time_to_first_contact'}, inplace=True)
pace_list.append(avg_time)
# 2. Trade Kill Rate
# "Kill a killer within 5s of teammate death"
# We need to reconstruct the flow.
# Iterate matches? Vectorized is hard.
# Let's try a simplified approach:
# For each match, sort events by time.
# If (Kill A->B) at T1, and (Kill C->A) at T2, and T2-T1 <= 5, and C & B are same team.
# We don't have team info in events easily (we have side logic elsewhere).
# Assuming Side logic: If A->B (A=CT, B=T). Then C->A (C=T).
# So B and C are T.
# Let's fetch basic trade info using self-join in SQL?
# A kills B at T1.
# C kills A at T2.
# T2 > T1 and T2 - T1 <= 5.
# C is the Trader. B is the Victim (Teammate).
# We want C's Trade Rate.
q_trades = f"""
SELECT
t2.attacker_steam_id as trader_id,
COUNT(*) as trade_count
FROM fact_round_events t1
JOIN fact_round_events t2
ON t1.match_id = t2.match_id
AND t1.round_num = t2.round_num
WHERE t1.event_type = 'kill' AND t2.event_type = 'kill'
AND t1.attacker_steam_id = t2.victim_steam_id -- Avenger kills the Killer
AND t2.event_time > t1.event_time
AND t2.event_time - t1.event_time <= 5
AND t2.attacker_steam_id IN ({placeholders})
GROUP BY t2.attacker_steam_id
"""
df_trades = pd.read_sql_query(q_trades, conn, params=player_ids)
# Denominator: Opportunities? Or just Total Kills?
# Trade Kill Rate usually means % of Kills that were Trades.
# Let's use that.
# Get Total Kills
q_kills = f"""
SELECT attacker_steam_id as steam_id_64, COUNT(*) as total_kills
FROM fact_round_events
WHERE event_type='kill' AND attacker_steam_id IN ({placeholders})
GROUP BY attacker_steam_id
"""
df_tot_kills = pd.read_sql_query(q_kills, conn, params=player_ids)
if not df_trades.empty:
df_trades = df_trades.merge(df_tot_kills, left_on='trader_id', right_on='steam_id_64', how='right').fillna(0)
df_trades['pace_trade_kill_rate'] = df_trades['trade_count'] / df_trades['total_kills'].replace(0, 1)
else:
df_trades = df_tot_kills.copy()
df_trades['pace_trade_kill_rate'] = 0
df_final = pd.DataFrame({'steam_id_64': list(player_ids)})
if pace_list:
df_final = df_final.merge(pace_list[0], on='steam_id_64', how='left')
# Merge Trade Rate
if not df_trades.empty:
df_final = df_final.merge(df_trades[['steam_id_64', 'pace_trade_kill_rate']], on='steam_id_64', how='left')
# 3. New Pace Metrics
# pace_opening_kill_time: Avg time of Opening Kills (where attacker_steam_id = player AND is_first_kill = 1?)
# Wait, fact_round_events doesn't store 'is_first_kill' directly? It stores 'first_kill' in fact_match_players but that's aggregate.
# It stores 'event_type'. We need to check if it was the FIRST kill of the round.
# Query: For each round, find the FIRST kill event. Check if attacker is our player. Get time.
q_opening_time = f"""
SELECT
attacker_steam_id as steam_id_64,
AVG(event_time) as pace_opening_kill_time
FROM (
SELECT
match_id, round_num,
attacker_steam_id,
MIN(event_time) as event_time
FROM fact_round_events
WHERE event_type='kill'
GROUP BY match_id, round_num
) first_kills
WHERE attacker_steam_id IN ({placeholders})
GROUP BY attacker_steam_id
"""
df_opening_time = pd.read_sql_query(q_opening_time, conn, params=player_ids)
# pace_avg_life_time: Avg time alive per round
# Logic: Round Duration - Death Time (if died). Else Round Duration.
# We need Round Duration (fact_rounds doesn't have duration? fact_matches has match duration).
# Usually round duration is fixed or we use last event time.
# Let's approximate: If died, time = death_time. If survived, time = max_event_time_of_round.
# Better: survival time.
q_survival = f"""
SELECT
p.steam_id_64,
AVG(
CASE
WHEN d.death_time IS NOT NULL THEN d.death_time
ELSE r.round_end_time -- Use max event time as proxy for round end
END
) as pace_avg_life_time
FROM fact_match_players p
JOIN (
SELECT match_id, round_num, MAX(event_time) as round_end_time
FROM fact_round_events
GROUP BY match_id, round_num
) r ON p.match_id = r.match_id
LEFT JOIN (
SELECT match_id, round_num, victim_steam_id, MIN(event_time) as death_time
FROM fact_round_events
WHERE event_type='kill'
GROUP BY match_id, round_num, victim_steam_id
) d ON p.match_id = d.match_id AND p.steam_id_64 = d.victim_steam_id
-- We need to join rounds to ensure we track every round the player played?
-- fact_match_players is per match. We need per round.
-- We can use fact_round_player_economy to get all rounds a player played.
JOIN fact_round_player_economy e ON p.match_id = e.match_id AND p.steam_id_64 = e.steam_id_64 AND r.round_num = e.round_num
WHERE p.steam_id_64 IN ({placeholders})
GROUP BY p.steam_id_64
"""
# This join is heavy. Let's simplify.
# Just use death events for "Time of Death".
# And for rounds without death, use 115s (avg round length)? Or max event time?
# Let's stick to what we have.
df_survival = pd.read_sql_query(q_survival, conn, params=player_ids)
if not df_opening_time.empty:
df_final = df_final.merge(df_opening_time, on='steam_id_64', how='left')
if not df_survival.empty:
df_final = df_final.merge(df_survival, on='steam_id_64', how='left')
return df_final.fillna(0)
@staticmethod
def _calculate_ultimate_scores(df):
@@ -1203,6 +1572,19 @@ class FeatureService:
0.20 * n('util_avg_flash_enemy')
)
# ECO (New)
df['score_eco'] = (
0.50 * n('eco_avg_damage_per_1k') +
0.50 * n('eco_rating_eco_rounds')
)
# PACE (New)
# Aggression Score: Faster first contact (lower time) -> higher score
df['score_pace'] = (
0.50 * (100 - n('pace_avg_time_to_first_contact')) +
0.50 * n('pace_trade_kill_rate')
)
return df
@staticmethod
@@ -1228,13 +1610,8 @@ class FeatureService:
# 2. Fetch L3 features for all roster members
placeholders = ','.join('?' for _ in active_roster_ids)
sql = f"""
SELECT
steam_id_64,
score_bat, score_sta, score_hps, score_ptl, score_tct, score_util
FROM dm_player_features
WHERE steam_id_64 IN ({placeholders})
"""
# Select all columns (simplified) or explicit list including raw metrics
sql = f"SELECT * FROM dm_player_features WHERE steam_id_64 IN ({placeholders})"
rows = query_db('l3', sql, active_roster_ids)
if not rows:
@@ -1245,23 +1622,73 @@ class FeatureService:
# If target not in map (maybe no L3 data yet), default to 0
if target_steam_id not in stats_map:
stats_map[target_steam_id] = {
'score_bat': 0, 'score_sta': 0, 'score_hps': 0,
'score_ptl': 0, 'score_tct': 0, 'score_util': 0
}
stats_map[target_steam_id] = {} # Empty dict, will fallback to 0 in loop
# 3. Calculate Distribution
metrics = ['score_bat', 'score_sta', 'score_hps', 'score_ptl', 'score_tct', 'score_util']
# Include Scores AND Raw Metrics used in Profile
metrics = [
# Scores
'score_bat', 'score_sta', 'score_hps', 'score_ptl', 'score_tct', 'score_util', 'score_eco', 'score_pace',
# Core
'basic_avg_rating', 'basic_avg_kd', 'basic_avg_adr', 'basic_avg_kast', 'basic_avg_rws',
# Combat
'basic_avg_headshot_kills', 'basic_headshot_rate', 'basic_avg_assisted_kill', 'basic_avg_awp_kill', 'basic_avg_jump_count',
# Obj
'basic_avg_mvps', 'basic_avg_plants', 'basic_avg_defuses', 'basic_avg_flash_assists',
# Opening
'basic_avg_first_kill', 'basic_avg_first_death', 'basic_first_kill_rate', 'basic_first_death_rate',
# Multi
'basic_avg_kill_2', 'basic_avg_kill_3', 'basic_avg_kill_4', 'basic_avg_kill_5',
'basic_avg_perfect_kill', 'basic_avg_revenge_kill',
# STA & BAT Details
'sta_last_30_rating', 'sta_win_rating', 'sta_loss_rating', 'sta_rating_volatility', 'sta_time_rating_corr',
'bat_kd_diff_high_elo', 'bat_avg_duel_win_rate',
# HPS & PTL Details
'hps_clutch_win_rate_1v1', 'hps_clutch_win_rate_1v3_plus', 'hps_match_point_win_rate', 'hps_pressure_entry_rate',
'hps_comeback_kd_diff', 'hps_losing_streak_kd_diff',
'ptl_pistol_kills', 'ptl_pistol_win_rate', 'ptl_pistol_kd', 'ptl_pistol_util_efficiency',
# UTIL Details
'util_usage_rate', 'util_avg_nade_dmg', 'util_avg_flash_time', 'util_avg_flash_enemy',
# ECO & PACE (New)
'eco_avg_damage_per_1k', 'eco_rating_eco_rounds', 'eco_kd_ratio', 'eco_avg_rounds',
'pace_avg_time_to_first_contact', 'pace_trade_kill_rate', 'pace_opening_kill_time', 'pace_avg_life_time',
# Party
'party_1_win_rate', 'party_1_rating', 'party_1_adr',
'party_2_win_rate', 'party_2_rating', 'party_2_adr',
'party_3_win_rate', 'party_3_rating', 'party_3_adr',
'party_4_win_rate', 'party_4_rating', 'party_4_adr',
'party_5_win_rate', 'party_5_rating', 'party_5_adr',
# Rating Dist
'rating_dist_carry_rate', 'rating_dist_normal_rate', 'rating_dist_sacrifice_rate', 'rating_dist_sleeping_rate',
# ELO
'elo_lt1200_rating', 'elo_1200_1400_rating', 'elo_1400_1600_rating', 'elo_1600_1800_rating', 'elo_1800_2000_rating', 'elo_gt2000_rating'
]
result = {}
for m in metrics:
values = [p.get(m, 0) or 0 for p in stats_map.values()]
target_val = stats_map[target_steam_id].get(m, 0) or 0
# Handle missing columns gracefully
values = []
for p in stats_map.values():
val = p.get(m)
if val is None: val = 0
values.append(float(val))
target_val = stats_map[target_steam_id].get(m)
if target_val is None: target_val = 0
target_val = float(target_val)
if not values:
result[m] = None
continue
# For PACE (Time), lower is better usually, but rank logic assumes Higher is Better (reverse=True).
# If we want Rank #1 to be Lowest Time, we should sort normal.
# But standardized scores handle this. For raw metrics, let's keep consistent (Higher = Rank 1)
# unless we explicitly handle "Low is Good".
# For now, keep simple: Rank 1 = Highest Value.
# For Time: Rank 1 = Slowest. (User can interpret)
values.sort(reverse=True)
try:

View File

@@ -5,21 +5,24 @@ import os
class StatsService:
@staticmethod
def resolve_avatar_url(steam_id, avatar_url):
"""
Resolves avatar URL with priority:
1. Local File (web/static/avatars/{steam_id}.jpg/png) - User override
2. DB Value (avatar_url)
"""
try:
# Check local file first (User Request: "directly associate if exists")
base = os.path.join(current_app.root_path, 'static', 'avatars')
for ext in ('.jpg', '.png', '.jpeg'):
fname = f"{steam_id}{ext}"
fpath = os.path.join(base, fname)
if os.path.exists(fpath):
return url_for('static', filename=f'avatars/{fname}')
# Fallback to DB value if valid
if avatar_url and str(avatar_url).strip():
return avatar_url
base = os.path.join(current_app.root_path, 'static', 'avatars')
# Check jpg/png in order
for ext in ('.jpg', '.png'):
fname = f"{steam_id}{ext}"
if os.path.exists(os.path.join(base, fname)):
url = url_for('static', filename=f'avatars/{fname}')
try:
# Persist fallback URL into L2 for future reads
execute_db('l2', "UPDATE dim_players SET avatar_url = ? WHERE steam_id_64 = ?", [url, str(steam_id)])
except Exception:
pass
return url
return None
except Exception:
return avatar_url
@@ -739,6 +742,9 @@ class StatsService:
'side_headshot_rate_ct', 'side_headshot_rate_t',
'side_defuses_ct', 'side_plants_t',
'util_avg_nade_dmg', 'util_avg_flash_time', 'util_avg_flash_enemy', 'util_usage_rate',
# New: ECO & PACE
'eco_avg_damage_per_1k', 'eco_rating_eco_rounds', 'eco_kd_ratio', 'eco_avg_rounds',
'pace_avg_time_to_first_contact', 'pace_trade_kill_rate', 'pace_opening_kill_time', 'pace_avg_life_time',
# New: Party Size Stats
'party_1_win_rate', 'party_1_rating', 'party_1_adr',
'party_2_win_rate', 'party_2_rating', 'party_2_adr',
@@ -759,6 +765,9 @@ class StatsService:
# Mapping for L2 legacy calls (if any) - mainly map 'rating' to 'basic_avg_rating' etc if needed
# But here we just use L3 columns directly.
# Define metrics where LOWER is BETTER
lower_is_better = ['pace_avg_time_to_first_contact', 'pace_opening_kill_time']
result = {}
for m in metrics:
@@ -768,8 +777,10 @@ class StatsService:
if not values:
result[m] = None
continue
values.sort(reverse=True)
# Sort: Reverse (High to Low) by default, unless in lower_is_better
is_reverse = m not in lower_is_better
values.sort(reverse=is_reverse)
# Rank
try:
@@ -783,7 +794,8 @@ class StatsService:
'total': len(values),
'min': min(values),
'max': max(values),
'avg': sum(values) / len(values)
'avg': sum(values) / len(values),
'inverted': not is_reverse # Flag for frontend to invert bar
}
# Legacy mapping for top cards (rating, kd, adr, kast)