diff --git a/database/L3/L3_Features.sqlite b/database/L3/L3_Features.sqlite index db91e56..72a1e0d 100644 Binary files a/database/L3/L3_Features.sqlite and b/database/L3/L3_Features.sqlite differ diff --git a/database/L3/schema.sql b/database/L3/schema.sql index fd49432..44c8552 100644 --- a/database/L3/schema.sql +++ b/database/L3/schema.sql @@ -98,6 +98,52 @@ CREATE TABLE IF NOT EXISTS dm_player_features ( side_kd_diff_ct_t REAL, -- CT KD - T KD -- New Side Comparisons + side_rating_diff_ct_t REAL, + + -- ========================================== + -- 6. Party Size Performance + -- ========================================== + party_1_win_rate REAL, + party_1_rating REAL, + party_1_adr REAL, + + party_2_win_rate REAL, + party_2_rating REAL, + party_2_adr REAL, + + party_3_win_rate REAL, + party_3_rating REAL, + party_3_adr REAL, + + party_4_win_rate REAL, + party_4_rating REAL, + party_4_adr REAL, + + party_5_win_rate REAL, + party_5_rating REAL, + party_5_adr REAL, + + -- ========================================== + -- 7. Rating Distribution (Performance Tiers) + -- ========================================== + rating_dist_carry_rate REAL, -- > 1.5 + rating_dist_normal_rate REAL, -- 1.0 - 1.5 + rating_dist_sacrifice_rate REAL, -- 0.6 - 1.0 + rating_dist_sleeping_rate REAL, -- < 0.6 + + -- ========================================== + -- 8. ELO Stratification (Performance vs ELO) + -- ========================================== + elo_lt1200_rating REAL, + elo_1200_1400_rating REAL, + elo_1400_1600_rating REAL, + elo_1600_1800_rating REAL, + elo_1800_2000_rating REAL, + elo_gt2000_rating REAL, + + -- ========================================== + -- 9. More Side Stats (Restored) + -- ========================================== side_kast_ct REAL, side_kast_t REAL, side_rws_ct REAL, diff --git a/web/services/feature_service.py b/web/services/feature_service.py index b1cf451..c5505aa 100644 --- a/web/services/feature_service.py +++ b/web/services/feature_service.py @@ -978,6 +978,163 @@ class FeatureService: df['util_usage_rate'] = df['util_usage_rate_backup'].fillna(0) df.drop(columns=['util_usage_rate_backup'], inplace=True) + # --- 8. New Feature Dimensions (Party, Rating Dist, ELO) --- + # Fetch Base Data for Calculation + q_new_feats = f""" + SELECT mp.steam_id_64, mp.match_id, mp.match_team_id, mp.team_id, + mp.rating, mp.adr, mp.is_win + FROM fact_match_players mp + WHERE mp.steam_id_64 IN ({placeholders}) + """ + df_base = pd.read_sql_query(q_new_feats, conn, params=valid_ids) + + if not df_base.empty: + # 8.1 Party Size Stats + # Get party sizes for these matches + # We need to query party sizes for ALL matches involved + match_ids = df_base['match_id'].unique() + if len(match_ids) > 0: + match_id_ph = ','.join(['?'] * len(match_ids)) + q_party_size = f""" + SELECT match_id, match_team_id, COUNT(*) as party_size + FROM fact_match_players + WHERE match_id IN ({match_id_ph}) AND match_team_id > 0 + GROUP BY match_id, match_team_id + """ + # Split match_ids into chunks if too many + chunk_size = 900 + party_sizes_list = [] + for i in range(0, len(match_ids), chunk_size): + chunk = match_ids[i:i+chunk_size] + chunk_ph = ','.join(['?'] * len(chunk)) + q_chunk = q_party_size.replace(match_id_ph, chunk_ph) + party_sizes_list.append(pd.read_sql_query(q_chunk, conn, params=list(chunk))) + + if party_sizes_list: + df_party_sizes = pd.concat(party_sizes_list) + + # Merge party size to base data + df_base_party = df_base.merge(df_party_sizes, on=['match_id', 'match_team_id'], how='left') + + # Calculate Stats per Party Size (1-5) + # We want columns like party_1_win_rate, party_1_rating, party_1_adr + party_stats = df_base_party.groupby(['steam_id_64', 'party_size']).agg({ + 'is_win': 'mean', + 'rating': 'mean', + 'adr': 'mean' + }).reset_index() + + # Pivot + pivoted_party = party_stats.pivot(index='steam_id_64', columns='party_size').reset_index() + + # Flatten and rename + new_party_cols = ['steam_id_64'] + for col in pivoted_party.columns: + if col[0] == 'steam_id_64': continue + metric, size = col + if size in [1, 2, 3, 4, 5]: + # metric is is_win, rating, adr + metric_name = 'win_rate' if metric == 'is_win' else metric + new_party_cols.append(f"party_{int(size)}_{metric_name}") + + # Handle MultiIndex column flattening properly + # The pivot creates MultiIndex. We need to construct a flat DataFrame. + flat_data = {'steam_id_64': pivoted_party['steam_id_64']} + for size in [1, 2, 3, 4, 5]: + if size in pivoted_party['is_win'].columns: + flat_data[f"party_{size}_win_rate"] = pivoted_party['is_win'][size] + if size in pivoted_party['rating'].columns: + flat_data[f"party_{size}_rating"] = pivoted_party['rating'][size] + if size in pivoted_party['adr'].columns: + flat_data[f"party_{size}_adr"] = pivoted_party['adr'][size] + + df_party_flat = pd.DataFrame(flat_data) + df = df.merge(df_party_flat, on='steam_id_64', how='left') + + # 8.2 Rating Distribution + # rating_dist_carry_rate (>1.5), normal (1.0-1.5), sacrifice (0.6-1.0), sleeping (<0.6) + df_base['rating_tier'] = pd.cut(df_base['rating'], + bins=[-1, 0.6, 1.0, 1.5, 100], + labels=['sleeping', 'sacrifice', 'normal', 'carry'], + right=False) # <0.6, 0.6-<1.0, 1.0-<1.5, >=1.5 (wait, cut behavior) + # Standard cut: right=True by default (a, b]. We want: + # < 0.6 + # 0.6 <= x < 1.0 + # 1.0 <= x < 1.5 + # >= 1.5 + # So bins=[-inf, 0.6, 1.0, 1.5, inf], right=False -> [a, b) + df_base['rating_tier'] = pd.cut(df_base['rating'], + bins=[-float('inf'), 0.6, 1.0, 1.5, float('inf')], + labels=['sleeping', 'sacrifice', 'normal', 'carry'], + right=False) + + # Wait, 1.5 should be Normal or Carry? + # User: >1.5 Carry, 1.0~1.5 Normal. So 1.5 is Normal? Or Carry? + # Usually inclusive on lower bound. + # 1.5 -> Carry (>1.5 usually means >= 1.5 or strictly >). + # "1.0~1.5 正常" implies [1.0, 1.5]. ">1.5 Carry" implies (1.5, inf). + # Let's assume >= 1.5 is Carry. + # So bins: (-inf, 0.6), [0.6, 1.0), [1.0, 1.5), [1.5, inf) + # right=False gives [a, b). + # So [1.5, inf) is correct for Carry. + + dist_stats = df_base.groupby(['steam_id_64', 'rating_tier']).size().unstack(fill_value=0) + # Calculate rates + dist_stats = dist_stats.div(dist_stats.sum(axis=1), axis=0) + dist_stats.columns = [f"rating_dist_{c}_rate" for c in dist_stats.columns] + dist_stats = dist_stats.reset_index() + + df = df.merge(dist_stats, on='steam_id_64', how='left') + + # 8.3 ELO Stratification + # Fetch Match Teams ELO + if len(match_ids) > 0: + q_elo = f""" + SELECT match_id, group_id, group_origin_elo + FROM fact_match_teams + WHERE match_id IN ({match_id_ph}) + """ + # Use chunking again + elo_list = [] + for i in range(0, len(match_ids), chunk_size): + chunk = match_ids[i:i+chunk_size] + chunk_ph = ','.join(['?'] * len(chunk)) + q_chunk = q_elo.replace(match_id_ph, chunk_ph) + elo_list.append(pd.read_sql_query(q_chunk, conn, params=list(chunk))) + + if elo_list: + df_elo_teams = pd.concat(elo_list) + + # Merge to get Opponent ELO + # Player has match_id, team_id. + # Join on match_id. + # Filter where group_id != team_id + df_merged_elo = df_base.merge(df_elo_teams, on='match_id', how='left') + df_merged_elo = df_merged_elo[df_merged_elo['group_id'] != df_merged_elo['team_id']] + + # Now df_merged_elo has 'group_origin_elo' which is Opponent ELO + # Binning: <1200, 1200-1400, 1400-1600, 1600-1800, 1800-2000, >2000 + # bins: [-inf, 1200, 1400, 1600, 1800, 2000, inf] + elo_bins = [-float('inf'), 1200, 1400, 1600, 1800, 2000, float('inf')] + elo_labels = ['lt1200', '1200_1400', '1400_1600', '1600_1800', '1800_2000', 'gt2000'] + + df_merged_elo['elo_bin'] = pd.cut(df_merged_elo['group_origin_elo'], bins=elo_bins, labels=elo_labels, right=False) + + elo_stats = df_merged_elo.groupby(['steam_id_64', 'elo_bin']).agg({ + 'rating': 'mean' + }).unstack(fill_value=0) # We only need rating for now + + # Rename columns + # elo_stats columns are MultiIndex (rating, bin). + # We want: elo_{bin}_rating + flat_elo_data = {'steam_id_64': elo_stats.index} + for bin_label in elo_labels: + if bin_label in elo_stats['rating'].columns: + flat_elo_data[f"elo_{bin_label}_rating"] = elo_stats['rating'][bin_label].values + + df_elo_flat = pd.DataFrame(flat_elo_data) + df = df.merge(df_elo_flat, on='steam_id_64', how='left') + # Final Mappings df['total_matches'] = df['matches_played'] diff --git a/web/services/stats_service.py b/web/services/stats_service.py index 7199edc..9393060 100644 --- a/web/services/stats_service.py +++ b/web/services/stats_service.py @@ -648,7 +648,17 @@ class StatsService: 'side_multikill_rate_ct', 'side_multikill_rate_t', 'side_headshot_rate_ct', 'side_headshot_rate_t', 'side_defuses_ct', 'side_plants_t', - 'util_avg_nade_dmg', 'util_avg_flash_time', 'util_avg_flash_enemy', 'util_usage_rate' + 'util_avg_nade_dmg', 'util_avg_flash_time', 'util_avg_flash_enemy', 'util_usage_rate', + # New: Party Size Stats + 'party_1_win_rate', 'party_1_rating', 'party_1_adr', + 'party_2_win_rate', 'party_2_rating', 'party_2_adr', + 'party_3_win_rate', 'party_3_rating', 'party_3_adr', + 'party_4_win_rate', 'party_4_rating', 'party_4_adr', + 'party_5_win_rate', 'party_5_rating', 'party_5_adr', + # New: Rating Distribution + 'rating_dist_carry_rate', 'rating_dist_normal_rate', 'rating_dist_sacrifice_rate', 'rating_dist_sleeping_rate', + # New: ELO Stratification + 'elo_lt1200_rating', 'elo_1200_1400_rating', 'elo_1400_1600_rating', 'elo_1600_1800_rating', 'elo_1800_2000_rating', 'elo_gt2000_rating' ] # Mapping for L2 legacy calls (if any) - mainly map 'rating' to 'basic_avg_rating' etc if needed diff --git a/web/templates/base.html b/web/templates/base.html index d90bfcb..2d3f7b9 100644 --- a/web/templates/base.html +++ b/web/templates/base.html @@ -103,7 +103,7 @@ diff --git a/web/templates/matches/detail.html b/web/templates/matches/detail.html index 33b854d..cb36129 100644 --- a/web/templates/matches/detail.html +++ b/web/templates/matches/detail.html @@ -194,97 +194,86 @@