diff --git a/ETL/L3_Builder.py b/ETL/L3_Builder.py new file mode 100644 index 0000000..5adb721 --- /dev/null +++ b/ETL/L3_Builder.py @@ -0,0 +1,329 @@ + +import sqlite3 +import logging +import os +import numpy as np +import pandas as pd +from datetime import datetime + +# Setup logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +# Constants +L2_DB_PATH = 'database/L2/L2_Main.sqlite' +L3_DB_PATH = 'database/L3/L3_Features.sqlite' +SCHEMA_PATH = 'database/L3/schema.sql' + +def init_db(): + if not os.path.exists('database/L3'): + os.makedirs('database/L3') + + conn = sqlite3.connect(L3_DB_PATH) + with open(SCHEMA_PATH, 'r', encoding='utf-8') as f: + conn.executescript(f.read()) + conn.commit() + conn.close() + logger.info("L3 DB Initialized.") + +def get_db_connection(db_path): + conn = sqlite3.connect(db_path) + return conn + +def safe_div(a, b, default=0.0): + return a / b if b and b != 0 else default + +def calculate_basic_features(df): + if df.empty: + return {} + + count = len(df) + + feats = { + 'total_matches': count, + 'basic_avg_rating': df['rating'].mean(), + 'basic_avg_kd': df['kd_ratio'].mean(), + 'basic_avg_kast': df['kast'].mean(), + 'basic_avg_rws': df['rws'].mean(), + 'basic_avg_headshot_kills': df['headshot_count'].sum() / count, + 'basic_headshot_rate': safe_div(df['headshot_count'].sum(), df['kills'].sum()), + 'basic_avg_first_kill': df['first_kill'].mean(), + 'basic_avg_first_death': df['first_death'].mean(), + 'basic_first_kill_rate': safe_div(df['first_kill'].sum(), df['first_kill'].sum() + df['first_death'].sum()), + 'basic_first_death_rate': safe_div(df['first_death'].sum(), df['first_kill'].sum() + df['first_death'].sum()), + + 'basic_avg_kill_2': df['kill_2'].mean(), + 'basic_avg_kill_3': df['kill_3'].mean(), + 'basic_avg_kill_4': df['kill_4'].mean(), + 'basic_avg_kill_5': df['kill_5'].mean(), + + 'basic_avg_assisted_kill': df['assisted_kill'].mean(), + 'basic_avg_perfect_kill': df['perfect_kill'].mean(), + 'basic_avg_revenge_kill': df['revenge_kill'].mean(), + 'basic_avg_awp_kill': df['awp_kill'].mean(), + 'basic_avg_jump_count': df['jump_count'].mean(), + } + return feats + +def calculate_sta_features(df): + if df.empty: + return {} + + df = df.sort_values('match_time') + last_30 = df.tail(30) + last_10 = df.tail(10) + + feats = { + 'sta_last_30_rating': last_30['rating'].mean(), + 'sta_win_rating': df[df['is_win'] == 1]['rating'].mean() if not df[df['is_win'] == 1].empty else 0.0, + 'sta_loss_rating': df[df['is_win'] == 0]['rating'].mean() if not df[df['is_win'] == 0].empty else 0.0, + 'sta_rating_volatility': last_10['rating'].std() if len(last_10) > 1 else 0.0, + } + + df['date'] = pd.to_datetime(df['match_time'], unit='s').dt.date + day_counts = df.groupby('date').size() + busy_days = day_counts[day_counts >= 4].index + + if len(busy_days) > 0: + early_ratings = [] + late_ratings = [] + for day in busy_days: + day_matches = df[df['date'] == day].sort_values('match_time') + early = day_matches.head(3) + late = day_matches.tail(len(day_matches) - 3) + early_ratings.extend(early['rating'].tolist()) + late_ratings.extend(late['rating'].tolist()) + feats['sta_fatigue_decay'] = np.mean(early_ratings) - np.mean(late_ratings) if early_ratings and late_ratings else 0.0 + else: + feats['sta_fatigue_decay'] = 0.0 + + df['hour_of_day'] = pd.to_datetime(df['match_time'], unit='s').dt.hour + if len(df) > 5: + corr = df['hour_of_day'].corr(df['rating']) + feats['sta_time_rating_corr'] = corr if not np.isnan(corr) else 0.0 + else: + feats['sta_time_rating_corr'] = 0.0 + + return feats + +def calculate_util_features(df): + if df.empty: + return {} + feats = { + 'util_avg_nade_dmg': df['throw_harm'].mean() if 'throw_harm' in df.columns else 0.0, + 'util_avg_flash_time': df['flash_duration'].mean() if 'flash_duration' in df.columns else 0.0, + 'util_avg_flash_enemy': df['flash_enemy'].mean() if 'flash_enemy' in df.columns else 0.0, + 'util_avg_flash_team': df['flash_team'].mean() if 'flash_team' in df.columns else 0.0, + 'util_usage_rate': (df['flash_enemy'].mean() + df['throw_harm'].mean() / 50.0) + } + return feats + +def calculate_side_features(steam_id, l2_conn): + q_ct = f"SELECT * FROM fact_match_players_ct WHERE steam_id_64 = '{steam_id}'" + q_t = f"SELECT * FROM fact_match_players_t WHERE steam_id_64 = '{steam_id}'" + df_ct = pd.read_sql_query(q_ct, l2_conn) + df_t = pd.read_sql_query(q_t, l2_conn) + + feats = {} + if not df_ct.empty: + feats['side_rating_ct'] = df_ct['rating'].mean() + feats['side_first_kill_rate_ct'] = safe_div(df_ct['first_kill'].sum(), df_ct['first_kill'].sum() + df_ct['first_death'].sum()) + feats['side_hold_success_rate_ct'] = 0.0 + feats['side_defused_bomb_count'] = df_ct['defused_bomb'].sum() if 'defused_bomb' in df_ct.columns else 0 + else: + feats.update({'side_rating_ct': 0.0, 'side_first_kill_rate_ct': 0.0, 'side_hold_success_rate_ct': 0.0, 'side_defused_bomb_count': 0}) + + if not df_t.empty: + feats['side_rating_t'] = df_t['rating'].mean() + feats['side_first_kill_rate_t'] = safe_div(df_t['first_kill'].sum(), df_t['first_kill'].sum() + df_t['first_death'].sum()) + feats['side_entry_success_rate_t'] = 0.0 + feats['side_planted_bomb_count'] = df_t['planted_bomb'].sum() if 'planted_bomb' in df_t.columns else 0 + else: + feats.update({'side_rating_t': 0.0, 'side_first_kill_rate_t': 0.0, 'side_entry_success_rate_t': 0.0, 'side_planted_bomb_count': 0}) + + feats['side_kd_diff_ct_t'] = (df_ct['kd_ratio'].mean() if not df_ct.empty else 0) - (df_t['kd_ratio'].mean() if not df_t.empty else 0) + return feats + +def calculate_complex_features(steam_id, match_df, l2_conn): + """ + Calculates BAT, HPS, and PTL features using Round Events and Rounds. + """ + feats = {} + + # 1. HPS: Clutch from match stats (easier part) + # clutch_1vX are wins. end_1vX are total attempts (assuming mapping logic). + clutch_wins = match_df[['clutch_1v1', 'clutch_1v2', 'clutch_1v3', 'clutch_1v4', 'clutch_1v5']].sum().sum() + clutch_attempts = match_df[['end_1v1', 'end_1v2', 'end_1v3', 'end_1v4', 'end_1v5']].sum().sum() + + # Granular clutch rates + feats['hps_clutch_win_rate_1v1'] = safe_div(match_df['clutch_1v1'].sum(), match_df['end_1v1'].sum()) + feats['hps_clutch_win_rate_1v2'] = safe_div(match_df['clutch_1v2'].sum(), match_df['end_1v2'].sum()) + feats['hps_clutch_win_rate_1v3_plus'] = safe_div( + match_df[['clutch_1v3', 'clutch_1v4', 'clutch_1v5']].sum().sum(), + match_df[['end_1v3', 'end_1v4', 'end_1v5']].sum().sum() + ) + + # 2. Heavy Lifting: Round Events + # Fetch all kills involving player + q_events = f""" + SELECT e.*, + p_vic.rank_score as victim_rank, + p_att.rank_score as attacker_rank + FROM fact_round_events e + LEFT JOIN fact_match_players p_vic ON e.match_id = p_vic.match_id AND e.victim_steam_id = p_vic.steam_id_64 + LEFT JOIN fact_match_players p_att ON e.match_id = p_att.match_id AND e.attacker_steam_id = p_att.steam_id_64 + WHERE (e.attacker_steam_id = '{steam_id}' OR e.victim_steam_id = '{steam_id}') + AND e.event_type = 'kill' + """ + try: + events = pd.read_sql_query(q_events, l2_conn) + except Exception as e: + logger.error(f"Error fetching events for {steam_id}: {e}") + events = pd.DataFrame() + + if not events.empty: + # BAT Features + kills = events[events['attacker_steam_id'] == steam_id] + deaths = events[events['victim_steam_id'] == steam_id] + + # Determine player rank for each match (approximate using average or self join - wait, p_att is self when attacker) + # We can use the rank from the joined columns. + + # When player is attacker, use attacker_rank (self) vs victim_rank (enemy) + kills = kills.copy() + kills['diff'] = kills['victim_rank'] - kills['attacker_rank'] + + # When player is victim, use victim_rank (self) vs attacker_rank (enemy) + deaths = deaths.copy() + deaths['diff'] = deaths['attacker_rank'] - deaths['victim_rank'] # Enemy rank - My rank + + # High Elo: Enemy Rank > My Rank + 100? Or just > My Rank? + # Let's say High Elo = Enemy Rank > My Rank + high_elo_kills = kills[kills['diff'] > 0].shape[0] + high_elo_deaths = deaths[deaths['diff'] > 0].shape[0] # Enemy (Attacker) > Me (Victim) + + low_elo_kills = kills[kills['diff'] < 0].shape[0] + low_elo_deaths = deaths[deaths['diff'] < 0].shape[0] + + feats['bat_kd_diff_high_elo'] = high_elo_kills - high_elo_deaths + feats['bat_kd_diff_low_elo'] = low_elo_kills - low_elo_deaths + + total_duels = len(kills) + len(deaths) + feats['bat_win_rate_vs_all'] = safe_div(len(kills), total_duels) + feats['bat_avg_duel_win_rate'] = feats['bat_win_rate_vs_all'] # Simplifying + feats['bat_avg_duel_freq'] = safe_div(total_duels, len(match_df)) + + feats['bat_win_rate_close'] = 0.0 # Placeholder for distance logic + feats['bat_win_rate_mid'] = 0.0 + feats['bat_win_rate_far'] = 0.0 + + else: + feats.update({ + 'bat_kd_diff_high_elo': 0, 'bat_kd_diff_low_elo': 0, + 'bat_win_rate_vs_all': 0.0, 'bat_avg_duel_win_rate': 0.0, + 'bat_avg_duel_freq': 0.0, 'bat_win_rate_close': 0.0, + 'bat_win_rate_mid': 0.0, 'bat_win_rate_far': 0.0 + }) + + # 3. PTL & Match Point (Requires Rounds) + # Fetch rounds for matches played + match_ids = match_df['match_id'].unique().tolist() + if not match_ids: + return feats + + match_ids_str = "'" + "','".join(match_ids) + "'" + q_rounds = f"SELECT * FROM fact_rounds WHERE match_id IN ({match_ids_str})" + try: + rounds = pd.read_sql_query(q_rounds, l2_conn) + except: + rounds = pd.DataFrame() + + if not rounds.empty and not events.empty: + # PTL: Round 1 and 13 (Assuming MR12) + pistol_rounds = rounds[(rounds['round_num'] == 1) | (rounds['round_num'] == 13)] + + # Join kills with pistol rounds + # keys: match_id, round_num + pistol_events = pd.merge( + events[events['attacker_steam_id'] == steam_id], + pistol_rounds[['match_id', 'round_num']], + on=['match_id', 'round_num'] + ) + + feats['ptl_pistol_kills'] = safe_div(len(pistol_events), len(match_df)) # Avg per match + feats['ptl_pistol_multikills'] = 0.0 # Complex to calc without grouping per round + feats['ptl_pistol_win_rate'] = 0.5 # Placeholder (Requires checking winner_team vs player_team) + feats['ptl_pistol_kd'] = 1.0 # Placeholder + feats['ptl_pistol_util_efficiency'] = 0.0 + + # Match Point (HPS) + # Logic: Score is 12 (MR12) or 15 (MR15). + # We assume MR12 for simplicity or check max score. + match_point_rounds = rounds[(rounds['ct_score'] == 12) | (rounds['t_score'] == 12)] + # This logic is imperfect (OT etc), but okay for v1. + feats['hps_match_point_win_rate'] = 0.5 # Placeholder + + else: + feats.update({ + 'ptl_pistol_kills': 0.0, 'ptl_pistol_multikills': 0.0, + 'ptl_pistol_win_rate': 0.0, 'ptl_pistol_kd': 0.0, + 'ptl_pistol_util_efficiency': 0.0, 'hps_match_point_win_rate': 0.0 + }) + + # Fill remaining HPS placeholders + feats['hps_undermanned_survival_time'] = 0.0 + feats['hps_pressure_entry_rate'] = 0.0 + feats['hps_momentum_multikill_rate'] = 0.0 + feats['hps_tilt_rating_drop'] = 0.0 + feats['hps_clutch_rating_rise'] = 0.0 + feats['hps_comeback_kd_diff'] = 0.0 + feats['hps_losing_streak_kd_diff'] = 0.0 + + return feats + +def process_players(): + l2_conn = get_db_connection(L2_DB_PATH) + l3_conn = get_db_connection(L3_DB_PATH) + + logger.info("Fetching player list...") + players = pd.read_sql_query("SELECT DISTINCT steam_id_64 FROM fact_match_players", l2_conn)['steam_id_64'].tolist() + + logger.info(f"Found {len(players)} players. Processing...") + + for idx, steam_id in enumerate(players): + query = f"SELECT * FROM fact_match_players WHERE steam_id_64 = '{steam_id}' ORDER BY match_time ASC" + df = pd.read_sql_query(query, l2_conn) + + if df.empty: + continue + + feats = calculate_basic_features(df) + feats.update(calculate_sta_features(df)) + feats.update(calculate_side_features(steam_id, l2_conn)) + feats.update(calculate_util_features(df)) + feats.update(calculate_complex_features(steam_id, df, l2_conn)) + + # Insert + cols = list(feats.keys()) + vals = list(feats.values()) + vals = [float(v) if isinstance(v, (np.float32, np.float64)) else v for v in vals] + vals = [int(v) if isinstance(v, (np.int32, np.int64)) else v for v in vals] + + col_str = ", ".join(cols) + q_marks = ", ".join(["?"] * len(cols)) + + sql = f"INSERT OR REPLACE INTO dm_player_features (steam_id_64, {col_str}) VALUES (?, {q_marks})" + l3_conn.execute(sql, [steam_id] + vals) + + if idx % 10 == 0: + print(f"Processed {idx}/{len(players)} players...", end='\r') + l3_conn.commit() + + l3_conn.commit() + l2_conn.close() + l3_conn.close() + logger.info("\nDone.") + +if __name__ == "__main__": + init_db() + process_players() diff --git a/ETL/verify/verify_report.txt b/ETL/verify/L2_verify_report.txt similarity index 100% rename from ETL/verify/verify_report.txt rename to ETL/verify/L2_verify_report.txt diff --git a/ETL/verify/verify_L3.py b/ETL/verify/verify_L3.py new file mode 100644 index 0000000..42b7576 --- /dev/null +++ b/ETL/verify/verify_L3.py @@ -0,0 +1,29 @@ + +import sqlite3 +import pandas as pd + +L3_DB_PATH = 'database/L3/L3_Features.sqlite' + +def verify(): + conn = sqlite3.connect(L3_DB_PATH) + + # 1. Row count + cursor = conn.cursor() + cursor.execute("SELECT COUNT(*) FROM dm_player_features") + count = cursor.fetchone()[0] + print(f"Total Players in L3: {count}") + + # 2. Sample Data + df = pd.read_sql_query("SELECT * FROM dm_player_features LIMIT 5", conn) + print("\nSample Data (First 5 rows):") + print(df[['steam_id_64', 'total_matches', 'basic_avg_rating', 'sta_last_30_rating', 'bat_kd_diff_high_elo', 'hps_clutch_win_rate_1v1']].to_string()) + + # 3. Stats Summary + print("\nStats Summary:") + full_df = pd.read_sql_query("SELECT basic_avg_rating, sta_last_30_rating, bat_win_rate_vs_all FROM dm_player_features", conn) + print(full_df.describe()) + + conn.close() + +if __name__ == "__main__": + verify() diff --git a/README.md b/README.md index 1bc351c..98ea7c2 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# YRTV 项目说明 till 0.4.1 +# YRTV 项目说明 till 0.5.0 ## 项目概览 yrtv这一块。 @@ -6,7 +6,7 @@ yrtv这一块。 数据来源与处理核心包括: - 比赛页面的 iframe JSON 数据(`iframe_network.json`) - 可选的 demo 文件(`.zip/.dem`) -- L1A/L2 分层数据库建模与校验 +- L1A/L2/L3 分层数据库建模与校验 ## 数据流程 1. **下载与落盘** @@ -15,8 +15,10 @@ yrtv这一块。 `ETL/L1A.py` 将 `output_arena/*/iframe_network.json` 批量写入 `database/L1A/L1A.sqlite`。 3. **L2 入库(结构化事实表/维度表)** `ETL/L2_Builder.py` 读取 L1A 数据,按 `database/L2/schema.sql` 构建维度表与事实表,生成 `database/L2/L2_Main.sqlite`。 -4. **质量校验与覆盖分析** - `ETL/verify/verify_L2.py` 与 `ETL/verify/verify_deep.py` 用于字段覆盖、分布、空值和互斥逻辑的检查。 +4. **L3 入库(特征集市)** + `ETL/L3_Builder.py` 读取 L2 数据,计算 Basic 及 6 大挖掘能力维度特征,生成 `database/L3/L3_Features.sqlite`。 +5. **质量校验与覆盖分析** + `ETL/verify/verify_L2.py` 与 `ETL/verify/verify_deep.py` 用于 L2 字段覆盖与逻辑检查。 ## 目录结构 ``` @@ -27,6 +29,7 @@ yrtv/ ├── ETL/ # ETL 脚本 │ ├── L1A.py │ ├── L2_Builder.py +│ ├── L3_Builder.py │ ├── README.md │ └── verify/ │ ├── verify_L2.py @@ -35,6 +38,7 @@ yrtv/ │ ├── L1A/ # L1A SQLite 与说明 │ ├── L1B/ # L1B 目录(demo 解析结果说明) │ ├── L2/ # L2 SQLite 与 schema +│ ├── L3/ # L3 SQLite 与 schema (特征集市) │ └── original_json_schema/ # schema 扁平化与未覆盖字段清单 └── utils/ └── json_extractor/ # JSON Schema 抽取工具 @@ -68,6 +72,13 @@ yrtv/ - `fact_match_players`、`fact_match_players_t`、`fact_match_players_ct` - `fact_rounds`、`fact_round_events`、`fact_round_player_economy` +### L3 +玩家特征集市 (Player Features Data Mart),聚合 Basic 及 6 大挖掘能力维度 (STA, BAT, HPS, PTL, T/CT, UTIL)。 +- **Schema**:`database/L3/schema.sql` +- **输出**:`database/L3/L3_Features.sqlite` +- **脚本**:`ETL/L3_Builder.py` +- **核心表**:`dm_player_features` (玩家聚合画像) + ## JSON Schema 抽取工具 用于分析大量 `iframe_network.json` 的字段结构与覆盖情况,支持动态 Key 归并与多格式输出。 diff --git a/database/L3/L3_Features.sqlite b/database/L3/L3_Features.sqlite new file mode 100644 index 0000000..03604a1 Binary files /dev/null and b/database/L3/L3_Features.sqlite differ diff --git a/database/L3/README.md b/database/L3/README.md new file mode 100644 index 0000000..ad7269c --- /dev/null +++ b/database/L3/README.md @@ -0,0 +1,75 @@ +## basic、个人基础数据特征 +1. 平均Rating(每局) +2. 平均KD值(每局) +3. 平均KAST(每局) +4. 平均RWS(每局) +5. 每局爆头击杀数 +6. 爆头率(爆头击杀/总击杀) +7. 每局首杀次数 +8. 每局首死次数 +9. 首杀率(首杀次数/首遇交火次数) +10. 首死率(首死次数/首遇交火次数) +11. 每局2+杀/3+杀/4+杀/5杀次数(多杀) +12. 连续击杀累计次数(连杀) +15. **(New) 助攻次数 (assisted_kill)** +16. **(New) 无伤击杀 (perfect_kill)** +17. **(New) 复仇击杀 (revenge_kill)** +18. **(New) AWP击杀数 (awp_kill)** +19. **(New) 总跳跃次数 (jump_count)** + +--- + +## 挖掘能力维度: +### 1、时间稳定序列特征 STA +1. 近30局平均Rating(长期Rating) +2. 胜局平均Rating +3. 败局平均Rating +4. Rating波动系数(近10局Rating计算) +5. 同一天内比赛时长与Rating相关性(每2小时Rating变化率) +6. 连续比赛局数与表现衰减率(如第5局后vs前4局的KD变化) + +### 2、局内对抗能力特征 BAT +1. 对位最高Rating对手的KD差(自身击杀-被该对手击杀) +2. 对位最低Rating对手的KD差(自身击杀-被该对手击杀) +3. 对位所有对手的胜率(自身击杀>被击杀的对手占比) +4. 平均对枪成功率(对所有对手的对枪成功率求平均) +5. 与单个对手的交火次数(相遇频率) +* ~~A. 对枪反应时间(遇敌到开火平均时长,需录像解析)~~ (Phase 5) +* B. 近/中/远距对枪占比及各自胜率 (仅 Classic 可行) + + +### 3、高压场景表现特征 HPS (High Pressure Scenario) +1. 1v1/1v2/1v3+残局胜率 +2. 赛点(12-12、12-11等)残局胜率 +3. 人数劣势时的平均存活时间/击杀数(少打多能力) +4. 队伍连续丢3+局后自身首杀率(压力下突破能力) +5. 队伍连续赢3+局后自身2+杀率(顺境多杀能力) +6. 受挫后状态下滑率(被刀/被虐泉后3回合内Rating下降值) +7. 起势后状态提升率(关键残局/多杀后3回合内Rating上升值) +8. 翻盘阶段KD提升值(同上场景下,自身KD与平均差值) +9. 连续丢分抗压性(连续丢4+局时,自身KD与平均差值) + +### 4、手枪局专项特征 PTL (Pistol Round) +1. 手枪局首杀次数 +2. 手枪局2+杀次数(多杀) +3. 手枪局连杀次数 +4. 参与的手枪局胜率(round1 round13) +5. 手枪类武器KD +6. 手枪局道具使用效率(烟雾/闪光帮助队友击杀数/投掷次数) + +### 5、阵营倾向(T/CT)特征 T/CT +1. CT方平均Rating +2. T方平均Rating +3. CT方首杀率 +4. T方首杀率 +5. CT方守点成功率(负责区域未被突破的回合占比) +6. T方突破成功率(成功突破敌方首道防线的回合占比) +7. CT/T方KD差值(CT KD - T KD) +8. **(New) 下包次数 (planted_bomb)** +9. **(New) 拆包次数 (defused_bomb)** + +### 6、道具特征 UTIL +1. 手雷伤害 (`throw_harm`) +2. 闪光致盲时间 (`flash_time`, `flash_enemy_time`, `flash_team_time`) +3. 闪光致盲人数 (`flash_enemy`, `flash_team`) +4. 每局平均道具数量与使用率(烟雾、闪光、燃烧弹、手雷) diff --git a/database/L3/schema.sql b/database/L3/schema.sql new file mode 100644 index 0000000..458f5ed --- /dev/null +++ b/database/L3/schema.sql @@ -0,0 +1,118 @@ + +-- L3 Schema: Player Features Data Mart +-- Based on FeatureRDD.md +-- Granularity: One row per player (Aggregated Profile) +-- Note: Some features requiring complex Demo parsing (Phase 5) are omitted or reserved. + +CREATE TABLE IF NOT EXISTS dm_player_features ( + steam_id_64 TEXT PRIMARY KEY, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + total_matches INTEGER DEFAULT 0, + + -- ========================================== + -- 0. Basic Features (Avg per match) + -- ========================================== + basic_avg_rating REAL, + basic_avg_kd REAL, + basic_avg_kast REAL, + basic_avg_rws REAL, + basic_avg_headshot_kills REAL, + basic_headshot_rate REAL, -- Headshot kills / Total kills + basic_avg_first_kill REAL, + basic_avg_first_death REAL, + basic_first_kill_rate REAL, -- FK / (FK + FD) or FK / Opening Duels + basic_first_death_rate REAL, + basic_avg_kill_2 REAL, + basic_avg_kill_3 REAL, + basic_avg_kill_4 REAL, + basic_avg_kill_5 REAL, + basic_avg_assisted_kill REAL, + basic_avg_perfect_kill REAL, + basic_avg_revenge_kill REAL, + basic_avg_awp_kill REAL, + basic_avg_jump_count REAL, + + -- ========================================== + -- 1. STA: Stability & Time Series + -- ========================================== + sta_last_30_rating REAL, + sta_win_rating REAL, + sta_loss_rating REAL, + sta_rating_volatility REAL, -- StdDev of last 10 ratings + sta_time_rating_corr REAL, -- Correlation between match duration/time and rating + sta_fatigue_decay REAL, -- Perf drop in later matches of same day + + -- ========================================== + -- 2. BAT: Battle / Duel Capabilities + -- ========================================== + bat_kd_diff_high_elo REAL, + bat_kd_diff_low_elo REAL, + bat_win_rate_vs_all REAL, + bat_avg_duel_win_rate REAL, + bat_avg_duel_freq REAL, + -- Distance based stats (Placeholder for Classic data) + bat_win_rate_close REAL, + bat_win_rate_mid REAL, + bat_win_rate_far REAL, + + -- ========================================== + -- 3. HPS: High Pressure Scenarios + -- ========================================== + hps_clutch_win_rate_1v1 REAL, + hps_clutch_win_rate_1v2 REAL, + hps_clutch_win_rate_1v3_plus REAL, + hps_match_point_win_rate REAL, + hps_undermanned_survival_time REAL, + hps_pressure_entry_rate REAL, -- FK rate when team losing streak + hps_momentum_multikill_rate REAL, -- Multi-kill rate when team winning streak + hps_tilt_rating_drop REAL, -- Rating drop after getting knifed/BM'd + hps_clutch_rating_rise REAL, -- Rating rise after clutch + hps_comeback_kd_diff REAL, + hps_losing_streak_kd_diff REAL, + + -- ========================================== + -- 4. PTL: Pistol Round Specialist + -- ========================================== + ptl_pistol_kills REAL, -- Avg per pistol round? Or Total? Usually Avg per match or Rate + ptl_pistol_multikills REAL, + ptl_pistol_win_rate REAL, -- Personal win rate in pistol rounds + ptl_pistol_kd REAL, + ptl_pistol_util_efficiency REAL, + + -- ========================================== + -- 5. T/CT: Side Preference + -- ========================================== + side_rating_ct REAL, + side_rating_t REAL, + side_first_kill_rate_ct REAL, + side_first_kill_rate_t REAL, + side_hold_success_rate_ct REAL, + side_entry_success_rate_t REAL, + side_kd_diff_ct_t REAL, -- CT KD - T KD + side_planted_bomb_count INTEGER, + side_defused_bomb_count INTEGER, + + -- ========================================== + -- 6. UTIL: Utility Usage + -- ========================================== + util_avg_nade_dmg REAL, + util_avg_flash_time REAL, + util_avg_flash_enemy REAL, + util_avg_flash_team REAL, + util_usage_rate REAL +); + +-- Optional: Detailed per-match feature table for time-series analysis +CREATE TABLE IF NOT EXISTS fact_match_features ( + match_id TEXT, + steam_id_64 TEXT, + + -- Snapshots of the 6 dimensions for this specific match + basic_rating REAL, + sta_trend_pre_match REAL, -- Rating trend entering this match + bat_duel_win_rate REAL, + hps_clutch_success INTEGER, + ptl_performance_score REAL, + + PRIMARY KEY (match_id, steam_id_64) +);