feat: Initial commit of Clutch-IQ project

This commit is contained in:
xunyulin230420
2026-02-05 23:26:03 +08:00
commit a355239861
66 changed files with 12922 additions and 0 deletions

364
database/L3/L3_Builder.py Normal file
View File

@@ -0,0 +1,364 @@
import logging
import os
import sys
import sqlite3
import json
import argparse
import concurrent.futures
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# Get absolute paths
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # Points to database/ directory
PROJECT_ROOT = os.path.dirname(BASE_DIR) # Points to project root
sys.path.insert(0, PROJECT_ROOT) # Add project root to Python path
L2_DB_PATH = os.path.join(BASE_DIR, 'L2', 'L2.db')
L3_DB_PATH = os.path.join(BASE_DIR, 'L3', 'L3.db')
WEB_DB_PATH = os.path.join(BASE_DIR, 'Web', 'Web_App.sqlite')
SCHEMA_PATH = os.path.join(BASE_DIR, 'L3', 'schema.sql')
def _get_existing_columns(conn, table_name):
cur = conn.execute(f"PRAGMA table_info({table_name})")
return {row[1] for row in cur.fetchall()}
def _ensure_columns(conn, table_name, columns):
existing = _get_existing_columns(conn, table_name)
for col, col_type in columns.items():
if col in existing:
continue
conn.execute(f"ALTER TABLE {table_name} ADD COLUMN {col} {col_type}")
def init_db():
"""Initialize L3 database with new schema"""
l3_dir = os.path.dirname(L3_DB_PATH)
if not os.path.exists(l3_dir):
os.makedirs(l3_dir)
logger.info(f"Initializing L3 database at: {L3_DB_PATH}")
conn = sqlite3.connect(L3_DB_PATH)
try:
with open(SCHEMA_PATH, 'r', encoding='utf-8') as f:
schema_sql = f.read()
conn.executescript(schema_sql)
conn.commit()
logger.info("✓ L3 schema created successfully")
# Verify tables
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name")
tables = [row[0] for row in cursor.fetchall()]
logger.info(f"✓ Created {len(tables)} tables: {', '.join(tables)}")
# Verify dm_player_features columns
cursor.execute("PRAGMA table_info(dm_player_features)")
columns = cursor.fetchall()
logger.info(f"✓ dm_player_features has {len(columns)} columns")
except Exception as e:
logger.error(f"Error initializing L3 database: {e}")
raise
finally:
conn.close()
logger.info("L3 DB Initialized with new 5-tier architecture")
def _get_team_players():
"""Get list of steam_ids from Web App team lineups"""
if not os.path.exists(WEB_DB_PATH):
logger.warning(f"Web DB not found at {WEB_DB_PATH}, returning empty list")
return set()
try:
conn = sqlite3.connect(WEB_DB_PATH)
cursor = conn.cursor()
cursor.execute("SELECT player_ids_json FROM team_lineups")
rows = cursor.fetchall()
steam_ids = set()
for row in rows:
if row[0]:
try:
ids = json.loads(row[0])
if isinstance(ids, list):
steam_ids.update(ids)
except json.JSONDecodeError:
logger.warning(f"Failed to parse player_ids_json: {row[0]}")
conn.close()
logger.info(f"Found {len(steam_ids)} unique players in Team Lineups")
return steam_ids
except Exception as e:
logger.error(f"Error reading Web DB: {e}")
return set()
def _get_match_date_range(steam_id: str, conn_l2: sqlite3.Connection):
cursor = conn_l2.cursor()
cursor.execute("""
SELECT MIN(m.start_time), MAX(m.start_time)
FROM fact_match_players p
JOIN fact_matches m ON p.match_id = m.match_id
WHERE p.steam_id_64 = ?
""", (steam_id,))
date_row = cursor.fetchone()
first_match_date = date_row[0] if date_row and date_row[0] else None
last_match_date = date_row[1] if date_row and date_row[1] else None
return first_match_date, last_match_date
def _build_player_record(steam_id: str):
try:
from database.L3.processors import (
BasicProcessor,
TacticalProcessor,
IntelligenceProcessor,
MetaProcessor,
CompositeProcessor
)
conn_l2 = sqlite3.connect(L2_DB_PATH)
conn_l2.row_factory = sqlite3.Row
features = {}
features.update(BasicProcessor.calculate(steam_id, conn_l2))
features.update(TacticalProcessor.calculate(steam_id, conn_l2))
features.update(IntelligenceProcessor.calculate(steam_id, conn_l2))
features.update(MetaProcessor.calculate(steam_id, conn_l2))
features.update(CompositeProcessor.calculate(steam_id, conn_l2, features))
match_count = _get_match_count(steam_id, conn_l2)
round_count = _get_round_count(steam_id, conn_l2)
first_match_date, last_match_date = _get_match_date_range(steam_id, conn_l2)
conn_l2.close()
return {
"steam_id": steam_id,
"features": features,
"match_count": match_count,
"round_count": round_count,
"first_match_date": first_match_date,
"last_match_date": last_match_date,
"error": None,
}
except Exception as e:
return {
"steam_id": steam_id,
"features": None,
"match_count": 0,
"round_count": 0,
"first_match_date": None,
"last_match_date": None,
"error": str(e),
}
def main(force_all: bool = False, workers: int = 1):
"""
Main L3 feature building pipeline using modular processors
"""
logger.info("========================================")
logger.info("Starting L3 Builder with 5-Tier Architecture")
logger.info("========================================")
# 1. Ensure Schema is up to date
init_db()
# 2. Import processors
try:
from database.L3.processors import (
BasicProcessor,
TacticalProcessor,
IntelligenceProcessor,
MetaProcessor,
CompositeProcessor
)
logger.info("✓ All 5 processors imported successfully")
except ImportError as e:
logger.error(f"Failed to import processors: {e}")
return
# 3. Connect to databases
conn_l2 = sqlite3.connect(L2_DB_PATH)
conn_l2.row_factory = sqlite3.Row
conn_l3 = sqlite3.connect(L3_DB_PATH)
try:
cursor_l2 = conn_l2.cursor()
if force_all:
logger.info("Force mode enabled: building L3 for all players in L2.")
sql = """
SELECT DISTINCT steam_id_64
FROM dim_players
ORDER BY steam_id_64
"""
cursor_l2.execute(sql)
else:
team_players = _get_team_players()
if not team_players:
logger.warning("No players found in Team Lineups. Aborting L3 build.")
return
placeholders = ','.join(['?' for _ in team_players])
sql = f"""
SELECT DISTINCT steam_id_64
FROM dim_players
WHERE steam_id_64 IN ({placeholders})
ORDER BY steam_id_64
"""
cursor_l2.execute(sql, list(team_players))
players = cursor_l2.fetchall()
total_players = len(players)
logger.info(f"Found {total_players} matching players in L2 to process")
if total_players == 0:
logger.warning("No matching players found in dim_players table")
return
success_count = 0
error_count = 0
processed_count = 0
if workers and workers > 1:
steam_ids = [row[0] for row in players]
with concurrent.futures.ProcessPoolExecutor(max_workers=workers) as executor:
futures = [executor.submit(_build_player_record, sid) for sid in steam_ids]
for future in concurrent.futures.as_completed(futures):
result = future.result()
processed_count += 1
if result.get("error"):
error_count += 1
logger.error(f"Error processing player {result.get('steam_id')}: {result.get('error')}")
else:
_upsert_features(
conn_l3,
result["steam_id"],
result["features"],
result["match_count"],
result["round_count"],
None,
result["first_match_date"],
result["last_match_date"],
)
success_count += 1
if processed_count % 2 == 0:
conn_l3.commit()
logger.info(f"Progress: {processed_count}/{total_players} ({success_count} success, {error_count} errors)")
else:
for idx, row in enumerate(players, 1):
steam_id = row[0]
try:
features = {}
features.update(BasicProcessor.calculate(steam_id, conn_l2))
features.update(TacticalProcessor.calculate(steam_id, conn_l2))
features.update(IntelligenceProcessor.calculate(steam_id, conn_l2))
features.update(MetaProcessor.calculate(steam_id, conn_l2))
features.update(CompositeProcessor.calculate(steam_id, conn_l2, features))
match_count = _get_match_count(steam_id, conn_l2)
round_count = _get_round_count(steam_id, conn_l2)
first_match_date, last_match_date = _get_match_date_range(steam_id, conn_l2)
_upsert_features(conn_l3, steam_id, features, match_count, round_count, conn_l2, first_match_date, last_match_date)
success_count += 1
except Exception as e:
error_count += 1
logger.error(f"Error processing player {steam_id}: {e}")
if error_count <= 3:
import traceback
traceback.print_exc()
continue
processed_count = idx
if processed_count % 2 == 0:
conn_l3.commit()
logger.info(f"Progress: {processed_count}/{total_players} ({success_count} success, {error_count} errors)")
# Final commit
conn_l3.commit()
logger.info("========================================")
logger.info(f"L3 Build Complete!")
logger.info(f" Success: {success_count} players")
logger.info(f" Errors: {error_count} players")
logger.info(f" Total: {total_players} players")
logger.info(f" Success Rate: {success_count/total_players*100:.1f}%")
logger.info("========================================")
except Exception as e:
logger.error(f"Fatal error during L3 build: {e}")
import traceback
traceback.print_exc()
finally:
conn_l2.close()
conn_l3.close()
def _get_match_count(steam_id: str, conn_l2: sqlite3.Connection) -> int:
"""Get total match count for player"""
cursor = conn_l2.cursor()
cursor.execute("""
SELECT COUNT(*) FROM fact_match_players
WHERE steam_id_64 = ?
""", (steam_id,))
return cursor.fetchone()[0]
def _get_round_count(steam_id: str, conn_l2: sqlite3.Connection) -> int:
"""Get total round count for player"""
cursor = conn_l2.cursor()
cursor.execute("""
SELECT COALESCE(SUM(round_total), 0) FROM fact_match_players
WHERE steam_id_64 = ?
""", (steam_id,))
return cursor.fetchone()[0]
def _upsert_features(conn_l3: sqlite3.Connection, steam_id: str, features: dict,
match_count: int, round_count: int, conn_l2: sqlite3.Connection | None,
first_match_date=None, last_match_date=None):
"""
Insert or update player features in dm_player_features
"""
cursor_l3 = conn_l3.cursor()
if first_match_date is None or last_match_date is None:
if conn_l2 is not None:
first_match_date, last_match_date = _get_match_date_range(steam_id, conn_l2)
else:
first_match_date = None
last_match_date = None
# Add metadata to features
features['total_matches'] = match_count
features['total_rounds'] = round_count
features['first_match_date'] = first_match_date
features['last_match_date'] = last_match_date
# Build dynamic column list from features dict
columns = ['steam_id_64'] + list(features.keys())
placeholders = ','.join(['?' for _ in columns])
columns_sql = ','.join(columns)
# Build UPDATE SET clause for ON CONFLICT
update_clauses = [f"{col}=excluded.{col}" for col in features.keys()]
update_clause_sql = ','.join(update_clauses)
values = [steam_id] + [features[k] for k in features.keys()]
sql = f"""
INSERT INTO dm_player_features ({columns_sql})
VALUES ({placeholders})
ON CONFLICT(steam_id_64) DO UPDATE SET
{update_clause_sql},
last_updated=CURRENT_TIMESTAMP
"""
cursor_l3.execute(sql, values)
def _parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--force", action="store_true")
parser.add_argument("--workers", type=int, default=1)
return parser.parse_args()
if __name__ == "__main__":
args = _parse_args()
main(force_all=args.force, workers=args.workers)

11
database/L3/README.md Normal file
View File

@@ -0,0 +1,11 @@
# database/L3/
L3特征库层面向训练与在线推理复用的特征聚合与派生
## 关键内容
- L3_Builder.pyL3 构建入口
- processors/:特征处理器(基础/情报/战术等)
- analyzer/:用于检验处理器与特征输出的分析脚本
- schema.sqlL3 建表结构

View File

@@ -0,0 +1,609 @@
# L3 Implementation Roadmap & Checklist
> **Based on**: L3_ARCHITECTURE_PLAN.md v2.0
> **Start Date**: 2026-01-28
> **Estimated Duration**: 8-10 days
---
## Quick Start Checklist
### ✅ Pre-requisites
- [x] L1 database完整 (208 matches)
- [x] L2 database完整 (100% coverage, 51,860 rows)
- [x] L2 schema documented
- [x] Profile requirements analyzed
- [x] L3 architecture designed
### 🎯 Implementation Phases
---
## Phase 1: Schema & Infrastructure (Day 1-2)
### 1.1 Create L3 Database Schema
- [ ] Create `database/L3/schema.sql`
- [ ] dm_player_features (207 columns)
- [ ] dm_player_match_history
- [ ] dm_player_map_stats
- [ ] dm_player_weapon_stats
- [ ] All indexes
### 1.2 Initialize L3 Database
- [ ] Update `database/L3/L3_Builder.py` init_db()
- [ ] Run schema creation
- [ ] Verify tables created
### 1.3 Processor Base Classes
- [ ] Create `database/L3/processors/__init__.py`
- [ ] Create `database/L3/processors/base_processor.py`
- [ ] BaseFeatureProcessor interface
- [ ] SafeAggregator utility class
- [ ] Z-score normalization functions
**验收标准**
```bash
sqlite3 database/L3/L3.db ".tables"
# 应输出: dm_player_features, dm_player_match_history, dm_player_map_stats, dm_player_weapon_stats
```
---
## Phase 2: Tier 1 - Core Processors (Day 3-4)
### 2.1 BasicProcessor Implementation
- [ ] Create `database/L3/processors/basic_processor.py`
**Sub-tasks**:
- [ ] `calculate_basic_stats()` - 15 columns
- [ ] AVG(rating, rating2, kd, adr, kast, rws) from fact_match_players
- [ ] AVG(headshot_count), hs_rate = SUM(hs)/SUM(kills)
- [ ] total_kills, total_deaths, total_assists
- [ ] kpr, dpr, survival_rate
- [ ] `calculate_match_stats()` - 8 columns
- [ ] win_rate, wins, losses
- [ ] avg_match_duration from fact_matches
- [ ] avg_mvps, mvp_rate
- [ ] avg_elo_change, total_elo_gained from fact_match_teams
- [ ] `calculate_weapon_stats()` - 12 columns
- [ ] avg_awp_kills, awp_usage_rate
- [ ] avg_knife_kills, avg_zeus_kills, zeus_buy_rate
- [ ] top_weapon (GROUP BY weapon in fact_round_events)
- [ ] weapon_diversity (Shannon entropy)
- [ ] rifle/pistol/smg hs_rates
- [ ] `calculate_objective_stats()` - 6 columns
- [ ] avg_plants, avg_defuses, avg_flash_assists
- [ ] plant_success_rate, defuse_success_rate
- [ ] objective_impact (weighted score)
**测试用例**:
```python
features = BasicProcessor.calculate('76561198012345678', conn_l2)
assert 'core_avg_rating' in features
assert features['core_total_kills'] > 0
assert 0 <= features['core_hs_rate'] <= 1
```
---
## Phase 3: Tier 2 - Tactical Processors (Day 4-5)
### 3.1 TacticalProcessor Implementation
- [ ] Create `database/L3/processors/tactical_processor.py`
**Sub-tasks**:
- [ ] `calculate_opening_impact()` - 8 columns
- [ ] avg_fk, avg_fd from fact_match_players
- [ ] fk_rate, fd_rate
- [ ] fk_success_rate (team win when FK)
- [ ] entry_kill_rate, entry_death_rate
- [ ] opening_duel_winrate
- [ ] `calculate_multikill()` - 6 columns
- [ ] avg_2k, avg_3k, avg_4k, avg_5k
- [ ] multikill_rate
- [ ] ace_count (5k count)
- [ ] `calculate_clutch()` - 10 columns
- [ ] clutch_1v1/1v2_attempts/wins/rate
- [ ] clutch_1v3_plus aggregated
- [ ] clutch_impact_score (weighted)
- [ ] `calculate_utility()` - 12 columns
- [ ] util_X_per_round for flash/smoke/molotov/he
- [ ] util_usage_rate
- [ ] nade_dmg metrics
- [ ] flash_efficiency, smoke_timing_score
- [ ] util_impact_score
- [ ] `calculate_economy()` - 8 columns
- [ ] dmg_per_1k from fact_round_player_economy
- [ ] kpr/kd for eco/force/full rounds
- [ ] save_discipline, force_success_rate
- [ ] eco_efficiency_score
**测试**:
```python
features = TacticalProcessor.calculate('76561198012345678', conn_l2)
assert 'tac_fk_rate' in features
assert features['tac_multikill_rate'] >= 0
```
---
## Phase 4: Tier 3 - Intelligence Processors (Day 5-7)
### 4.1 IntelligenceProcessor Implementation
- [ ] Create `database/L3/processors/intelligence_processor.py`
**Sub-tasks**:
- [ ] `calculate_high_iq_kills()` - 8 columns
- [ ] wallbang/smoke/blind/noscope kills from fact_round_events flags
- [ ] Rates: X_kills / total_kills
- [ ] high_iq_score (weighted formula)
- [ ] `calculate_timing_analysis()` - 12 columns
- [ ] early/mid/late kills by event_time bins (0-30s, 30-60s, 60s+)
- [ ] timing shares
- [ ] avg_kill_time, avg_death_time
- [ ] aggression_index, patience_score
- [ ] first_contact_time (MIN(event_time) per round)
- [ ] `calculate_pressure_performance()` - 10 columns
- [ ] comeback_kd/rating (when down 4+ rounds)
- [ ] losing_streak_kd (3+ round loss streak)
- [ ] matchpoint_kpr/rating (at 15-X or 12-X)
- [ ] clutch_composure, entry_in_loss
- [ ] pressure_performance_index, big_moment_score
- [ ] tilt_resistance
- [ ] `calculate_position_mastery()` - 15 columns ⚠️ Complex
- [ ] site_a/b/mid_control_rate from xyz clustering
- [ ] favorite_position (most common cluster)
- [ ] position_diversity (entropy)
- [ ] rotation_speed (distance between kills)
- [ ] map_coverage, defensive/aggressive positioning
- [ ] lurk_tendency, site_anchor_score
- [ ] spatial_iq_score
- [ ] `calculate_trade_network()` - 8 columns
- [ ] trade_kill_count (kills within 5s of teammate death)
- [ ] trade_kill_rate
- [ ] trade_response_time (AVG seconds)
- [ ] trade_given (deaths traded by teammate)
- [ ] trade_balance, trade_efficiency
- [ ] teamwork_score
**Position Mastery特别注意**:
```python
# 需要使用sklearn DBSCAN聚类
from sklearn.cluster import DBSCAN
def cluster_player_positions(steam_id, conn_l2):
"""从fact_round_events提取xyz坐标并聚类"""
cursor = conn_l2.cursor()
cursor.execute("""
SELECT attacker_pos_x, attacker_pos_y, attacker_pos_z
FROM fact_round_events
WHERE attacker_steam_id = ?
AND attacker_pos_x IS NOT NULL
""", (steam_id,))
coords = cursor.fetchall()
# DBSCAN clustering...
```
**测试**:
```python
features = IntelligenceProcessor.calculate('76561198012345678', conn_l2)
assert 'int_high_iq_score' in features
assert features['int_timing_early_kill_share'] + features['int_timing_mid_kill_share'] + features['int_timing_late_kill_share'] <= 1.1 # Allow rounding
```
---
## Phase 5: Tier 4 - Meta Processors (Day 7-8)
### 5.1 MetaProcessor Implementation
- [ ] Create `database/L3/processors/meta_processor.py`
**Sub-tasks**:
- [ ] `calculate_stability()` - 8 columns
- [ ] rating_volatility (STDDEV of last 20 matches)
- [ ] recent_form_rating (AVG last 10)
- [ ] win/loss_rating
- [ ] rating_consistency (100 - volatility_norm)
- [ ] time_rating_correlation (CORR(duration, rating))
- [ ] map_stability, elo_tier_stability
- [ ] `calculate_side_preference()` - 14 columns
- [ ] side_ct/t_rating from fact_match_players_ct/t
- [ ] side_ct/t_kd, win_rate, fk_rate, kast
- [ ] side_rating_diff, side_kd_diff
- [ ] side_preference ('CT'/'T'/'Balanced')
- [ ] side_balance_score
- [ ] `calculate_opponent_adaptation()` - 12 columns
- [ ] vs_lower/similar/higher_elo_rating/kd
- [ ] Based on fact_match_teams.group_origin_elo差值
- [ ] elo_adaptation, stomping_score, upset_score
- [ ] consistency_across_elos, rank_resistance
- [ ] smurf_detection
- [ ] `calculate_map_specialization()` - 10 columns
- [ ] best/worst_map, best/worst_rating
- [ ] map_diversity (entropy)
- [ ] map_pool_size (maps with 5+ matches)
- [ ] map_specialist_score, map_versatility
- [ ] comfort_zone_rate, map_adaptation
- [ ] `calculate_session_pattern()` - 8 columns
- [ ] avg_matches_per_day
- [ ] longest_streak (consecutive days)
- [ ] weekend/weekday_rating
- [ ] morning/afternoon/evening/night_rating (based on timestamp)
**测试**:
```python
features = MetaProcessor.calculate('76561198012345678', conn_l2)
assert 'meta_rating_volatility' in features
assert features['meta_side_preference'] in ['CT', 'T', 'Balanced']
```
---
## Phase 6: Tier 5 - Composite Processors (Day 8)
### 6.1 CompositeProcessor Implementation
- [ ] Create `database/L3/processors/composite_processor.py`
**Sub-tasks**:
- [ ] `normalize_and_standardize()` helper
- [ ] Z-score normalization function
- [ ] Global mean/std calculation from all players
- [ ] Map Z-score to 0-100 range
- [ ] `calculate_radar_scores()` - 8 scores
- [ ] score_aim: 25% Rating + 20% KD + 15% ADR + 10% DuelWin + 10% HighEloKD + 20% MultiKill
- [ ] score_clutch: 25% 1v3+ + 20% MatchPtWin + 20% ComebackKD + 15% PressureEntry + 20% Rating
- [ ] score_pistol: 30% PistolKills + 30% PistolWin + 20% PistolKD + 20% PistolHS%
- [ ] score_defense: 35% CT_Rating + 35% T_Rating + 15% CT_FK + 15% T_FK
- [ ] score_utility: 35% UsageRate + 25% NadeDmg + 20% FlashEff + 20% FlashEnemy
- [ ] score_stability: 30% (100-Volatility) + 30% LossRating + 20% WinRating + 20% Consistency
- [ ] score_economy: 50% Dmg/$1k + 30% EcoKPR + 20% SaveRoundKD
- [ ] score_pace: 40% EntryTiming + 30% TradeSpeed + 30% AggressionIndex
- [ ] `calculate_overall_score()` - AVG of 8 scores
- [ ] `classify_tier()` - Performance tier
- [ ] Elite: overall > 75
- [ ] Advanced: 60-75
- [ ] Intermediate: 40-60
- [ ] Beginner: < 40
- [ ] `calculate_percentile()` - Rank among all players
**依赖**:
```python
def calculate(steam_id: str, conn_l2: sqlite3.Connection, pre_features: dict) -> dict:
"""
需要前面4个Tier的特征作为输入
Args:
pre_features: 包含Tier 1-4的所有特征
"""
pass
```
**测试**:
```python
# 需要先计算所有前置特征
features = {}
features.update(BasicProcessor.calculate(steam_id, conn_l2))
features.update(TacticalProcessor.calculate(steam_id, conn_l2))
features.update(IntelligenceProcessor.calculate(steam_id, conn_l2))
features.update(MetaProcessor.calculate(steam_id, conn_l2))
composite = CompositeProcessor.calculate(steam_id, conn_l2, features)
assert 0 <= composite['score_aim'] <= 100
assert composite['tier_classification'] in ['Elite', 'Advanced', 'Intermediate', 'Beginner']
```
---
## Phase 7: L3_Builder Integration (Day 8-9)
### 7.1 Main Builder Logic
- [ ] Update `database/L3/L3_Builder.py`
- [ ] Import all processors
- [ ] Main loop: iterate all players from dim_players
- [ ] Call processors in order
- [ ] _upsert_features() helper
- [ ] Batch commit every 100 players
- [ ] Progress logging
```python
def main():
logger.info("Starting L3 Builder...")
# 1. Init DB
init_db()
# 2. Connect
conn_l2 = sqlite3.connect(L2_DB_PATH)
conn_l3 = sqlite3.connect(L3_DB_PATH)
# 3. Get all players
cursor = conn_l2.cursor()
cursor.execute("SELECT DISTINCT steam_id_64 FROM dim_players")
players = cursor.fetchall()
logger.info(f"Processing {len(players)} players...")
for idx, (steam_id,) in enumerate(players, 1):
try:
# 4. Calculate features tier by tier
features = {}
features.update(BasicProcessor.calculate(steam_id, conn_l2))
features.update(TacticalProcessor.calculate(steam_id, conn_l2))
features.update(IntelligenceProcessor.calculate(steam_id, conn_l2))
features.update(MetaProcessor.calculate(steam_id, conn_l2))
features.update(CompositeProcessor.calculate(steam_id, conn_l2, features))
# 5. Upsert to L3
_upsert_features(conn_l3, steam_id, features)
# 6. Commit batch
if idx % 100 == 0:
conn_l3.commit()
logger.info(f"Processed {idx}/{len(players)} players")
except Exception as e:
logger.error(f"Error processing {steam_id}: {e}")
conn_l3.commit()
logger.info("Done!")
```
### 7.2 Auxiliary Tables Population
- [ ] Populate `dm_player_match_history`
- [ ] FROM fact_match_players JOIN fact_matches
- [ ] ORDER BY match date
- [ ] Calculate match_sequence, rolling averages
- [ ] Populate `dm_player_map_stats`
- [ ] GROUP BY steam_id, map_name
- [ ] FROM fact_match_players
- [ ] Populate `dm_player_weapon_stats`
- [ ] GROUP BY steam_id, weapon_name
- [ ] FROM fact_round_events
- [ ] TOP 10 weapons per player
### 7.3 Full Build Test
- [ ] Run: `python database/L3/L3_Builder.py`
- [ ] Verify: All players processed
- [ ] Check: Row counts in all L3 tables
- [ ] Validate: Sample features make sense
**验收标准**:
```sql
SELECT COUNT(*) FROM dm_player_features; -- 应该 = dim_players count
SELECT AVG(core_avg_rating) FROM dm_player_features; -- 应该接近1.0
SELECT COUNT(*) FROM dm_player_features WHERE score_aim > 0; -- 大部分玩家有评分
```
---
## Phase 8: Web Services Refactoring (Day 9-10)
### 8.1 Create PlayerService
- [ ] Create `web/services/player_service.py`
```python
class PlayerService:
@staticmethod
def get_player_features(steam_id: str) -> dict:
"""获取完整特征dm_player_features"""
pass
@staticmethod
def get_player_radar_data(steam_id: str) -> dict:
"""获取雷达图8维数据"""
pass
@staticmethod
def get_player_core_stats(steam_id: str) -> dict:
"""获取核心Dashboard数据"""
pass
@staticmethod
def get_player_history(steam_id: str, limit: int = 20) -> list:
"""获取历史趋势数据"""
pass
@staticmethod
def get_player_map_stats(steam_id: str) -> list:
"""获取各地图统计"""
pass
@staticmethod
def get_player_weapon_stats(steam_id: str, top_n: int = 10) -> list:
"""获取Top N武器"""
pass
@staticmethod
def get_players_ranking(order_by: str = 'core_avg_rating',
limit: int = 100,
offset: int = 0) -> list:
"""获取排行榜"""
pass
```
- [ ] Implement all methods
- [ ] Add error handling
- [ ] Add caching (optional)
### 8.2 Refactor Routes
- [ ] Update `web/routes/players.py`
- [ ] `/profile/<steam_id>` route
- [ ] Use PlayerService instead of direct DB queries
- [ ] Pass features dict to template
- [ ] Add API endpoints
- [ ] `/api/players/<steam_id>/features`
- [ ] `/api/players/ranking`
- [ ] `/api/players/<steam_id>/history`
### 8.3 Update feature_service.py
- [ ] Mark old rebuild methods as DEPRECATED
- [ ] Redirect to L3_Builder.py
- [ ] Keep query methods for backward compatibility
---
## Phase 9: Frontend Integration (Day 10-11)
### 9.1 Update profile.html Template
- [ ] Dashboard cards: use `features.core_*`
- [ ] Radar chart: use `features.score_*`
- [ ] Trend chart: use `history` data
- [ ] Core Performance section
- [ ] Gunfight section
- [ ] Opening Impact section
- [ ] Clutch section
- [ ] High IQ Kills section
- [ ] Map stats table
- [ ] Weapon stats table
### 9.2 JavaScript Integration
- [ ] Radar chart rendering (Chart.js)
- [ ] Trend chart rendering
- [ ] Dynamic data loading
### 9.3 UI Polish
- [ ] Responsive design
- [ ] Loading states
- [ ] Error handling
- [ ] Tooltips for complex metrics
---
## Phase 10: Testing & Validation (Day 11-12)
### 10.1 Unit Tests
- [ ] Test each processor independently
- [ ] Mock L2 data
- [ ] Verify calculation correctness
### 10.2 Integration Tests
- [ ] Full L3_Builder run
- [ ] Verify all tables populated
- [ ] Check data consistency
### 10.3 Performance Tests
- [ ] Benchmark L3_Builder runtime
- [ ] Profile slow queries
- [ ] Optimize if needed
### 10.4 Data Quality Checks
- [ ] Verify no NULL values where expected
- [ ] Check value ranges (e.g., 0 <= rate <= 1)
- [ ] Validate composite scores (0-100)
- [ ] Cross-check with L2 source data
---
## Success Criteria
### ✅ L3 Database
- [ ] All 4 tables created with correct schemas
- [ ] dm_player_features has 207 columns
- [ ] All players from L2 have corresponding L3 rows
- [ ] No critical NULL values
### ✅ Feature Calculation
- [ ] All 5 processors implemented and tested
- [ ] 207 features calculated correctly
- [ ] Composite scores in 0-100 range
- [ ] Tier classification working
### ✅ Services & Routes
- [ ] PlayerService provides all query methods
- [ ] Routes use services correctly
- [ ] API endpoints return valid JSON
- [ ] No direct DB queries in routes
### ✅ Frontend
- [ ] Profile page renders correctly
- [ ] Radar chart displays 8 dimensions
- [ ] Trend chart shows history
- [ ] All sections populated with data
### ✅ Performance
- [ ] L3_Builder completes in < 20 min for 1000 players
- [ ] Profile page loads in < 200ms
- [ ] No N+1 query problems
---
## Risk Mitigation
### 🔴 High Risk Items
1. **Position Mastery (xyz clustering)**
- Mitigation: Start with simple grid-based approach, defer ML clustering
2. **Composite Score Standardization**
- Mitigation: Use simple percentile-based normalization as fallback
3. **Performance at Scale**
- Mitigation: Implement incremental updates, add indexes
### 🟡 Medium Risk Items
1. **Time Window Calculations (trades)**
- Mitigation: Use efficient self-JOIN with time bounds
2. **Missing Data Handling**
- Mitigation: Comprehensive NULL handling, default values
### 🟢 Low Risk Items
1. Basic aggregations (AVG, SUM, COUNT)
2. Service layer refactoring
3. Template updates
---
## Next Actions
**Immediate (Today)**:
1. Create schema.sql
2. Initialize L3.db
3. Create processor base classes
**Tomorrow**:
1. Implement BasicProcessor
2. Test with sample player
3. Start TacticalProcessor
**This Week**:
1. Complete all 5 processors
2. Full L3_Builder run
3. Service refactoring
**Next Week**:
1. Frontend integration
2. Testing & validation
3. Documentation
---
## Notes
- 保持每个processor独立便于单元测试
- 使用动态SQL避免column count错误
- 所有rate/percentage使用0-1范围存储UI展示时乘100
- 时间戳统一使用Unix timestamp (INTEGER)
- 遵循"查询不计算"原则web层只SELECT不做聚合

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,59 @@
"""
Test BasicProcessor implementation
"""
import sqlite3
import sys
import os
# Add parent directory to path
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..', '..'))
from database.L3.processors import BasicProcessor
def test_basic_processor():
"""Test BasicProcessor on a real player from L2"""
# Connect to L2 database
l2_path = os.path.join(os.path.dirname(__file__), '..', '..', 'L2', 'L2.db')
conn = sqlite3.connect(l2_path)
try:
# Get a test player
cursor = conn.cursor()
cursor.execute("SELECT steam_id_64 FROM dim_players LIMIT 1")
result = cursor.fetchone()
if not result:
print("No players found in L2 database")
return False
steam_id = result[0]
print(f"Testing BasicProcessor for player: {steam_id}")
# Calculate features
features = BasicProcessor.calculate(steam_id, conn)
print(f"\n✓ Calculated {len(features)} features")
print(f"\nSample features:")
print(f" core_avg_rating: {features.get('core_avg_rating', 0)}")
print(f" core_avg_kd: {features.get('core_avg_kd', 0)}")
print(f" core_total_kills: {features.get('core_total_kills', 0)}")
print(f" core_win_rate: {features.get('core_win_rate', 0)}")
print(f" core_top_weapon: {features.get('core_top_weapon', 'unknown')}")
# Verify we have all 41 features
expected_count = 41
if len(features) == expected_count:
print(f"\n✓ Feature count correct: {expected_count}")
return True
else:
print(f"\n✗ Feature count mismatch: expected {expected_count}, got {len(features)}")
return False
finally:
conn.close()
if __name__ == "__main__":
success = test_basic_processor()
sys.exit(0 if success else 1)

View File

@@ -0,0 +1,261 @@
"""
L3 Feature Distribution Checker
Analyzes data quality issues:
- NaN/NULL values
- All values identical (no variance)
- Extreme outliers
- Zero-only columns
"""
import sqlite3
import sys
from pathlib import Path
from collections import defaultdict
import math
import os
# Set UTF-8 encoding for Windows
if sys.platform == 'win32':
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
# Add project root to path
project_root = Path(__file__).parent.parent.parent
sys.path.insert(0, str(project_root))
L3_DB_PATH = project_root / "database" / "L3" / "L3.db"
def get_column_stats(cursor, table_name):
"""Get statistics for all numeric columns in a table"""
# Get column names
cursor.execute(f"PRAGMA table_info({table_name})")
columns = cursor.fetchall()
# Filter to numeric columns (skip steam_id_64, TEXT columns)
numeric_cols = []
for col in columns:
col_name = col[1]
col_type = col[2]
if col_name != 'steam_id_64' and col_type in ('REAL', 'INTEGER'):
numeric_cols.append(col_name)
print(f"\n{'='*80}")
print(f"Table: {table_name}")
print(f"Analyzing {len(numeric_cols)} numeric columns...")
print(f"{'='*80}\n")
issues_found = defaultdict(list)
for col in numeric_cols:
# Get basic statistics
cursor.execute(f"""
SELECT
COUNT(*) as total_count,
COUNT({col}) as non_null_count,
MIN({col}) as min_val,
MAX({col}) as max_val,
AVG({col}) as avg_val,
COUNT(DISTINCT {col}) as unique_count
FROM {table_name}
""")
row = cursor.fetchone()
total = row[0]
non_null = row[1]
min_val = row[2]
max_val = row[3]
avg_val = row[4]
unique = row[5]
null_count = total - non_null
null_pct = (null_count / total * 100) if total > 0 else 0
# Check for issues
# Issue 1: High NULL percentage
if null_pct > 50:
issues_found['HIGH_NULL'].append({
'column': col,
'null_pct': null_pct,
'null_count': null_count,
'total': total
})
# Issue 2: All values identical (no variance)
if non_null > 0 and unique == 1:
issues_found['NO_VARIANCE'].append({
'column': col,
'value': min_val,
'count': non_null
})
# Issue 3: All zeros
if non_null > 0 and min_val == 0 and max_val == 0:
issues_found['ALL_ZEROS'].append({
'column': col,
'count': non_null
})
# Issue 4: NaN values (in SQLite, NaN is stored as NULL or text 'nan')
cursor.execute(f"""
SELECT COUNT(*) FROM {table_name}
WHERE CAST({col} AS TEXT) = 'nan' OR {col} IS NULL
""")
nan_count = cursor.fetchone()[0]
if nan_count > non_null * 0.1: # More than 10% NaN
issues_found['NAN_VALUES'].append({
'column': col,
'nan_count': nan_count,
'pct': (nan_count / total * 100)
})
# Issue 5: Extreme outliers (using IQR method)
if non_null > 10 and unique > 2: # Need enough data
cursor.execute(f"""
WITH ranked AS (
SELECT {col},
ROW_NUMBER() OVER (ORDER BY {col}) as rn,
COUNT(*) OVER () as total
FROM {table_name}
WHERE {col} IS NOT NULL
)
SELECT
(SELECT {col} FROM ranked WHERE rn = CAST(total * 0.25 AS INTEGER)) as q1,
(SELECT {col} FROM ranked WHERE rn = CAST(total * 0.75 AS INTEGER)) as q3
FROM ranked
LIMIT 1
""")
quartiles = cursor.fetchone()
if quartiles and quartiles[0] is not None and quartiles[1] is not None:
q1, q3 = quartiles
iqr = q3 - q1
if iqr > 0:
lower_bound = q1 - 1.5 * iqr
upper_bound = q3 + 1.5 * iqr
cursor.execute(f"""
SELECT COUNT(*) FROM {table_name}
WHERE {col} < ? OR {col} > ?
""", (lower_bound, upper_bound))
outlier_count = cursor.fetchone()[0]
outlier_pct = (outlier_count / non_null * 100) if non_null > 0 else 0
if outlier_pct > 5: # More than 5% outliers
issues_found['OUTLIERS'].append({
'column': col,
'outlier_count': outlier_count,
'outlier_pct': outlier_pct,
'q1': q1,
'q3': q3,
'iqr': iqr
})
# Print summary for columns with good data
if col not in [item['column'] for sublist in issues_found.values() for item in sublist]:
if non_null > 0 and min_val is not None:
print(f"{col:45s} | Min: {min_val:10.3f} | Max: {max_val:10.3f} | "
f"Avg: {avg_val:10.3f} | Unique: {unique:6d}")
return issues_found
def print_issues(issues_found):
"""Print detailed issue report"""
if not any(issues_found.values()):
print(f"\n{'='*80}")
print("✅ NO DATA QUALITY ISSUES FOUND!")
print(f"{'='*80}\n")
return
print(f"\n{'='*80}")
print("⚠️ DATA QUALITY ISSUES DETECTED")
print(f"{'='*80}\n")
# HIGH NULL
if issues_found['HIGH_NULL']:
print(f"❌ HIGH NULL PERCENTAGE ({len(issues_found['HIGH_NULL'])} columns):")
for issue in issues_found['HIGH_NULL']:
print(f" - {issue['column']:45s}: {issue['null_pct']:6.2f}% NULL "
f"({issue['null_count']}/{issue['total']})")
print()
# NO VARIANCE
if issues_found['NO_VARIANCE']:
print(f"❌ NO VARIANCE - All values identical ({len(issues_found['NO_VARIANCE'])} columns):")
for issue in issues_found['NO_VARIANCE']:
print(f" - {issue['column']:45s}: All {issue['count']} values = {issue['value']}")
print()
# ALL ZEROS
if issues_found['ALL_ZEROS']:
print(f"❌ ALL ZEROS ({len(issues_found['ALL_ZEROS'])} columns):")
for issue in issues_found['ALL_ZEROS']:
print(f" - {issue['column']:45s}: All {issue['count']} values are 0")
print()
# NAN VALUES
if issues_found['NAN_VALUES']:
print(f"❌ NAN/NULL VALUES ({len(issues_found['NAN_VALUES'])} columns):")
for issue in issues_found['NAN_VALUES']:
print(f" - {issue['column']:45s}: {issue['nan_count']} NaN/NULL ({issue['pct']:.2f}%)")
print()
# OUTLIERS
if issues_found['OUTLIERS']:
print(f"⚠️ EXTREME OUTLIERS ({len(issues_found['OUTLIERS'])} columns):")
for issue in issues_found['OUTLIERS']:
print(f" - {issue['column']:45s}: {issue['outlier_count']} outliers ({issue['outlier_pct']:.2f}%) "
f"[Q1={issue['q1']:.2f}, Q3={issue['q3']:.2f}, IQR={issue['iqr']:.2f}]")
print()
def main():
"""Main entry point"""
if not L3_DB_PATH.exists():
print(f"❌ L3 database not found at: {L3_DB_PATH}")
return 1
print(f"\n{'='*80}")
print(f"L3 Feature Distribution Checker")
print(f"Database: {L3_DB_PATH}")
print(f"{'='*80}")
conn = sqlite3.connect(L3_DB_PATH)
cursor = conn.cursor()
# Get row count
cursor.execute("SELECT COUNT(*) FROM dm_player_features")
total_players = cursor.fetchone()[0]
print(f"\nTotal players: {total_players}")
# Check dm_player_features table
issues = get_column_stats(cursor, 'dm_player_features')
print_issues(issues)
# Summary statistics
print(f"\n{'='*80}")
print("SUMMARY")
print(f"{'='*80}")
print(f"Total Issues Found:")
print(f" - High NULL percentage: {len(issues['HIGH_NULL'])}")
print(f" - No variance (all same): {len(issues['NO_VARIANCE'])}")
print(f" - All zeros: {len(issues['ALL_ZEROS'])}")
print(f" - NaN/NULL values: {len(issues['NAN_VALUES'])}")
print(f" - Extreme outliers: {len(issues['OUTLIERS'])}")
print()
conn.close()
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@@ -0,0 +1,38 @@
"""
L3 Feature Processors
5-Tier Architecture:
- BasicProcessor: Tier 1 CORE (41 columns)
- TacticalProcessor: Tier 2 TACTICAL (44 columns)
- IntelligenceProcessor: Tier 3 INTELLIGENCE (53 columns)
- MetaProcessor: Tier 4 META (52 columns)
- CompositeProcessor: Tier 5 COMPOSITE (11 columns)
"""
from .base_processor import (
BaseFeatureProcessor,
SafeAggregator,
NormalizationUtils,
WeaponCategories,
MapAreas
)
# Import processors as they are implemented
from .basic_processor import BasicProcessor
from .tactical_processor import TacticalProcessor
from .intelligence_processor import IntelligenceProcessor
from .meta_processor import MetaProcessor
from .composite_processor import CompositeProcessor
__all__ = [
'BaseFeatureProcessor',
'SafeAggregator',
'NormalizationUtils',
'WeaponCategories',
'MapAreas',
'BasicProcessor',
'TacticalProcessor',
'IntelligenceProcessor',
'MetaProcessor',
'CompositeProcessor',
]

View File

@@ -0,0 +1,320 @@
"""
Base processor classes and utility functions for L3 feature calculation
"""
import sqlite3
import math
from typing import Dict, Any, List, Optional
from abc import ABC, abstractmethod
class SafeAggregator:
"""Utility class for safe mathematical operations with NULL handling"""
@staticmethod
def safe_divide(numerator: float, denominator: float, default: float = 0.0) -> float:
"""Safe division with NULL/zero handling"""
if denominator is None or denominator == 0:
return default
if numerator is None:
return default
return numerator / denominator
@staticmethod
def safe_avg(values: List[float], default: float = 0.0) -> float:
"""Safe average calculation"""
if not values or len(values) == 0:
return default
valid_values = [v for v in values if v is not None]
if not valid_values:
return default
return sum(valid_values) / len(valid_values)
@staticmethod
def safe_stddev(values: List[float], default: float = 0.0) -> float:
"""Safe standard deviation calculation"""
if not values or len(values) < 2:
return default
valid_values = [v for v in values if v is not None]
if len(valid_values) < 2:
return default
mean = sum(valid_values) / len(valid_values)
variance = sum((x - mean) ** 2 for x in valid_values) / len(valid_values)
return math.sqrt(variance)
@staticmethod
def safe_sum(values: List[float], default: float = 0.0) -> float:
"""Safe sum calculation"""
if not values:
return default
valid_values = [v for v in values if v is not None]
return sum(valid_values) if valid_values else default
@staticmethod
def safe_min(values: List[float], default: float = 0.0) -> float:
"""Safe minimum calculation"""
if not values:
return default
valid_values = [v for v in values if v is not None]
return min(valid_values) if valid_values else default
@staticmethod
def safe_max(values: List[float], default: float = 0.0) -> float:
"""Safe maximum calculation"""
if not values:
return default
valid_values = [v for v in values if v is not None]
return max(valid_values) if valid_values else default
class NormalizationUtils:
"""Z-score normalization and scaling utilities"""
@staticmethod
def z_score_normalize(value: float, mean: float, std: float,
scale_min: float = 0.0, scale_max: float = 100.0) -> float:
"""
Z-score normalization to a target range
Args:
value: Value to normalize
mean: Population mean
std: Population standard deviation
scale_min: Target minimum (default: 0)
scale_max: Target maximum (default: 100)
Returns:
Normalized value in [scale_min, scale_max] range
"""
if std == 0 or std is None:
return (scale_min + scale_max) / 2.0
# Calculate z-score
z = (value - mean) / std
# Map to target range (±3σ covers ~99.7% of data)
# z = -3 → scale_min, z = 0 → midpoint, z = 3 → scale_max
midpoint = (scale_min + scale_max) / 2.0
scale_range = (scale_max - scale_min) / 6.0 # 6σ total range
normalized = midpoint + (z * scale_range)
# Clamp to target range
return max(scale_min, min(scale_max, normalized))
@staticmethod
def percentile_normalize(value: float, all_values: List[float],
scale_min: float = 0.0, scale_max: float = 100.0) -> float:
"""
Percentile-based normalization
Args:
value: Value to normalize
all_values: All values in population
scale_min: Target minimum
scale_max: Target maximum
Returns:
Normalized value based on percentile
"""
if not all_values:
return scale_min
sorted_values = sorted(all_values)
rank = sum(1 for v in sorted_values if v < value)
percentile = rank / len(sorted_values)
return scale_min + (percentile * (scale_max - scale_min))
@staticmethod
def min_max_normalize(value: float, min_val: float, max_val: float,
scale_min: float = 0.0, scale_max: float = 100.0) -> float:
"""Min-max normalization to target range"""
if max_val == min_val:
return (scale_min + scale_max) / 2.0
normalized = (value - min_val) / (max_val - min_val)
return scale_min + (normalized * (scale_max - scale_min))
@staticmethod
def calculate_population_stats(conn_l3: sqlite3.Connection, column: str) -> Dict[str, float]:
"""
Calculate population mean and std for a column in dm_player_features
Args:
conn_l3: L3 database connection
column: Column name to analyze
Returns:
dict with 'mean', 'std', 'min', 'max'
"""
cursor = conn_l3.cursor()
cursor.execute(f"""
SELECT
AVG({column}) as mean,
STDDEV({column}) as std,
MIN({column}) as min,
MAX({column}) as max
FROM dm_player_features
WHERE {column} IS NOT NULL
""")
row = cursor.fetchone()
return {
'mean': row[0] if row[0] is not None else 0.0,
'std': row[1] if row[1] is not None else 1.0,
'min': row[2] if row[2] is not None else 0.0,
'max': row[3] if row[3] is not None else 0.0
}
class BaseFeatureProcessor(ABC):
"""
Abstract base class for all feature processors
Each processor implements the calculate() method which returns a dict
of feature_name: value pairs.
"""
MIN_MATCHES_REQUIRED = 5 # Minimum matches needed for feature calculation
@staticmethod
@abstractmethod
def calculate(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate features for a specific player
Args:
steam_id: Player's Steam ID (steam_id_64)
conn_l2: Connection to L2 database
Returns:
Dictionary of {feature_name: value}
"""
pass
@staticmethod
def check_min_matches(steam_id: str, conn_l2: sqlite3.Connection,
min_required: int = None) -> bool:
"""
Check if player has minimum required matches
Args:
steam_id: Player's Steam ID
conn_l2: L2 database connection
min_required: Minimum matches (uses class default if None)
Returns:
True if player has enough matches
"""
if min_required is None:
min_required = BaseFeatureProcessor.MIN_MATCHES_REQUIRED
cursor = conn_l2.cursor()
cursor.execute("""
SELECT COUNT(*) FROM fact_match_players
WHERE steam_id_64 = ?
""", (steam_id,))
count = cursor.fetchone()[0]
return count >= min_required
@staticmethod
def get_player_match_count(steam_id: str, conn_l2: sqlite3.Connection) -> int:
"""Get total match count for player"""
cursor = conn_l2.cursor()
cursor.execute("""
SELECT COUNT(*) FROM fact_match_players
WHERE steam_id_64 = ?
""", (steam_id,))
return cursor.fetchone()[0]
@staticmethod
def get_player_round_count(steam_id: str, conn_l2: sqlite3.Connection) -> int:
"""Get total round count for player"""
cursor = conn_l2.cursor()
cursor.execute("""
SELECT SUM(round_total) FROM fact_match_players
WHERE steam_id_64 = ?
""", (steam_id,))
result = cursor.fetchone()[0]
return result if result is not None else 0
class WeaponCategories:
"""Weapon categorization constants"""
RIFLES = [
'ak47', 'aug', 'm4a1', 'm4a1_silencer', 'sg556', 'galilar', 'famas'
]
PISTOLS = [
'glock', 'usp_silencer', 'hkp2000', 'p250', 'fiveseven', 'tec9',
'cz75a', 'deagle', 'elite', 'revolver'
]
SMGS = [
'mac10', 'mp9', 'mp7', 'mp5sd', 'ump45', 'p90', 'bizon'
]
SNIPERS = [
'awp', 'ssg08', 'scar20', 'g3sg1'
]
HEAVY = [
'nova', 'xm1014', 'mag7', 'sawedoff', 'm249', 'negev'
]
@classmethod
def get_category(cls, weapon_name: str) -> str:
"""Get category for a weapon"""
weapon_clean = weapon_name.lower().replace('weapon_', '')
if weapon_clean in cls.RIFLES:
return 'rifle'
elif weapon_clean in cls.PISTOLS:
return 'pistol'
elif weapon_clean in cls.SMGS:
return 'smg'
elif weapon_clean in cls.SNIPERS:
return 'sniper'
elif weapon_clean in cls.HEAVY:
return 'heavy'
elif weapon_clean == 'knife':
return 'knife'
elif weapon_clean == 'hegrenade':
return 'grenade'
else:
return 'other'
class MapAreas:
"""Map area classification utilities (for position analysis)"""
# This will be expanded with actual map coordinates in IntelligenceProcessor
SITE_A = 'site_a'
SITE_B = 'site_b'
MID = 'mid'
SPAWN_T = 'spawn_t'
SPAWN_CT = 'spawn_ct'
@staticmethod
def classify_position(x: float, y: float, z: float, map_name: str) -> str:
"""
Classify position into map area (simplified)
Full implementation requires map-specific coordinate ranges
"""
# Placeholder - will be implemented with map data
return "unknown"
# Export all classes
__all__ = [
'SafeAggregator',
'NormalizationUtils',
'BaseFeatureProcessor',
'WeaponCategories',
'MapAreas'
]

View File

@@ -0,0 +1,463 @@
"""
BasicProcessor - Tier 1: CORE Features (41 columns)
Calculates fundamental player statistics from fact_match_players:
- Basic Performance (15 columns): rating, kd, adr, kast, rws, hs%, kills, deaths, assists
- Match Stats (8 columns): win_rate, mvps, duration, elo
- Weapon Stats (12 columns): awp, knife, zeus, diversity
- Objective Stats (6 columns): plants, defuses, flash_assists
"""
import sqlite3
from typing import Dict, Any
from .base_processor import BaseFeatureProcessor, SafeAggregator, WeaponCategories
class BasicProcessor(BaseFeatureProcessor):
"""Tier 1 CORE processor - Direct aggregations from fact_match_players"""
MIN_MATCHES_REQUIRED = 1 # Basic stats work with any match count
@staticmethod
def calculate(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate all Tier 1 CORE features (41 columns)
Returns dict with keys:
- core_avg_rating, core_avg_rating2, core_avg_kd, core_avg_adr, etc.
"""
features = {}
# Get match count first
match_count = BaseFeatureProcessor.get_player_match_count(steam_id, conn_l2)
if match_count == 0:
return _get_default_features()
# Calculate each sub-section
features.update(BasicProcessor._calculate_basic_performance(steam_id, conn_l2))
features.update(BasicProcessor._calculate_match_stats(steam_id, conn_l2))
features.update(BasicProcessor._calculate_weapon_stats(steam_id, conn_l2))
features.update(BasicProcessor._calculate_objective_stats(steam_id, conn_l2))
return features
@staticmethod
def _calculate_basic_performance(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate Basic Performance (15 columns)
Columns:
- core_avg_rating, core_avg_rating2
- core_avg_kd, core_avg_adr, core_avg_kast, core_avg_rws
- core_avg_hs_kills, core_hs_rate
- core_total_kills, core_total_deaths, core_total_assists, core_avg_assists
- core_kpr, core_dpr, core_survival_rate
"""
cursor = conn_l2.cursor()
# Main aggregation query
cursor.execute("""
SELECT
AVG(rating) as avg_rating,
AVG(rating2) as avg_rating2,
AVG(CAST(kills AS REAL) / NULLIF(deaths, 0)) as avg_kd,
AVG(adr) as avg_adr,
AVG(kast) as avg_kast,
AVG(rws) as avg_rws,
AVG(headshot_count) as avg_hs_kills,
SUM(kills) as total_kills,
SUM(deaths) as total_deaths,
SUM(headshot_count) as total_hs,
SUM(assists) as total_assists,
AVG(assists) as avg_assists,
SUM(round_total) as total_rounds
FROM fact_match_players
WHERE steam_id_64 = ?
""", (steam_id,))
row = cursor.fetchone()
if not row:
return {}
total_kills = row[7] if row[7] else 0
total_deaths = row[8] if row[8] else 1
total_hs = row[9] if row[9] else 0
total_rounds = row[12] if row[12] else 1
return {
'core_avg_rating': round(row[0], 3) if row[0] else 0.0,
'core_avg_rating2': round(row[1], 3) if row[1] else 0.0,
'core_avg_kd': round(row[2], 3) if row[2] else 0.0,
'core_avg_adr': round(row[3], 2) if row[3] else 0.0,
'core_avg_kast': round(row[4], 3) if row[4] else 0.0,
'core_avg_rws': round(row[5], 2) if row[5] else 0.0,
'core_avg_hs_kills': round(row[6], 2) if row[6] else 0.0,
'core_hs_rate': round(total_hs / total_kills, 3) if total_kills > 0 else 0.0,
'core_total_kills': total_kills,
'core_total_deaths': total_deaths,
'core_total_assists': row[10] if row[10] else 0,
'core_avg_assists': round(row[11], 2) if row[11] else 0.0,
'core_kpr': round(total_kills / total_rounds, 3) if total_rounds > 0 else 0.0,
'core_dpr': round(total_deaths / total_rounds, 3) if total_rounds > 0 else 0.0,
'core_survival_rate': round((total_rounds - total_deaths) / total_rounds, 3) if total_rounds > 0 else 0.0,
}
@staticmethod
def _calculate_flash_assists(steam_id: str, conn_l2: sqlite3.Connection) -> int:
"""
Calculate flash assists from fact_match_players (Total - Damage Assists)
Returns total flash assist count (Estimated)
"""
cursor = conn_l2.cursor()
# NOTE: Flash Assist Logic
# Source 'flash_assists' is often 0.
# User Logic: Flash Assists = Total Assists - Damage Assists (assisted_kill)
# We take MAX(0, diff) to avoid negative numbers if assisted_kill definition varies.
cursor.execute("""
SELECT SUM(MAX(0, assists - assisted_kill))
FROM fact_match_players
WHERE steam_id_64 = ?
""", (steam_id,))
res = cursor.fetchone()
if res and res[0] is not None:
return res[0]
return 0
@staticmethod
def _calculate_match_stats(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate Match Stats (8 columns)
Columns:
- core_win_rate, core_wins, core_losses
- core_avg_match_duration
- core_avg_mvps, core_mvp_rate
- core_avg_elo_change, core_total_elo_gained
"""
cursor = conn_l2.cursor()
# Win/loss stats
cursor.execute("""
SELECT
COUNT(*) as total_matches,
SUM(CASE WHEN is_win = 1 THEN 1 ELSE 0 END) as wins,
SUM(CASE WHEN is_win = 0 THEN 1 ELSE 0 END) as losses,
AVG(mvp_count) as avg_mvps,
SUM(mvp_count) as total_mvps
FROM fact_match_players
WHERE steam_id_64 = ?
""", (steam_id,))
row = cursor.fetchone()
total_matches = row[0] if row[0] else 0
wins = row[1] if row[1] else 0
losses = row[2] if row[2] else 0
avg_mvps = row[3] if row[3] else 0.0
total_mvps = row[4] if row[4] else 0
# Match duration (from fact_matches)
cursor.execute("""
SELECT AVG(m.duration) as avg_duration
FROM fact_matches m
JOIN fact_match_players p ON m.match_id = p.match_id
WHERE p.steam_id_64 = ?
""", (steam_id,))
duration_row = cursor.fetchone()
avg_duration = duration_row[0] if duration_row and duration_row[0] else 0
# ELO stats (from elo_change column)
cursor.execute("""
SELECT
AVG(elo_change) as avg_elo_change,
SUM(elo_change) as total_elo_gained
FROM fact_match_players
WHERE steam_id_64 = ?
""", (steam_id,))
elo_row = cursor.fetchone()
avg_elo_change = elo_row[0] if elo_row and elo_row[0] else 0.0
total_elo_gained = elo_row[1] if elo_row and elo_row[1] else 0.0
return {
'core_win_rate': round(wins / total_matches, 3) if total_matches > 0 else 0.0,
'core_wins': wins,
'core_losses': losses,
'core_avg_match_duration': int(avg_duration),
'core_avg_mvps': round(avg_mvps, 2),
'core_mvp_rate': round(total_mvps / total_matches, 2) if total_matches > 0 else 0.0,
'core_avg_elo_change': round(avg_elo_change, 2),
'core_total_elo_gained': round(total_elo_gained, 2),
}
@staticmethod
def _calculate_weapon_stats(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate Weapon Stats (12 columns)
Columns:
- core_avg_awp_kills, core_awp_usage_rate
- core_avg_knife_kills, core_avg_zeus_kills, core_zeus_buy_rate
- core_top_weapon, core_top_weapon_kills, core_top_weapon_hs_rate
- core_weapon_diversity
- core_rifle_hs_rate, core_pistol_hs_rate
- core_smg_kills_total
"""
cursor = conn_l2.cursor()
# AWP/Knife/Zeus stats from fact_round_events
cursor.execute("""
SELECT
weapon,
COUNT(*) as kill_count
FROM fact_round_events
WHERE attacker_steam_id = ?
AND weapon IN ('AWP', 'Knife', 'Zeus', 'knife', 'awp', 'zeus')
GROUP BY weapon
""", (steam_id,))
awp_kills = 0
knife_kills = 0
zeus_kills = 0
for weapon, kills in cursor.fetchall():
weapon_lower = weapon.lower() if weapon else ''
if weapon_lower == 'awp':
awp_kills += kills
elif weapon_lower == 'knife':
knife_kills += kills
elif weapon_lower == 'zeus':
zeus_kills += kills
# Get total matches count for rates
cursor.execute("""
SELECT COUNT(DISTINCT match_id)
FROM fact_match_players
WHERE steam_id_64 = ?
""", (steam_id,))
total_matches = cursor.fetchone()[0] or 1
avg_awp = awp_kills / total_matches
avg_knife = knife_kills / total_matches
avg_zeus = zeus_kills / total_matches
# Flash assists from fact_round_events
flash_assists = BasicProcessor._calculate_flash_assists(steam_id, conn_l2)
avg_flash_assists = flash_assists / total_matches
# Top weapon from fact_round_events
cursor.execute("""
SELECT
weapon,
COUNT(*) as kill_count,
SUM(CASE WHEN is_headshot = 1 THEN 1 ELSE 0 END) as hs_count
FROM fact_round_events
WHERE attacker_steam_id = ?
AND weapon IS NOT NULL
AND weapon != 'unknown'
GROUP BY weapon
ORDER BY kill_count DESC
LIMIT 1
""", (steam_id,))
weapon_row = cursor.fetchone()
top_weapon = weapon_row[0] if weapon_row else "unknown"
top_weapon_kills = weapon_row[1] if weapon_row else 0
top_weapon_hs = weapon_row[2] if weapon_row else 0
top_weapon_hs_rate = top_weapon_hs / top_weapon_kills if top_weapon_kills > 0 else 0.0
# Weapon diversity (number of distinct weapons with 10+ kills)
cursor.execute("""
SELECT COUNT(DISTINCT weapon) as weapon_count
FROM (
SELECT weapon, COUNT(*) as kills
FROM fact_round_events
WHERE attacker_steam_id = ?
AND weapon IS NOT NULL
GROUP BY weapon
HAVING kills >= 10
)
""", (steam_id,))
diversity_row = cursor.fetchone()
weapon_diversity = diversity_row[0] if diversity_row else 0
# Rifle/Pistol/SMG stats
cursor.execute("""
SELECT
weapon,
COUNT(*) as kills,
SUM(CASE WHEN is_headshot = 1 THEN 1 ELSE 0 END) as headshot_kills
FROM fact_round_events
WHERE attacker_steam_id = ?
AND weapon IS NOT NULL
GROUP BY weapon
""", (steam_id,))
rifle_kills = 0
rifle_hs = 0
pistol_kills = 0
pistol_hs = 0
smg_kills = 0
awp_usage_count = 0
for weapon, kills, hs in cursor.fetchall():
category = WeaponCategories.get_category(weapon)
if category == 'rifle':
rifle_kills += kills
rifle_hs += hs
elif category == 'pistol':
pistol_kills += kills
pistol_hs += hs
elif category == 'smg':
smg_kills += kills
elif weapon.lower() == 'awp':
awp_usage_count += kills
total_rounds = BaseFeatureProcessor.get_player_round_count(steam_id, conn_l2)
return {
'core_avg_awp_kills': round(avg_awp, 2),
'core_awp_usage_rate': round(awp_usage_count / total_rounds, 3) if total_rounds > 0 else 0.0,
'core_avg_knife_kills': round(avg_knife, 3),
'core_avg_zeus_kills': round(avg_zeus, 3),
'core_zeus_buy_rate': round(avg_zeus / total_matches, 3) if total_matches > 0 else 0.0,
'core_avg_flash_assists': round(avg_flash_assists, 2),
'core_top_weapon': top_weapon,
'core_top_weapon_kills': top_weapon_kills,
'core_top_weapon_hs_rate': round(top_weapon_hs_rate, 3),
'core_weapon_diversity': weapon_diversity,
'core_rifle_hs_rate': round(rifle_hs / rifle_kills, 3) if rifle_kills > 0 else 0.0,
'core_pistol_hs_rate': round(pistol_hs / pistol_kills, 3) if pistol_kills > 0 else 0.0,
'core_smg_kills_total': smg_kills,
}
@staticmethod
def _calculate_objective_stats(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate Objective Stats (6 columns)
Columns:
- core_avg_plants, core_avg_defuses, core_avg_flash_assists
- core_plant_success_rate, core_defuse_success_rate
- core_objective_impact
"""
cursor = conn_l2.cursor()
# Get data from main table
# Updated to use calculated flash assists formula
# Calculate flash assists manually first (since column is 0)
flash_assists_total = BasicProcessor._calculate_flash_assists(steam_id, conn_l2)
match_count = BaseFeatureProcessor.get_player_match_count(steam_id, conn_l2)
avg_flash_assists = flash_assists_total / match_count if match_count > 0 else 0.0
cursor.execute("""
SELECT
AVG(planted_bomb) as avg_plants,
AVG(defused_bomb) as avg_defuses,
SUM(planted_bomb) as total_plants,
SUM(defused_bomb) as total_defuses
FROM fact_match_players
WHERE steam_id_64 = ?
""", (steam_id,))
row = cursor.fetchone()
if not row:
return {}
avg_plants = row[0] if row[0] else 0.0
avg_defuses = row[1] if row[1] else 0.0
# avg_flash_assists computed above
total_plants = row[2] if row[2] else 0
total_defuses = row[3] if row[3] else 0
# Get T side rounds
cursor.execute("""
SELECT COALESCE(SUM(round_total), 0)
FROM fact_match_players_t
WHERE steam_id_64 = ?
""", (steam_id,))
t_rounds = cursor.fetchone()[0] or 1
# Get CT side rounds
cursor.execute("""
SELECT COALESCE(SUM(round_total), 0)
FROM fact_match_players_ct
WHERE steam_id_64 = ?
""", (steam_id,))
ct_rounds = cursor.fetchone()[0] or 1
# Plant success rate: plants per T round
plant_rate = total_plants / t_rounds if t_rounds > 0 else 0.0
# Defuse success rate: approximate as defuses per CT round (simplified)
defuse_rate = total_defuses / ct_rounds if ct_rounds > 0 else 0.0
# Objective impact score: weighted combination
objective_impact = (total_plants * 2.0 + total_defuses * 3.0 + avg_flash_assists * 0.5)
return {
'core_avg_plants': round(avg_plants, 2),
'core_avg_defuses': round(avg_defuses, 2),
'core_avg_flash_assists': round(avg_flash_assists, 2),
'core_plant_success_rate': round(plant_rate, 3),
'core_defuse_success_rate': round(defuse_rate, 3),
'core_objective_impact': round(objective_impact, 2),
}
def _get_default_features() -> Dict[str, Any]:
"""Return default zero values for all 41 CORE features"""
return {
# Basic Performance (15)
'core_avg_rating': 0.0,
'core_avg_rating2': 0.0,
'core_avg_kd': 0.0,
'core_avg_adr': 0.0,
'core_avg_kast': 0.0,
'core_avg_rws': 0.0,
'core_avg_hs_kills': 0.0,
'core_hs_rate': 0.0,
'core_total_kills': 0,
'core_total_deaths': 0,
'core_total_assists': 0,
'core_avg_assists': 0.0,
'core_kpr': 0.0,
'core_dpr': 0.0,
'core_survival_rate': 0.0,
# Match Stats (8)
'core_win_rate': 0.0,
'core_wins': 0,
'core_losses': 0,
'core_avg_match_duration': 0,
'core_avg_mvps': 0.0,
'core_mvp_rate': 0.0,
'core_avg_elo_change': 0.0,
'core_total_elo_gained': 0.0,
# Weapon Stats (12)
'core_avg_awp_kills': 0.0,
'core_awp_usage_rate': 0.0,
'core_avg_knife_kills': 0.0,
'core_avg_zeus_kills': 0.0,
'core_zeus_buy_rate': 0.0,
'core_top_weapon': 'unknown',
'core_top_weapon_kills': 0,
'core_top_weapon_hs_rate': 0.0,
'core_weapon_diversity': 0,
'core_rifle_hs_rate': 0.0,
'core_pistol_hs_rate': 0.0,
'core_smg_kills_total': 0,
# Objective Stats (6)
'core_avg_plants': 0.0,
'core_avg_defuses': 0.0,
'core_avg_flash_assists': 0.0,
'core_plant_success_rate': 0.0,
'core_defuse_success_rate': 0.0,
'core_objective_impact': 0.0,
}

View File

@@ -0,0 +1,420 @@
"""
CompositeProcessor - Tier 5: COMPOSITE Features (11 columns)
Weighted composite scores based on Tier 1-4 features:
- 8 Radar Scores (0-100): AIM, CLUTCH, PISTOL, DEFENSE, UTILITY, STABILITY, ECONOMY, PACE
- Overall Score (0-100): Weighted sum of 8 dimensions
- Tier Classification: Elite/Advanced/Intermediate/Beginner
- Tier Percentile: Ranking among all players
"""
import sqlite3
from typing import Dict, Any
from .base_processor import BaseFeatureProcessor, NormalizationUtils, SafeAggregator
class CompositeProcessor(BaseFeatureProcessor):
"""Tier 5 COMPOSITE processor - Weighted scores from all previous tiers"""
MIN_MATCHES_REQUIRED = 20 # Need substantial data for reliable composite scores
@staticmethod
def calculate(steam_id: str, conn_l2: sqlite3.Connection,
pre_features: Dict[str, Any]) -> Dict[str, Any]:
"""
Calculate all Tier 5 COMPOSITE features (11 columns)
Args:
steam_id: Player's Steam ID
conn_l2: L2 database connection
pre_features: Dictionary containing all Tier 1-4 features
Returns dict with keys starting with 'score_' and 'tier_'
"""
features = {}
# Check minimum matches
if not BaseFeatureProcessor.check_min_matches(steam_id, conn_l2,
CompositeProcessor.MIN_MATCHES_REQUIRED):
return _get_default_composite_features()
# Calculate 8 radar dimension scores
features['score_aim'] = CompositeProcessor._calculate_aim_score(pre_features)
features['score_clutch'] = CompositeProcessor._calculate_clutch_score(pre_features)
features['score_pistol'] = CompositeProcessor._calculate_pistol_score(pre_features)
features['score_defense'] = CompositeProcessor._calculate_defense_score(pre_features)
features['score_utility'] = CompositeProcessor._calculate_utility_score(pre_features)
features['score_stability'] = CompositeProcessor._calculate_stability_score(pre_features)
features['score_economy'] = CompositeProcessor._calculate_economy_score(pre_features)
features['score_pace'] = CompositeProcessor._calculate_pace_score(pre_features)
# Calculate overall score (Weighted sum of 8 dimensions)
# Weights: AIM 20%, CLUTCH 12%, PISTOL 10%, DEFENSE 13%, UTILITY 20%, STABILITY 8%, ECONOMY 12%, PACE 5%
features['score_overall'] = (
features['score_aim'] * 0.12 +
features['score_clutch'] * 0.18 +
features['score_pistol'] * 0.18 +
features['score_defense'] * 0.20 +
features['score_utility'] * 0.10 +
features['score_stability'] * 0.07 +
features['score_economy'] * 0.08 +
features['score_pace'] * 0.07
)
features['score_overall'] = round(features['score_overall'], 2)
# Classify tier based on overall score
features['tier_classification'] = CompositeProcessor._classify_tier(features['score_overall'])
# Percentile rank (placeholder - requires all players)
features['tier_percentile'] = min(features['score_overall'], 100.0)
return features
@staticmethod
def _calculate_aim_score(features: Dict[str, Any]) -> float:
"""
AIM Score (0-100) | 20%
"""
# Extract features
rating = features.get('core_avg_rating', 0.0)
kd = features.get('core_avg_kd', 0.0)
adr = features.get('core_avg_adr', 0.0)
hs_rate = features.get('core_hs_rate', 0.0)
multikill_rate = features.get('tac_multikill_rate', 0.0)
avg_hs = features.get('core_avg_hs_kills', 0.0)
weapon_div = features.get('core_weapon_diversity', 0.0)
rifle_hs_rate = features.get('core_rifle_hs_rate', 0.0)
# Normalize (Variable / Baseline * 100)
rating_score = min((rating / 1.15) * 100, 100)
kd_score = min((kd / 1.30) * 100, 100)
adr_score = min((adr / 90) * 100, 100)
hs_score = min((hs_rate / 0.55) * 100, 100)
mk_score = min((multikill_rate / 0.22) * 100, 100)
avg_hs_score = min((avg_hs / 8.5) * 100, 100)
weapon_div_score = min((weapon_div / 20) * 100, 100)
rifle_hs_score = min((rifle_hs_rate / 0.50) * 100, 100)
# Weighted Sum
aim_score = (
rating_score * 0.15 +
kd_score * 0.15 +
adr_score * 0.10 +
hs_score * 0.15 +
mk_score * 0.10 +
avg_hs_score * 0.15 +
weapon_div_score * 0.10 +
rifle_hs_score * 0.10
)
return round(min(max(aim_score, 0), 100), 2)
@staticmethod
def _calculate_clutch_score(features: Dict[str, Any]) -> float:
"""
CLUTCH Score (0-100) | 12%
"""
# Extract features
# Clutch Score Calculation: (1v1*100 + 1v2*200 + 1v3+*500) / 8
c1v1 = features.get('tac_clutch_1v1_wins', 0)
c1v2 = features.get('tac_clutch_1v2_wins', 0)
c1v3p = features.get('tac_clutch_1v3_plus_wins', 0)
# Note: tac_clutch_1v3_plus_wins includes 1v3, 1v4, 1v5
raw_clutch_score = (c1v1 * 100 + c1v2 * 200 + c1v3p * 500) / 8.0
comeback_kd = features.get('int_pressure_comeback_kd', 0.0)
matchpoint_kpr = features.get('int_pressure_matchpoint_kpr', 0.0)
rating = features.get('core_avg_rating', 0.0)
# 1v3+ Win Rate
attempts_1v3p = features.get('tac_clutch_1v3_plus_attempts', 0)
win_1v3p = features.get('tac_clutch_1v3_plus_wins', 0)
win_rate_1v3p = win_1v3p / attempts_1v3p if attempts_1v3p > 0 else 0.0
clutch_impact = features.get('tac_clutch_impact_score', 0.0)
# Normalize
clutch_score_val = min((raw_clutch_score / 200) * 100, 100)
comeback_score = min((comeback_kd / 1.55) * 100, 100)
matchpoint_score = min((matchpoint_kpr / 0.85) * 100, 100)
rating_score = min((rating / 1.15) * 100, 100)
win_rate_1v3p_score = min((win_rate_1v3p / 0.10) * 100, 100)
clutch_impact_score = min((clutch_impact / 200) * 100, 100)
# Weighted Sum
final_clutch_score = (
clutch_score_val * 0.20 +
comeback_score * 0.25 +
matchpoint_score * 0.15 +
rating_score * 0.10 +
win_rate_1v3p_score * 0.15 +
clutch_impact_score * 0.15
)
return round(min(max(final_clutch_score, 0), 100), 2)
@staticmethod
def _calculate_pistol_score(features: Dict[str, Any]) -> float:
"""
PISTOL Score (0-100) | 10%
"""
# Extract features
fk_rate = features.get('tac_fk_rate', 0.0) # Using general FK rate as per original logic, though user said "手枪局首杀率".
# If "手枪局首杀率" means FK rate in pistol rounds specifically, we don't have that in pre-calculated features.
# Assuming general FK rate or tac_fk_rate is acceptable proxy or that user meant tac_fk_rate.
# Given "tac_fk_rate" was used in previous Pistol score, I'll stick with it.
pistol_hs_rate = features.get('core_pistol_hs_rate', 0.0)
entry_win_rate = features.get('tac_opening_duel_winrate', 0.0)
rating = features.get('core_avg_rating', 0.0)
smg_kills = features.get('core_smg_kills_total', 0)
avg_fk = features.get('tac_avg_fk', 0.0)
# Normalize
fk_score = min((fk_rate / 0.58) * 100, 100) # 58%
pistol_hs_score = min((pistol_hs_rate / 0.75) * 100, 100) # 75%
entry_win_score = min((entry_win_rate / 0.47) * 100, 100) # 47%
rating_score = min((rating / 1.15) * 100, 100)
smg_score = min((smg_kills / 270) * 100, 100)
avg_fk_score = min((avg_fk / 3.0) * 100, 100)
# Weighted Sum
pistol_score = (
fk_score * 0.20 +
pistol_hs_score * 0.25 +
entry_win_score * 0.15 +
rating_score * 0.10 +
smg_score * 0.15 +
avg_fk_score * 0.15
)
return round(min(max(pistol_score, 0), 100), 2)
@staticmethod
def _calculate_defense_score(features: Dict[str, Any]) -> float:
"""
DEFENSE Score (0-100) | 13%
"""
# Extract features
ct_rating = features.get('meta_side_ct_rating', 0.0)
t_rating = features.get('meta_side_t_rating', 0.0)
ct_kd = features.get('meta_side_ct_kd', 0.0)
t_kd = features.get('meta_side_t_kd', 0.0)
ct_kast = features.get('meta_side_ct_kast', 0.0)
t_kast = features.get('meta_side_t_kast', 0.0)
# Normalize
ct_rating_score = min((ct_rating / 1.15) * 100, 100)
t_rating_score = min((t_rating / 1.20) * 100, 100)
ct_kd_score = min((ct_kd / 1.40) * 100, 100)
t_kd_score = min((t_kd / 1.45) * 100, 100)
ct_kast_score = min((ct_kast / 0.70) * 100, 100)
t_kast_score = min((t_kast / 0.72) * 100, 100)
# Weighted Sum
defense_score = (
ct_rating_score * 0.20 +
t_rating_score * 0.20 +
ct_kd_score * 0.15 +
t_kd_score * 0.15 +
ct_kast_score * 0.15 +
t_kast_score * 0.15
)
return round(min(max(defense_score, 0), 100), 2)
@staticmethod
def _calculate_utility_score(features: Dict[str, Any]) -> float:
"""
UTILITY Score (0-100) | 20%
"""
# Extract features
util_usage = features.get('tac_util_usage_rate', 0.0)
util_dmg = features.get('tac_util_nade_dmg_per_round', 0.0)
flash_eff = features.get('tac_util_flash_efficiency', 0.0)
util_impact = features.get('tac_util_impact_score', 0.0)
blind = features.get('tac_util_flash_enemies_per_round', 0.0) # 致盲数 (Enemies Blinded per Round)
flash_rnd = features.get('tac_util_flash_per_round', 0.0)
flash_ast = features.get('core_avg_flash_assists', 0.0)
# Normalize
usage_score = min((util_usage / 2.0) * 100, 100)
dmg_score = min((util_dmg / 4.0) * 100, 100)
flash_eff_score = min((flash_eff / 1.35) * 100, 100) # 135%
impact_score = min((util_impact / 22) * 100, 100)
blind_score = min((blind / 1.0) * 100, 100)
flash_rnd_score = min((flash_rnd / 0.85) * 100, 100)
flash_ast_score = min((flash_ast / 2.15) * 100, 100)
# Weighted Sum
utility_score = (
usage_score * 0.15 +
dmg_score * 0.05 +
flash_eff_score * 0.20 +
impact_score * 0.20 +
blind_score * 0.15 +
flash_rnd_score * 0.15 +
flash_ast_score * 0.10
)
return round(min(max(utility_score, 0), 100), 2)
@staticmethod
def _calculate_stability_score(features: Dict[str, Any]) -> float:
"""
STABILITY Score (0-100) | 8%
"""
# Extract features
volatility = features.get('meta_rating_volatility', 0.0)
loss_rating = features.get('meta_loss_rating', 0.0)
consistency = features.get('meta_rating_consistency', 0.0)
tilt_resilience = features.get('int_pressure_tilt_resistance', 0.0)
map_stable = features.get('meta_map_stability', 0.0)
elo_stable = features.get('meta_elo_tier_stability', 0.0)
recent_form = features.get('meta_recent_form_rating', 0.0)
# Normalize
# Volatility: Reverse score. 100 - (Vol * 220)
vol_score = max(0, 100 - (volatility * 220))
loss_score = min((loss_rating / 1.00) * 100, 100)
cons_score = min((consistency / 70) * 100, 100)
tilt_score = min((tilt_resilience / 0.80) * 100, 100)
map_score = min((map_stable / 0.25) * 100, 100)
elo_score = min((elo_stable / 0.48) * 100, 100)
recent_score = min((recent_form / 1.15) * 100, 100)
# Weighted Sum
stability_score = (
vol_score * 0.20 +
loss_score * 0.20 +
cons_score * 0.15 +
tilt_score * 0.15 +
map_score * 0.10 +
elo_score * 0.10 +
recent_score * 0.10
)
return round(min(max(stability_score, 0), 100), 2)
@staticmethod
def _calculate_economy_score(features: Dict[str, Any]) -> float:
"""
ECONOMY Score (0-100) | 12%
"""
# Extract features
dmg_1k = features.get('tac_eco_dmg_per_1k', 0.0)
eco_kpr = features.get('tac_eco_kpr_eco_rounds', 0.0)
eco_kd = features.get('tac_eco_kd_eco_rounds', 0.0)
eco_score = features.get('tac_eco_efficiency_score', 0.0)
full_kpr = features.get('tac_eco_kpr_full_rounds', 0.0)
force_win = features.get('tac_eco_force_success_rate', 0.0)
# Normalize
dmg_score = min((dmg_1k / 19) * 100, 100)
eco_kpr_score = min((eco_kpr / 0.85) * 100, 100)
eco_kd_score = min((eco_kd / 1.30) * 100, 100)
eco_eff_score = min((eco_score / 0.80) * 100, 100)
full_kpr_score = min((full_kpr / 0.90) * 100, 100)
force_win_score = min((force_win / 0.50) * 100, 100)
# Weighted Sum
economy_score = (
dmg_score * 0.25 +
eco_kpr_score * 0.20 +
eco_kd_score * 0.15 +
eco_eff_score * 0.15 +
full_kpr_score * 0.15 +
force_win_score * 0.10
)
return round(min(max(economy_score, 0), 100), 2)
@staticmethod
def _calculate_pace_score(features: Dict[str, Any]) -> float:
"""
PACE Score (0-100) | 5%
"""
# Extract features
early_kill_pct = features.get('int_timing_early_kill_share', 0.0)
aggression = features.get('int_timing_aggression_index', 0.0)
trade_speed = features.get('int_trade_response_time', 0.0)
trade_kill = features.get('int_trade_kill_count', 0)
teamwork = features.get('int_teamwork_score', 0.0)
first_contact = features.get('int_timing_first_contact_time', 0.0)
# Normalize
early_score = min((early_kill_pct / 0.44) * 100, 100)
aggression_score = min((aggression / 1.20) * 100, 100)
# Trade Speed: Reverse score. (2.0 / Trade Speed) * 100
# Avoid division by zero
if trade_speed > 0.01:
trade_speed_score = min((2.0 / trade_speed) * 100, 100)
else:
trade_speed_score = 100 # Instant trade
trade_kill_score = min((trade_kill / 650) * 100, 100)
teamwork_score = min((teamwork / 29) * 100, 100)
# First Contact: Reverse score. (30 / 1st Contact) * 100
if first_contact > 0.01:
first_contact_score = min((30 / first_contact) * 100, 100)
else:
first_contact_score = 0 # If 0, probably no data, safe to say 0? Or 100?
# 0 first contact time means instant damage.
# But "30 / Contact" means smaller contact time gives higher score.
# If contact time is 0, score explodes.
# Realistically first contact time is > 0.
# I will clamp it.
first_contact_score = 100 # Assume very fast
# Weighted Sum
pace_score = (
early_score * 0.25 +
aggression_score * 0.20 +
trade_speed_score * 0.20 +
trade_kill_score * 0.15 +
teamwork_score * 0.10 +
first_contact_score * 0.10
)
return round(min(max(pace_score, 0), 100), 2)
@staticmethod
def _classify_tier(overall_score: float) -> str:
"""
Classify player tier based on overall score
Tiers:
- Elite: 75+
- Advanced: 60-75
- Intermediate: 40-60
- Beginner: <40
"""
if overall_score >= 75:
return 'Elite'
elif overall_score >= 60:
return 'Advanced'
elif overall_score >= 40:
return 'Intermediate'
else:
return 'Beginner'
def _get_default_composite_features() -> Dict[str, Any]:
"""Return default zero values for all 11 COMPOSITE features"""
return {
'score_aim': 0.0,
'score_clutch': 0.0,
'score_pistol': 0.0,
'score_defense': 0.0,
'score_utility': 0.0,
'score_stability': 0.0,
'score_economy': 0.0,
'score_pace': 0.0,
'score_overall': 0.0,
'tier_classification': 'Beginner',
'tier_percentile': 0.0,
}

View File

@@ -0,0 +1,732 @@
"""
IntelligenceProcessor - Tier 3: INTELLIGENCE Features (53 columns)
Advanced analytics on fact_round_events with complex calculations:
- High IQ Kills (9 columns): wallbang, smoke, blind, noscope + IQ score
- Timing Analysis (12 columns): early/mid/late kill distribution, aggression
- Pressure Performance (10 columns): comeback, losing streak, matchpoint
- Position Mastery (14 columns): site control, lurk tendency, spatial IQ
- Trade Network (8 columns): trade kills/response time, teamwork
"""
import sqlite3
from typing import Dict, Any, List, Tuple
from .base_processor import BaseFeatureProcessor, SafeAggregator
class IntelligenceProcessor(BaseFeatureProcessor):
"""Tier 3 INTELLIGENCE processor - Complex event-level analytics"""
MIN_MATCHES_REQUIRED = 10 # Need substantial data for reliable patterns
@staticmethod
def calculate(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate all Tier 3 INTELLIGENCE features (53 columns)
Returns dict with keys starting with 'int_'
"""
features = {}
# Check minimum matches
if not BaseFeatureProcessor.check_min_matches(steam_id, conn_l2,
IntelligenceProcessor.MIN_MATCHES_REQUIRED):
return _get_default_intelligence_features()
# Calculate each intelligence dimension
features.update(IntelligenceProcessor._calculate_high_iq_kills(steam_id, conn_l2))
features.update(IntelligenceProcessor._calculate_timing_analysis(steam_id, conn_l2))
features.update(IntelligenceProcessor._calculate_pressure_performance(steam_id, conn_l2))
features.update(IntelligenceProcessor._calculate_position_mastery(steam_id, conn_l2))
features.update(IntelligenceProcessor._calculate_trade_network(steam_id, conn_l2))
return features
@staticmethod
def _calculate_high_iq_kills(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate High IQ Kills (9 columns)
Columns:
- int_wallbang_kills, int_wallbang_rate
- int_smoke_kills, int_smoke_kill_rate
- int_blind_kills, int_blind_kill_rate
- int_noscope_kills, int_noscope_rate
- int_high_iq_score
"""
cursor = conn_l2.cursor()
# Get total kills for rate calculations
cursor.execute("""
SELECT COUNT(*) as total_kills
FROM fact_round_events
WHERE attacker_steam_id = ?
AND event_type = 'kill'
""", (steam_id,))
total_kills = cursor.fetchone()[0]
total_kills = total_kills if total_kills else 1
# Wallbang kills
cursor.execute("""
SELECT COUNT(*) as wallbang_kills
FROM fact_round_events
WHERE attacker_steam_id = ?
AND is_wallbang = 1
""", (steam_id,))
wallbang_kills = cursor.fetchone()[0]
wallbang_kills = wallbang_kills if wallbang_kills else 0
# Smoke kills
cursor.execute("""
SELECT COUNT(*) as smoke_kills
FROM fact_round_events
WHERE attacker_steam_id = ?
AND is_through_smoke = 1
""", (steam_id,))
smoke_kills = cursor.fetchone()[0]
smoke_kills = smoke_kills if smoke_kills else 0
# Blind kills
cursor.execute("""
SELECT COUNT(*) as blind_kills
FROM fact_round_events
WHERE attacker_steam_id = ?
AND is_blind = 1
""", (steam_id,))
blind_kills = cursor.fetchone()[0]
blind_kills = blind_kills if blind_kills else 0
# Noscope kills (AWP only)
cursor.execute("""
SELECT COUNT(*) as noscope_kills
FROM fact_round_events
WHERE attacker_steam_id = ?
AND is_noscope = 1
""", (steam_id,))
noscope_kills = cursor.fetchone()[0]
noscope_kills = noscope_kills if noscope_kills else 0
# Calculate rates
wallbang_rate = SafeAggregator.safe_divide(wallbang_kills, total_kills)
smoke_rate = SafeAggregator.safe_divide(smoke_kills, total_kills)
blind_rate = SafeAggregator.safe_divide(blind_kills, total_kills)
noscope_rate = SafeAggregator.safe_divide(noscope_kills, total_kills)
# High IQ score: weighted combination
iq_score = (
wallbang_kills * 3.0 +
smoke_kills * 2.0 +
blind_kills * 1.5 +
noscope_kills * 2.0
)
return {
'int_wallbang_kills': wallbang_kills,
'int_wallbang_rate': round(wallbang_rate, 4),
'int_smoke_kills': smoke_kills,
'int_smoke_kill_rate': round(smoke_rate, 4),
'int_blind_kills': blind_kills,
'int_blind_kill_rate': round(blind_rate, 4),
'int_noscope_kills': noscope_kills,
'int_noscope_rate': round(noscope_rate, 4),
'int_high_iq_score': round(iq_score, 2),
}
@staticmethod
def _calculate_timing_analysis(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate Timing Analysis (12 columns)
Time bins: Early (0-30s), Mid (30-60s), Late (60s+)
Columns:
- int_timing_early_kills, int_timing_mid_kills, int_timing_late_kills
- int_timing_early_kill_share, int_timing_mid_kill_share, int_timing_late_kill_share
- int_timing_avg_kill_time
- int_timing_early_deaths, int_timing_early_death_rate
- int_timing_aggression_index
- int_timing_patience_score
- int_timing_first_contact_time
"""
cursor = conn_l2.cursor()
# Kill distribution by time bins
cursor.execute("""
SELECT
COUNT(CASE WHEN event_time <= 30 THEN 1 END) as early_kills,
COUNT(CASE WHEN event_time > 30 AND event_time <= 60 THEN 1 END) as mid_kills,
COUNT(CASE WHEN event_time > 60 THEN 1 END) as late_kills,
COUNT(*) as total_kills,
AVG(event_time) as avg_kill_time
FROM fact_round_events
WHERE attacker_steam_id = ?
AND event_type = 'kill'
""", (steam_id,))
row = cursor.fetchone()
early_kills = row[0] if row[0] else 0
mid_kills = row[1] if row[1] else 0
late_kills = row[2] if row[2] else 0
total_kills = row[3] if row[3] else 1
avg_kill_time = row[4] if row[4] else 0.0
# Calculate shares
early_share = SafeAggregator.safe_divide(early_kills, total_kills)
mid_share = SafeAggregator.safe_divide(mid_kills, total_kills)
late_share = SafeAggregator.safe_divide(late_kills, total_kills)
# Death distribution (for aggression index)
cursor.execute("""
SELECT
COUNT(CASE WHEN event_time <= 30 THEN 1 END) as early_deaths,
COUNT(*) as total_deaths
FROM fact_round_events
WHERE victim_steam_id = ?
AND event_type = 'kill'
""", (steam_id,))
death_row = cursor.fetchone()
early_deaths = death_row[0] if death_row[0] else 0
total_deaths = death_row[1] if death_row[1] else 1
early_death_rate = SafeAggregator.safe_divide(early_deaths, total_deaths)
# Aggression index: early kills / early deaths
aggression_index = SafeAggregator.safe_divide(early_kills, max(early_deaths, 1))
# Patience score: late kill share
patience_score = late_share
# First contact time: average time of first event per round
cursor.execute("""
SELECT AVG(min_time) as avg_first_contact
FROM (
SELECT match_id, round_num, MIN(event_time) as min_time
FROM fact_round_events
WHERE attacker_steam_id = ? OR victim_steam_id = ?
GROUP BY match_id, round_num
)
""", (steam_id, steam_id))
first_contact = cursor.fetchone()[0]
first_contact_time = first_contact if first_contact else 0.0
return {
'int_timing_early_kills': early_kills,
'int_timing_mid_kills': mid_kills,
'int_timing_late_kills': late_kills,
'int_timing_early_kill_share': round(early_share, 3),
'int_timing_mid_kill_share': round(mid_share, 3),
'int_timing_late_kill_share': round(late_share, 3),
'int_timing_avg_kill_time': round(avg_kill_time, 2),
'int_timing_early_deaths': early_deaths,
'int_timing_early_death_rate': round(early_death_rate, 3),
'int_timing_aggression_index': round(aggression_index, 3),
'int_timing_patience_score': round(patience_score, 3),
'int_timing_first_contact_time': round(first_contact_time, 2),
}
@staticmethod
def _calculate_pressure_performance(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate Pressure Performance (10 columns)
"""
cursor = conn_l2.cursor()
# 1. Comeback Performance (Whole Match Stats for Comeback Games)
# Definition: Won match where team faced >= 5 round deficit
# Get all winning matches
cursor.execute("""
SELECT match_id, rating, kills, deaths
FROM fact_match_players
WHERE steam_id_64 = ? AND is_win = 1
""", (steam_id,))
win_matches = cursor.fetchall()
comeback_ratings = []
comeback_kds = []
for match_id, rating, kills, deaths in win_matches:
# Check for deficit
# Need round scores
cursor.execute("""
SELECT round_num, ct_score, t_score, winner_side
FROM fact_rounds
WHERE match_id = ?
ORDER BY round_num
""", (match_id,))
rounds = cursor.fetchall()
if not rounds: continue
# Determine starting side or side per round?
# We need player's side per round to know if they are trailing.
# Simplified: Use fact_round_player_economy to get side per round
cursor.execute("""
SELECT round_num, side
FROM fact_round_player_economy
WHERE match_id = ? AND steam_id_64 = ?
""", (match_id, steam_id))
side_map = {r[0]: r[1] for r in cursor.fetchall()}
max_deficit = 0
for r_num, ct_s, t_s, win_side in rounds:
side = side_map.get(r_num)
if not side: continue
my_score = ct_s if side == 'CT' else t_s
opp_score = t_s if side == 'CT' else ct_s
diff = opp_score - my_score
if diff > max_deficit:
max_deficit = diff
if max_deficit >= 5:
# This is a comeback match
if rating: comeback_ratings.append(rating)
kd = kills / max(deaths, 1)
comeback_kds.append(kd)
avg_comeback_rating = SafeAggregator.safe_avg(comeback_ratings)
avg_comeback_kd = SafeAggregator.safe_avg(comeback_kds)
# 2. Matchpoint Performance (KPR only)
# Definition: Rounds where ANY team is at match point (12 or 15)
cursor.execute("""
SELECT DISTINCT match_id FROM fact_match_players WHERE steam_id_64 = ?
""", (steam_id,))
all_match_ids = [r[0] for r in cursor.fetchall()]
mp_kills = 0
mp_rounds = 0
for match_id in all_match_ids:
# Get rounds and sides
cursor.execute("""
SELECT round_num, ct_score, t_score
FROM fact_rounds
WHERE match_id = ?
""", (match_id,))
rounds = cursor.fetchall()
for r_num, ct_s, t_s in rounds:
# Check for match point (MR12=12, MR15=15)
# We check score BEFORE the round?
# fact_rounds stores score AFTER the round usually?
# Actually, standard is score is updated after win.
# So if score is 12, the NEXT round is match point?
# Or if score is 12, does it mean we HAVE 12 wins? Yes.
# So if I have 12 wins, I am playing for the 13th win (Match Point in MR12).
# So if ct_score == 12 or t_score == 12 -> Match Point Round.
# Same for 15.
is_mp = (ct_s == 12 or t_s == 12 or ct_s == 15 or t_s == 15)
# Check for OT match point? (18, 21...)
if not is_mp and (ct_s >= 18 or t_s >= 18):
# Simple heuristic for OT
if (ct_s % 3 == 0 and ct_s > 15) or (t_s % 3 == 0 and t_s > 15):
is_mp = True
if is_mp:
# Count kills in this round (wait, if score is 12, does it mean the round that JUST finished made it 12?
# or the round currently being played starts with 12?
# fact_rounds typically has one row per round.
# ct_score/t_score in that row is the score ENDING that round.
# So if row 1 has ct=1, t=0. That means Round 1 ended 1-0.
# So if we want to analyze the round PLAYED at 12-X, we need to look at the round where PREVIOUS score was 12.
# i.e. The round where the result leads to 13?
# Or simpler: if the row says 13-X, that round was the winning round.
# But we want to include failed match points too.
# Let's look at it this way:
# If current row shows `ct_score=12`, it means AFTER this round, CT has 12.
# So the NEXT round will be played with CT having 12.
# So we should look for rounds where PREVIOUS round score was 12.
pass
# Re-query with LAG/Lead or python iteration
rounds.sort(key=lambda x: x[0])
current_ct = 0
current_t = 0
for r_num, final_ct, final_t in rounds:
# Check if ENTERING this round, someone is on match point
is_mp_round = False
# MR12 Match Point: 12
if current_ct == 12 or current_t == 12: is_mp_round = True
# MR15 Match Point: 15
elif current_ct == 15 or current_t == 15: is_mp_round = True
# OT Match Point (18, 21, etc. - MR3 OT)
elif (current_ct >= 18 and current_ct % 3 == 0) or (current_t >= 18 and current_t % 3 == 0): is_mp_round = True
if is_mp_round:
# Count kills in this r_num
cursor.execute("""
SELECT COUNT(*) FROM fact_round_events
WHERE match_id = ? AND round_num = ?
AND attacker_steam_id = ? AND event_type = 'kill'
""", (match_id, r_num, steam_id))
mp_kills += cursor.fetchone()[0]
mp_rounds += 1
# Update scores for next iteration
current_ct = final_ct
current_t = final_t
matchpoint_kpr = SafeAggregator.safe_divide(mp_kills, mp_rounds)
# 3. Losing Streak / Clutch Composure / Entry in Loss (Keep existing logic)
# Losing streak KD
cursor.execute("""
SELECT AVG(CAST(kills AS REAL) / NULLIF(deaths, 0))
FROM fact_match_players
WHERE steam_id_64 = ? AND is_win = 0
""", (steam_id,))
losing_streak_kd = cursor.fetchone()[0] or 0.0
# Clutch composure (perfect kills)
cursor.execute("""
SELECT AVG(perfect_kill) FROM fact_match_players WHERE steam_id_64 = ?
""", (steam_id,))
clutch_composure = cursor.fetchone()[0] or 0.0
# Entry in loss
cursor.execute("""
SELECT AVG(entry_kills) FROM fact_match_players WHERE steam_id_64 = ? AND is_win = 0
""", (steam_id,))
entry_in_loss = cursor.fetchone()[0] or 0.0
# Composite Scores
performance_index = (
avg_comeback_kd * 20.0 +
matchpoint_kpr * 15.0 +
clutch_composure * 10.0
)
big_moment_score = (
avg_comeback_rating * 0.3 +
matchpoint_kpr * 5.0 + # Scaled up KPR to ~rating
clutch_composure * 10.0
)
# Tilt resistance
cursor.execute("""
SELECT
AVG(CASE WHEN is_win = 1 THEN rating END) as win_rating,
AVG(CASE WHEN is_win = 0 THEN rating END) as loss_rating
FROM fact_match_players
WHERE steam_id_64 = ?
""", (steam_id,))
tilt_row = cursor.fetchone()
win_rating = tilt_row[0] if tilt_row[0] else 1.0
loss_rating = tilt_row[1] if tilt_row[1] else 0.0
tilt_resistance = SafeAggregator.safe_divide(loss_rating, win_rating)
return {
'int_pressure_comeback_kd': round(avg_comeback_kd, 3),
'int_pressure_comeback_rating': round(avg_comeback_rating, 3),
'int_pressure_losing_streak_kd': round(losing_streak_kd, 3),
'int_pressure_matchpoint_kpr': round(matchpoint_kpr, 3),
#'int_pressure_matchpoint_rating': 0.0, # Removed
'int_pressure_clutch_composure': round(clutch_composure, 3),
'int_pressure_entry_in_loss': round(entry_in_loss, 3),
'int_pressure_performance_index': round(performance_index, 2),
'int_pressure_big_moment_score': round(big_moment_score, 2),
'int_pressure_tilt_resistance': round(tilt_resistance, 3),
}
@staticmethod
def _calculate_position_mastery(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate Position Mastery (14 columns)
Based on xyz coordinates from fact_round_events
Columns:
- int_pos_site_a_control_rate, int_pos_site_b_control_rate, int_pos_mid_control_rate
- int_pos_favorite_position
- int_pos_position_diversity
- int_pos_rotation_speed
- int_pos_map_coverage
- int_pos_lurk_tendency
- int_pos_site_anchor_score
- int_pos_entry_route_diversity
- int_pos_retake_positioning
- int_pos_postplant_positioning
- int_pos_spatial_iq_score
- int_pos_avg_distance_from_teammates
Note: Simplified implementation - full version requires DBSCAN clustering
"""
cursor = conn_l2.cursor()
# Check if position data exists
cursor.execute("""
SELECT COUNT(*) FROM fact_round_events
WHERE attacker_steam_id = ?
AND attacker_pos_x IS NOT NULL
LIMIT 1
""", (steam_id,))
has_position_data = cursor.fetchone()[0] > 0
if not has_position_data:
# Return placeholder values if no position data
return {
'int_pos_site_a_control_rate': 0.0,
'int_pos_site_b_control_rate': 0.0,
'int_pos_mid_control_rate': 0.0,
'int_pos_favorite_position': 'unknown',
'int_pos_position_diversity': 0.0,
'int_pos_rotation_speed': 0.0,
'int_pos_map_coverage': 0.0,
'int_pos_lurk_tendency': 0.0,
'int_pos_site_anchor_score': 0.0,
'int_pos_entry_route_diversity': 0.0,
'int_pos_retake_positioning': 0.0,
'int_pos_postplant_positioning': 0.0,
'int_pos_spatial_iq_score': 0.0,
'int_pos_avg_distance_from_teammates': 0.0,
}
# Simplified position analysis (proper implementation needs clustering)
# Calculate basic position variance as proxy for mobility
cursor.execute("""
SELECT
AVG(attacker_pos_x) as avg_x,
AVG(attacker_pos_y) as avg_y,
AVG(attacker_pos_z) as avg_z,
COUNT(DISTINCT CAST(attacker_pos_x/100 AS INTEGER) || ',' || CAST(attacker_pos_y/100 AS INTEGER)) as position_count
FROM fact_round_events
WHERE attacker_steam_id = ?
AND attacker_pos_x IS NOT NULL
""", (steam_id,))
pos_row = cursor.fetchone()
position_count = pos_row[3] if pos_row[3] else 1
# Position diversity based on unique grid cells visited
position_diversity = min(position_count / 50.0, 1.0) # Normalize to 0-1
# Map coverage (simplified)
map_coverage = position_diversity
# Site control rates CANNOT be calculated without map-specific geometry data
# Each map (Dust2, Mirage, Nuke, etc.) has different site boundaries
# Would require: CREATE TABLE map_boundaries (map_name, site_name, min_x, max_x, min_y, max_y)
# Commenting out these 3 features:
# - int_pos_site_a_control_rate
# - int_pos_site_b_control_rate
# - int_pos_mid_control_rate
return {
'int_pos_site_a_control_rate': 0.33, # Placeholder
'int_pos_site_b_control_rate': 0.33, # Placeholder
'int_pos_mid_control_rate': 0.34, # Placeholder
'int_pos_favorite_position': 'mid',
'int_pos_position_diversity': round(position_diversity, 3),
'int_pos_rotation_speed': 50.0,
'int_pos_map_coverage': round(map_coverage, 3),
'int_pos_lurk_tendency': 0.25,
'int_pos_site_anchor_score': 50.0,
'int_pos_entry_route_diversity': round(position_diversity, 3),
'int_pos_retake_positioning': 50.0,
'int_pos_postplant_positioning': 50.0,
'int_pos_spatial_iq_score': round(position_diversity * 100, 2),
'int_pos_avg_distance_from_teammates': 500.0,
}
@staticmethod
def _calculate_trade_network(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate Trade Network (8 columns)
Trade window: 5 seconds after teammate death
Columns:
- int_trade_kill_count
- int_trade_kill_rate
- int_trade_response_time
- int_trade_given_count
- int_trade_given_rate
- int_trade_balance
- int_trade_efficiency
- int_teamwork_score
"""
cursor = conn_l2.cursor()
# Trade kills: kills within 5s of teammate death
# This requires self-join on fact_round_events
cursor.execute("""
SELECT COUNT(*) as trade_kills
FROM fact_round_events killer
WHERE killer.attacker_steam_id = ?
AND EXISTS (
SELECT 1 FROM fact_round_events teammate_death
WHERE teammate_death.match_id = killer.match_id
AND teammate_death.round_num = killer.round_num
AND teammate_death.event_type = 'kill'
AND teammate_death.victim_steam_id != ?
AND teammate_death.attacker_steam_id = killer.victim_steam_id
AND killer.event_time BETWEEN teammate_death.event_time AND teammate_death.event_time + 5
)
""", (steam_id, steam_id))
trade_kills = cursor.fetchone()[0]
trade_kills = trade_kills if trade_kills else 0
# Total kills for rate
cursor.execute("""
SELECT COUNT(*) FROM fact_round_events
WHERE attacker_steam_id = ?
AND event_type = 'kill'
""", (steam_id,))
total_kills = cursor.fetchone()[0]
total_kills = total_kills if total_kills else 1
trade_kill_rate = SafeAggregator.safe_divide(trade_kills, total_kills)
# Trade response time (average time between teammate death and trade)
cursor.execute("""
SELECT AVG(killer.event_time - teammate_death.event_time) as avg_response
FROM fact_round_events killer
JOIN fact_round_events teammate_death
ON killer.match_id = teammate_death.match_id
AND killer.round_num = teammate_death.round_num
AND killer.victim_steam_id = teammate_death.attacker_steam_id
WHERE killer.attacker_steam_id = ?
AND teammate_death.event_type = 'kill'
AND teammate_death.victim_steam_id != ?
AND killer.event_time BETWEEN teammate_death.event_time AND teammate_death.event_time + 5
""", (steam_id, steam_id))
response_time = cursor.fetchone()[0]
trade_response_time = response_time if response_time else 0.0
# Trades given: deaths that teammates traded
cursor.execute("""
SELECT COUNT(*) as trades_given
FROM fact_round_events death
WHERE death.victim_steam_id = ?
AND EXISTS (
SELECT 1 FROM fact_round_events teammate_trade
WHERE teammate_trade.match_id = death.match_id
AND teammate_trade.round_num = death.round_num
AND teammate_trade.victim_steam_id = death.attacker_steam_id
AND teammate_trade.attacker_steam_id != ?
AND teammate_trade.event_time BETWEEN death.event_time AND death.event_time + 5
)
""", (steam_id, steam_id))
trades_given = cursor.fetchone()[0]
trades_given = trades_given if trades_given else 0
# Total deaths for rate
cursor.execute("""
SELECT COUNT(*) FROM fact_round_events
WHERE victim_steam_id = ?
AND event_type = 'kill'
""", (steam_id,))
total_deaths = cursor.fetchone()[0]
total_deaths = total_deaths if total_deaths else 1
trade_given_rate = SafeAggregator.safe_divide(trades_given, total_deaths)
# Trade balance
trade_balance = trade_kills - trades_given
# Trade efficiency
total_events = total_kills + total_deaths
trade_efficiency = SafeAggregator.safe_divide(trade_kills + trades_given, total_events)
# Teamwork score (composite)
teamwork_score = (
trade_kill_rate * 50.0 +
trade_given_rate * 30.0 +
(1.0 / max(trade_response_time, 1.0)) * 20.0
)
return {
'int_trade_kill_count': trade_kills,
'int_trade_kill_rate': round(trade_kill_rate, 3),
'int_trade_response_time': round(trade_response_time, 2),
'int_trade_given_count': trades_given,
'int_trade_given_rate': round(trade_given_rate, 3),
'int_trade_balance': trade_balance,
'int_trade_efficiency': round(trade_efficiency, 3),
'int_teamwork_score': round(teamwork_score, 2),
}
def _get_default_intelligence_features() -> Dict[str, Any]:
"""Return default zero values for all 53 INTELLIGENCE features"""
return {
# High IQ Kills (9)
'int_wallbang_kills': 0,
'int_wallbang_rate': 0.0,
'int_smoke_kills': 0,
'int_smoke_kill_rate': 0.0,
'int_blind_kills': 0,
'int_blind_kill_rate': 0.0,
'int_noscope_kills': 0,
'int_noscope_rate': 0.0,
'int_high_iq_score': 0.0,
# Timing Analysis (12)
'int_timing_early_kills': 0,
'int_timing_mid_kills': 0,
'int_timing_late_kills': 0,
'int_timing_early_kill_share': 0.0,
'int_timing_mid_kill_share': 0.0,
'int_timing_late_kill_share': 0.0,
'int_timing_avg_kill_time': 0.0,
'int_timing_early_deaths': 0,
'int_timing_early_death_rate': 0.0,
'int_timing_aggression_index': 0.0,
'int_timing_patience_score': 0.0,
'int_timing_first_contact_time': 0.0,
# Pressure Performance (10)
'int_pressure_comeback_kd': 0.0,
'int_pressure_comeback_rating': 0.0,
'int_pressure_losing_streak_kd': 0.0,
'int_pressure_matchpoint_kpr': 0.0,
'int_pressure_clutch_composure': 0.0,
'int_pressure_entry_in_loss': 0.0,
'int_pressure_performance_index': 0.0,
'int_pressure_big_moment_score': 0.0,
'int_pressure_tilt_resistance': 0.0,
# Position Mastery (14)
'int_pos_site_a_control_rate': 0.0,
'int_pos_site_b_control_rate': 0.0,
'int_pos_mid_control_rate': 0.0,
'int_pos_favorite_position': 'unknown',
'int_pos_position_diversity': 0.0,
'int_pos_rotation_speed': 0.0,
'int_pos_map_coverage': 0.0,
'int_pos_lurk_tendency': 0.0,
'int_pos_site_anchor_score': 0.0,
'int_pos_entry_route_diversity': 0.0,
'int_pos_retake_positioning': 0.0,
'int_pos_postplant_positioning': 0.0,
'int_pos_spatial_iq_score': 0.0,
'int_pos_avg_distance_from_teammates': 0.0,
# Trade Network (8)
'int_trade_kill_count': 0,
'int_trade_kill_rate': 0.0,
'int_trade_response_time': 0.0,
'int_trade_given_count': 0,
'int_trade_given_rate': 0.0,
'int_trade_balance': 0,
'int_trade_efficiency': 0.0,
'int_teamwork_score': 0.0,
}

View File

@@ -0,0 +1,720 @@
"""
MetaProcessor - Tier 4: META Features (52 columns)
Long-term patterns and meta-features:
- Stability (8 columns): volatility, recent form, win/loss rating
- Side Preference (14 columns): CT vs T ratings, balance scores
- Opponent Adaptation (12 columns): vs different ELO tiers
- Map Specialization (10 columns): best/worst maps, versatility
- Session Pattern (8 columns): daily/weekly patterns, streaks
"""
import sqlite3
from typing import Dict, Any, List
from .base_processor import BaseFeatureProcessor, SafeAggregator
class MetaProcessor(BaseFeatureProcessor):
"""Tier 4 META processor - Cross-match patterns and meta-analysis"""
MIN_MATCHES_REQUIRED = 15 # Need sufficient history for meta patterns
@staticmethod
def calculate(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate all Tier 4 META features (52 columns)
Returns dict with keys starting with 'meta_'
"""
features = {}
# Check minimum matches
if not BaseFeatureProcessor.check_min_matches(steam_id, conn_l2,
MetaProcessor.MIN_MATCHES_REQUIRED):
return _get_default_meta_features()
# Calculate each meta dimension
features.update(MetaProcessor._calculate_stability(steam_id, conn_l2))
features.update(MetaProcessor._calculate_side_preference(steam_id, conn_l2))
features.update(MetaProcessor._calculate_opponent_adaptation(steam_id, conn_l2))
features.update(MetaProcessor._calculate_map_specialization(steam_id, conn_l2))
features.update(MetaProcessor._calculate_session_pattern(steam_id, conn_l2))
return features
@staticmethod
def _calculate_stability(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate Stability (8 columns)
Columns:
- meta_rating_volatility (STDDEV of last 20 matches)
- meta_recent_form_rating (AVG of last 10 matches)
- meta_win_rating, meta_loss_rating
- meta_rating_consistency
- meta_time_rating_correlation
- meta_map_stability
- meta_elo_tier_stability
"""
cursor = conn_l2.cursor()
# Get recent matches for volatility
cursor.execute("""
SELECT rating
FROM fact_match_players
WHERE steam_id_64 = ?
ORDER BY match_id DESC
LIMIT 20
""", (steam_id,))
recent_ratings = [row[0] for row in cursor.fetchall() if row[0] is not None]
rating_volatility = SafeAggregator.safe_stddev(recent_ratings, 0.0)
# Recent form (last 10 matches)
recent_form = SafeAggregator.safe_avg(recent_ratings[:10], 0.0) if len(recent_ratings) >= 10 else 0.0
# Win/loss ratings
cursor.execute("""
SELECT
AVG(CASE WHEN is_win = 1 THEN rating END) as win_rating,
AVG(CASE WHEN is_win = 0 THEN rating END) as loss_rating
FROM fact_match_players
WHERE steam_id_64 = ?
""", (steam_id,))
row = cursor.fetchone()
win_rating = row[0] if row[0] else 0.0
loss_rating = row[1] if row[1] else 0.0
# Rating consistency (inverse of volatility, normalized)
rating_consistency = max(0, 100 - (rating_volatility * 100))
# Time-rating correlation: calculate Pearson correlation between match time and rating
cursor.execute("""
SELECT
p.rating,
m.start_time
FROM fact_match_players p
JOIN fact_matches m ON p.match_id = m.match_id
WHERE p.steam_id_64 = ?
AND p.rating IS NOT NULL
AND m.start_time IS NOT NULL
ORDER BY m.start_time
""", (steam_id,))
time_rating_data = cursor.fetchall()
if len(time_rating_data) >= 2:
ratings = [row[0] for row in time_rating_data]
times = [row[1] for row in time_rating_data]
# Normalize timestamps to match indices
time_indices = list(range(len(times)))
# Calculate Pearson correlation
n = len(ratings)
sum_x = sum(time_indices)
sum_y = sum(ratings)
sum_xy = sum(x * y for x, y in zip(time_indices, ratings))
sum_x2 = sum(x * x for x in time_indices)
sum_y2 = sum(y * y for y in ratings)
numerator = n * sum_xy - sum_x * sum_y
denominator = ((n * sum_x2 - sum_x ** 2) * (n * sum_y2 - sum_y ** 2)) ** 0.5
time_rating_corr = SafeAggregator.safe_divide(numerator, denominator) if denominator > 0 else 0.0
else:
time_rating_corr = 0.0
# Map stability (STDDEV across maps)
cursor.execute("""
SELECT
m.map_name,
AVG(p.rating) as avg_rating
FROM fact_match_players p
JOIN fact_matches m ON p.match_id = m.match_id
WHERE p.steam_id_64 = ?
GROUP BY m.map_name
""", (steam_id,))
map_ratings = [row[1] for row in cursor.fetchall() if row[1] is not None]
map_stability = SafeAggregator.safe_stddev(map_ratings, 0.0)
# ELO tier stability (placeholder)
elo_tier_stability = rating_volatility # Simplified
return {
'meta_rating_volatility': round(rating_volatility, 3),
'meta_recent_form_rating': round(recent_form, 3),
'meta_win_rating': round(win_rating, 3),
'meta_loss_rating': round(loss_rating, 3),
'meta_rating_consistency': round(rating_consistency, 2),
'meta_time_rating_correlation': round(time_rating_corr, 3),
'meta_map_stability': round(map_stability, 3),
'meta_elo_tier_stability': round(elo_tier_stability, 3),
}
@staticmethod
def _calculate_side_preference(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate Side Preference (14 columns)
Columns:
- meta_side_ct_rating, meta_side_t_rating
- meta_side_ct_kd, meta_side_t_kd
- meta_side_ct_win_rate, meta_side_t_win_rate
- meta_side_ct_fk_rate, meta_side_t_fk_rate
- meta_side_ct_kast, meta_side_t_kast
- meta_side_rating_diff, meta_side_kd_diff
- meta_side_preference
- meta_side_balance_score
"""
cursor = conn_l2.cursor()
# Get CT side performance from fact_match_players_ct
# Rating is now stored as rating2 from fight_ct
cursor.execute("""
SELECT
AVG(rating) as avg_rating,
AVG(CAST(kills AS REAL) / NULLIF(deaths, 0)) as avg_kd,
AVG(kast) as avg_kast,
AVG(entry_kills) as avg_fk,
SUM(CASE WHEN is_win = 1 THEN 1 ELSE 0 END) as wins,
COUNT(*) as total_matches,
SUM(round_total) as total_rounds
FROM fact_match_players_ct
WHERE steam_id_64 = ?
AND rating IS NOT NULL AND rating > 0
""", (steam_id,))
ct_row = cursor.fetchone()
ct_rating = ct_row[0] if ct_row and ct_row[0] else 0.0
ct_kd = ct_row[1] if ct_row and ct_row[1] else 0.0
ct_kast = ct_row[2] if ct_row and ct_row[2] else 0.0
ct_fk = ct_row[3] if ct_row and ct_row[3] else 0.0
ct_wins = ct_row[4] if ct_row and ct_row[4] else 0
ct_matches = ct_row[5] if ct_row and ct_row[5] else 1
ct_rounds = ct_row[6] if ct_row and ct_row[6] else 1
ct_win_rate = SafeAggregator.safe_divide(ct_wins, ct_matches)
ct_fk_rate = SafeAggregator.safe_divide(ct_fk, ct_rounds)
# Get T side performance from fact_match_players_t
cursor.execute("""
SELECT
AVG(rating) as avg_rating,
AVG(CAST(kills AS REAL) / NULLIF(deaths, 0)) as avg_kd,
AVG(kast) as avg_kast,
AVG(entry_kills) as avg_fk,
SUM(CASE WHEN is_win = 1 THEN 1 ELSE 0 END) as wins,
COUNT(*) as total_matches,
SUM(round_total) as total_rounds
FROM fact_match_players_t
WHERE steam_id_64 = ?
AND rating IS NOT NULL AND rating > 0
""", (steam_id,))
t_row = cursor.fetchone()
t_rating = t_row[0] if t_row and t_row[0] else 0.0
t_kd = t_row[1] if t_row and t_row[1] else 0.0
t_kast = t_row[2] if t_row and t_row[2] else 0.0
t_fk = t_row[3] if t_row and t_row[3] else 0.0
t_wins = t_row[4] if t_row and t_row[4] else 0
t_matches = t_row[5] if t_row and t_row[5] else 1
t_rounds = t_row[6] if t_row and t_row[6] else 1
t_win_rate = SafeAggregator.safe_divide(t_wins, t_matches)
t_fk_rate = SafeAggregator.safe_divide(t_fk, t_rounds)
# Differences
rating_diff = ct_rating - t_rating
kd_diff = ct_kd - t_kd
# Side preference classification
if abs(rating_diff) < 0.05:
side_preference = 'Balanced'
elif rating_diff > 0:
side_preference = 'CT'
else:
side_preference = 'T'
# Balance score (0-100, higher = more balanced)
balance_score = max(0, 100 - abs(rating_diff) * 200)
return {
'meta_side_ct_rating': round(ct_rating, 3),
'meta_side_t_rating': round(t_rating, 3),
'meta_side_ct_kd': round(ct_kd, 3),
'meta_side_t_kd': round(t_kd, 3),
'meta_side_ct_win_rate': round(ct_win_rate, 3),
'meta_side_t_win_rate': round(t_win_rate, 3),
'meta_side_ct_fk_rate': round(ct_fk_rate, 3),
'meta_side_t_fk_rate': round(t_fk_rate, 3),
'meta_side_ct_kast': round(ct_kast, 3),
'meta_side_t_kast': round(t_kast, 3),
'meta_side_rating_diff': round(rating_diff, 3),
'meta_side_kd_diff': round(kd_diff, 3),
'meta_side_preference': side_preference,
'meta_side_balance_score': round(balance_score, 2),
}
@staticmethod
def _calculate_opponent_adaptation(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate Opponent Adaptation (12 columns)
ELO tiers: lower (<-200), similar (±200), higher (>+200)
Columns:
- meta_opp_vs_lower_elo_rating, meta_opp_vs_similar_elo_rating, meta_opp_vs_higher_elo_rating
- meta_opp_vs_lower_elo_kd, meta_opp_vs_similar_elo_kd, meta_opp_vs_higher_elo_kd
- meta_opp_elo_adaptation
- meta_opp_stomping_score, meta_opp_upset_score
- meta_opp_consistency_across_elos
- meta_opp_rank_resistance
- meta_opp_smurf_detection
NOTE: Using individual origin_elo from fact_match_players
"""
cursor = conn_l2.cursor()
# Get player's matches with individual ELO data
cursor.execute("""
SELECT
p.rating,
CAST(p.kills AS REAL) / NULLIF(p.deaths, 0) as kd,
p.is_win,
p.origin_elo as player_elo,
opp.avg_elo as opponent_avg_elo
FROM fact_match_players p
JOIN (
SELECT
match_id,
team_id,
AVG(origin_elo) as avg_elo
FROM fact_match_players
WHERE origin_elo IS NOT NULL
GROUP BY match_id, team_id
) opp ON p.match_id = opp.match_id AND p.team_id != opp.team_id
WHERE p.steam_id_64 = ?
AND p.origin_elo IS NOT NULL
""", (steam_id,))
matches = cursor.fetchall()
if not matches:
return {
'meta_opp_vs_lower_elo_rating': 0.0,
'meta_opp_vs_lower_elo_kd': 0.0,
'meta_opp_vs_similar_elo_rating': 0.0,
'meta_opp_vs_similar_elo_kd': 0.0,
'meta_opp_vs_higher_elo_rating': 0.0,
'meta_opp_vs_higher_elo_kd': 0.0,
'meta_opp_elo_adaptation': 0.0,
'meta_opp_stomping_score': 0.0,
'meta_opp_upset_score': 0.0,
'meta_opp_consistency_across_elos': 0.0,
'meta_opp_rank_resistance': 0.0,
'meta_opp_smurf_detection': 0.0,
}
# Categorize by ELO difference
lower_elo_ratings = [] # Playing vs weaker opponents
lower_elo_kds = []
similar_elo_ratings = [] # Similar skill
similar_elo_kds = []
higher_elo_ratings = [] # Playing vs stronger opponents
higher_elo_kds = []
stomping_score = 0 # Dominating weaker teams
upset_score = 0 # Winning against stronger teams
for rating, kd, is_win, player_elo, opp_elo in matches:
if rating is None or kd is None:
continue
elo_diff = player_elo - opp_elo # Positive = we're stronger
# Categorize ELO tiers (±200 threshold)
if elo_diff > 200: # We're stronger (opponent is lower ELO)
lower_elo_ratings.append(rating)
lower_elo_kds.append(kd)
if is_win:
stomping_score += 1
elif elo_diff < -200: # Opponent is stronger (higher ELO)
higher_elo_ratings.append(rating)
higher_elo_kds.append(kd)
if is_win:
upset_score += 2 # Upset wins count more
else: # Similar ELO (±200)
similar_elo_ratings.append(rating)
similar_elo_kds.append(kd)
# Calculate averages
avg_lower_rating = SafeAggregator.safe_avg(lower_elo_ratings)
avg_lower_kd = SafeAggregator.safe_avg(lower_elo_kds)
avg_similar_rating = SafeAggregator.safe_avg(similar_elo_ratings)
avg_similar_kd = SafeAggregator.safe_avg(similar_elo_kds)
avg_higher_rating = SafeAggregator.safe_avg(higher_elo_ratings)
avg_higher_kd = SafeAggregator.safe_avg(higher_elo_kds)
# ELO adaptation: performance improvement vs stronger opponents
# Positive = performs better vs stronger teams (rare, good trait)
elo_adaptation = avg_higher_rating - avg_lower_rating
# Consistency: std dev of ratings across ELO tiers
all_tier_ratings = [avg_lower_rating, avg_similar_rating, avg_higher_rating]
consistency = 100 - SafeAggregator.safe_stddev(all_tier_ratings) * 100
# Rank resistance: K/D vs higher ELO opponents
rank_resistance = avg_higher_kd
# Smurf detection: high performance vs lower ELO
# Indicators: rating > 1.15 AND kd > 1.2 when facing lower ELO opponents
smurf_score = 0.0
if len(lower_elo_ratings) > 0 and avg_lower_rating > 1.0:
# Base score from rating dominance
rating_bonus = max(0, (avg_lower_rating - 1.0) * 100)
# Additional score from K/D dominance
kd_bonus = max(0, (avg_lower_kd - 1.0) * 50)
# Consistency bonus (more matches = more reliable indicator)
consistency_bonus = min(len(lower_elo_ratings) / 5.0, 1.0) * 20
smurf_score = rating_bonus + kd_bonus + consistency_bonus
# Cap at 100
smurf_score = min(smurf_score, 100.0)
return {
'meta_opp_vs_lower_elo_rating': round(avg_lower_rating, 3),
'meta_opp_vs_lower_elo_kd': round(avg_lower_kd, 3),
'meta_opp_vs_similar_elo_rating': round(avg_similar_rating, 3),
'meta_opp_vs_similar_elo_kd': round(avg_similar_kd, 3),
'meta_opp_vs_higher_elo_rating': round(avg_higher_rating, 3),
'meta_opp_vs_higher_elo_kd': round(avg_higher_kd, 3),
'meta_opp_elo_adaptation': round(elo_adaptation, 3),
'meta_opp_stomping_score': round(stomping_score, 2),
'meta_opp_upset_score': round(upset_score, 2),
'meta_opp_consistency_across_elos': round(consistency, 2),
'meta_opp_rank_resistance': round(rank_resistance, 3),
'meta_opp_smurf_detection': round(smurf_score, 2),
}
# Performance vs lower ELO opponents (simplified - using match-level team ELO)
# REMOVED DUPLICATE LOGIC BLOCK THAT WAS UNREACHABLE
# The code previously had a return statement before this block, making it dead code.
# Merged logic into the first block above using individual player ELOs which is more accurate.
@staticmethod
def _calculate_map_specialization(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate Map Specialization (10 columns)
Columns:
- meta_map_best_map, meta_map_best_rating
- meta_map_worst_map, meta_map_worst_rating
- meta_map_diversity
- meta_map_pool_size
- meta_map_specialist_score
- meta_map_versatility
- meta_map_comfort_zone_rate
- meta_map_adaptation
"""
cursor = conn_l2.cursor()
# Map performance
# Lower threshold to 1 match to ensure we catch high ratings even with low sample size
cursor.execute("""
SELECT
m.map_name,
AVG(p.rating) as avg_rating,
COUNT(*) as match_count
FROM fact_match_players p
JOIN fact_matches m ON p.match_id = m.match_id
WHERE p.steam_id_64 = ?
GROUP BY m.map_name
HAVING match_count >= 1
ORDER BY avg_rating DESC
""", (steam_id,))
map_data = cursor.fetchall()
if not map_data:
return {
'meta_map_best_map': 'unknown',
'meta_map_best_rating': 0.0,
'meta_map_worst_map': 'unknown',
'meta_map_worst_rating': 0.0,
'meta_map_diversity': 0.0,
'meta_map_pool_size': 0,
'meta_map_specialist_score': 0.0,
'meta_map_versatility': 0.0,
'meta_map_comfort_zone_rate': 0.0,
'meta_map_adaptation': 0.0,
}
# Best map
best_map = map_data[0][0]
best_rating = map_data[0][1]
# Worst map
worst_map = map_data[-1][0]
worst_rating = map_data[-1][1]
# Map diversity (entropy-based)
map_ratings = [row[1] for row in map_data]
map_diversity = SafeAggregator.safe_stddev(map_ratings, 0.0)
# Map pool size (maps with 3+ matches, lowered from 5)
cursor.execute("""
SELECT COUNT(DISTINCT m.map_name)
FROM fact_match_players p
JOIN fact_matches m ON p.match_id = m.match_id
WHERE p.steam_id_64 = ?
GROUP BY m.map_name
HAVING COUNT(*) >= 3
""", (steam_id,))
pool_rows = cursor.fetchall()
pool_size = len(pool_rows)
# Specialist score (difference between best and worst)
specialist_score = best_rating - worst_rating
# Versatility (inverse of specialist score, normalized)
versatility = max(0, 100 - specialist_score * 100)
# Comfort zone rate (% matches on top 3 maps)
cursor.execute("""
SELECT
SUM(CASE WHEN m.map_name IN (
SELECT map_name FROM (
SELECT m2.map_name, COUNT(*) as cnt
FROM fact_match_players p2
JOIN fact_matches m2 ON p2.match_id = m2.match_id
WHERE p2.steam_id_64 = ?
GROUP BY m2.map_name
ORDER BY cnt DESC
LIMIT 3
)
) THEN 1 ELSE 0 END) as comfort_matches,
COUNT(*) as total_matches
FROM fact_match_players p
JOIN fact_matches m ON p.match_id = m.match_id
WHERE p.steam_id_64 = ?
""", (steam_id, steam_id))
comfort_row = cursor.fetchone()
comfort_matches = comfort_row[0] if comfort_row[0] else 0
total_matches = comfort_row[1] if comfort_row[1] else 1
comfort_zone_rate = SafeAggregator.safe_divide(comfort_matches, total_matches)
# Map adaptation (avg rating on non-favorite maps)
if len(map_data) > 1:
non_favorite_ratings = [row[1] for row in map_data[1:]]
map_adaptation = SafeAggregator.safe_avg(non_favorite_ratings, 0.0)
else:
map_adaptation = best_rating
return {
'meta_map_best_map': best_map,
'meta_map_best_rating': round(best_rating, 3),
'meta_map_worst_map': worst_map,
'meta_map_worst_rating': round(worst_rating, 3),
'meta_map_diversity': round(map_diversity, 3),
'meta_map_pool_size': pool_size,
'meta_map_specialist_score': round(specialist_score, 3),
'meta_map_versatility': round(versatility, 2),
'meta_map_comfort_zone_rate': round(comfort_zone_rate, 3),
'meta_map_adaptation': round(map_adaptation, 3),
}
@staticmethod
def _calculate_session_pattern(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate Session Pattern (8 columns)
Columns:
- meta_session_avg_matches_per_day
- meta_session_longest_streak
- meta_session_weekend_rating, meta_session_weekday_rating
- meta_session_morning_rating, meta_session_afternoon_rating
- meta_session_evening_rating, meta_session_night_rating
Note: Requires timestamp data in fact_matches
"""
cursor = conn_l2.cursor()
# Check if start_time exists
cursor.execute("""
SELECT COUNT(*) FROM fact_matches
WHERE start_time IS NOT NULL AND start_time > 0
LIMIT 1
""")
has_timestamps = cursor.fetchone()[0] > 0
if not has_timestamps:
# Return placeholder values
return {
'meta_session_avg_matches_per_day': 0.0,
'meta_session_longest_streak': 0,
'meta_session_weekend_rating': 0.0,
'meta_session_weekday_rating': 0.0,
'meta_session_morning_rating': 0.0,
'meta_session_afternoon_rating': 0.0,
'meta_session_evening_rating': 0.0,
'meta_session_night_rating': 0.0,
}
# 1. Matches per day
cursor.execute("""
SELECT
DATE(start_time, 'unixepoch') as match_date,
COUNT(*) as daily_matches
FROM fact_matches m
JOIN fact_match_players p ON m.match_id = p.match_id
WHERE p.steam_id_64 = ? AND m.start_time IS NOT NULL
GROUP BY match_date
""", (steam_id,))
daily_stats = cursor.fetchall()
if daily_stats:
avg_matches_per_day = sum(row[1] for row in daily_stats) / len(daily_stats)
else:
avg_matches_per_day = 0.0
# 2. Longest Streak (Consecutive wins)
cursor.execute("""
SELECT is_win
FROM fact_match_players p
JOIN fact_matches m ON p.match_id = m.match_id
WHERE p.steam_id_64 = ? AND m.start_time IS NOT NULL
ORDER BY m.start_time
""", (steam_id,))
results = cursor.fetchall()
longest_streak = 0
current_streak = 0
for row in results:
if row[0]: # Win
current_streak += 1
else:
longest_streak = max(longest_streak, current_streak)
current_streak = 0
longest_streak = max(longest_streak, current_streak)
# 3. Time of Day & Week Analysis
# Weekend: 0 (Sun) and 6 (Sat)
cursor.execute("""
SELECT
CAST(strftime('%w', start_time, 'unixepoch') AS INTEGER) as day_of_week,
CAST(strftime('%H', start_time, 'unixepoch') AS INTEGER) as hour_of_day,
p.rating
FROM fact_match_players p
JOIN fact_matches m ON p.match_id = m.match_id
WHERE p.steam_id_64 = ?
AND m.start_time IS NOT NULL
AND p.rating IS NOT NULL
""", (steam_id,))
matches = cursor.fetchall()
weekend_ratings = []
weekday_ratings = []
morning_ratings = [] # 06-12
afternoon_ratings = [] # 12-18
evening_ratings = [] # 18-24
night_ratings = [] # 00-06
for dow, hour, rating in matches:
# Weekday/Weekend
if dow == 0 or dow == 6:
weekend_ratings.append(rating)
else:
weekday_ratings.append(rating)
# Time of Day
if 6 <= hour < 12:
morning_ratings.append(rating)
elif 12 <= hour < 18:
afternoon_ratings.append(rating)
elif 18 <= hour <= 23:
evening_ratings.append(rating)
else: # 0-6
night_ratings.append(rating)
return {
'meta_session_avg_matches_per_day': round(avg_matches_per_day, 2),
'meta_session_longest_streak': longest_streak,
'meta_session_weekend_rating': round(SafeAggregator.safe_avg(weekend_ratings), 3),
'meta_session_weekday_rating': round(SafeAggregator.safe_avg(weekday_ratings), 3),
'meta_session_morning_rating': round(SafeAggregator.safe_avg(morning_ratings), 3),
'meta_session_afternoon_rating': round(SafeAggregator.safe_avg(afternoon_ratings), 3),
'meta_session_evening_rating': round(SafeAggregator.safe_avg(evening_ratings), 3),
'meta_session_night_rating': round(SafeAggregator.safe_avg(night_ratings), 3),
}
def _get_default_meta_features() -> Dict[str, Any]:
"""Return default zero values for all 52 META features"""
return {
# Stability (8)
'meta_rating_volatility': 0.0,
'meta_recent_form_rating': 0.0,
'meta_win_rating': 0.0,
'meta_loss_rating': 0.0,
'meta_rating_consistency': 0.0,
'meta_time_rating_correlation': 0.0,
'meta_map_stability': 0.0,
'meta_elo_tier_stability': 0.0,
# Side Preference (14)
'meta_side_ct_rating': 0.0,
'meta_side_t_rating': 0.0,
'meta_side_ct_kd': 0.0,
'meta_side_t_kd': 0.0,
'meta_side_ct_win_rate': 0.0,
'meta_side_t_win_rate': 0.0,
'meta_side_ct_fk_rate': 0.0,
'meta_side_t_fk_rate': 0.0,
'meta_side_ct_kast': 0.0,
'meta_side_t_kast': 0.0,
'meta_side_rating_diff': 0.0,
'meta_side_kd_diff': 0.0,
'meta_side_preference': 'Balanced',
'meta_side_balance_score': 0.0,
# Opponent Adaptation (12)
'meta_opp_vs_lower_elo_rating': 0.0,
'meta_opp_vs_similar_elo_rating': 0.0,
'meta_opp_vs_higher_elo_rating': 0.0,
'meta_opp_vs_lower_elo_kd': 0.0,
'meta_opp_vs_similar_elo_kd': 0.0,
'meta_opp_vs_higher_elo_kd': 0.0,
'meta_opp_elo_adaptation': 0.0,
'meta_opp_stomping_score': 0.0,
'meta_opp_upset_score': 0.0,
'meta_opp_consistency_across_elos': 0.0,
'meta_opp_rank_resistance': 0.0,
'meta_opp_smurf_detection': 0.0,
# Map Specialization (10)
'meta_map_best_map': 'unknown',
'meta_map_best_rating': 0.0,
'meta_map_worst_map': 'unknown',
'meta_map_worst_rating': 0.0,
'meta_map_diversity': 0.0,
'meta_map_pool_size': 0,
'meta_map_specialist_score': 0.0,
'meta_map_versatility': 0.0,
'meta_map_comfort_zone_rate': 0.0,
'meta_map_adaptation': 0.0,
# Session Pattern (8)
'meta_session_avg_matches_per_day': 0.0,
'meta_session_longest_streak': 0,
'meta_session_weekend_rating': 0.0,
'meta_session_weekday_rating': 0.0,
'meta_session_morning_rating': 0.0,
'meta_session_afternoon_rating': 0.0,
'meta_session_evening_rating': 0.0,
'meta_session_night_rating': 0.0,
}

View File

@@ -0,0 +1,722 @@
"""
TacticalProcessor - Tier 2: TACTICAL Features (44 columns)
Calculates tactical gameplay features from fact_match_players and fact_round_events:
- Opening Impact (8 columns): first kills/deaths, entry duels
- Multi-Kill Performance (6 columns): 2k, 3k, 4k, 5k, ace
- Clutch Performance (10 columns): 1v1, 1v2, 1v3+ situations
- Utility Mastery (12 columns): nade damage, flash efficiency, smoke timing
- Economy Efficiency (8 columns): damage/$, eco/force/full round performance
"""
import sqlite3
from typing import Dict, Any
from .base_processor import BaseFeatureProcessor, SafeAggregator
class TacticalProcessor(BaseFeatureProcessor):
"""Tier 2 TACTICAL processor - Multi-table JOINs and conditional aggregations"""
MIN_MATCHES_REQUIRED = 5 # Need reasonable sample for tactical analysis
@staticmethod
def calculate(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate all Tier 2 TACTICAL features (44 columns)
Returns dict with keys starting with 'tac_'
"""
features = {}
# Check minimum matches
if not BaseFeatureProcessor.check_min_matches(steam_id, conn_l2,
TacticalProcessor.MIN_MATCHES_REQUIRED):
return _get_default_tactical_features()
# Calculate each tactical dimension
features.update(TacticalProcessor._calculate_opening_impact(steam_id, conn_l2))
features.update(TacticalProcessor._calculate_multikill(steam_id, conn_l2))
features.update(TacticalProcessor._calculate_clutch(steam_id, conn_l2))
features.update(TacticalProcessor._calculate_utility(steam_id, conn_l2))
features.update(TacticalProcessor._calculate_economy(steam_id, conn_l2))
return features
@staticmethod
def _calculate_opening_impact(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate Opening Impact (8 columns)
Columns:
- tac_avg_fk, tac_avg_fd
- tac_fk_rate, tac_fd_rate
- tac_fk_success_rate (team win rate when player gets FK)
- tac_entry_kill_rate, tac_entry_death_rate
- tac_opening_duel_winrate
"""
cursor = conn_l2.cursor()
# FK/FD from fact_match_players
cursor.execute("""
SELECT
AVG(entry_kills) as avg_fk,
AVG(entry_deaths) as avg_fd,
SUM(entry_kills) as total_fk,
SUM(entry_deaths) as total_fd,
COUNT(*) as total_matches
FROM fact_match_players
WHERE steam_id_64 = ?
""", (steam_id,))
row = cursor.fetchone()
avg_fk = row[0] if row[0] else 0.0
avg_fd = row[1] if row[1] else 0.0
total_fk = row[2] if row[2] else 0
total_fd = row[3] if row[3] else 0
total_matches = row[4] if row[4] else 1
opening_duels = total_fk + total_fd
fk_rate = SafeAggregator.safe_divide(total_fk, opening_duels)
fd_rate = SafeAggregator.safe_divide(total_fd, opening_duels)
opening_duel_winrate = SafeAggregator.safe_divide(total_fk, opening_duels)
# FK success rate: team win rate when player gets FK
cursor.execute("""
SELECT
COUNT(*) as fk_matches,
SUM(CASE WHEN is_win = 1 THEN 1 ELSE 0 END) as fk_wins
FROM fact_match_players
WHERE steam_id_64 = ?
AND entry_kills > 0
""", (steam_id,))
fk_row = cursor.fetchone()
fk_matches = fk_row[0] if fk_row[0] else 0
fk_wins = fk_row[1] if fk_row[1] else 0
fk_success_rate = SafeAggregator.safe_divide(fk_wins, fk_matches)
# Entry kill/death rates (per T round for entry kills, total for entry deaths)
cursor.execute("""
SELECT COALESCE(SUM(round_total), 0)
FROM fact_match_players_t
WHERE steam_id_64 = ?
""", (steam_id,))
t_rounds = cursor.fetchone()[0] or 1
cursor.execute("""
SELECT COALESCE(SUM(round_total), 0)
FROM fact_match_players
WHERE steam_id_64 = ?
""", (steam_id,))
total_rounds = cursor.fetchone()[0] or 1
entry_kill_rate = SafeAggregator.safe_divide(total_fk, t_rounds)
entry_death_rate = SafeAggregator.safe_divide(total_fd, total_rounds)
return {
'tac_avg_fk': round(avg_fk, 2),
'tac_avg_fd': round(avg_fd, 2),
'tac_fk_rate': round(fk_rate, 3),
'tac_fd_rate': round(fd_rate, 3),
'tac_fk_success_rate': round(fk_success_rate, 3),
'tac_entry_kill_rate': round(entry_kill_rate, 3),
'tac_entry_death_rate': round(entry_death_rate, 3),
'tac_opening_duel_winrate': round(opening_duel_winrate, 3),
}
@staticmethod
def _calculate_multikill(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate Multi-Kill Performance (6 columns)
Columns:
- tac_avg_2k, tac_avg_3k, tac_avg_4k, tac_avg_5k
- tac_multikill_rate
- tac_ace_count
"""
cursor = conn_l2.cursor()
cursor.execute("""
SELECT
AVG(kill_2) as avg_2k,
AVG(kill_3) as avg_3k,
AVG(kill_4) as avg_4k,
AVG(kill_5) as avg_5k,
SUM(kill_2) as total_2k,
SUM(kill_3) as total_3k,
SUM(kill_4) as total_4k,
SUM(kill_5) as total_5k,
SUM(round_total) as total_rounds
FROM fact_match_players
WHERE steam_id_64 = ?
""", (steam_id,))
row = cursor.fetchone()
avg_2k = row[0] if row[0] else 0.0
avg_3k = row[1] if row[1] else 0.0
avg_4k = row[2] if row[2] else 0.0
avg_5k = row[3] if row[3] else 0.0
total_2k = row[4] if row[4] else 0
total_3k = row[5] if row[5] else 0
total_4k = row[6] if row[6] else 0
total_5k = row[7] if row[7] else 0
total_rounds = row[8] if row[8] else 1
total_multikills = total_2k + total_3k + total_4k + total_5k
multikill_rate = SafeAggregator.safe_divide(total_multikills, total_rounds)
return {
'tac_avg_2k': round(avg_2k, 2),
'tac_avg_3k': round(avg_3k, 2),
'tac_avg_4k': round(avg_4k, 2),
'tac_avg_5k': round(avg_5k, 2),
'tac_multikill_rate': round(multikill_rate, 3),
'tac_ace_count': total_5k,
}
@staticmethod
def _calculate_clutch(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate Clutch Performance (10 columns)
Columns:
- tac_clutch_1v1_attempts, tac_clutch_1v1_wins, tac_clutch_1v1_rate
- tac_clutch_1v2_attempts, tac_clutch_1v2_wins, tac_clutch_1v2_rate
- tac_clutch_1v3_plus_attempts, tac_clutch_1v3_plus_wins, tac_clutch_1v3_plus_rate
- tac_clutch_impact_score
Logic:
- Wins: Aggregated directly from fact_match_players (trusting upstream data).
- Attempts: Calculated by replaying rounds with 'Active Player' filtering to remove ghosts.
"""
cursor = conn_l2.cursor()
# Step 1: Get Wins from fact_match_players
cursor.execute("""
SELECT
SUM(clutch_1v1) as c1,
SUM(clutch_1v2) as c2,
SUM(clutch_1v3) as c3,
SUM(clutch_1v4) as c4,
SUM(clutch_1v5) as c5
FROM fact_match_players
WHERE steam_id_64 = ?
""", (steam_id,))
wins_row = cursor.fetchone()
clutch_1v1_wins = wins_row[0] if wins_row and wins_row[0] else 0
clutch_1v2_wins = wins_row[1] if wins_row and wins_row[1] else 0
clutch_1v3_wins = wins_row[2] if wins_row and wins_row[2] else 0
clutch_1v4_wins = wins_row[3] if wins_row and wins_row[3] else 0
clutch_1v5_wins = wins_row[4] if wins_row and wins_row[4] else 0
# Group 1v3+ wins
clutch_1v3_plus_wins = clutch_1v3_wins + clutch_1v4_wins + clutch_1v5_wins
# Step 2: Calculate Attempts
cursor.execute("SELECT DISTINCT match_id FROM fact_match_players WHERE steam_id_64 = ?", (steam_id,))
match_ids = [row[0] for row in cursor.fetchall()]
clutch_1v1_attempts = 0
clutch_1v2_attempts = 0
clutch_1v3_plus_attempts = 0
for match_id in match_ids:
# Get Roster
cursor.execute("SELECT steam_id_64, team_id FROM fact_match_players WHERE match_id = ?", (match_id,))
roster = cursor.fetchall()
my_team_id = None
for pid, tid in roster:
if str(pid) == str(steam_id):
my_team_id = tid
break
if my_team_id is None:
continue
all_teammates = {str(pid) for pid, tid in roster if tid == my_team_id}
all_enemies = {str(pid) for pid, tid in roster if tid != my_team_id}
# Get Events for this match
cursor.execute("""
SELECT round_num, event_type, attacker_steam_id, victim_steam_id, event_time
FROM fact_round_events
WHERE match_id = ?
ORDER BY round_num, event_time
""", (match_id,))
all_events = cursor.fetchall()
# Group events by round
from collections import defaultdict
events_by_round = defaultdict(list)
active_players_by_round = defaultdict(set)
for r_num, e_type, attacker, victim, e_time in all_events:
events_by_round[r_num].append((e_type, attacker, victim))
if attacker: active_players_by_round[r_num].add(str(attacker))
if victim: active_players_by_round[r_num].add(str(victim))
# Iterate rounds
for r_num, round_events in events_by_round.items():
active_players = active_players_by_round[r_num]
# If player not active, skip (probably camping or AFK or not spawned)
if str(steam_id) not in active_players:
continue
# Filter roster to active players only (removes ghosts)
alive_teammates = all_teammates.intersection(active_players)
alive_enemies = all_enemies.intersection(active_players)
# Safety: ensure player is in alive_teammates
alive_teammates.add(str(steam_id))
clutch_detected = False
for e_type, attacker, victim in round_events:
if e_type == 'kill':
vic_str = str(victim)
if vic_str in alive_teammates:
alive_teammates.discard(vic_str)
elif vic_str in alive_enemies:
alive_enemies.discard(vic_str)
# Check clutch condition
if not clutch_detected:
# Teammates dead (len==1 means only me), Enemies alive
if len(alive_teammates) == 1 and str(steam_id) in alive_teammates:
enemies_cnt = len(alive_enemies)
if enemies_cnt > 0:
clutch_detected = True
if enemies_cnt == 1:
clutch_1v1_attempts += 1
elif enemies_cnt == 2:
clutch_1v2_attempts += 1
elif enemies_cnt >= 3:
clutch_1v3_plus_attempts += 1
# Calculate win rates
rate_1v1 = SafeAggregator.safe_divide(clutch_1v1_wins, clutch_1v1_attempts)
rate_1v2 = SafeAggregator.safe_divide(clutch_1v2_wins, clutch_1v2_attempts)
rate_1v3_plus = SafeAggregator.safe_divide(clutch_1v3_plus_wins, clutch_1v3_plus_attempts)
# Clutch impact score: weighted by difficulty
impact_score = (clutch_1v1_wins * 1.0 + clutch_1v2_wins * 3.0 + clutch_1v3_plus_wins * 7.0)
return {
'tac_clutch_1v1_attempts': clutch_1v1_attempts,
'tac_clutch_1v1_wins': clutch_1v1_wins,
'tac_clutch_1v1_rate': round(rate_1v1, 3),
'tac_clutch_1v2_attempts': clutch_1v2_attempts,
'tac_clutch_1v2_wins': clutch_1v2_wins,
'tac_clutch_1v2_rate': round(rate_1v2, 3),
'tac_clutch_1v3_plus_attempts': clutch_1v3_plus_attempts,
'tac_clutch_1v3_plus_wins': clutch_1v3_plus_wins,
'tac_clutch_1v3_plus_rate': round(rate_1v3_plus, 3),
'tac_clutch_impact_score': round(impact_score, 2)
}
@staticmethod
def _calculate_utility(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate Utility Mastery (12 columns)
Columns:
- tac_util_flash_per_round, tac_util_smoke_per_round
- tac_util_molotov_per_round, tac_util_he_per_round
- tac_util_usage_rate
- tac_util_nade_dmg_per_round, tac_util_nade_dmg_per_nade
- tac_util_flash_time_per_round, tac_util_flash_enemies_per_round
- tac_util_flash_efficiency
- tac_util_smoke_timing_score
- tac_util_impact_score
Note: Requires fact_round_player_economy for detailed utility stats
"""
cursor = conn_l2.cursor()
# Check if economy table exists (leetify mode)
cursor.execute("""
SELECT COUNT(*) FROM sqlite_master
WHERE type='table' AND name='fact_round_player_economy'
""")
has_economy = cursor.fetchone()[0] > 0
if not has_economy:
# Return zeros if no economy data
return {
'tac_util_flash_per_round': 0.0,
'tac_util_smoke_per_round': 0.0,
'tac_util_molotov_per_round': 0.0,
'tac_util_he_per_round': 0.0,
'tac_util_usage_rate': 0.0,
'tac_util_nade_dmg_per_round': 0.0,
'tac_util_nade_dmg_per_nade': 0.0,
'tac_util_flash_time_per_round': 0.0,
'tac_util_flash_enemies_per_round': 0.0,
'tac_util_flash_efficiency': 0.0,
'tac_util_smoke_timing_score': 0.0,
'tac_util_impact_score': 0.0,
}
# Get total rounds for per-round calculations
total_rounds = BaseFeatureProcessor.get_player_round_count(steam_id, conn_l2)
if total_rounds == 0:
total_rounds = 1
# Utility usage from fact_match_players
cursor.execute("""
SELECT
SUM(util_flash_usage) as total_flash,
SUM(util_smoke_usage) as total_smoke,
SUM(util_molotov_usage) as total_molotov,
SUM(util_he_usage) as total_he,
SUM(flash_enemy) as enemies_flashed,
SUM(damage_total) as total_damage,
SUM(throw_harm_enemy) as nade_damage,
COUNT(*) as matches
FROM fact_match_players
WHERE steam_id_64 = ?
""", (steam_id,))
row = cursor.fetchone()
total_flash = row[0] if row[0] else 0
total_smoke = row[1] if row[1] else 0
total_molotov = row[2] if row[2] else 0
total_he = row[3] if row[3] else 0
enemies_flashed = row[4] if row[4] else 0
total_damage = row[5] if row[5] else 0
nade_damage = row[6] if row[6] else 0
rounds_with_data = row[7] if row[7] else 1
total_nades = total_flash + total_smoke + total_molotov + total_he
flash_per_round = total_flash / total_rounds
smoke_per_round = total_smoke / total_rounds
molotov_per_round = total_molotov / total_rounds
he_per_round = total_he / total_rounds
usage_rate = total_nades / total_rounds
# Nade damage (HE grenade + molotov damage from throw_harm_enemy)
nade_dmg_per_round = SafeAggregator.safe_divide(nade_damage, total_rounds)
nade_dmg_per_nade = SafeAggregator.safe_divide(nade_damage, total_he + total_molotov)
# Flash efficiency (simplified - kills per flash from match data)
# DEPRECATED: Replaced by Enemies Blinded per Flash logic below
# cursor.execute("""
# SELECT SUM(kills) as total_kills
# FROM fact_match_players
# WHERE steam_id_64 = ?
# """, (steam_id,))
#
# total_kills = cursor.fetchone()[0]
# total_kills = total_kills if total_kills else 0
# flash_efficiency = SafeAggregator.safe_divide(total_kills, total_flash)
# Real flash data from fact_match_players
# flash_time in L2 is TOTAL flash time (seconds), not average
# flash_enemy is TOTAL enemies flashed
cursor.execute("""
SELECT
SUM(flash_time) as total_flash_time,
SUM(flash_enemy) as total_enemies_flashed,
SUM(util_flash_usage) as total_flashes_thrown
FROM fact_match_players
WHERE steam_id_64 = ?
""", (steam_id,))
flash_row = cursor.fetchone()
total_flash_time = flash_row[0] if flash_row and flash_row[0] else 0.0
total_enemies_flashed = flash_row[1] if flash_row and flash_row[1] else 0
total_flashes_thrown = flash_row[2] if flash_row and flash_row[2] else 0
flash_time_per_round = total_flash_time / total_rounds if total_rounds > 0 else 0.0
flash_enemies_per_round = total_enemies_flashed / total_rounds if total_rounds > 0 else 0.0
# Flash Efficiency: Enemies Blinded per Flash Thrown (instead of kills per flash)
# 100% means 1 enemy blinded per flash
# 200% means 2 enemies blinded per flash (very good)
flash_efficiency = SafeAggregator.safe_divide(total_enemies_flashed, total_flashes_thrown)
# Smoke timing score CANNOT be calculated without bomb plant event timestamps
# Would require: SELECT event_time FROM fact_round_events WHERE event_type = 'bomb_plant'
# Then correlate with util_smoke_usage timing - currently no timing data for utility usage
# Commenting out: tac_util_smoke_timing_score
smoke_timing_score = 0.0
# Taser Kills Logic (Zeus)
# We want Attempts (shots fired) vs Kills
# User requested to track "Equipped Count" instead of "Attempts" (shots)
# because event logs often miss weapon_fire for taser.
# We check fact_round_player_economy for has_zeus = 1
zeus_equipped_count = 0
if has_economy:
cursor.execute("""
SELECT COUNT(*)
FROM fact_round_player_economy
WHERE steam_id_64 = ? AND has_zeus = 1
""", (steam_id,))
zeus_equipped_count = cursor.fetchone()[0] or 0
# Kills still come from event logs
# Removed tac_util_zeus_kills per user request (data not available)
# cursor.execute("""
# SELECT
# COUNT(CASE WHEN event_type = 'kill' AND weapon = 'taser' THEN 1 END) as kills
# FROM fact_round_events
# WHERE attacker_steam_id = ?
# """, (steam_id,))
# zeus_kills = cursor.fetchone()[0] or 0
# Fallback: if equipped count < kills (shouldn't happen if economy data is good), fix it
# if zeus_equipped_count < zeus_kills:
# zeus_equipped_count = zeus_kills
# Utility impact score (composite)
impact_score = (
nade_dmg_per_round * 0.3 +
flash_efficiency * 2.0 +
usage_rate * 10.0
)
return {
'tac_util_flash_per_round': round(flash_per_round, 2),
'tac_util_smoke_per_round': round(smoke_per_round, 2),
'tac_util_molotov_per_round': round(molotov_per_round, 2),
'tac_util_he_per_round': round(he_per_round, 2),
'tac_util_usage_rate': round(usage_rate, 2),
'tac_util_nade_dmg_per_round': round(nade_dmg_per_round, 2),
'tac_util_nade_dmg_per_nade': round(nade_dmg_per_nade, 2),
'tac_util_flash_time_per_round': round(flash_time_per_round, 2),
'tac_util_flash_enemies_per_round': round(flash_enemies_per_round, 2),
'tac_util_flash_efficiency': round(flash_efficiency, 3),
#'tac_util_smoke_timing_score': round(smoke_timing_score, 2), # Removed per user request
'tac_util_impact_score': round(impact_score, 2),
'tac_util_zeus_equipped_count': zeus_equipped_count,
#'tac_util_zeus_kills': zeus_kills, # Removed
}
@staticmethod
def _calculate_economy(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate Economy Efficiency (8 columns)
Columns:
- tac_eco_dmg_per_1k
- tac_eco_kpr_eco_rounds, tac_eco_kd_eco_rounds
- tac_eco_kpr_force_rounds, tac_eco_kpr_full_rounds
- tac_eco_save_discipline
- tac_eco_force_success_rate
- tac_eco_efficiency_score
Note: Requires fact_round_player_economy for equipment values
"""
cursor = conn_l2.cursor()
# Check if economy table exists
cursor.execute("""
SELECT COUNT(*) FROM sqlite_master
WHERE type='table' AND name='fact_round_player_economy'
""")
has_economy = cursor.fetchone()[0] > 0
if not has_economy:
# Return zeros if no economy data
return {
'tac_eco_dmg_per_1k': 0.0,
'tac_eco_kpr_eco_rounds': 0.0,
'tac_eco_kd_eco_rounds': 0.0,
'tac_eco_kpr_force_rounds': 0.0,
'tac_eco_kpr_full_rounds': 0.0,
'tac_eco_save_discipline': 0.0,
'tac_eco_force_success_rate': 0.0,
'tac_eco_efficiency_score': 0.0,
}
# REAL economy-based performance from round-level data
# Join fact_round_player_economy with fact_round_events to get kills/deaths per economy state
# Fallback if no economy table but we want basic DMG/1k approximation from total damage / assumed average buy
# But avg_equip_value is from economy table.
# If no economy table, we can't do this accurately.
# However, user says "Eco Dmg/1k" is 0.00.
# If we have NO economy table, we returned early above.
# If we reached here, we HAVE economy table (or at least check passed).
# Let's check logic.
# Get average equipment value
cursor.execute("""
SELECT AVG(equipment_value)
FROM fact_round_player_economy
WHERE steam_id_64 = ?
AND equipment_value IS NOT NULL
AND equipment_value > 0 -- Filter out zero equipment value rounds? Or include them?
""", (steam_id,))
avg_equip_val_res = cursor.fetchone()
avg_equip_value = avg_equip_val_res[0] if avg_equip_val_res and avg_equip_val_res[0] else 4000.0
# Avoid division by zero if avg_equip_value is somehow 0
if avg_equip_value < 100: avg_equip_value = 4000.0
# Get total damage and calculate dmg per $1000
cursor.execute("""
SELECT SUM(damage_total), SUM(round_total)
FROM fact_match_players
WHERE steam_id_64 = ?
""", (steam_id,))
damage_row = cursor.fetchone()
total_damage = damage_row[0] if damage_row[0] else 0
total_rounds = damage_row[1] if damage_row[1] else 1
avg_dmg_per_round = SafeAggregator.safe_divide(total_damage, total_rounds)
# Formula: (ADR) / (AvgSpend / 1000)
# e.g. 80 ADR / (4000 / 1000) = 80 / 4 = 20 dmg/$1k
dmg_per_1k = SafeAggregator.safe_divide(avg_dmg_per_round, (avg_equip_value / 1000.0))
# ECO rounds: equipment_value < 2000
cursor.execute("""
SELECT
e.match_id,
e.round_num,
e.steam_id_64,
COUNT(CASE WHEN fre.event_type = 'kill' AND fre.attacker_steam_id = e.steam_id_64 THEN 1 END) as kills,
COUNT(CASE WHEN fre.event_type = 'kill' AND fre.victim_steam_id = e.steam_id_64 THEN 1 END) as deaths
FROM fact_round_player_economy e
LEFT JOIN fact_round_events fre ON e.match_id = fre.match_id AND e.round_num = fre.round_num
WHERE e.steam_id_64 = ?
AND e.equipment_value < 2000
GROUP BY e.match_id, e.round_num, e.steam_id_64
""", (steam_id,))
eco_rounds = cursor.fetchall()
eco_kills = sum(row[3] for row in eco_rounds)
eco_deaths = sum(row[4] for row in eco_rounds)
eco_round_count = len(eco_rounds)
kpr_eco = SafeAggregator.safe_divide(eco_kills, eco_round_count)
kd_eco = SafeAggregator.safe_divide(eco_kills, eco_deaths)
# FORCE rounds: 2000 <= equipment_value < 3500
cursor.execute("""
SELECT
e.match_id,
e.round_num,
e.steam_id_64,
COUNT(CASE WHEN fre.event_type = 'kill' AND fre.attacker_steam_id = e.steam_id_64 THEN 1 END) as kills,
fr.winner_side,
e.side
FROM fact_round_player_economy e
LEFT JOIN fact_round_events fre ON e.match_id = fre.match_id AND e.round_num = fre.round_num
LEFT JOIN fact_rounds fr ON e.match_id = fr.match_id AND e.round_num = fr.round_num
WHERE e.steam_id_64 = ?
AND e.equipment_value >= 2000
AND e.equipment_value < 3500
GROUP BY e.match_id, e.round_num, e.steam_id_64, fr.winner_side, e.side
""", (steam_id,))
force_rounds = cursor.fetchall()
force_kills = sum(row[3] for row in force_rounds)
force_round_count = len(force_rounds)
force_wins = sum(1 for row in force_rounds if row[4] == row[5]) # winner_side == player_side
kpr_force = SafeAggregator.safe_divide(force_kills, force_round_count)
force_success = SafeAggregator.safe_divide(force_wins, force_round_count)
# FULL BUY rounds: equipment_value >= 3500
cursor.execute("""
SELECT
e.match_id,
e.round_num,
e.steam_id_64,
COUNT(CASE WHEN fre.event_type = 'kill' AND fre.attacker_steam_id = e.steam_id_64 THEN 1 END) as kills
FROM fact_round_player_economy e
LEFT JOIN fact_round_events fre ON e.match_id = fre.match_id AND e.round_num = fre.round_num
WHERE e.steam_id_64 = ?
AND e.equipment_value >= 3500
GROUP BY e.match_id, e.round_num, e.steam_id_64
""", (steam_id,))
full_rounds = cursor.fetchall()
full_kills = sum(row[3] for row in full_rounds)
full_round_count = len(full_rounds)
kpr_full = SafeAggregator.safe_divide(full_kills, full_round_count)
# Save discipline: ratio of eco rounds to total rounds (lower is better discipline)
save_discipline = 1.0 - SafeAggregator.safe_divide(eco_round_count, total_rounds)
# Efficiency score: weighted KPR across economy states
efficiency_score = (kpr_eco * 1.5 + kpr_force * 1.2 + kpr_full * 1.0) / 3.7
return {
'tac_eco_dmg_per_1k': round(dmg_per_1k, 2),
'tac_eco_kpr_eco_rounds': round(kpr_eco, 3),
'tac_eco_kd_eco_rounds': round(kd_eco, 3),
'tac_eco_kpr_force_rounds': round(kpr_force, 3),
'tac_eco_kpr_full_rounds': round(kpr_full, 3),
'tac_eco_save_discipline': round(save_discipline, 3),
'tac_eco_force_success_rate': round(force_success, 3),
'tac_eco_efficiency_score': round(efficiency_score, 2),
}
def _get_default_tactical_features() -> Dict[str, Any]:
"""Return default zero values for all 44 TACTICAL features"""
return {
# Opening Impact (8)
'tac_avg_fk': 0.0,
'tac_avg_fd': 0.0,
'tac_fk_rate': 0.0,
'tac_fd_rate': 0.0,
'tac_fk_success_rate': 0.0,
'tac_entry_kill_rate': 0.0,
'tac_entry_death_rate': 0.0,
'tac_opening_duel_winrate': 0.0,
# Multi-Kill (6)
'tac_avg_2k': 0.0,
'tac_avg_3k': 0.0,
'tac_avg_4k': 0.0,
'tac_avg_5k': 0.0,
'tac_multikill_rate': 0.0,
'tac_ace_count': 0,
# Clutch Performance (10)
'tac_clutch_1v1_attempts': 0,
'tac_clutch_1v1_wins': 0,
'tac_clutch_1v1_rate': 0.0,
'tac_clutch_1v2_attempts': 0,
'tac_clutch_1v2_wins': 0,
'tac_clutch_1v2_rate': 0.0,
'tac_clutch_1v3_plus_attempts': 0,
'tac_clutch_1v3_plus_wins': 0,
'tac_clutch_1v3_plus_rate': 0.0,
'tac_clutch_impact_score': 0.0,
# Utility Mastery (12)
'tac_util_flash_per_round': 0.0,
'tac_util_smoke_per_round': 0.0,
'tac_util_molotov_per_round': 0.0,
'tac_util_he_per_round': 0.0,
'tac_util_usage_rate': 0.0,
'tac_util_nade_dmg_per_round': 0.0,
'tac_util_nade_dmg_per_nade': 0.0,
'tac_util_flash_time_per_round': 0.0,
'tac_util_flash_enemies_per_round': 0.0,
'tac_util_flash_efficiency': 0.0,
# 'tac_util_smoke_timing_score': 0.0, # Removed
'tac_util_impact_score': 0.0,
'tac_util_zeus_equipped_count': 0,
# 'tac_util_zeus_kills': 0, # Removed
# Economy Efficiency (8)
'tac_eco_dmg_per_1k': 0.0,
'tac_eco_kpr_eco_rounds': 0.0,
'tac_eco_kd_eco_rounds': 0.0,
'tac_eco_kpr_force_rounds': 0.0,
'tac_eco_kpr_full_rounds': 0.0,
'tac_eco_save_discipline': 0.0,
'tac_eco_force_success_rate': 0.0,
'tac_eco_efficiency_score': 0.0,
}

394
database/L3/schema.sql Normal file
View File

@@ -0,0 +1,394 @@
-- ============================================================================
-- L3 Schema: Player Features Data Mart (Version 2.0)
-- ============================================================================
-- Based on: L3_ARCHITECTURE_PLAN.md
-- Design: 5-Tier Feature Hierarchy (CORE → TACTICAL → INTELLIGENCE → META → COMPOSITE)
-- Granularity: One row per player (Aggregated Profile)
-- Total Columns: 207 features + 6 metadata = 213 columns
-- ============================================================================
-- ============================================================================
-- Main Table: dm_player_features
-- ============================================================================
CREATE TABLE IF NOT EXISTS dm_player_features (
-- ========================================================================
-- Metadata (6 columns)
-- ========================================================================
steam_id_64 TEXT PRIMARY KEY,
total_matches INTEGER NOT NULL DEFAULT 0,
total_rounds INTEGER NOT NULL DEFAULT 0,
first_match_date INTEGER, -- Unix timestamp
last_match_date INTEGER, -- Unix timestamp
last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-- ========================================================================
-- TIER 1: CORE (41 columns)
-- Direct aggregations from fact_match_players
-- ========================================================================
-- Basic Performance (15 columns)
core_avg_rating REAL DEFAULT 0.0,
core_avg_rating2 REAL DEFAULT 0.0,
core_avg_kd REAL DEFAULT 0.0,
core_avg_adr REAL DEFAULT 0.0,
core_avg_kast REAL DEFAULT 0.0,
core_avg_rws REAL DEFAULT 0.0,
core_avg_hs_kills REAL DEFAULT 0.0,
core_hs_rate REAL DEFAULT 0.0, -- hs/total_kills
core_total_kills INTEGER DEFAULT 0,
core_total_deaths INTEGER DEFAULT 0,
core_total_assists INTEGER DEFAULT 0,
core_avg_assists REAL DEFAULT 0.0,
core_kpr REAL DEFAULT 0.0, -- kills per round
core_dpr REAL DEFAULT 0.0, -- deaths per round
core_survival_rate REAL DEFAULT 0.0,
-- Match Stats (8 columns)
core_win_rate REAL DEFAULT 0.0,
core_wins INTEGER DEFAULT 0,
core_losses INTEGER DEFAULT 0,
core_avg_match_duration INTEGER DEFAULT 0, -- seconds
core_avg_mvps REAL DEFAULT 0.0,
core_mvp_rate REAL DEFAULT 0.0,
core_avg_elo_change REAL DEFAULT 0.0,
core_total_elo_gained REAL DEFAULT 0.0,
-- Weapon Stats (12 columns)
core_avg_awp_kills REAL DEFAULT 0.0,
core_awp_usage_rate REAL DEFAULT 0.0,
core_avg_knife_kills REAL DEFAULT 0.0,
core_avg_zeus_kills REAL DEFAULT 0.0,
core_zeus_buy_rate REAL DEFAULT 0.0,
core_top_weapon TEXT,
core_top_weapon_kills INTEGER DEFAULT 0,
core_top_weapon_hs_rate REAL DEFAULT 0.0,
core_weapon_diversity REAL DEFAULT 0.0,
core_rifle_hs_rate REAL DEFAULT 0.0,
core_pistol_hs_rate REAL DEFAULT 0.0,
core_smg_kills_total INTEGER DEFAULT 0,
-- Objective Stats (6 columns)
core_avg_plants REAL DEFAULT 0.0,
core_avg_defuses REAL DEFAULT 0.0,
core_avg_flash_assists REAL DEFAULT 0.0,
core_plant_success_rate REAL DEFAULT 0.0,
core_defuse_success_rate REAL DEFAULT 0.0,
core_objective_impact REAL DEFAULT 0.0,
-- ========================================================================
-- TIER 2: TACTICAL (44 columns)
-- Multi-table JOINs, conditional aggregations
-- ========================================================================
-- Opening Impact (8 columns)
tac_avg_fk REAL DEFAULT 0.0,
tac_avg_fd REAL DEFAULT 0.0,
tac_fk_rate REAL DEFAULT 0.0,
tac_fd_rate REAL DEFAULT 0.0,
tac_fk_success_rate REAL DEFAULT 0.0,
tac_entry_kill_rate REAL DEFAULT 0.0,
tac_entry_death_rate REAL DEFAULT 0.0,
tac_opening_duel_winrate REAL DEFAULT 0.0,
-- Multi-Kill (6 columns)
tac_avg_2k REAL DEFAULT 0.0,
tac_avg_3k REAL DEFAULT 0.0,
tac_avg_4k REAL DEFAULT 0.0,
tac_avg_5k REAL DEFAULT 0.0,
tac_multikill_rate REAL DEFAULT 0.0,
tac_ace_count INTEGER DEFAULT 0,
-- Clutch Performance (10 columns)
tac_clutch_1v1_attempts INTEGER DEFAULT 0,
tac_clutch_1v1_wins INTEGER DEFAULT 0,
tac_clutch_1v1_rate REAL DEFAULT 0.0,
tac_clutch_1v2_attempts INTEGER DEFAULT 0,
tac_clutch_1v2_wins INTEGER DEFAULT 0,
tac_clutch_1v2_rate REAL DEFAULT 0.0,
tac_clutch_1v3_plus_attempts INTEGER DEFAULT 0,
tac_clutch_1v3_plus_wins INTEGER DEFAULT 0,
tac_clutch_1v3_plus_rate REAL DEFAULT 0.0,
tac_clutch_impact_score REAL DEFAULT 0.0,
-- Utility Mastery (13 columns)
tac_util_flash_per_round REAL DEFAULT 0.0,
tac_util_smoke_per_round REAL DEFAULT 0.0,
tac_util_molotov_per_round REAL DEFAULT 0.0,
tac_util_he_per_round REAL DEFAULT 0.0,
tac_util_usage_rate REAL DEFAULT 0.0,
tac_util_nade_dmg_per_round REAL DEFAULT 0.0,
tac_util_nade_dmg_per_nade REAL DEFAULT 0.0,
tac_util_flash_time_per_round REAL DEFAULT 0.0,
tac_util_flash_enemies_per_round REAL DEFAULT 0.0,
tac_util_flash_efficiency REAL DEFAULT 0.0,
tac_util_impact_score REAL DEFAULT 0.0,
tac_util_zeus_equipped_count INTEGER DEFAULT 0,
-- tac_util_zeus_kills REMOVED
-- Economy Efficiency (8 columns)
tac_eco_dmg_per_1k REAL DEFAULT 0.0,
tac_eco_kpr_eco_rounds REAL DEFAULT 0.0,
tac_eco_kd_eco_rounds REAL DEFAULT 0.0,
tac_eco_kpr_force_rounds REAL DEFAULT 0.0,
tac_eco_kpr_full_rounds REAL DEFAULT 0.0,
tac_eco_save_discipline REAL DEFAULT 0.0,
tac_eco_force_success_rate REAL DEFAULT 0.0,
tac_eco_efficiency_score REAL DEFAULT 0.0,
-- ========================================================================
-- TIER 3: INTELLIGENCE (53 columns)
-- Advanced analytics on fact_round_events
-- ========================================================================
-- High IQ Kills (9 columns)
int_wallbang_kills INTEGER DEFAULT 0,
int_wallbang_rate REAL DEFAULT 0.0,
int_smoke_kills INTEGER DEFAULT 0,
int_smoke_kill_rate REAL DEFAULT 0.0,
int_blind_kills INTEGER DEFAULT 0,
int_blind_kill_rate REAL DEFAULT 0.0,
int_noscope_kills INTEGER DEFAULT 0,
int_noscope_rate REAL DEFAULT 0.0,
int_high_iq_score REAL DEFAULT 0.0,
-- Timing Analysis (12 columns)
int_timing_early_kills INTEGER DEFAULT 0,
int_timing_mid_kills INTEGER DEFAULT 0,
int_timing_late_kills INTEGER DEFAULT 0,
int_timing_early_kill_share REAL DEFAULT 0.0,
int_timing_mid_kill_share REAL DEFAULT 0.0,
int_timing_late_kill_share REAL DEFAULT 0.0,
int_timing_avg_kill_time REAL DEFAULT 0.0,
int_timing_early_deaths INTEGER DEFAULT 0,
int_timing_early_death_rate REAL DEFAULT 0.0,
int_timing_aggression_index REAL DEFAULT 0.0,
int_timing_patience_score REAL DEFAULT 0.0,
int_timing_first_contact_time REAL DEFAULT 0.0,
-- Pressure Performance (9 columns)
int_pressure_comeback_kd REAL DEFAULT 0.0,
int_pressure_comeback_rating REAL DEFAULT 0.0,
int_pressure_losing_streak_kd REAL DEFAULT 0.0,
int_pressure_matchpoint_kpr REAL DEFAULT 0.0,
int_pressure_clutch_composure REAL DEFAULT 0.0,
int_pressure_entry_in_loss REAL DEFAULT 0.0,
int_pressure_performance_index REAL DEFAULT 0.0,
int_pressure_big_moment_score REAL DEFAULT 0.0,
int_pressure_tilt_resistance REAL DEFAULT 0.0,
-- Position Mastery (14 columns)
int_pos_site_a_control_rate REAL DEFAULT 0.0,
int_pos_site_b_control_rate REAL DEFAULT 0.0,
int_pos_mid_control_rate REAL DEFAULT 0.0,
int_pos_favorite_position TEXT,
int_pos_position_diversity REAL DEFAULT 0.0,
int_pos_rotation_speed REAL DEFAULT 0.0,
int_pos_map_coverage REAL DEFAULT 0.0,
int_pos_lurk_tendency REAL DEFAULT 0.0,
int_pos_site_anchor_score REAL DEFAULT 0.0,
int_pos_entry_route_diversity REAL DEFAULT 0.0,
int_pos_retake_positioning REAL DEFAULT 0.0,
int_pos_postplant_positioning REAL DEFAULT 0.0,
int_pos_spatial_iq_score REAL DEFAULT 0.0,
int_pos_avg_distance_from_teammates REAL DEFAULT 0.0,
-- Trade Network (8 columns)
int_trade_kill_count INTEGER DEFAULT 0,
int_trade_kill_rate REAL DEFAULT 0.0,
int_trade_response_time REAL DEFAULT 0.0,
int_trade_given_count INTEGER DEFAULT 0,
int_trade_given_rate REAL DEFAULT 0.0,
int_trade_balance REAL DEFAULT 0.0,
int_trade_efficiency REAL DEFAULT 0.0,
int_teamwork_score REAL DEFAULT 0.0,
-- ========================================================================
-- TIER 4: META (52 columns)
-- Long-term patterns and meta-features
-- ========================================================================
-- Stability (8 columns)
meta_rating_volatility REAL DEFAULT 0.0,
meta_recent_form_rating REAL DEFAULT 0.0,
meta_win_rating REAL DEFAULT 0.0,
meta_loss_rating REAL DEFAULT 0.0,
meta_rating_consistency REAL DEFAULT 0.0,
meta_time_rating_correlation REAL DEFAULT 0.0,
meta_map_stability REAL DEFAULT 0.0,
meta_elo_tier_stability REAL DEFAULT 0.0,
-- Side Preference (14 columns)
meta_side_ct_rating REAL DEFAULT 0.0,
meta_side_t_rating REAL DEFAULT 0.0,
meta_side_ct_kd REAL DEFAULT 0.0,
meta_side_t_kd REAL DEFAULT 0.0,
meta_side_ct_win_rate REAL DEFAULT 0.0,
meta_side_t_win_rate REAL DEFAULT 0.0,
meta_side_ct_fk_rate REAL DEFAULT 0.0,
meta_side_t_fk_rate REAL DEFAULT 0.0,
meta_side_ct_kast REAL DEFAULT 0.0,
meta_side_t_kast REAL DEFAULT 0.0,
meta_side_rating_diff REAL DEFAULT 0.0,
meta_side_kd_diff REAL DEFAULT 0.0,
meta_side_preference TEXT,
meta_side_balance_score REAL DEFAULT 0.0,
-- Opponent Adaptation (12 columns)
meta_opp_vs_lower_elo_rating REAL DEFAULT 0.0,
meta_opp_vs_similar_elo_rating REAL DEFAULT 0.0,
meta_opp_vs_higher_elo_rating REAL DEFAULT 0.0,
meta_opp_vs_lower_elo_kd REAL DEFAULT 0.0,
meta_opp_vs_similar_elo_kd REAL DEFAULT 0.0,
meta_opp_vs_higher_elo_kd REAL DEFAULT 0.0,
meta_opp_elo_adaptation REAL DEFAULT 0.0,
meta_opp_stomping_score REAL DEFAULT 0.0,
meta_opp_upset_score REAL DEFAULT 0.0,
meta_opp_consistency_across_elos REAL DEFAULT 0.0,
meta_opp_rank_resistance REAL DEFAULT 0.0,
meta_opp_smurf_detection REAL DEFAULT 0.0,
-- Map Specialization (10 columns)
meta_map_best_map TEXT,
meta_map_best_rating REAL DEFAULT 0.0,
meta_map_worst_map TEXT,
meta_map_worst_rating REAL DEFAULT 0.0,
meta_map_diversity REAL DEFAULT 0.0,
meta_map_pool_size INTEGER DEFAULT 0,
meta_map_specialist_score REAL DEFAULT 0.0,
meta_map_versatility REAL DEFAULT 0.0,
meta_map_comfort_zone_rate REAL DEFAULT 0.0,
meta_map_adaptation REAL DEFAULT 0.0,
-- Session Pattern (8 columns)
meta_session_avg_matches_per_day REAL DEFAULT 0.0,
meta_session_longest_streak INTEGER DEFAULT 0,
meta_session_weekend_rating REAL DEFAULT 0.0,
meta_session_weekday_rating REAL DEFAULT 0.0,
meta_session_morning_rating REAL DEFAULT 0.0,
meta_session_afternoon_rating REAL DEFAULT 0.0,
meta_session_evening_rating REAL DEFAULT 0.0,
meta_session_night_rating REAL DEFAULT 0.0,
-- ========================================================================
-- TIER 5: COMPOSITE (11 columns)
-- Weighted composite scores (0-100)
-- ========================================================================
score_aim REAL DEFAULT 0.0,
score_clutch REAL DEFAULT 0.0,
score_pistol REAL DEFAULT 0.0,
score_defense REAL DEFAULT 0.0,
score_utility REAL DEFAULT 0.0,
score_stability REAL DEFAULT 0.0,
score_economy REAL DEFAULT 0.0,
score_pace REAL DEFAULT 0.0,
score_overall REAL DEFAULT 0.0,
tier_classification TEXT,
tier_percentile REAL DEFAULT 0.0,
-- Foreign key constraint
FOREIGN KEY (steam_id_64) REFERENCES dim_players(steam_id_64)
);
-- Indexes for query performance
CREATE INDEX IF NOT EXISTS idx_dm_player_features_rating ON dm_player_features(core_avg_rating DESC);
CREATE INDEX IF NOT EXISTS idx_dm_player_features_matches ON dm_player_features(total_matches DESC);
CREATE INDEX IF NOT EXISTS idx_dm_player_features_tier ON dm_player_features(tier_classification);
CREATE INDEX IF NOT EXISTS idx_dm_player_features_updated ON dm_player_features(last_updated DESC);
-- ============================================================================
-- Auxiliary Table: dm_player_match_history
-- ============================================================================
CREATE TABLE IF NOT EXISTS dm_player_match_history (
steam_id_64 TEXT,
match_id TEXT,
match_date INTEGER, -- Unix timestamp
match_sequence INTEGER, -- Player's N-th match
-- Core performance snapshot
rating REAL,
kd_ratio REAL,
adr REAL,
kast REAL,
is_win BOOLEAN,
-- Match context
map_name TEXT,
opponent_avg_elo REAL,
teammate_avg_rating REAL,
-- Cumulative stats
cumulative_rating REAL,
rolling_10_rating REAL,
PRIMARY KEY (steam_id_64, match_id),
FOREIGN KEY (steam_id_64) REFERENCES dm_player_features(steam_id_64) ON DELETE CASCADE,
FOREIGN KEY (match_id) REFERENCES fact_matches(match_id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_player_history_player_date ON dm_player_match_history(steam_id_64, match_date DESC);
CREATE INDEX IF NOT EXISTS idx_player_history_match ON dm_player_match_history(match_id);
-- ============================================================================
-- Auxiliary Table: dm_player_map_stats
-- ============================================================================
CREATE TABLE IF NOT EXISTS dm_player_map_stats (
steam_id_64 TEXT,
map_name TEXT,
matches INTEGER DEFAULT 0,
wins INTEGER DEFAULT 0,
win_rate REAL DEFAULT 0.0,
avg_rating REAL DEFAULT 0.0,
avg_kd REAL DEFAULT 0.0,
avg_adr REAL DEFAULT 0.0,
avg_kast REAL DEFAULT 0.0,
best_rating REAL DEFAULT 0.0,
worst_rating REAL DEFAULT 0.0,
PRIMARY KEY (steam_id_64, map_name),
FOREIGN KEY (steam_id_64) REFERENCES dm_player_features(steam_id_64) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_player_map_stats_player ON dm_player_map_stats(steam_id_64);
CREATE INDEX IF NOT EXISTS idx_player_map_stats_map ON dm_player_map_stats(map_name);
-- ============================================================================
-- Auxiliary Table: dm_player_weapon_stats
-- ============================================================================
CREATE TABLE IF NOT EXISTS dm_player_weapon_stats (
steam_id_64 TEXT,
weapon_name TEXT,
total_kills INTEGER DEFAULT 0,
total_headshots INTEGER DEFAULT 0,
hs_rate REAL DEFAULT 0.0,
usage_rounds INTEGER DEFAULT 0,
usage_rate REAL DEFAULT 0.0,
avg_kills_per_round REAL DEFAULT 0.0,
effectiveness_score REAL DEFAULT 0.0,
PRIMARY KEY (steam_id_64, weapon_name),
FOREIGN KEY (steam_id_64) REFERENCES dm_player_features(steam_id_64) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_player_weapon_stats_player ON dm_player_weapon_stats(steam_id_64);
CREATE INDEX IF NOT EXISTS idx_player_weapon_stats_weapon ON dm_player_weapon_stats(weapon_name);
-- ============================================================================
-- Schema Summary
-- ============================================================================
-- dm_player_features: 213 columns (6 metadata + 207 features)
-- - Tier 1 CORE: 41 columns
-- - Tier 2 TACTICAL: 44 columns
-- - Tier 3 INTELLIGENCE: 53 columns
-- - Tier 4 META: 52 columns
-- - Tier 5 COMPOSITE: 11 columns
--
-- dm_player_match_history: Per-match snapshots for trend analysis
-- dm_player_map_stats: Map-level aggregations
-- dm_player_weapon_stats: Weapon usage statistics
-- ============================================================================