""" Base processor classes and utility functions for L3 feature calculation """ import sqlite3 import math from typing import Dict, Any, List, Optional from abc import ABC, abstractmethod class SafeAggregator: """Utility class for safe mathematical operations with NULL handling""" @staticmethod def safe_divide(numerator: float, denominator: float, default: float = 0.0) -> float: """Safe division with NULL/zero handling""" if denominator is None or denominator == 0: return default if numerator is None: return default return numerator / denominator @staticmethod def safe_avg(values: List[float], default: float = 0.0) -> float: """Safe average calculation""" if not values or len(values) == 0: return default valid_values = [v for v in values if v is not None] if not valid_values: return default return sum(valid_values) / len(valid_values) @staticmethod def safe_stddev(values: List[float], default: float = 0.0) -> float: """Safe standard deviation calculation""" if not values or len(values) < 2: return default valid_values = [v for v in values if v is not None] if len(valid_values) < 2: return default mean = sum(valid_values) / len(valid_values) variance = sum((x - mean) ** 2 for x in valid_values) / len(valid_values) return math.sqrt(variance) @staticmethod def safe_sum(values: List[float], default: float = 0.0) -> float: """Safe sum calculation""" if not values: return default valid_values = [v for v in values if v is not None] return sum(valid_values) if valid_values else default @staticmethod def safe_min(values: List[float], default: float = 0.0) -> float: """Safe minimum calculation""" if not values: return default valid_values = [v for v in values if v is not None] return min(valid_values) if valid_values else default @staticmethod def safe_max(values: List[float], default: float = 0.0) -> float: """Safe maximum calculation""" if not values: return default valid_values = [v for v in values if v is not None] return max(valid_values) if valid_values else default class NormalizationUtils: """Z-score normalization and scaling utilities""" @staticmethod def z_score_normalize(value: float, mean: float, std: float, scale_min: float = 0.0, scale_max: float = 100.0) -> float: """ Z-score normalization to a target range Args: value: Value to normalize mean: Population mean std: Population standard deviation scale_min: Target minimum (default: 0) scale_max: Target maximum (default: 100) Returns: Normalized value in [scale_min, scale_max] range """ if std == 0 or std is None: return (scale_min + scale_max) / 2.0 # Calculate z-score z = (value - mean) / std # Map to target range (±3σ covers ~99.7% of data) # z = -3 → scale_min, z = 0 → midpoint, z = 3 → scale_max midpoint = (scale_min + scale_max) / 2.0 scale_range = (scale_max - scale_min) / 6.0 # 6σ total range normalized = midpoint + (z * scale_range) # Clamp to target range return max(scale_min, min(scale_max, normalized)) @staticmethod def percentile_normalize(value: float, all_values: List[float], scale_min: float = 0.0, scale_max: float = 100.0) -> float: """ Percentile-based normalization Args: value: Value to normalize all_values: All values in population scale_min: Target minimum scale_max: Target maximum Returns: Normalized value based on percentile """ if not all_values: return scale_min sorted_values = sorted(all_values) rank = sum(1 for v in sorted_values if v < value) percentile = rank / len(sorted_values) return scale_min + (percentile * (scale_max - scale_min)) @staticmethod def min_max_normalize(value: float, min_val: float, max_val: float, scale_min: float = 0.0, scale_max: float = 100.0) -> float: """Min-max normalization to target range""" if max_val == min_val: return (scale_min + scale_max) / 2.0 normalized = (value - min_val) / (max_val - min_val) return scale_min + (normalized * (scale_max - scale_min)) @staticmethod def calculate_population_stats(conn_l3: sqlite3.Connection, column: str) -> Dict[str, float]: """ Calculate population mean and std for a column in dm_player_features Args: conn_l3: L3 database connection column: Column name to analyze Returns: dict with 'mean', 'std', 'min', 'max' """ cursor = conn_l3.cursor() cursor.execute(f""" SELECT AVG({column}) as mean, STDDEV({column}) as std, MIN({column}) as min, MAX({column}) as max FROM dm_player_features WHERE {column} IS NOT NULL """) row = cursor.fetchone() return { 'mean': row[0] if row[0] is not None else 0.0, 'std': row[1] if row[1] is not None else 1.0, 'min': row[2] if row[2] is not None else 0.0, 'max': row[3] if row[3] is not None else 0.0 } class BaseFeatureProcessor(ABC): """ Abstract base class for all feature processors Each processor implements the calculate() method which returns a dict of feature_name: value pairs. """ MIN_MATCHES_REQUIRED = 5 # Minimum matches needed for feature calculation @staticmethod @abstractmethod def calculate(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]: """ Calculate features for a specific player Args: steam_id: Player's Steam ID (steam_id_64) conn_l2: Connection to L2 database Returns: Dictionary of {feature_name: value} """ pass @staticmethod def check_min_matches(steam_id: str, conn_l2: sqlite3.Connection, min_required: int = None) -> bool: """ Check if player has minimum required matches Args: steam_id: Player's Steam ID conn_l2: L2 database connection min_required: Minimum matches (uses class default if None) Returns: True if player has enough matches """ if min_required is None: min_required = BaseFeatureProcessor.MIN_MATCHES_REQUIRED cursor = conn_l2.cursor() cursor.execute(""" SELECT COUNT(*) FROM fact_match_players WHERE steam_id_64 = ? """, (steam_id,)) count = cursor.fetchone()[0] return count >= min_required @staticmethod def get_player_match_count(steam_id: str, conn_l2: sqlite3.Connection) -> int: """Get total match count for player""" cursor = conn_l2.cursor() cursor.execute(""" SELECT COUNT(*) FROM fact_match_players WHERE steam_id_64 = ? """, (steam_id,)) return cursor.fetchone()[0] @staticmethod def get_player_round_count(steam_id: str, conn_l2: sqlite3.Connection) -> int: """Get total round count for player""" cursor = conn_l2.cursor() cursor.execute(""" SELECT SUM(round_total) FROM fact_match_players WHERE steam_id_64 = ? """, (steam_id,)) result = cursor.fetchone()[0] return result if result is not None else 0 class WeaponCategories: """Weapon categorization constants""" RIFLES = [ 'ak47', 'aug', 'm4a1', 'm4a1_silencer', 'sg556', 'galilar', 'famas' ] PISTOLS = [ 'glock', 'usp_silencer', 'hkp2000', 'p250', 'fiveseven', 'tec9', 'cz75a', 'deagle', 'elite', 'revolver' ] SMGS = [ 'mac10', 'mp9', 'mp7', 'mp5sd', 'ump45', 'p90', 'bizon' ] SNIPERS = [ 'awp', 'ssg08', 'scar20', 'g3sg1' ] HEAVY = [ 'nova', 'xm1014', 'mag7', 'sawedoff', 'm249', 'negev' ] @classmethod def get_category(cls, weapon_name: str) -> str: """Get category for a weapon""" weapon_clean = weapon_name.lower().replace('weapon_', '') if weapon_clean in cls.RIFLES: return 'rifle' elif weapon_clean in cls.PISTOLS: return 'pistol' elif weapon_clean in cls.SMGS: return 'smg' elif weapon_clean in cls.SNIPERS: return 'sniper' elif weapon_clean in cls.HEAVY: return 'heavy' elif weapon_clean == 'knife': return 'knife' elif weapon_clean == 'hegrenade': return 'grenade' else: return 'other' class MapAreas: """Map area classification utilities (for position analysis)""" # This will be expanded with actual map coordinates in IntelligenceProcessor SITE_A = 'site_a' SITE_B = 'site_b' MID = 'mid' SPAWN_T = 'spawn_t' SPAWN_CT = 'spawn_ct' @staticmethod def classify_position(x: float, y: float, z: float, map_name: str) -> str: """ Classify position into map area (simplified) Full implementation requires map-specific coordinate ranges """ # Placeholder - will be implemented with map data return "unknown" # Export all classes __all__ = [ 'SafeAggregator', 'NormalizationUtils', 'BaseFeatureProcessor', 'WeaponCategories', 'MapAreas' ]