3.0.0 : Reconstructed Database System.

2026-01-29 02:21:44 +08:00
parent 1642adb00e
commit 04ee957af6
69 changed files with 10258 additions and 6546 deletions
--- a/database/L3/processors/base_processor.py
+++ b/database/L3/processors/base_processor.py
@@ -0,0 +1,320 @@
+"""
+Base processor classes and utility functions for L3 feature calculation
+"""
+
+import sqlite3
+import math
+from typing import Dict, Any, List, Optional
+from abc import ABC, abstractmethod
+
+
+class SafeAggregator:
+    """Utility class for safe mathematical operations with NULL handling"""
+    
+    @staticmethod
+    def safe_divide(numerator: float, denominator: float, default: float = 0.0) -> float:
+        """Safe division with NULL/zero handling"""
+        if denominator is None or denominator == 0:
+            return default
+        if numerator is None:
+            return default
+        return numerator / denominator
+    
+    @staticmethod
+    def safe_avg(values: List[float], default: float = 0.0) -> float:
+        """Safe average calculation"""
+        if not values or len(values) == 0:
+            return default
+        valid_values = [v for v in values if v is not None]
+        if not valid_values:
+            return default
+        return sum(valid_values) / len(valid_values)
+    
+    @staticmethod
+    def safe_stddev(values: List[float], default: float = 0.0) -> float:
+        """Safe standard deviation calculation"""
+        if not values or len(values) < 2:
+            return default
+        valid_values = [v for v in values if v is not None]
+        if len(valid_values) < 2:
+            return default
+        
+        mean = sum(valid_values) / len(valid_values)
+        variance = sum((x - mean) ** 2 for x in valid_values) / len(valid_values)
+        return math.sqrt(variance)
+    
+    @staticmethod
+    def safe_sum(values: List[float], default: float = 0.0) -> float:
+        """Safe sum calculation"""
+        if not values:
+            return default
+        valid_values = [v for v in values if v is not None]
+        return sum(valid_values) if valid_values else default
+    
+    @staticmethod
+    def safe_min(values: List[float], default: float = 0.0) -> float:
+        """Safe minimum calculation"""
+        if not values:
+            return default
+        valid_values = [v for v in values if v is not None]
+        return min(valid_values) if valid_values else default
+    
+    @staticmethod
+    def safe_max(values: List[float], default: float = 0.0) -> float:
+        """Safe maximum calculation"""
+        if not values:
+            return default
+        valid_values = [v for v in values if v is not None]
+        return max(valid_values) if valid_values else default
+
+
+class NormalizationUtils:
+    """Z-score normalization and scaling utilities"""
+    
+    @staticmethod
+    def z_score_normalize(value: float, mean: float, std: float, 
+                         scale_min: float = 0.0, scale_max: float = 100.0) -> float:
+        """
+        Z-score normalization to a target range
+        
+        Args:
+            value: Value to normalize
+            mean: Population mean
+            std: Population standard deviation
+            scale_min: Target minimum (default: 0)
+            scale_max: Target maximum (default: 100)
+            
+        Returns:
+            Normalized value in [scale_min, scale_max] range
+        """
+        if std == 0 or std is None:
+            return (scale_min + scale_max) / 2.0
+        
+        # Calculate z-score
+        z = (value - mean) / std
+        
+        # Map to target range (±3σ covers ~99.7% of data)
+        # z = -3 → scale_min, z = 0 → midpoint, z = 3 → scale_max
+        midpoint = (scale_min + scale_max) / 2.0
+        scale_range = (scale_max - scale_min) / 6.0  # 6σ total range
+        
+        normalized = midpoint + (z * scale_range)
+        
+        # Clamp to target range
+        return max(scale_min, min(scale_max, normalized))
+    
+    @staticmethod
+    def percentile_normalize(value: float, all_values: List[float], 
+                            scale_min: float = 0.0, scale_max: float = 100.0) -> float:
+        """
+        Percentile-based normalization
+        
+        Args:
+            value: Value to normalize
+            all_values: All values in population
+            scale_min: Target minimum
+            scale_max: Target maximum
+            
+        Returns:
+            Normalized value based on percentile
+        """
+        if not all_values:
+            return scale_min
+        
+        sorted_values = sorted(all_values)
+        rank = sum(1 for v in sorted_values if v < value)
+        percentile = rank / len(sorted_values)
+        
+        return scale_min + (percentile * (scale_max - scale_min))
+    
+    @staticmethod
+    def min_max_normalize(value: float, min_val: float, max_val: float,
+                         scale_min: float = 0.0, scale_max: float = 100.0) -> float:
+        """Min-max normalization to target range"""
+        if max_val == min_val:
+            return (scale_min + scale_max) / 2.0
+        
+        normalized = (value - min_val) / (max_val - min_val)
+        return scale_min + (normalized * (scale_max - scale_min))
+    
+    @staticmethod
+    def calculate_population_stats(conn_l3: sqlite3.Connection, column: str) -> Dict[str, float]:
+        """
+        Calculate population mean and std for a column in dm_player_features
+        
+        Args:
+            conn_l3: L3 database connection
+            column: Column name to analyze
+            
+        Returns:
+            dict with 'mean', 'std', 'min', 'max'
+        """
+        cursor = conn_l3.cursor()
+        cursor.execute(f"""
+            SELECT 
+                AVG({column}) as mean,
+                STDDEV({column}) as std,
+                MIN({column}) as min,
+                MAX({column}) as max
+            FROM dm_player_features
+            WHERE {column} IS NOT NULL
+        """)
+        
+        row = cursor.fetchone()
+        return {
+            'mean': row[0] if row[0] is not None else 0.0,
+            'std': row[1] if row[1] is not None else 1.0,
+            'min': row[2] if row[2] is not None else 0.0,
+            'max': row[3] if row[3] is not None else 0.0
+        }
+
+
+class BaseFeatureProcessor(ABC):
+    """
+    Abstract base class for all feature processors
+    
+    Each processor implements the calculate() method which returns a dict
+    of feature_name: value pairs.
+    """
+    
+    MIN_MATCHES_REQUIRED = 5  # Minimum matches needed for feature calculation
+    
+    @staticmethod
+    @abstractmethod
+    def calculate(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
+        """
+        Calculate features for a specific player
+        
+        Args:
+            steam_id: Player's Steam ID (steam_id_64)
+            conn_l2: Connection to L2 database
+            
+        Returns:
+            Dictionary of {feature_name: value}
+        """
+        pass
+    
+    @staticmethod
+    def check_min_matches(steam_id: str, conn_l2: sqlite3.Connection, 
+                         min_required: int = None) -> bool:
+        """
+        Check if player has minimum required matches
+        
+        Args:
+            steam_id: Player's Steam ID
+            conn_l2: L2 database connection
+            min_required: Minimum matches (uses class default if None)
+            
+        Returns:
+            True if player has enough matches
+        """
+        if min_required is None:
+            min_required = BaseFeatureProcessor.MIN_MATCHES_REQUIRED
+        
+        cursor = conn_l2.cursor()
+        cursor.execute("""
+            SELECT COUNT(*) FROM fact_match_players
+            WHERE steam_id_64 = ?
+        """, (steam_id,))
+        
+        count = cursor.fetchone()[0]
+        return count >= min_required
+    
+    @staticmethod
+    def get_player_match_count(steam_id: str, conn_l2: sqlite3.Connection) -> int:
+        """Get total match count for player"""
+        cursor = conn_l2.cursor()
+        cursor.execute("""
+            SELECT COUNT(*) FROM fact_match_players
+            WHERE steam_id_64 = ?
+        """, (steam_id,))
+        return cursor.fetchone()[0]
+    
+    @staticmethod
+    def get_player_round_count(steam_id: str, conn_l2: sqlite3.Connection) -> int:
+        """Get total round count for player"""
+        cursor = conn_l2.cursor()
+        cursor.execute("""
+            SELECT SUM(round_total) FROM fact_match_players
+            WHERE steam_id_64 = ?
+        """, (steam_id,))
+        result = cursor.fetchone()[0]
+        return result if result is not None else 0
+
+
+class WeaponCategories:
+    """Weapon categorization constants"""
+    
+    RIFLES = [
+        'ak47', 'aug', 'm4a1', 'm4a1_silencer', 'sg556', 'galilar', 'famas'
+    ]
+    
+    PISTOLS = [
+        'glock', 'usp_silencer', 'hkp2000', 'p250', 'fiveseven', 'tec9', 
+        'cz75a', 'deagle', 'elite', 'revolver'
+    ]
+    
+    SMGS = [
+        'mac10', 'mp9', 'mp7', 'mp5sd', 'ump45', 'p90', 'bizon'
+    ]
+    
+    SNIPERS = [
+        'awp', 'ssg08', 'scar20', 'g3sg1'
+    ]
+    
+    HEAVY = [
+        'nova', 'xm1014', 'mag7', 'sawedoff', 'm249', 'negev'
+    ]
+    
+    @classmethod
+    def get_category(cls, weapon_name: str) -> str:
+        """Get category for a weapon"""
+        weapon_clean = weapon_name.lower().replace('weapon_', '')
+        
+        if weapon_clean in cls.RIFLES:
+            return 'rifle'
+        elif weapon_clean in cls.PISTOLS:
+            return 'pistol'
+        elif weapon_clean in cls.SMGS:
+            return 'smg'
+        elif weapon_clean in cls.SNIPERS:
+            return 'sniper'
+        elif weapon_clean in cls.HEAVY:
+            return 'heavy'
+        elif weapon_clean == 'knife':
+            return 'knife'
+        elif weapon_clean == 'hegrenade':
+            return 'grenade'
+        else:
+            return 'other'
+
+
+class MapAreas:
+    """Map area classification utilities (for position analysis)"""
+    
+    # This will be expanded with actual map coordinates in IntelligenceProcessor
+    SITE_A = 'site_a'
+    SITE_B = 'site_b'
+    MID = 'mid'
+    SPAWN_T = 'spawn_t'
+    SPAWN_CT = 'spawn_ct'
+    
+    @staticmethod
+    def classify_position(x: float, y: float, z: float, map_name: str) -> str:
+        """
+        Classify position into map area (simplified)
+        
+        Full implementation requires map-specific coordinate ranges
+        """
+        # Placeholder - will be implemented with map data
+        return "unknown"
+
+
+# Export all classes
+__all__ = [
+    'SafeAggregator',
+    'NormalizationUtils',
+    'BaseFeatureProcessor',
+    'WeaponCategories',
+    'MapAreas'
+]