3.0.0 : Reconstructed Database System.

This commit is contained in:
2026-01-29 02:21:44 +08:00
parent 1642adb00e
commit 04ee957af6
69 changed files with 10258 additions and 6546 deletions

View File

@@ -0,0 +1,320 @@
"""
Base processor classes and utility functions for L3 feature calculation
"""
import sqlite3
import math
from typing import Dict, Any, List, Optional
from abc import ABC, abstractmethod
class SafeAggregator:
"""Utility class for safe mathematical operations with NULL handling"""
@staticmethod
def safe_divide(numerator: float, denominator: float, default: float = 0.0) -> float:
"""Safe division with NULL/zero handling"""
if denominator is None or denominator == 0:
return default
if numerator is None:
return default
return numerator / denominator
@staticmethod
def safe_avg(values: List[float], default: float = 0.0) -> float:
"""Safe average calculation"""
if not values or len(values) == 0:
return default
valid_values = [v for v in values if v is not None]
if not valid_values:
return default
return sum(valid_values) / len(valid_values)
@staticmethod
def safe_stddev(values: List[float], default: float = 0.0) -> float:
"""Safe standard deviation calculation"""
if not values or len(values) < 2:
return default
valid_values = [v for v in values if v is not None]
if len(valid_values) < 2:
return default
mean = sum(valid_values) / len(valid_values)
variance = sum((x - mean) ** 2 for x in valid_values) / len(valid_values)
return math.sqrt(variance)
@staticmethod
def safe_sum(values: List[float], default: float = 0.0) -> float:
"""Safe sum calculation"""
if not values:
return default
valid_values = [v for v in values if v is not None]
return sum(valid_values) if valid_values else default
@staticmethod
def safe_min(values: List[float], default: float = 0.0) -> float:
"""Safe minimum calculation"""
if not values:
return default
valid_values = [v for v in values if v is not None]
return min(valid_values) if valid_values else default
@staticmethod
def safe_max(values: List[float], default: float = 0.0) -> float:
"""Safe maximum calculation"""
if not values:
return default
valid_values = [v for v in values if v is not None]
return max(valid_values) if valid_values else default
class NormalizationUtils:
"""Z-score normalization and scaling utilities"""
@staticmethod
def z_score_normalize(value: float, mean: float, std: float,
scale_min: float = 0.0, scale_max: float = 100.0) -> float:
"""
Z-score normalization to a target range
Args:
value: Value to normalize
mean: Population mean
std: Population standard deviation
scale_min: Target minimum (default: 0)
scale_max: Target maximum (default: 100)
Returns:
Normalized value in [scale_min, scale_max] range
"""
if std == 0 or std is None:
return (scale_min + scale_max) / 2.0
# Calculate z-score
z = (value - mean) / std
# Map to target range (±3σ covers ~99.7% of data)
# z = -3 → scale_min, z = 0 → midpoint, z = 3 → scale_max
midpoint = (scale_min + scale_max) / 2.0
scale_range = (scale_max - scale_min) / 6.0 # 6σ total range
normalized = midpoint + (z * scale_range)
# Clamp to target range
return max(scale_min, min(scale_max, normalized))
@staticmethod
def percentile_normalize(value: float, all_values: List[float],
scale_min: float = 0.0, scale_max: float = 100.0) -> float:
"""
Percentile-based normalization
Args:
value: Value to normalize
all_values: All values in population
scale_min: Target minimum
scale_max: Target maximum
Returns:
Normalized value based on percentile
"""
if not all_values:
return scale_min
sorted_values = sorted(all_values)
rank = sum(1 for v in sorted_values if v < value)
percentile = rank / len(sorted_values)
return scale_min + (percentile * (scale_max - scale_min))
@staticmethod
def min_max_normalize(value: float, min_val: float, max_val: float,
scale_min: float = 0.0, scale_max: float = 100.0) -> float:
"""Min-max normalization to target range"""
if max_val == min_val:
return (scale_min + scale_max) / 2.0
normalized = (value - min_val) / (max_val - min_val)
return scale_min + (normalized * (scale_max - scale_min))
@staticmethod
def calculate_population_stats(conn_l3: sqlite3.Connection, column: str) -> Dict[str, float]:
"""
Calculate population mean and std for a column in dm_player_features
Args:
conn_l3: L3 database connection
column: Column name to analyze
Returns:
dict with 'mean', 'std', 'min', 'max'
"""
cursor = conn_l3.cursor()
cursor.execute(f"""
SELECT
AVG({column}) as mean,
STDDEV({column}) as std,
MIN({column}) as min,
MAX({column}) as max
FROM dm_player_features
WHERE {column} IS NOT NULL
""")
row = cursor.fetchone()
return {
'mean': row[0] if row[0] is not None else 0.0,
'std': row[1] if row[1] is not None else 1.0,
'min': row[2] if row[2] is not None else 0.0,
'max': row[3] if row[3] is not None else 0.0
}
class BaseFeatureProcessor(ABC):
"""
Abstract base class for all feature processors
Each processor implements the calculate() method which returns a dict
of feature_name: value pairs.
"""
MIN_MATCHES_REQUIRED = 5 # Minimum matches needed for feature calculation
@staticmethod
@abstractmethod
def calculate(steam_id: str, conn_l2: sqlite3.Connection) -> Dict[str, Any]:
"""
Calculate features for a specific player
Args:
steam_id: Player's Steam ID (steam_id_64)
conn_l2: Connection to L2 database
Returns:
Dictionary of {feature_name: value}
"""
pass
@staticmethod
def check_min_matches(steam_id: str, conn_l2: sqlite3.Connection,
min_required: int = None) -> bool:
"""
Check if player has minimum required matches
Args:
steam_id: Player's Steam ID
conn_l2: L2 database connection
min_required: Minimum matches (uses class default if None)
Returns:
True if player has enough matches
"""
if min_required is None:
min_required = BaseFeatureProcessor.MIN_MATCHES_REQUIRED
cursor = conn_l2.cursor()
cursor.execute("""
SELECT COUNT(*) FROM fact_match_players
WHERE steam_id_64 = ?
""", (steam_id,))
count = cursor.fetchone()[0]
return count >= min_required
@staticmethod
def get_player_match_count(steam_id: str, conn_l2: sqlite3.Connection) -> int:
"""Get total match count for player"""
cursor = conn_l2.cursor()
cursor.execute("""
SELECT COUNT(*) FROM fact_match_players
WHERE steam_id_64 = ?
""", (steam_id,))
return cursor.fetchone()[0]
@staticmethod
def get_player_round_count(steam_id: str, conn_l2: sqlite3.Connection) -> int:
"""Get total round count for player"""
cursor = conn_l2.cursor()
cursor.execute("""
SELECT SUM(round_total) FROM fact_match_players
WHERE steam_id_64 = ?
""", (steam_id,))
result = cursor.fetchone()[0]
return result if result is not None else 0
class WeaponCategories:
"""Weapon categorization constants"""
RIFLES = [
'ak47', 'aug', 'm4a1', 'm4a1_silencer', 'sg556', 'galilar', 'famas'
]
PISTOLS = [
'glock', 'usp_silencer', 'hkp2000', 'p250', 'fiveseven', 'tec9',
'cz75a', 'deagle', 'elite', 'revolver'
]
SMGS = [
'mac10', 'mp9', 'mp7', 'mp5sd', 'ump45', 'p90', 'bizon'
]
SNIPERS = [
'awp', 'ssg08', 'scar20', 'g3sg1'
]
HEAVY = [
'nova', 'xm1014', 'mag7', 'sawedoff', 'm249', 'negev'
]
@classmethod
def get_category(cls, weapon_name: str) -> str:
"""Get category for a weapon"""
weapon_clean = weapon_name.lower().replace('weapon_', '')
if weapon_clean in cls.RIFLES:
return 'rifle'
elif weapon_clean in cls.PISTOLS:
return 'pistol'
elif weapon_clean in cls.SMGS:
return 'smg'
elif weapon_clean in cls.SNIPERS:
return 'sniper'
elif weapon_clean in cls.HEAVY:
return 'heavy'
elif weapon_clean == 'knife':
return 'knife'
elif weapon_clean == 'hegrenade':
return 'grenade'
else:
return 'other'
class MapAreas:
"""Map area classification utilities (for position analysis)"""
# This will be expanded with actual map coordinates in IntelligenceProcessor
SITE_A = 'site_a'
SITE_B = 'site_b'
MID = 'mid'
SPAWN_T = 'spawn_t'
SPAWN_CT = 'spawn_ct'
@staticmethod
def classify_position(x: float, y: float, z: float, map_name: str) -> str:
"""
Classify position into map area (simplified)
Full implementation requires map-specific coordinate ranges
"""
# Placeholder - will be implemented with map data
return "unknown"
# Export all classes
__all__ = [
'SafeAggregator',
'NormalizationUtils',
'BaseFeatureProcessor',
'WeaponCategories',
'MapAreas'
]