365 lines
14 KiB
Python
365 lines
14 KiB
Python
|
|
|
||
|
|
import logging
|
||
|
|
import os
|
||
|
|
import sys
|
||
|
|
import sqlite3
|
||
|
|
import json
|
||
|
|
import argparse
|
||
|
|
import concurrent.futures
|
||
|
|
|
||
|
|
# Setup logging
|
||
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||
|
|
logger = logging.getLogger(__name__)
|
||
|
|
|
||
|
|
# Get absolute paths
|
||
|
|
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # Points to database/ directory
|
||
|
|
PROJECT_ROOT = os.path.dirname(BASE_DIR) # Points to project root
|
||
|
|
sys.path.insert(0, PROJECT_ROOT) # Add project root to Python path
|
||
|
|
L2_DB_PATH = os.path.join(BASE_DIR, 'L2', 'L2.db')
|
||
|
|
L3_DB_PATH = os.path.join(BASE_DIR, 'L3', 'L3.db')
|
||
|
|
WEB_DB_PATH = os.path.join(BASE_DIR, 'Web', 'Web_App.sqlite')
|
||
|
|
SCHEMA_PATH = os.path.join(BASE_DIR, 'L3', 'schema.sql')
|
||
|
|
|
||
|
|
def _get_existing_columns(conn, table_name):
|
||
|
|
cur = conn.execute(f"PRAGMA table_info({table_name})")
|
||
|
|
return {row[1] for row in cur.fetchall()}
|
||
|
|
|
||
|
|
def _ensure_columns(conn, table_name, columns):
|
||
|
|
existing = _get_existing_columns(conn, table_name)
|
||
|
|
for col, col_type in columns.items():
|
||
|
|
if col in existing:
|
||
|
|
continue
|
||
|
|
conn.execute(f"ALTER TABLE {table_name} ADD COLUMN {col} {col_type}")
|
||
|
|
|
||
|
|
def init_db():
|
||
|
|
"""Initialize L3 database with new schema"""
|
||
|
|
l3_dir = os.path.dirname(L3_DB_PATH)
|
||
|
|
if not os.path.exists(l3_dir):
|
||
|
|
os.makedirs(l3_dir)
|
||
|
|
|
||
|
|
logger.info(f"Initializing L3 database at: {L3_DB_PATH}")
|
||
|
|
conn = sqlite3.connect(L3_DB_PATH)
|
||
|
|
|
||
|
|
try:
|
||
|
|
with open(SCHEMA_PATH, 'r', encoding='utf-8') as f:
|
||
|
|
schema_sql = f.read()
|
||
|
|
conn.executescript(schema_sql)
|
||
|
|
|
||
|
|
conn.commit()
|
||
|
|
logger.info("✓ L3 schema created successfully")
|
||
|
|
|
||
|
|
# Verify tables
|
||
|
|
cursor = conn.cursor()
|
||
|
|
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name")
|
||
|
|
tables = [row[0] for row in cursor.fetchall()]
|
||
|
|
logger.info(f"✓ Created {len(tables)} tables: {', '.join(tables)}")
|
||
|
|
|
||
|
|
# Verify dm_player_features columns
|
||
|
|
cursor.execute("PRAGMA table_info(dm_player_features)")
|
||
|
|
columns = cursor.fetchall()
|
||
|
|
logger.info(f"✓ dm_player_features has {len(columns)} columns")
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"Error initializing L3 database: {e}")
|
||
|
|
raise
|
||
|
|
finally:
|
||
|
|
conn.close()
|
||
|
|
|
||
|
|
logger.info("L3 DB Initialized with new 5-tier architecture")
|
||
|
|
|
||
|
|
def _get_team_players():
|
||
|
|
"""Get list of steam_ids from Web App team lineups"""
|
||
|
|
if not os.path.exists(WEB_DB_PATH):
|
||
|
|
logger.warning(f"Web DB not found at {WEB_DB_PATH}, returning empty list")
|
||
|
|
return set()
|
||
|
|
|
||
|
|
try:
|
||
|
|
conn = sqlite3.connect(WEB_DB_PATH)
|
||
|
|
cursor = conn.cursor()
|
||
|
|
cursor.execute("SELECT player_ids_json FROM team_lineups")
|
||
|
|
rows = cursor.fetchall()
|
||
|
|
|
||
|
|
steam_ids = set()
|
||
|
|
for row in rows:
|
||
|
|
if row[0]:
|
||
|
|
try:
|
||
|
|
ids = json.loads(row[0])
|
||
|
|
if isinstance(ids, list):
|
||
|
|
steam_ids.update(ids)
|
||
|
|
except json.JSONDecodeError:
|
||
|
|
logger.warning(f"Failed to parse player_ids_json: {row[0]}")
|
||
|
|
|
||
|
|
conn.close()
|
||
|
|
logger.info(f"Found {len(steam_ids)} unique players in Team Lineups")
|
||
|
|
return steam_ids
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"Error reading Web DB: {e}")
|
||
|
|
return set()
|
||
|
|
|
||
|
|
def _get_match_date_range(steam_id: str, conn_l2: sqlite3.Connection):
|
||
|
|
cursor = conn_l2.cursor()
|
||
|
|
cursor.execute("""
|
||
|
|
SELECT MIN(m.start_time), MAX(m.start_time)
|
||
|
|
FROM fact_match_players p
|
||
|
|
JOIN fact_matches m ON p.match_id = m.match_id
|
||
|
|
WHERE p.steam_id_64 = ?
|
||
|
|
""", (steam_id,))
|
||
|
|
date_row = cursor.fetchone()
|
||
|
|
first_match_date = date_row[0] if date_row and date_row[0] else None
|
||
|
|
last_match_date = date_row[1] if date_row and date_row[1] else None
|
||
|
|
return first_match_date, last_match_date
|
||
|
|
|
||
|
|
def _build_player_record(steam_id: str):
|
||
|
|
try:
|
||
|
|
from database.L3.processors import (
|
||
|
|
BasicProcessor,
|
||
|
|
TacticalProcessor,
|
||
|
|
IntelligenceProcessor,
|
||
|
|
MetaProcessor,
|
||
|
|
CompositeProcessor
|
||
|
|
)
|
||
|
|
conn_l2 = sqlite3.connect(L2_DB_PATH)
|
||
|
|
conn_l2.row_factory = sqlite3.Row
|
||
|
|
features = {}
|
||
|
|
features.update(BasicProcessor.calculate(steam_id, conn_l2))
|
||
|
|
features.update(TacticalProcessor.calculate(steam_id, conn_l2))
|
||
|
|
features.update(IntelligenceProcessor.calculate(steam_id, conn_l2))
|
||
|
|
features.update(MetaProcessor.calculate(steam_id, conn_l2))
|
||
|
|
features.update(CompositeProcessor.calculate(steam_id, conn_l2, features))
|
||
|
|
match_count = _get_match_count(steam_id, conn_l2)
|
||
|
|
round_count = _get_round_count(steam_id, conn_l2)
|
||
|
|
first_match_date, last_match_date = _get_match_date_range(steam_id, conn_l2)
|
||
|
|
conn_l2.close()
|
||
|
|
return {
|
||
|
|
"steam_id": steam_id,
|
||
|
|
"features": features,
|
||
|
|
"match_count": match_count,
|
||
|
|
"round_count": round_count,
|
||
|
|
"first_match_date": first_match_date,
|
||
|
|
"last_match_date": last_match_date,
|
||
|
|
"error": None,
|
||
|
|
}
|
||
|
|
except Exception as e:
|
||
|
|
return {
|
||
|
|
"steam_id": steam_id,
|
||
|
|
"features": None,
|
||
|
|
"match_count": 0,
|
||
|
|
"round_count": 0,
|
||
|
|
"first_match_date": None,
|
||
|
|
"last_match_date": None,
|
||
|
|
"error": str(e),
|
||
|
|
}
|
||
|
|
|
||
|
|
def main(force_all: bool = False, workers: int = 1):
|
||
|
|
"""
|
||
|
|
Main L3 feature building pipeline using modular processors
|
||
|
|
"""
|
||
|
|
logger.info("========================================")
|
||
|
|
logger.info("Starting L3 Builder with 5-Tier Architecture")
|
||
|
|
logger.info("========================================")
|
||
|
|
|
||
|
|
# 1. Ensure Schema is up to date
|
||
|
|
init_db()
|
||
|
|
|
||
|
|
# 2. Import processors
|
||
|
|
try:
|
||
|
|
from database.L3.processors import (
|
||
|
|
BasicProcessor,
|
||
|
|
TacticalProcessor,
|
||
|
|
IntelligenceProcessor,
|
||
|
|
MetaProcessor,
|
||
|
|
CompositeProcessor
|
||
|
|
)
|
||
|
|
logger.info("✓ All 5 processors imported successfully")
|
||
|
|
except ImportError as e:
|
||
|
|
logger.error(f"Failed to import processors: {e}")
|
||
|
|
return
|
||
|
|
|
||
|
|
# 3. Connect to databases
|
||
|
|
conn_l2 = sqlite3.connect(L2_DB_PATH)
|
||
|
|
conn_l2.row_factory = sqlite3.Row
|
||
|
|
conn_l3 = sqlite3.connect(L3_DB_PATH)
|
||
|
|
|
||
|
|
try:
|
||
|
|
cursor_l2 = conn_l2.cursor()
|
||
|
|
if force_all:
|
||
|
|
logger.info("Force mode enabled: building L3 for all players in L2.")
|
||
|
|
sql = """
|
||
|
|
SELECT DISTINCT steam_id_64
|
||
|
|
FROM dim_players
|
||
|
|
ORDER BY steam_id_64
|
||
|
|
"""
|
||
|
|
cursor_l2.execute(sql)
|
||
|
|
else:
|
||
|
|
team_players = _get_team_players()
|
||
|
|
if not team_players:
|
||
|
|
logger.warning("No players found in Team Lineups. Aborting L3 build.")
|
||
|
|
return
|
||
|
|
|
||
|
|
placeholders = ','.join(['?' for _ in team_players])
|
||
|
|
sql = f"""
|
||
|
|
SELECT DISTINCT steam_id_64
|
||
|
|
FROM dim_players
|
||
|
|
WHERE steam_id_64 IN ({placeholders})
|
||
|
|
ORDER BY steam_id_64
|
||
|
|
"""
|
||
|
|
cursor_l2.execute(sql, list(team_players))
|
||
|
|
|
||
|
|
players = cursor_l2.fetchall()
|
||
|
|
total_players = len(players)
|
||
|
|
logger.info(f"Found {total_players} matching players in L2 to process")
|
||
|
|
|
||
|
|
if total_players == 0:
|
||
|
|
logger.warning("No matching players found in dim_players table")
|
||
|
|
return
|
||
|
|
|
||
|
|
success_count = 0
|
||
|
|
error_count = 0
|
||
|
|
processed_count = 0
|
||
|
|
|
||
|
|
if workers and workers > 1:
|
||
|
|
steam_ids = [row[0] for row in players]
|
||
|
|
with concurrent.futures.ProcessPoolExecutor(max_workers=workers) as executor:
|
||
|
|
futures = [executor.submit(_build_player_record, sid) for sid in steam_ids]
|
||
|
|
for future in concurrent.futures.as_completed(futures):
|
||
|
|
result = future.result()
|
||
|
|
processed_count += 1
|
||
|
|
if result.get("error"):
|
||
|
|
error_count += 1
|
||
|
|
logger.error(f"Error processing player {result.get('steam_id')}: {result.get('error')}")
|
||
|
|
else:
|
||
|
|
_upsert_features(
|
||
|
|
conn_l3,
|
||
|
|
result["steam_id"],
|
||
|
|
result["features"],
|
||
|
|
result["match_count"],
|
||
|
|
result["round_count"],
|
||
|
|
None,
|
||
|
|
result["first_match_date"],
|
||
|
|
result["last_match_date"],
|
||
|
|
)
|
||
|
|
success_count += 1
|
||
|
|
if processed_count % 2 == 0:
|
||
|
|
conn_l3.commit()
|
||
|
|
logger.info(f"Progress: {processed_count}/{total_players} ({success_count} success, {error_count} errors)")
|
||
|
|
else:
|
||
|
|
for idx, row in enumerate(players, 1):
|
||
|
|
steam_id = row[0]
|
||
|
|
|
||
|
|
try:
|
||
|
|
features = {}
|
||
|
|
features.update(BasicProcessor.calculate(steam_id, conn_l2))
|
||
|
|
features.update(TacticalProcessor.calculate(steam_id, conn_l2))
|
||
|
|
features.update(IntelligenceProcessor.calculate(steam_id, conn_l2))
|
||
|
|
features.update(MetaProcessor.calculate(steam_id, conn_l2))
|
||
|
|
features.update(CompositeProcessor.calculate(steam_id, conn_l2, features))
|
||
|
|
match_count = _get_match_count(steam_id, conn_l2)
|
||
|
|
round_count = _get_round_count(steam_id, conn_l2)
|
||
|
|
first_match_date, last_match_date = _get_match_date_range(steam_id, conn_l2)
|
||
|
|
_upsert_features(conn_l3, steam_id, features, match_count, round_count, conn_l2, first_match_date, last_match_date)
|
||
|
|
success_count += 1
|
||
|
|
except Exception as e:
|
||
|
|
error_count += 1
|
||
|
|
logger.error(f"Error processing player {steam_id}: {e}")
|
||
|
|
if error_count <= 3:
|
||
|
|
import traceback
|
||
|
|
traceback.print_exc()
|
||
|
|
continue
|
||
|
|
|
||
|
|
processed_count = idx
|
||
|
|
if processed_count % 2 == 0:
|
||
|
|
conn_l3.commit()
|
||
|
|
logger.info(f"Progress: {processed_count}/{total_players} ({success_count} success, {error_count} errors)")
|
||
|
|
|
||
|
|
# Final commit
|
||
|
|
conn_l3.commit()
|
||
|
|
|
||
|
|
logger.info("========================================")
|
||
|
|
logger.info(f"L3 Build Complete!")
|
||
|
|
logger.info(f" Success: {success_count} players")
|
||
|
|
logger.info(f" Errors: {error_count} players")
|
||
|
|
logger.info(f" Total: {total_players} players")
|
||
|
|
logger.info(f" Success Rate: {success_count/total_players*100:.1f}%")
|
||
|
|
logger.info("========================================")
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"Fatal error during L3 build: {e}")
|
||
|
|
import traceback
|
||
|
|
traceback.print_exc()
|
||
|
|
|
||
|
|
finally:
|
||
|
|
conn_l2.close()
|
||
|
|
conn_l3.close()
|
||
|
|
|
||
|
|
|
||
|
|
def _get_match_count(steam_id: str, conn_l2: sqlite3.Connection) -> int:
|
||
|
|
"""Get total match count for player"""
|
||
|
|
cursor = conn_l2.cursor()
|
||
|
|
cursor.execute("""
|
||
|
|
SELECT COUNT(*) FROM fact_match_players
|
||
|
|
WHERE steam_id_64 = ?
|
||
|
|
""", (steam_id,))
|
||
|
|
return cursor.fetchone()[0]
|
||
|
|
|
||
|
|
|
||
|
|
def _get_round_count(steam_id: str, conn_l2: sqlite3.Connection) -> int:
|
||
|
|
"""Get total round count for player"""
|
||
|
|
cursor = conn_l2.cursor()
|
||
|
|
cursor.execute("""
|
||
|
|
SELECT COALESCE(SUM(round_total), 0) FROM fact_match_players
|
||
|
|
WHERE steam_id_64 = ?
|
||
|
|
""", (steam_id,))
|
||
|
|
return cursor.fetchone()[0]
|
||
|
|
|
||
|
|
|
||
|
|
def _upsert_features(conn_l3: sqlite3.Connection, steam_id: str, features: dict,
|
||
|
|
match_count: int, round_count: int, conn_l2: sqlite3.Connection | None,
|
||
|
|
first_match_date=None, last_match_date=None):
|
||
|
|
"""
|
||
|
|
Insert or update player features in dm_player_features
|
||
|
|
"""
|
||
|
|
cursor_l3 = conn_l3.cursor()
|
||
|
|
if first_match_date is None or last_match_date is None:
|
||
|
|
if conn_l2 is not None:
|
||
|
|
first_match_date, last_match_date = _get_match_date_range(steam_id, conn_l2)
|
||
|
|
else:
|
||
|
|
first_match_date = None
|
||
|
|
last_match_date = None
|
||
|
|
|
||
|
|
# Add metadata to features
|
||
|
|
features['total_matches'] = match_count
|
||
|
|
features['total_rounds'] = round_count
|
||
|
|
features['first_match_date'] = first_match_date
|
||
|
|
features['last_match_date'] = last_match_date
|
||
|
|
|
||
|
|
# Build dynamic column list from features dict
|
||
|
|
columns = ['steam_id_64'] + list(features.keys())
|
||
|
|
placeholders = ','.join(['?' for _ in columns])
|
||
|
|
columns_sql = ','.join(columns)
|
||
|
|
|
||
|
|
# Build UPDATE SET clause for ON CONFLICT
|
||
|
|
update_clauses = [f"{col}=excluded.{col}" for col in features.keys()]
|
||
|
|
update_clause_sql = ','.join(update_clauses)
|
||
|
|
|
||
|
|
values = [steam_id] + [features[k] for k in features.keys()]
|
||
|
|
|
||
|
|
sql = f"""
|
||
|
|
INSERT INTO dm_player_features ({columns_sql})
|
||
|
|
VALUES ({placeholders})
|
||
|
|
ON CONFLICT(steam_id_64) DO UPDATE SET
|
||
|
|
{update_clause_sql},
|
||
|
|
last_updated=CURRENT_TIMESTAMP
|
||
|
|
"""
|
||
|
|
|
||
|
|
cursor_l3.execute(sql, values)
|
||
|
|
|
||
|
|
def _parse_args():
|
||
|
|
parser = argparse.ArgumentParser()
|
||
|
|
parser.add_argument("--force", action="store_true")
|
||
|
|
parser.add_argument("--workers", type=int, default=1)
|
||
|
|
return parser.parse_args()
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
args = _parse_args()
|
||
|
|
main(force_all=args.force, workers=args.workers)
|