Files
yrtv/scripts/check_l3_variance.py

56 lines
1.6 KiB
Python
Raw Normal View History

import sqlite3
import pandas as pd
import numpy as np
import os
# Config to match your project structure
class Config:
DB_L3_PATH = r'd:\Documents\trae_projects\yrtv\database\L3\L3_Features.sqlite'
def check_variance():
db_path = Config.DB_L3_PATH
if not os.path.exists(db_path):
print(f"L3 DB not found at {db_path}")
return
conn = sqlite3.connect(db_path)
try:
# Read all features
df = pd.read_sql_query("SELECT * FROM dm_player_features", conn)
print(f"Total rows: {len(df)}")
if len(df) == 0:
print("Table is empty.")
return
numeric_cols = df.select_dtypes(include=['number']).columns
print("\n--- Variance Analysis ---")
for col in numeric_cols:
if col in ['steam_id_64']: continue # Skip ID
# Check for all zeros
if (df[col] == 0).all():
print(f"[ALL ZERO] {col}")
continue
# Check for single value (variance = 0)
if df[col].nunique() <= 1:
val = df[col].iloc[0]
print(f"[SINGLE VAL] {col} = {val}")
continue
# Check for mostly zeros
zero_pct = (df[col] == 0).mean()
if zero_pct > 0.9:
print(f"[MOSTLY ZERO] {col} ({zero_pct:.1%} zeros)")
# Basic stats for valid ones
# print(f"{col}: min={df[col].min():.2f}, max={df[col].max():.2f}, mean={df[col].mean():.2f}")
finally:
conn.close()
if __name__ == "__main__":
check_variance()