56 lines
1.6 KiB
Python
56 lines
1.6 KiB
Python
|
|
import sqlite3
|
||
|
|
import pandas as pd
|
||
|
|
import numpy as np
|
||
|
|
import os
|
||
|
|
|
||
|
|
# Config to match your project structure
|
||
|
|
class Config:
|
||
|
|
DB_L3_PATH = r'd:\Documents\trae_projects\yrtv\database\L3\L3_Features.sqlite'
|
||
|
|
|
||
|
|
def check_variance():
|
||
|
|
db_path = Config.DB_L3_PATH
|
||
|
|
if not os.path.exists(db_path):
|
||
|
|
print(f"L3 DB not found at {db_path}")
|
||
|
|
return
|
||
|
|
|
||
|
|
conn = sqlite3.connect(db_path)
|
||
|
|
try:
|
||
|
|
# Read all features
|
||
|
|
df = pd.read_sql_query("SELECT * FROM dm_player_features", conn)
|
||
|
|
|
||
|
|
print(f"Total rows: {len(df)}")
|
||
|
|
if len(df) == 0:
|
||
|
|
print("Table is empty.")
|
||
|
|
return
|
||
|
|
|
||
|
|
numeric_cols = df.select_dtypes(include=['number']).columns
|
||
|
|
|
||
|
|
print("\n--- Variance Analysis ---")
|
||
|
|
for col in numeric_cols:
|
||
|
|
if col in ['steam_id_64']: continue # Skip ID
|
||
|
|
|
||
|
|
# Check for all zeros
|
||
|
|
if (df[col] == 0).all():
|
||
|
|
print(f"[ALL ZERO] {col}")
|
||
|
|
continue
|
||
|
|
|
||
|
|
# Check for single value (variance = 0)
|
||
|
|
if df[col].nunique() <= 1:
|
||
|
|
val = df[col].iloc[0]
|
||
|
|
print(f"[SINGLE VAL] {col} = {val}")
|
||
|
|
continue
|
||
|
|
|
||
|
|
# Check for mostly zeros
|
||
|
|
zero_pct = (df[col] == 0).mean()
|
||
|
|
if zero_pct > 0.9:
|
||
|
|
print(f"[MOSTLY ZERO] {col} ({zero_pct:.1%} zeros)")
|
||
|
|
|
||
|
|
# Basic stats for valid ones
|
||
|
|
# print(f"{col}: min={df[col].min():.2f}, max={df[col].max():.2f}, mean={df[col].mean():.2f}")
|
||
|
|
|
||
|
|
finally:
|
||
|
|
conn.close()
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
check_variance()
|