简化项目并切换到v2模型与场景报告

This commit is contained in:
xunyulin230420
2026-02-12 16:32:45 +08:00
parent 706940d8d3
commit a19da4728b
23 changed files with 454 additions and 1235 deletions

View File

@@ -29,12 +29,12 @@ import sqlite3
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
from features.spatial import calculate_spatial_features
from features.economy import calculate_economy_features
from features.definitions import FEATURE_COLUMNS
from features.definitions import FEATURE_COLUMNS, XGB_FEATURE_COLUMNS_V2
# Configuration
DATA_DIR = "data/processed"
MODEL_DIR = "models"
MODEL_PATH = os.path.join(MODEL_DIR, "clutch_model_v1.json")
MODEL_PATH = os.path.join(MODEL_DIR, "clutch_model_v2.json")
L3_DB_PATH = os.path.join("database", "L3", "L3.db")
L2_DB_PATH = os.path.join("database", "L2", "L2.db")
TEST_SIZE = 0.2
@@ -102,6 +102,36 @@ def preprocess_features(df):
df['t_health'] = df['is_t'] * df['health']
df['ct_health'] = df['is_ct'] * df['health']
if 'flash_duration' not in df.columns:
df['flash_duration'] = 0.0
df['flash_duration'] = pd.to_numeric(df['flash_duration'], errors='coerce').fillna(0.0).astype('float32')
if 'has_defuser' not in df.columns:
df['has_defuser'] = 0
df['has_defuser'] = df['has_defuser'].fillna(0).astype(int)
if 'has_helmet' not in df.columns:
df['has_helmet'] = 0
df['has_helmet'] = df['has_helmet'].fillna(0).astype(int)
if 'armor_value' not in df.columns:
df['armor_value'] = 0
df['armor_value'] = pd.to_numeric(df['armor_value'], errors='coerce').fillna(0.0).astype('float32')
is_alive_int = df['is_alive'].astype(int)
is_blinded = ((df['flash_duration'] > 0).astype(int) * is_alive_int).astype(int)
df['t_blinded_count_p'] = df['is_t'] * is_blinded
df['ct_blinded_count_p'] = df['is_ct'] * is_blinded
df['t_blind_time_sum_p'] = df['is_t'] * is_alive_int * df['flash_duration']
df['ct_blind_time_sum_p'] = df['is_ct'] * is_alive_int * df['flash_duration']
df['ct_defuser_count_p'] = df['is_ct'] * is_alive_int * df['has_defuser']
df['t_helmet_count_p'] = df['is_t'] * is_alive_int * df['has_helmet']
df['ct_helmet_count_p'] = df['is_ct'] * is_alive_int * df['has_helmet']
df['t_armor_sum_p'] = df['is_t'] * is_alive_int * df['armor_value']
df['ct_armor_sum_p'] = df['is_ct'] * is_alive_int * df['armor_value']
# Aggregate per frame
group_cols = ['match_id', 'map_name', 'round', 'tick', 'round_winner', 'is_bomb_planted', 'site']
@@ -124,6 +154,36 @@ def preprocess_features(df):
# Note: 'round_winner' is in group_cols because it's constant per group
features_df = df.groupby(group_cols).agg(agg_funcs).reset_index()
utility_agg = (
df.groupby(['match_id', 'round', 'tick'])
.agg({
't_blinded_count_p': 'sum',
'ct_blinded_count_p': 'sum',
't_blind_time_sum_p': 'sum',
'ct_blind_time_sum_p': 'sum',
'ct_defuser_count_p': 'sum',
't_helmet_count_p': 'sum',
'ct_helmet_count_p': 'sum',
't_armor_sum_p': 'sum',
'ct_armor_sum_p': 'sum'
})
.reset_index()
.rename(columns={
't_blinded_count_p': 't_blinded_count',
'ct_blinded_count_p': 'ct_blinded_count',
't_blind_time_sum_p': 't_blind_time_sum',
'ct_blind_time_sum_p': 'ct_blind_time_sum',
'ct_defuser_count_p': 'ct_defuser_count',
't_helmet_count_p': 't_helmet_count',
'ct_helmet_count_p': 'ct_helmet_count',
't_armor_sum_p': 't_armor_sum',
'ct_armor_sum_p': 'ct_armor_sum'
})
)
utility_agg['ct_has_defuser'] = (utility_agg['ct_defuser_count'] > 0).astype(int)
utility_agg['blinded_diff'] = utility_agg['ct_blinded_count'] - utility_agg['t_blinded_count']
# 3. Add derived features
features_df['health_diff'] = features_df['ct_health'] - features_df['t_health']
features_df['alive_diff'] = features_df['ct_alive'] - features_df['t_alive']
@@ -140,6 +200,7 @@ def preprocess_features(df):
# Keys: match_id, round, tick
features_df = pd.merge(features_df, spatial_features, on=['match_id', 'round', 'tick'], how='left')
features_df = pd.merge(features_df, economy_features, on=['match_id', 'round', 'tick'], how='left')
features_df = pd.merge(features_df, utility_agg, on=['match_id', 'round', 'tick'], how='left')
rating_map = {}
try:
@@ -237,7 +298,7 @@ def train_model(df):
"""Train XGBoost Classifier."""
# Features (X) and Target (y)
feature_cols = FEATURE_COLUMNS
feature_cols = XGB_FEATURE_COLUMNS_V2
target_col = 'round_winner'
logging.info(f"Training features: {feature_cols}")
@@ -288,7 +349,7 @@ def train_model(df):
model.fit(X_train, y_train)
# Save Test Set for Evaluation Script
test_set_path = os.path.join("data", "processed", "test_set.parquet")
test_set_path = os.path.join("data", "processed", "test_set_v2.parquet")
logging.info(f"Saving validation set to {test_set_path}...")
test_df.to_parquet(test_set_path)
@@ -309,6 +370,8 @@ def main():
os.makedirs(MODEL_DIR)
try:
model_path = os.getenv("CLUTCH_XGB_MODEL_PATH", MODEL_PATH)
# 1. Load
raw_df = load_data(DATA_DIR)
@@ -323,8 +386,8 @@ def main():
model = train_model(features_df)
# 4. Save
model.save_model(MODEL_PATH)
logging.info(f"Model saved to {MODEL_PATH}")
model.save_model(model_path)
logging.info(f"Model saved to {model_path}")
# 5. Save player experience map for inference (optional)
if 'steamid' in raw_df.columns: