简化项目并切换到v2模型与场景报告
This commit is contained in:
@@ -29,12 +29,12 @@ import sqlite3
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
|
||||
from features.spatial import calculate_spatial_features
|
||||
from features.economy import calculate_economy_features
|
||||
from features.definitions import FEATURE_COLUMNS
|
||||
from features.definitions import FEATURE_COLUMNS, XGB_FEATURE_COLUMNS_V2
|
||||
|
||||
# Configuration
|
||||
DATA_DIR = "data/processed"
|
||||
MODEL_DIR = "models"
|
||||
MODEL_PATH = os.path.join(MODEL_DIR, "clutch_model_v1.json")
|
||||
MODEL_PATH = os.path.join(MODEL_DIR, "clutch_model_v2.json")
|
||||
L3_DB_PATH = os.path.join("database", "L3", "L3.db")
|
||||
L2_DB_PATH = os.path.join("database", "L2", "L2.db")
|
||||
TEST_SIZE = 0.2
|
||||
@@ -102,6 +102,36 @@ def preprocess_features(df):
|
||||
|
||||
df['t_health'] = df['is_t'] * df['health']
|
||||
df['ct_health'] = df['is_ct'] * df['health']
|
||||
|
||||
if 'flash_duration' not in df.columns:
|
||||
df['flash_duration'] = 0.0
|
||||
df['flash_duration'] = pd.to_numeric(df['flash_duration'], errors='coerce').fillna(0.0).astype('float32')
|
||||
|
||||
if 'has_defuser' not in df.columns:
|
||||
df['has_defuser'] = 0
|
||||
df['has_defuser'] = df['has_defuser'].fillna(0).astype(int)
|
||||
|
||||
if 'has_helmet' not in df.columns:
|
||||
df['has_helmet'] = 0
|
||||
df['has_helmet'] = df['has_helmet'].fillna(0).astype(int)
|
||||
|
||||
if 'armor_value' not in df.columns:
|
||||
df['armor_value'] = 0
|
||||
df['armor_value'] = pd.to_numeric(df['armor_value'], errors='coerce').fillna(0.0).astype('float32')
|
||||
|
||||
is_alive_int = df['is_alive'].astype(int)
|
||||
is_blinded = ((df['flash_duration'] > 0).astype(int) * is_alive_int).astype(int)
|
||||
|
||||
df['t_blinded_count_p'] = df['is_t'] * is_blinded
|
||||
df['ct_blinded_count_p'] = df['is_ct'] * is_blinded
|
||||
df['t_blind_time_sum_p'] = df['is_t'] * is_alive_int * df['flash_duration']
|
||||
df['ct_blind_time_sum_p'] = df['is_ct'] * is_alive_int * df['flash_duration']
|
||||
|
||||
df['ct_defuser_count_p'] = df['is_ct'] * is_alive_int * df['has_defuser']
|
||||
df['t_helmet_count_p'] = df['is_t'] * is_alive_int * df['has_helmet']
|
||||
df['ct_helmet_count_p'] = df['is_ct'] * is_alive_int * df['has_helmet']
|
||||
df['t_armor_sum_p'] = df['is_t'] * is_alive_int * df['armor_value']
|
||||
df['ct_armor_sum_p'] = df['is_ct'] * is_alive_int * df['armor_value']
|
||||
|
||||
# Aggregate per frame
|
||||
group_cols = ['match_id', 'map_name', 'round', 'tick', 'round_winner', 'is_bomb_planted', 'site']
|
||||
@@ -124,6 +154,36 @@ def preprocess_features(df):
|
||||
# Note: 'round_winner' is in group_cols because it's constant per group
|
||||
features_df = df.groupby(group_cols).agg(agg_funcs).reset_index()
|
||||
|
||||
utility_agg = (
|
||||
df.groupby(['match_id', 'round', 'tick'])
|
||||
.agg({
|
||||
't_blinded_count_p': 'sum',
|
||||
'ct_blinded_count_p': 'sum',
|
||||
't_blind_time_sum_p': 'sum',
|
||||
'ct_blind_time_sum_p': 'sum',
|
||||
'ct_defuser_count_p': 'sum',
|
||||
't_helmet_count_p': 'sum',
|
||||
'ct_helmet_count_p': 'sum',
|
||||
't_armor_sum_p': 'sum',
|
||||
'ct_armor_sum_p': 'sum'
|
||||
})
|
||||
.reset_index()
|
||||
.rename(columns={
|
||||
't_blinded_count_p': 't_blinded_count',
|
||||
'ct_blinded_count_p': 'ct_blinded_count',
|
||||
't_blind_time_sum_p': 't_blind_time_sum',
|
||||
'ct_blind_time_sum_p': 'ct_blind_time_sum',
|
||||
'ct_defuser_count_p': 'ct_defuser_count',
|
||||
't_helmet_count_p': 't_helmet_count',
|
||||
'ct_helmet_count_p': 'ct_helmet_count',
|
||||
't_armor_sum_p': 't_armor_sum',
|
||||
'ct_armor_sum_p': 'ct_armor_sum'
|
||||
})
|
||||
)
|
||||
|
||||
utility_agg['ct_has_defuser'] = (utility_agg['ct_defuser_count'] > 0).astype(int)
|
||||
utility_agg['blinded_diff'] = utility_agg['ct_blinded_count'] - utility_agg['t_blinded_count']
|
||||
|
||||
# 3. Add derived features
|
||||
features_df['health_diff'] = features_df['ct_health'] - features_df['t_health']
|
||||
features_df['alive_diff'] = features_df['ct_alive'] - features_df['t_alive']
|
||||
@@ -140,6 +200,7 @@ def preprocess_features(df):
|
||||
# Keys: match_id, round, tick
|
||||
features_df = pd.merge(features_df, spatial_features, on=['match_id', 'round', 'tick'], how='left')
|
||||
features_df = pd.merge(features_df, economy_features, on=['match_id', 'round', 'tick'], how='left')
|
||||
features_df = pd.merge(features_df, utility_agg, on=['match_id', 'round', 'tick'], how='left')
|
||||
|
||||
rating_map = {}
|
||||
try:
|
||||
@@ -237,7 +298,7 @@ def train_model(df):
|
||||
"""Train XGBoost Classifier."""
|
||||
|
||||
# Features (X) and Target (y)
|
||||
feature_cols = FEATURE_COLUMNS
|
||||
feature_cols = XGB_FEATURE_COLUMNS_V2
|
||||
target_col = 'round_winner'
|
||||
|
||||
logging.info(f"Training features: {feature_cols}")
|
||||
@@ -288,7 +349,7 @@ def train_model(df):
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
# Save Test Set for Evaluation Script
|
||||
test_set_path = os.path.join("data", "processed", "test_set.parquet")
|
||||
test_set_path = os.path.join("data", "processed", "test_set_v2.parquet")
|
||||
logging.info(f"Saving validation set to {test_set_path}...")
|
||||
test_df.to_parquet(test_set_path)
|
||||
|
||||
@@ -309,6 +370,8 @@ def main():
|
||||
os.makedirs(MODEL_DIR)
|
||||
|
||||
try:
|
||||
model_path = os.getenv("CLUTCH_XGB_MODEL_PATH", MODEL_PATH)
|
||||
|
||||
# 1. Load
|
||||
raw_df = load_data(DATA_DIR)
|
||||
|
||||
@@ -323,8 +386,8 @@ def main():
|
||||
model = train_model(features_df)
|
||||
|
||||
# 4. Save
|
||||
model.save_model(MODEL_PATH)
|
||||
logging.info(f"Model saved to {MODEL_PATH}")
|
||||
model.save_model(model_path)
|
||||
logging.info(f"Model saved to {model_path}")
|
||||
|
||||
# 5. Save player experience map for inference (optional)
|
||||
if 'steamid' in raw_df.columns:
|
||||
|
||||
Reference in New Issue
Block a user