import os import json import sqlite3 import glob # Paths BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) OUTPUT_ARENA_DIR = os.path.join(BASE_DIR, 'output_arena') DB_DIR = os.path.join(BASE_DIR, 'database', 'L1A') DB_PATH = os.path.join(DB_DIR, 'L1A.sqlite') def init_db(): if not os.path.exists(DB_DIR): os.makedirs(DB_DIR) conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() cursor.execute(''' CREATE TABLE IF NOT EXISTS raw_iframe_network ( match_id TEXT PRIMARY KEY, content TEXT, processed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) ''') conn.commit() return conn def process_files(): conn = init_db() cursor = conn.cursor() # Pattern to match all iframe_network.json files # output_arena/*/iframe_network.json pattern = os.path.join(OUTPUT_ARENA_DIR, '*', 'iframe_network.json') files = glob.glob(pattern) print(f"Found {len(files)} files to process.") count = 0 for file_path in files: try: # Extract match_id from directory name # file_path is like .../output_arena/g161-xxx/iframe_network.json parent_dir = os.path.dirname(file_path) match_id = os.path.basename(parent_dir) with open(file_path, 'r', encoding='utf-8') as f: content = f.read() # Upsert data cursor.execute(''' INSERT OR REPLACE INTO raw_iframe_network (match_id, content) VALUES (?, ?) ''', (match_id, content)) count += 1 if count % 100 == 0: print(f"Processed {count} files...") conn.commit() except Exception as e: print(f"Error processing {file_path}: {e}") conn.commit() conn.close() print(f"Finished processing {count} files.") if __name__ == '__main__': process_files()