feat: Implement L1A incremental refresh
This commit is contained in:
39
ETL/verify/clean_dirty_data.py
Normal file
39
ETL/verify/clean_dirty_data.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import sqlite3
|
||||
import os
|
||||
|
||||
# 路径指向正式数据库
|
||||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
DB_PATH = os.path.join(BASE_DIR, 'database', 'L1A', 'L1A.sqlite')
|
||||
|
||||
def clean_db():
|
||||
if not os.path.exists(DB_PATH):
|
||||
print(f"Database not found at {DB_PATH}")
|
||||
return
|
||||
|
||||
print(f"Connecting to production DB: {DB_PATH}")
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 查找脏数据 (假设模拟数据的 match_id 是 match_001, match_002, match_003)
|
||||
dirty_ids = ['match_001', 'match_002', 'match_003']
|
||||
|
||||
# 也可以用 LIKE 'match_%' 如果您想删得更彻底,但要小心误删
|
||||
# 这里我们精准删除
|
||||
|
||||
deleted_count = 0
|
||||
for mid in dirty_ids:
|
||||
cursor.execute("DELETE FROM raw_iframe_network WHERE match_id = ?", (mid,))
|
||||
if cursor.rowcount > 0:
|
||||
print(f"Deleted dirty record: {mid}")
|
||||
deleted_count += 1
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
if deleted_count > 0:
|
||||
print(f"Cleanup complete. Removed {deleted_count} dirty records.")
|
||||
else:
|
||||
print("Cleanup complete. No dirty records found.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
clean_db()
|
||||
Reference in New Issue
Block a user