Files
yrtv/ETL/verify/L1A_incre_test/clean_dirty_data.py

39 lines
1.2 KiB
Python

import sqlite3
import os
# 路径指向正式数据库
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
DB_PATH = os.path.join(BASE_DIR, 'database', 'L1A', 'L1A.sqlite')
def clean_db():
if not os.path.exists(DB_PATH):
print(f"Database not found at {DB_PATH}")
return
print(f"Connecting to production DB: {DB_PATH}")
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
# 查找脏数据 (假设模拟数据的 match_id 是 match_001, match_002, match_003)
dirty_ids = ['match_001', 'match_002', 'match_003']
# 也可以用 LIKE 'match_%' 如果您想删得更彻底,但要小心误删
# 这里我们精准删除
deleted_count = 0
for mid in dirty_ids:
cursor.execute("DELETE FROM raw_iframe_network WHERE match_id = ?", (mid,))
if cursor.rowcount > 0:
print(f"Deleted dirty record: {mid}")
deleted_count += 1
conn.commit()
conn.close()
if deleted_count > 0:
print(f"Cleanup complete. Removed {deleted_count} dirty records.")
else:
print("Cleanup complete. No dirty records found.")
if __name__ == "__main__":
clean_db()