From 1642adb00e16f5439cbc1140a97ab9cc5dfd7736 Mon Sep 17 00:00:00 2001 From: Jacky Yang Date: Wed, 28 Jan 2026 17:00:47 +0800 Subject: [PATCH] Remove test files. --- ETL/verify/L1A_incre_test/clean_dirty_data.py | 39 - ETL/verify/L1A_incre_test/setup_test_data.py | 35 - .../L1A_incre_test/test_L1_incremental.py | 76 -- ETL/verify/L2_verify_report.txt | Bin 97526 -> 0 bytes ETL/verify/verify_L2.py | 504 -------- ETL/verify/verify_L3.py | 29 - ETL/verify/verify_deep.py | 82 -- docs/player_profile_data_reorganization.md | 1043 +++++++++++++++++ scripts/analyze_dmg_per_1k.py | 74 -- scripts/debug_dist.py | 45 - scripts/debug_jacky.py | 94 -- 11 files changed, 1043 insertions(+), 978 deletions(-) delete mode 100644 ETL/verify/L1A_incre_test/clean_dirty_data.py delete mode 100644 ETL/verify/L1A_incre_test/setup_test_data.py delete mode 100644 ETL/verify/L1A_incre_test/test_L1_incremental.py delete mode 100644 ETL/verify/L2_verify_report.txt delete mode 100644 ETL/verify/verify_L2.py delete mode 100644 ETL/verify/verify_L3.py delete mode 100644 ETL/verify/verify_deep.py create mode 100644 docs/player_profile_data_reorganization.md delete mode 100644 scripts/analyze_dmg_per_1k.py delete mode 100644 scripts/debug_dist.py delete mode 100644 scripts/debug_jacky.py diff --git a/ETL/verify/L1A_incre_test/clean_dirty_data.py b/ETL/verify/L1A_incre_test/clean_dirty_data.py deleted file mode 100644 index 60280d5..0000000 --- a/ETL/verify/L1A_incre_test/clean_dirty_data.py +++ /dev/null @@ -1,39 +0,0 @@ -import sqlite3 -import os - -# 路径指向正式数据库 -BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -DB_PATH = os.path.join(BASE_DIR, 'database', 'L1A', 'L1A.sqlite') - -def clean_db(): - if not os.path.exists(DB_PATH): - print(f"Database not found at {DB_PATH}") - return - - print(f"Connecting to production DB: {DB_PATH}") - conn = sqlite3.connect(DB_PATH) - cursor = conn.cursor() - - # 查找脏数据 (假设模拟数据的 match_id 是 match_001, match_002, match_003) - dirty_ids = ['match_001', 'match_002', 'match_003'] - - # 也可以用 LIKE 'match_%' 如果您想删得更彻底,但要小心误删 - # 这里我们精准删除 - - deleted_count = 0 - for mid in dirty_ids: - cursor.execute("DELETE FROM raw_iframe_network WHERE match_id = ?", (mid,)) - if cursor.rowcount > 0: - print(f"Deleted dirty record: {mid}") - deleted_count += 1 - - conn.commit() - conn.close() - - if deleted_count > 0: - print(f"Cleanup complete. Removed {deleted_count} dirty records.") - else: - print("Cleanup complete. No dirty records found.") - -if __name__ == "__main__": - clean_db() \ No newline at end of file diff --git a/ETL/verify/L1A_incre_test/setup_test_data.py b/ETL/verify/L1A_incre_test/setup_test_data.py deleted file mode 100644 index 0641b87..0000000 --- a/ETL/verify/L1A_incre_test/setup_test_data.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -import json - -# 定义路径 -CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) -PROJECT_ROOT = os.path.dirname(os.path.dirname(CURRENT_DIR)) -OUTPUT_ARENA_DIR = os.path.join(PROJECT_ROOT, 'output_arena') - -def create_mock_data(): - if not os.path.exists(OUTPUT_ARENA_DIR): - os.makedirs(OUTPUT_ARENA_DIR) - print(f"Created directory: {OUTPUT_ARENA_DIR}") - - # 创建 3 个模拟比赛数据 - mock_matches = ['match_001', 'match_002', 'match_003'] - - for match_id in mock_matches: - match_dir = os.path.join(OUTPUT_ARENA_DIR, match_id) - if not os.path.exists(match_dir): - os.makedirs(match_dir) - - file_path = os.path.join(match_dir, 'iframe_network.json') - if not os.path.exists(file_path): - mock_content = { - "match_id": match_id, - "data": "This is mock data for testing." - } - with open(file_path, 'w', encoding='utf-8') as f: - json.dump(mock_content, f) - print(f"Created mock file: {file_path}") - else: - print(f"File already exists: {file_path}") - -if __name__ == "__main__": - create_mock_data() \ No newline at end of file diff --git a/ETL/verify/L1A_incre_test/test_L1_incremental.py b/ETL/verify/L1A_incre_test/test_L1_incremental.py deleted file mode 100644 index e6ab1a1..0000000 --- a/ETL/verify/L1A_incre_test/test_L1_incremental.py +++ /dev/null @@ -1,76 +0,0 @@ -import os -import sqlite3 -import subprocess -import glob - -# 配置路径 -# 当前脚本位于 ETL/verify/ 目录下,需要向上两级找到项目根目录 -CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) -PROJECT_ROOT = os.path.dirname(os.path.dirname(CURRENT_DIR)) - -L1_SCRIPT = os.path.join(PROJECT_ROOT, 'ETL', 'L1A.py') -DB_PATH = os.path.join(PROJECT_ROOT, 'database', 'L1A', 'L1A.sqlite') -OUTPUT_ARENA_DIR = os.path.join(PROJECT_ROOT, 'output_arena') - -def get_db_count(): - """获取数据库中的记录数""" - if not os.path.exists(DB_PATH): - return 0 - try: - conn = sqlite3.connect(DB_PATH) - cursor = conn.cursor() - cursor.execute("SELECT COUNT(*) FROM raw_iframe_network") - count = cursor.fetchone()[0] - conn.close() - return count - except Exception: - return 0 - -def get_file_count(): - """获取源文件总数""" - pattern = os.path.join(OUTPUT_ARENA_DIR, '*', 'iframe_network.json') - files = glob.glob(pattern) - return len(files) - -def run_l1_script(): - """运行 L1 脚本并返回输出""" - # 必须在项目根目录下运行,或者正确处理 Python 路径 - # 这里我们使用绝对路径调用脚本 - result = subprocess.run(['python', L1_SCRIPT], capture_output=True, text=True) - return result.stdout - -def main(): - print("=== 开始 L1 增量逻辑测试 ===") - print(f"项目根目录: {PROJECT_ROOT}") - - # 1. 检查环境 - total_files = get_file_count() - initial_db_count = get_db_count() - print(f"[环境] 源文件总数: {total_files}") - print(f"[环境] 数据库当前记录数: {initial_db_count}") - - # 2. 运行脚本 (第一次) - print("\n--- 运行 L1A.py (Run 1) ---") - output1 = run_l1_script() - print(output1.strip()) - - mid_db_count = get_db_count() - print(f"[状态] 运行后数据库记录数: {mid_db_count}") - - if mid_db_count < total_files: - print("警告: 数据库记录数少于文件数,可能部分文件处理失败或尚未完成。") - - # 3. 运行脚本 (第二次 - 验证增量) - print("\n--- 再次运行 L1A.py (Run 2 - 验证增量) ---") - output2 = run_l1_script() - print(output2.strip()) - - # 4. 验证结果 - expected_msg = f"Skipped: {total_files}" - if expected_msg in output2: - print("\n✅ 测试通过! 第二次运行跳过了所有文件,增量逻辑生效。") - else: - print(f"\n❌ 测试未通过。预期输出应包含 '{expected_msg}'") - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/ETL/verify/L2_verify_report.txt b/ETL/verify/L2_verify_report.txt deleted file mode 100644 index 641b571f057c83f4763f79a4dc2b5f6047480100..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 97526 zcmeI5OK&B+ae&V{K!E%N;Q~fBzTOaG#5vd%JpBAX=jb*Y9$vf0hc8-On~(Z60iR;>$h#X!~f=?`P5<=Yq+L?yavkAEf-Y>ye|4o&BJ@_p7e2E^qJI zncn;5=0fWEt-O6csr4za{&e#@>GO9T<<7fa9)A0}qtT7PbS?Bgm$%;wj8_SS_iy37 zzqz-$FF&hOX?Y~{o(Voz9kwq!Y{CC-U>bAk=`BsqHm?QaQ^EK|I2d`OyzIHm_%-FwHFK}W&3BD^W2`TPvLA)6$R@b@&}r)7&*hTG-9LGV+#BG3FLb-yycC&>ZgH}C)t!G3 z&OMQ9I-lCphmEOXk<<0C$EoMalg(KtiI`*1e~`6rgt`~AWbg6jNjIXOiAH!&bi$|6 z_&(Y^OwNr<+8)cALue=N1Ai8tIr^j(<+IH3=CQ-n@DA@DuLjejPH$9Q@J*)?vI5{d zKs$u#qmAP7OsIKJD41%0`=F_(7uw&Y$e=eJ&pz+CnOR14B-T8!W?q&^k8b%>G4(nz zJ@xPtH~4;QUmoV(RQ5g>ywMF`3%;!CJUnGgAAZ90vCtHq$-nn#5lmT6Jdxky%_HHY zV>uhf)I+4@8=)lYp!!+nso-W3=~yr|3R~Ujxm|C4G)zshA!PI^fLrVoT~6qg6Wr_85DeZyndH1!yu z4PFZcFNKCLg?^Wt-wU4Tf@(!VH>{mcAAFKX;CxT?20t#J3f`$EVLhcMopPRI12^sCnXP%zD9?XI~u z#x&;Tm)&|sAd2pI9(g0)iS-g8*{UVvSqofIcO`nW##&IHHUT7Lo$TT(7ix$U)vJpGWX_K zhW!e(BzDunZwQ;AduzZHEEub{PJ^pYMHXQtygQiY^z>Y-x>Bv|9UG@6uL6H8Qz~<> zwU=WJ7ej4(g5wZ}4&B=g_x^hGuLHA;;o&)$xs-jHdYi*oFYE?WpctlUPET}v9kF$` z!9&4RJsxgkMT4fm`h^{FThkcT$FM1L?=#U|{MyFX*rv=1>b|To`qwh)`ehqC(pWF- zihGAOhZN%vB8jl~-ZhDo;)yiE`{qPs5|+?9>&jYaIwp~9tfW|DJZ~ zNA^hiWbd$!&t57%EUmP`WhIjJTfaoIyLwFVz7{JJlDG{`%|;rx+3t)Tg!`R^r;ny_ zJ6>}*6)dM&wyCQ;%W!u++91$)Y*qd~PUgWOY|8h{hbQjaIcMxPeIb%xRhgzGt)cBWP3=l^upEB}%SIVf|GxR!nPwI0K_-F&(PriSx3fBNQ6KiT~G=l^;7*Z=%)*B`4fjrSQ_t;C=v@pE7ek4aYhgtht&dx;h{*ibhKfEnddMe}cOx}JWr594qnT$@P>~obR~|AMXG`$5l~k`Gd!=( zx_FBBLPab`dgh;PL(|>twGHi`dOUnt*Qjk~nHt$^ZL_ERMEd)=$kJ08cQ0jxpUDxA zuCGMGV(COiBbk|7)NAm??OQJg$7$;0Kz!EO=G!}BF-x(#5-t0=@boeoJ~Q1^&B16q z^;nj@DbDXjHzE!ZFQry%8KB*t?A5+X@eJ0Zt4>a}8K<AMPH8V^JmR|267xX;E;D_=|c-!oj z`s^2*U?F6j`oG#Z|5Dxn>=N%~T(rs<0Brlu7O-3vfS-y*I*;1dUTkIAfgk(zGhO#s zYsMvkf45nq(xUowA>Ge9>4}dF6txz)&puzf7JW_s^~eAI`9J;p|NPPOH|4oJj$P+W zF!0tl;BUk0YJXb?zx!V7FE2MD`rDg-wSR<-*Y{DVt@$GA{D#eb$}E!m{?p!WRt^?L zR>F`qF4_#B8tsO_jA;qm@Vic@;-xuadV zgkxwSc-MiUMq$7XeaZHzEXOVHp z{AhNiDYioEjhv2fvfWMlb=!0N1D?u{{Tt%+hv^t_#JYku&@%_BdMNxZI8!JVMR}{P~$Z`p09ws$%B_`Py=aX(_pCJpUgr{^cM3 z>iM6)>Bq5el{{4^r={|Em+&t z-%-3D`*7^%)DKc4STZVoKbL8>US2i$iD6i(tOyODloqZ6C{aJHOieRQ3@6QEIY`=0~#Y z_NKtA*igwhhK-!8(YEi}4o7-%37ON!7yWb$I9+$t)Er#XY#p>VG*;VqVD#a1Bs5o_ zc-BkxQ6KL_CvU6g+!u-058<{~r>#5UQHjqRBcqK6xkm47i_h5mXpA|p;z@(I7ymVf z`#ifGtq=3+Zscm8o|E>BUsp}p3mC?6iVk29wOm_So#6#b2F?MTKjUj#uFPFt{@H-8h$COdA_7KPRCffwj7Vk>HyYfwK_S^-=+Ny!@uvycNmG4_xFbQap>Oi zIyti>xgf@Q)$l=cMIUOjwxZT0=SRcudQ=C{NHE*wXqkZp9JlwgQcNC;bV08niwE-& z`LBK~|DVZ!%eW0K?XPA0L|AWLOUt!214-#Cqh##Ry|(?w4Hry7d7M~3)@7907Z%#H0SgcPOQFa-zkg(h_K#7` zC>gIa?P`moUoUwqu;?ES7Qrf-;}HB!eScXG=lzY3pg#GP_|cMc(7%^{cfjAae9anZt+X1#>riRs;XV@oN55NGqo1sUR;3lX`uBYh8KL=YiKVHP7Q~g&JNj*L z{IBD_T26QL?et4_eZu@@4i~F2$sX^cmnFNcc#T%+8f(9_vZ&bLeH>F*&*D`*;|lGT zdy3WLZ2h}WtnizS$5kz@>`T##Rlcgj~G}min>E$5jl?p)iv639<`Md?J z6CBnx&i#1Sg|^z3`mxG&_Pc^hIiW3= zIFRp9DOIt{fR)$Tc{jzf9VWh}eysEfKd$ZqR+gnJaA4hh#rjFSPRGR?TS|3Q!ehSlw$sFi33?(DbGFb>=tH6vv-3R znq67un2#%MC&lSH;-n$0R2~>su`aO>to)vE*gZfCtosH#9&L|rbtKIn$~P%IHHeb* z=S!4x8;5}dFPe*$nYgTjy3ZY zt15iW6e}#!_@c$|rjkCs+v?2bt5SV0JN=?=7AD4p-8kw(cf!h<>K3Whv()Z(*ZVTJ z#5F2jb+keP>PR93**&3LYFAG=GH$EYS&xc|CgeFJH=b!1wX}lE zXoHm&JE+F1+HQKDLeuKJKjLEEP7m^Yy;ymiuwsxg<#pA#VwGsLLI(1Mdf)%MqE*g; zp8o<)bsV@}tnjz}PX5UCf=9cJEZ^rgTGEvWQM)Sj@Iqi*#WK%qHNLJT%DKiwD}8g9 zZ@r9_*>165V7pa*)OVS~mS2g`avn`XHe94RqswQsufYn*r}`@+D4nm6j2=s0W7~2+ ztu(#~5B(79>)S2S3cp9w%9*dc-z^yBUM0TPZOg>1qtPI->ej-sEGuKh%3h&kzDj*L zcO?T$mNOtTy|zAo5Pw)16kLjw>dsygNc*=Z*&SV%P*~ydcA>P&Qfki?4wyo?jtkW&9_u0tN zm5AY}b?yqQe(e^2#|zOa@D0R|1bxGBN6cgs*dXFDq9L8!_dm=Gb{e5>O zwv5_wW&2m^|LXNXX0mkevA?}-t}^#y7x%QX`eGYX?M{!rZ5BUx!F-FY-jpNldb^|A zOH#?sckmcjd>>Z7bv#Y7V{2C!!78@KAR%qkwZkK?iBZr;D|^!We%m#UWf{oG(kH|JAKT6mrEbr4=~&^_D)>`oD7E@cJsmD%DE%p>c(l zXtJa=Hom_8apk{qG9?aQYc_uD=g!zV<6&u6#wIj0uC%3AUo~U4GFj3N2dxSEVE?`^ z7-4HIW5vz^(JzdzkJU1;L@Q#ZR+VEMuj4y;+akwd{yTvWIY&@>+TeMw@I$%7fEJRLlHL9;13c zCWW^&u8{eRtZ7))JWaj!4GS#&NTt4U<*{m;v3e;P7-OGXq`B|0x~t!f0mt_d=aji@ zSf9s#EAG`6XUqsE0^^0u0K|T1v+Zn>n%n2jx~4=an0n{E#rq9Z-Ji}}hj zNDuYbw?Zj+(mykg1L61Bmh0Bf`_|)K>7hAS#NK+`$^J;XVT~mL) z-`lPsHTv7JUcX!pQr!#i+7-~DC(ey6-Zh`{mFYQNB+KO7nT1jLSjVPkrN0 zJx+=KS`N~3pBl7&|I)7MTY47ndMyWOf1l2e6I>2b?VWV&Ti>lIf3NRTtgY+L;GK{C z`F`}fqkgRK#oFoqGi~R6YJ7SmbCrIXb{)Bk4bm|_t+8xEeajA`Ic{^GiuqP(gEaJ{ zmVy=5{9ODK<<-fqA7JvxILtCeR}5kv=&nPau8q0!FN7|G_I$w z)zYsg=1{cUf@Tx3^kjsep6>Gfqv?SnM!-F0Mtjv-B9^p<|#=j9P) z9`WrnI>(SIG&#HR6mHL{G(JX)32gK>MmHf<2^RheP zAFNV}>{M~2xI!CqLr%r=m3TzibMrKmBk9pyLuwFnTuUy*yiYytr}{50nYA&8lwN!z zyV3Z{oOFBW^_i`94QZSQ&Lpeni2gYWPgcwOM#jNBYiXLv+1EK%&Z7g@ng=q-Q^RSk zc{p=OW7;#m>T>|1ovMa{2Q{(Vv0l4P{;Of%<)BN&EtdEcO-g5WVA4EHMk_tekG0A9 zlp6bHR^MU?Qu600ESA~Rp5$%xm+ftRW2=*Rm0|6tRx)NNeG+>lvuSFBp5Lb_pL+Yw z^lU!BsaT)^qMycnnA=l5rH0ExI=_edI?^jkeF}Z87t#6I@%8PS)yiM1EvJ2Ic{Jwb zr(s2BurZ?2-AKz4UkjZ+r&?J&qEhSb)EJX7NJH+HX|VG2kd(4{bRBumQVj(eu+Cg& zDWt}ybMw8hv#XS4#h0!{S-VcBJ8vLUO8!#Z4|7zVJSE=KOKp`^17CPX z?VCBDh9@W){p%z@nTO13!W?D0s8;9v=qj-6>r=DsPnD<2`3e{Enixofl6&*#C}zJt zNUip%D?Z*cdh5&*kM*S_zri~#(~((8fYkFP^qV84kS01Wm;2OvBGhTp;AT<^NTHeG z3lFmr2wFGMuWdCH=6?HIHb;ZT*w(fIOuvu}eB}FkE2I1yDU-qY$5Ph0 zS=ZK|voyr>3TNY48vHx;`=i$Hx@PX1r?}(mo>CsNm3R*>vnXrb{WIfkA@!E0saEpc zLaKe1`^*?xWvx@4Xu9>9^iYbdIf4_S1z zf7hKy3W-maRO{8tXjQS#J{|KmTAL;f4}Q+kp(Aqpwdol3ZP!!VEAy%4y3ja%`|_*4 z#s7HsGF%@o)vluJAyvO9h1B2x=wiQompQc`MOHn`g za7@W3q5IzoseR8pU^!=ZT;Hzc)3}B5h$6;7`&2z*W8aAOeVX&>dLSK>QniqpPo!nO zi08Il^l2e9N;lAZi{yv zb^$3+q|l~b-0OifK$60|#>^LeeQTba0h6*WrQfUI$GZsY?)pq0_46IR+Wh8Z=2A|?PY_Ky<{IYPdf4J8 z`ykaPQh5D)B|hC1q<(Kdct%ah(!e*ojQSc^>V3@7PSdrj$G=?8HE9p)oXi)*Y-oQ! z=|)rv8D4m0c*=KjNYiIcpN79NUiBDMPG{kIq%&lkQwdt-Q-7cHRZu>OOCt}jSwojEa(W%IJ6x*|ovYEiwI$X}R zKFz>BFFy}_x=Tm{Zq}bC$$VYnQ?l`DFKRxCJT^DhSVBwVCG*=K)A?gPvjiz|`6{I} zzBY}2u8)PtAx|a0UpLRm(qLBT2hz95(^z+!A6JUpEAQ`nsIMcPsNAQP?SR-!X46=g z+6Gdie&}t!4tOQ@$mtd|e2Z-r4Tl10%$MmmRYR*oC2C?V>H9Q=Zkqjc98z0l4Z&5q z7ei`Kd2OnGbpoWUPqY^Do0|@3^_r$-=e0-jC#53_8E`0B8UmiCL$5RQMSwGureqYb zOqWL@p@@GDb|}XESIGzqZInwoHi_BtBZ}vB?9b19VRM9LR!F5(d|rkxzcU=CA7)c) zUu&>YlWKiL>6=!s`6S}^wvnep`FViKV6Bt<%p?X|zB*xlH*iDVYr|K<>ZI?vphwq2 z3V&)%^mn?#^L*RaUin)4d=f*DLVN4+GEDti42! zov1vb;8evG>f`4bFvl)oZ-%SmC2AtuXBpDZSsKQBF@K^oqT(mlo>9P{J%lU`hm!fi z>wllKG#H2FN8}r}E!K?W9HrDJrF?(lkHW0#Ycr{sYbd4NUweD1}qD ztIjrgd3(>!^xiL}zus(qD{seh?ARC{%Eh3&$k2m~E7wAmi;B61%*2-YQ9ZMR1<#X`f!pD7@@w9`2O& zF=%V6UG}?DT5pr}G4!K{gg%`HBi0yO$?G>VZMClT#_M(8)_A#ZPj0$z3wG|XgUv1n z``w{+tl)F_5$kwv=cZn-o3C@N6J~)E8RH)W;?N&ue&iYJxxRHp%AdZ@v2sjnm5xXs z_KU7X`nFr&7`Ki==GwFz z&ma5McswLy=L)2 z4dDmy`W6qh(%f`f@tN$rF;5wE9Q)~}{`IQSnfC6rCA>!LOrNMB&ButV!R?W7q(YA!J>NUtCXU&+KUOZ! z0g7enbL8vmqZ=zf<})2{jJ?!rml~F*8?D5eyFy+bZ{L5STS=B^4eTUpNqHlz(D(`? zUWlvDQ&rr;9Dq;woG&Cy=>=B5eI8m?hnF8gPS(;btX&j0 zqarq2;>M@gZ4ILOqkdnae^p-Q$$GjO7pAyDxAa?F^-$M6$*v$4ma;6tW7$?dd_{Q5 zcW}b&tho8{g@;Yx(K=}7Jcah-$D8+OHA_J5kr&oxX6d1iJpBFoSb#8oHeBDf^tYS8 zm*3Im>&-8_lE*Erdwmv}6vCnNzB$02k9qR@&J)n?akSpg`EK}L;8qF%Ff@l9IsXmbMQX!JO1PSOf&}LnkOj{HD(mi zx7qvk^-L*iKYP91VqDnaX9m8Iag2-)wmM(GL6Yme!MdR~2z!#Zi7)Bb4ZoD@mbU|~ ztnb(FOX{IoV)vT*(GOEO4K-A!!RCR^OFTDmDa1SQHBR+`MXtp##%G=eYuwF_p(U~) zUd5gWrg5~5Q9S33;kh2|K5fm;(;1H5rkBx~xi=gQVYTRS|2y))@e`Tt`gr5K^VNDI z`XLgTec%(({n?31J-FueGu*0{*SJ^4&(M>G= 2: - paths.append(row[1]) - return paths - -def is_covered(path): - if path in ["data", "code", "message", "status", "timestamp", "timeStamp", "traceId", "success", "errcode"]: - return True - if path.startswith("data.."): - key = path.split("data..")[1].split(".")[0] - if key in covered_vip_fields: - return True - if "data.group_N[].fight_any." in path: - return True - if "data.group_N[].fight_t." in path or "data.group_N[].fight_ct." in path: - return True - if "data.group_N[].sts." in path: - return True - if "data.group_N[].level_info." in path: - return True - if "data.treat_info." in path: - return True - if "data.has_side_data_and_rating2" in path: - return True - if "data.main." in path: - key = path.split("data.main.")[1].split(".")[0] - if key in covered_main_fields: - return True - if any(k in path for k in covered_user_fields): - return True - if "data.round_list" in path: - return True - if any(k in path for k in covered_round_fields): - return True - if "data.leetify_data." in path: - return True - if any(k in path for k in covered_leetify_fields): - return True - return False - -def group_key(p): - if "data.group_N[].user_info." in p: - return "data.group_N[].user_info.*" - if "data.group_N[].fight_any." in p: - return "data.group_N[].fight_any.*" - if "data.group_N[].fight_t." in p: - return "data.group_N[].fight_t.*" - if "data.group_N[].fight_ct." in p: - return "data.group_N[].fight_ct.*" - if "data.main." in p: - return "data.main.*" - if "data.round_list[]" in p or "data.round_list[]." in p: - return "data.round_list.*" - if "data.leetify_data.round_stat[]" in p or "data.leetify_data.round_stat[]." in p: - return "data.leetify_data.round_stat.*" - if "data.leetify_data." in p: - return "data.leetify_data.*" - if "data.treat_info." in p: - return "data.treat_info.*" - if "data." in p: - return "data.*" - return "other" - -def dump_uncovered(output_path): - paths = load_schema_paths(schema_path) - uncovered = [p for p in paths if not is_covered(p)] - df_unc = pd.DataFrame({"path": uncovered}) - if len(df_unc) == 0: - print("no uncovered paths") - return - df_unc["group"] = df_unc["path"].apply(group_key) - df_unc = df_unc.sort_values(["group", "path"]) - df_unc.to_csv(output_path, index=False, encoding='utf-8-sig') - print(f"uncovered total: {len(df_unc)}") - print("\n-- uncovered groups (count) --") - print(df_unc.groupby("group").size().sort_values(ascending=False)) - print(f"\noutput: {output_path}") - -def print_schema(conn): - tables = conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%' ORDER BY name").fetchall() - for (name,) in tables: - print(f"\n[{name}]") - cols = conn.execute(f"PRAGMA table_info({name})").fetchall() - rows = [["column", "type", "pk"]] - for _, col_name, col_type, _, _, pk in cols: - rows.append([col_name, col_type or "", str(pk)]) - widths = [max(len(r[i]) for r in rows) for i in range(3)] - for idx, r in enumerate(rows): - line = " | ".join([r[i].ljust(widths[i]) for i in range(3)]) - print(line) - if idx == 0: - print("-" * len(line)) - -def refresh_schema_sql(conn, output_path): - rows = conn.execute(""" - SELECT type, name, sql - FROM sqlite_master - WHERE sql IS NOT NULL AND type IN ('table', 'index') AND name NOT LIKE 'sqlite_%' - ORDER BY CASE WHEN type='table' THEN 0 ELSE 1 END, name - """).fetchall() - lines = ["PRAGMA foreign_keys = ON;", ""] - for _, _, sql in rows: - lines.append(sql.strip() + ";") - lines.append("") - with open(output_path, 'w', encoding='utf-8') as f: - f.write("\n".join(lines).strip() + "\n") - -def verify(): - conn = sqlite3.connect(db_path) - - print("--- Counts ---") - tables = [ - 'dim_players', - 'dim_maps', - 'fact_matches', - 'fact_match_teams', - 'fact_match_players', - 'fact_match_players_t', - 'fact_match_players_ct', - 'fact_rounds', - 'fact_round_events', - 'fact_round_player_economy' - ] - for t in tables: - count = conn.execute(f"SELECT COUNT(*) FROM {t}").fetchone()[0] - print(f"{t}: {count}") - - print("\n--- Data Source Distribution ---") - dist = pd.read_sql("SELECT data_source_type, COUNT(*) as cnt FROM fact_matches GROUP BY data_source_type", conn) - print(dist) - - print("\n--- Sample Round Events (Leetify vs Classic) ---") - # Fetch one event from a leetify match - leetify_match = conn.execute("SELECT match_id FROM fact_matches WHERE data_source_type='leetify' LIMIT 1").fetchone() - if leetify_match: - mid = leetify_match[0] - print(f"Leetify Match: {mid}") - df = pd.read_sql(f"SELECT * FROM fact_round_events WHERE match_id='{mid}' AND event_type='kill' LIMIT 1", conn) - print(df[['event_type', 'attacker_steam_id', 'trade_killer_steam_id', 'attacker_pos_x', 'score_change_attacker']]) - - # Fetch one event from a classic match - classic_match = conn.execute("SELECT match_id FROM fact_matches WHERE data_source_type='classic' LIMIT 1").fetchone() - if classic_match: - mid = classic_match[0] - print(f"Classic Match: {mid}") - df = pd.read_sql(f"SELECT * FROM fact_round_events WHERE match_id='{mid}' AND event_type='kill' LIMIT 1", conn) - print(df[['event_type', 'attacker_steam_id', 'trade_killer_steam_id', 'attacker_pos_x', 'score_change_attacker']]) - - print("\n--- Sample Player Stats (New Fields) ---") - df_players = pd.read_sql("SELECT steam_id_64, rating, rating3, elo_change, rank_score, flash_duration, jump_count FROM fact_match_players LIMIT 5", conn) - print(df_players) - - print("\n--- Insert Field Checks ---") - meta_counts = conn.execute(""" - SELECT - SUM(CASE WHEN response_code IS NOT NULL THEN 1 ELSE 0 END) AS response_code_cnt, - SUM(CASE WHEN response_trace_id IS NOT NULL AND response_trace_id != '' THEN 1 ELSE 0 END) AS response_trace_id_cnt, - SUM(CASE WHEN response_success IS NOT NULL THEN 1 ELSE 0 END) AS response_success_cnt, - SUM(CASE WHEN response_errcode IS NOT NULL THEN 1 ELSE 0 END) AS response_errcode_cnt, - SUM(CASE WHEN treat_info_raw IS NOT NULL AND treat_info_raw != '' THEN 1 ELSE 0 END) AS treat_info_raw_cnt, - SUM(CASE WHEN round_list_raw IS NOT NULL AND round_list_raw != '' THEN 1 ELSE 0 END) AS round_list_raw_cnt, - SUM(CASE WHEN leetify_data_raw IS NOT NULL AND leetify_data_raw != '' THEN 1 ELSE 0 END) AS leetify_data_raw_cnt - FROM fact_matches - """).fetchone() - print(f"response_code non-null: {meta_counts[0]}") - print(f"response_trace_id non-empty: {meta_counts[1]}") - print(f"response_success non-null: {meta_counts[2]}") - print(f"response_errcode non-null: {meta_counts[3]}") - print(f"treat_info_raw non-empty: {meta_counts[4]}") - print(f"round_list_raw non-empty: {meta_counts[5]}") - print(f"leetify_data_raw non-empty: {meta_counts[6]}") - - print("\n--- Integrity Checks ---") - missing_players = conn.execute(""" - SELECT COUNT(*) FROM fact_match_players f - LEFT JOIN dim_players d ON f.steam_id_64 = d.steam_id_64 - WHERE d.steam_id_64 IS NULL - """).fetchone()[0] - print(f"fact_match_players missing dim_players: {missing_players}") - - missing_round_matches = conn.execute(""" - SELECT COUNT(*) FROM fact_rounds r - LEFT JOIN fact_matches m ON r.match_id = m.match_id - WHERE m.match_id IS NULL - """).fetchone()[0] - print(f"fact_rounds missing fact_matches: {missing_round_matches}") - - missing_event_rounds = conn.execute(""" - SELECT COUNT(*) FROM fact_round_events e - LEFT JOIN fact_rounds r ON e.match_id = r.match_id AND e.round_num = r.round_num - WHERE r.match_id IS NULL - """).fetchone()[0] - print(f"fact_round_events missing fact_rounds: {missing_event_rounds}") - - side_zero_t = conn.execute(""" - SELECT COUNT(*) FROM fact_match_players_t - WHERE COALESCE(kills,0)=0 AND COALESCE(deaths,0)=0 AND COALESCE(assists,0)=0 - """).fetchone()[0] - side_zero_ct = conn.execute(""" - SELECT COUNT(*) FROM fact_match_players_ct - WHERE COALESCE(kills,0)=0 AND COALESCE(deaths,0)=0 AND COALESCE(assists,0)=0 - """).fetchone()[0] - print(f"fact_match_players_t zero K/D/A: {side_zero_t}") - print(f"fact_match_players_ct zero K/D/A: {side_zero_ct}") - - print("\n--- Full vs T/CT Comparison ---") - cols = [ - 'kills', 'deaths', 'assists', 'headshot_count', 'adr', 'rating', 'rating2', - 'rating3', 'rws', 'mvp_count', 'flash_duration', 'jump_count', 'is_win' - ] - df_full = pd.read_sql( - "SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players", - conn - ) - df_t = pd.read_sql( - "SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players_t", - conn - ).rename(columns={c: f"{c}_t" for c in cols}) - df_ct = pd.read_sql( - "SELECT match_id, steam_id_64, " + ",".join(cols) + " FROM fact_match_players_ct", - conn - ).rename(columns={c: f"{c}_ct" for c in cols}) - - df = df_full.merge(df_t, on=['match_id', 'steam_id_64'], how='left') - df = df.merge(df_ct, on=['match_id', 'steam_id_64'], how='left') - - def is_empty(s): - return s.isna() | (s == 0) - - for c in cols: - empty_count = is_empty(df[c]).sum() - print(f"{c} empty: {empty_count}") - - additive = ['kills', 'deaths', 'assists', 'headshot_count', 'mvp_count', 'flash_duration', 'jump_count'] - for c in additive: - t_sum = df[f"{c}_t"].fillna(0) + df[f"{c}_ct"].fillna(0) - tol = 0.01 if c == 'flash_duration' else 0 - diff = (df[c].fillna(0) - t_sum).abs() > tol - print(f"{c} full != t+ct: {diff.sum()}") - - non_additive = ['adr', 'rating', 'rating2', 'rating3', 'rws', 'is_win'] - for c in non_additive: - side_nonempty = (~is_empty(df[f"{c}_t"])) | (~is_empty(df[f"{c}_ct"])) - full_empty_side_nonempty = is_empty(df[c]) & side_nonempty - full_nonempty_side_empty = (~is_empty(df[c])) & (~side_nonempty) - print(f"{c} full empty but side has: {full_empty_side_nonempty.sum()}") - print(f"{c} full has but side empty: {full_nonempty_side_empty.sum()}") - - print("\n--- Rating Detail ---") - rating_cols = ['rating', 'rating2', 'rating3'] - for c in rating_cols: - full_null = df[c].isna().sum() - full_zero = (df[c] == 0).sum() - full_nonzero = ((~df[c].isna()) & (df[c] != 0)).sum() - side_t_nonzero = ((~df[f"{c}_t"].isna()) & (df[f"{c}_t"] != 0)).sum() - side_ct_nonzero = ((~df[f"{c}_ct"].isna()) & (df[f"{c}_ct"] != 0)).sum() - side_any_nonzero = ((~df[f"{c}_t"].isna()) & (df[f"{c}_t"] != 0)) | ((~df[f"{c}_ct"].isna()) & (df[f"{c}_ct"] != 0)) - full_nonzero_side_zero = ((~df[c].isna()) & (df[c] != 0) & (~side_any_nonzero)).sum() - full_zero_side_nonzero = (((df[c].isna()) | (df[c] == 0)) & side_any_nonzero).sum() - print(f"{c} full null: {full_null} full zero: {full_zero} full nonzero: {full_nonzero}") - print(f"{c} side t nonzero: {side_t_nonzero} side ct nonzero: {side_ct_nonzero}") - print(f"{c} full nonzero but side all zero: {full_nonzero_side_zero}") - print(f"{c} full zero but side has: {full_zero_side_nonzero}") - - df_rating_src = pd.read_sql( - "SELECT f.rating, f.rating2, f.rating3, m.data_source_type FROM fact_match_players f JOIN fact_matches m ON f.match_id = m.match_id", - conn - ) - for c in rating_cols: - grp = df_rating_src.groupby('data_source_type')[c].apply(lambda s: (s != 0).sum()).reset_index(name='nonzero') - print(f"{c} nonzero by source") - print(grp) - - print("\n--- Schema Coverage (fight_any) ---") - paths = load_schema_paths(schema_path) - fight_keys = set() - for p in paths: - if 'data.group_N[].fight_any.' in p: - key = p.split('fight_any.')[1].split('.')[0] - fight_keys.add(key) - l2_cols = set(pd.read_sql("PRAGMA table_info(fact_match_players)", conn)['name'].tolist()) - alias = { - 'kills': 'kill', - 'deaths': 'death', - 'assists': 'assist', - 'headshot_count': 'headshot', - 'mvp_count': 'is_mvp', - 'flash_duration': 'flash_enemy_time', - 'jump_count': 'jump_total', - 'awp_kills': 'awp_kill' - } - covered = set() - for c in l2_cols: - if c in fight_keys: - covered.add(c) - elif c in alias and alias[c] in fight_keys: - covered.add(alias[c]) - missing_keys = sorted(list(fight_keys - covered)) - print(f"fight_any keys: {len(fight_keys)}") - print(f"covered by L2 columns: {len(covered)}") - print(f"uncovered fight_any keys: {len(missing_keys)}") - if missing_keys: - print(missing_keys) - - print("\n--- Coverage Zero Rate (fight_any -> fact_match_players) ---") - fight_cols = [k for k in fight_keys if k in l2_cols or k in alias.values()] - col_map = {} - for k in fight_cols: - if k in l2_cols: - col_map[k] = k - else: - for l2k, src in alias.items(): - if src == k: - col_map[k] = l2k - break - select_cols = ["steam_id_64"] + list(set(col_map.values())) - df_fight = pd.read_sql( - "SELECT " + ",".join(select_cols) + " FROM fact_match_players", - conn - ) - total_rows = len(df_fight) - stats = [] - for fight_key, col in sorted(col_map.items()): - s = df_fight[col] - zeros = (s == 0).sum() - nulls = s.isna().sum() - nonzero = total_rows - zeros - nulls - stats.append({ - "fight_key": fight_key, - "column": col, - "nonzero": nonzero, - "zero": zeros, - "null": nulls, - "zero_rate": 0 if total_rows == 0 else round(zeros / total_rows, 4) - }) - df_stats = pd.DataFrame(stats).sort_values(["zero_rate", "nonzero"], ascending=[False, True]) - print(df_stats.head(30)) - print("\n-- zero_rate top (most zeros) --") - print(df_stats.head(10)) - print("\n-- zero_rate bottom (most nonzero) --") - print(df_stats.tail(10)) - - print("\n--- Schema Coverage (leetify economy) ---") - econ_keys = [ - 'data.leetify_data.round_stat[].bron_equipment.', - 'data.leetify_data.round_stat[].player_t_score.', - 'data.leetify_data.round_stat[].player_ct_score.', - 'data.leetify_data.round_stat[].player_bron_crash.' - ] - for k in econ_keys: - count = sum(1 for p in paths if k in p) - print(f"{k} paths: {count}") - - print("\n--- Schema Summary Coverage (by path groups) ---") - uncovered = [p for p in paths if not is_covered(p)] - print(f"total paths: {len(paths)}") - print(f"covered paths: {len(paths) - len(uncovered)}") - print(f"uncovered paths: {len(uncovered)}") - - df_unc = pd.DataFrame({"path": uncovered}) - if len(df_unc) > 0: - df_unc["group"] = df_unc["path"].apply(group_key) - print("\n-- Uncovered groups (count) --") - print(df_unc.groupby("group").size().sort_values(ascending=False)) - print("\n-- Uncovered examples (top 50) --") - print(df_unc["path"].head(50).to_list()) - - conn.close() - -def watch_schema(schema_path, interval=1.0): - last_db_mtime = 0 - last_schema_mtime = 0 - first = True - while True: - if not os.path.exists(db_path): - print(f"db not found: {db_path}") - time.sleep(interval) - continue - db_mtime = os.path.getmtime(db_path) - schema_mtime = os.path.getmtime(schema_path) if os.path.exists(schema_path) else 0 - if first or db_mtime > last_db_mtime or schema_mtime > last_schema_mtime: - conn = sqlite3.connect(db_path) - refresh_schema_sql(conn, schema_path) - print(f"\n[{time.strftime('%Y-%m-%d %H:%M:%S')}] schema.sql refreshed") - print_schema(conn) - conn.close() - last_db_mtime = db_mtime - last_schema_mtime = os.path.getmtime(schema_path) if os.path.exists(schema_path) else 0 - first = False - time.sleep(interval) - -if __name__ == "__main__": - args = [a.lower() for a in sys.argv[1:]] - if "dump_uncovered" in args or "uncovered" in args: - dump_uncovered('database/original_json_schema/uncovered_features.csv') - elif "watch_schema" in args or "watch" in args: - try: - watch_schema('database/L2/schema.sql') - except KeyboardInterrupt: - pass - elif "schema" in args or "refresh_schema" in args: - if not os.path.exists(db_path): - print(f"db not found: {db_path}") - else: - conn = sqlite3.connect(db_path) - if "refresh_schema" in args: - refresh_schema_sql(conn, 'database/L2/schema.sql') - print("schema.sql refreshed") - print_schema(conn) - conn.close() - else: - verify() diff --git a/ETL/verify/verify_L3.py b/ETL/verify/verify_L3.py deleted file mode 100644 index 42b7576..0000000 --- a/ETL/verify/verify_L3.py +++ /dev/null @@ -1,29 +0,0 @@ - -import sqlite3 -import pandas as pd - -L3_DB_PATH = 'database/L3/L3_Features.sqlite' - -def verify(): - conn = sqlite3.connect(L3_DB_PATH) - - # 1. Row count - cursor = conn.cursor() - cursor.execute("SELECT COUNT(*) FROM dm_player_features") - count = cursor.fetchone()[0] - print(f"Total Players in L3: {count}") - - # 2. Sample Data - df = pd.read_sql_query("SELECT * FROM dm_player_features LIMIT 5", conn) - print("\nSample Data (First 5 rows):") - print(df[['steam_id_64', 'total_matches', 'basic_avg_rating', 'sta_last_30_rating', 'bat_kd_diff_high_elo', 'hps_clutch_win_rate_1v1']].to_string()) - - # 3. Stats Summary - print("\nStats Summary:") - full_df = pd.read_sql_query("SELECT basic_avg_rating, sta_last_30_rating, bat_win_rate_vs_all FROM dm_player_features", conn) - print(full_df.describe()) - - conn.close() - -if __name__ == "__main__": - verify() diff --git a/ETL/verify/verify_deep.py b/ETL/verify/verify_deep.py deleted file mode 100644 index f31b1b2..0000000 --- a/ETL/verify/verify_deep.py +++ /dev/null @@ -1,82 +0,0 @@ -import sqlite3 -import pandas as pd -import numpy as np -import sys - -# 设置pandas显示选项,确保不省略任何行和列 -pd.set_option('display.max_rows', None) -pd.set_option('display.max_columns', None) -pd.set_option('display.width', 2000) -pd.set_option('display.float_format', '{:.2f}'.format) -pd.set_option('display.max_colwidth', None) - -db_path = 'database/L2/L2_Main.sqlite' - -def check_all_tables(): - conn = sqlite3.connect(db_path) - - # 获取所有表名 - tables = pd.read_sql("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'", conn)['name'].tolist() - - for table in tables: - print(f"\n{'='*20} Table: {table} {'='*20}") - - # 获取表的所有列 - cols_info = pd.read_sql(f"PRAGMA table_info({table})", conn) - cols = cols_info['name'].tolist() - - # 读取全表数据 - df = pd.read_sql(f"SELECT * FROM {table}", conn) - total = len(df) - - if total == 0: - print(f"Table is empty (0 rows)") - continue - - print(f"Total Rows: {total}") - print("-" * 60) - - stats = [] - for col in cols: - # 1. Null Check - nulls = df[col].isnull().sum() - - # 2. Zero Check (仅对数值型或可转换为数值的列) - zeros = 0 - try: - # 尝试转为数值,无法转换的变为NaN - numeric_series = pd.to_numeric(df[col], errors='coerce') - # 统计0值 (排除原本就是NaN的) - zeros = (numeric_series == 0).sum() - except: - zeros = 0 - - # 3. Unique Count (基数) - unique_count = df[col].nunique() - - # 4. Example Value (取第一个非空值) - example = df[col].dropna().iloc[0] if df[col].count() > 0 else 'ALL NULL' - - stats.append({ - 'Field': col, - 'Nulls': nulls, - 'Null%': (nulls/total)*100, - 'Zeros': zeros, - 'Zero%': (zeros/total)*100, - 'Unique': unique_count, - 'Example': str(example)[:50] # 截断过长示例 - }) - - # 输出完整统计表 - df_stats = pd.DataFrame(stats) - # 按 Zero% 降序排列,但保证 Null% 高的也显眼,这里默认不排序直接按字段序,或者按关注度排序 - # 用户要求全面探查,按字段原序输出可能更符合直觉,或者按Zero%排序 - # 这里为了排查问题,按 Zero% 降序输出 - df_stats = df_stats.sort_values('Zero%', ascending=False) - print(df_stats.to_string(index=False)) - print("\n") - - conn.close() - -if __name__ == "__main__": - check_all_tables() diff --git a/docs/player_profile_data_reorganization.md b/docs/player_profile_data_reorganization.md new file mode 100644 index 0000000..3236f3c --- /dev/null +++ b/docs/player_profile_data_reorganization.md @@ -0,0 +1,1043 @@ +# 玩家档案数据全面分析与重组方案 + +> **文档日期**: 2026-01-28 +> **适用范围**: YRTV Player Profile System +> **版本**: v1.0 + +--- + +## 目录 + +1. [完整数据清单](#1-完整数据清单) +2. [当前问题分析](#2-当前问题分析) +3. [重组方案](#3-重组方案) +4. [Schema优化建议](#4-schema优化建议) +5. [实施计划](#5-实施计划) + +--- + +## 1. 完整数据清单 + +### 1.1 数据仪表板区域 (Dashboard - Top Section) + +| 显示标签 | 指标键 | 计算方法 | L3列名 | L2来源表 | UI位置 | +|---------|--------|---------|--------|---------|--------| +| Rating (评分) | `basic_avg_rating` | `AVG(rating)` | `basic_avg_rating` | `fact_match_players.rating` | Dashboard Card 1 | +| K/D Ratio (击杀比) | `basic_avg_kd` | `AVG(kd_ratio)` | `basic_avg_kd` | `fact_match_players.kd_ratio` | Dashboard Card 2 | +| ADR (场均伤害) | `basic_avg_adr` | `AVG(adr)` | `basic_avg_adr` | `fact_match_players.adr` | Dashboard Card 3 | +| KAST (贡献率) | `basic_avg_kast` | `AVG(kast)` | `basic_avg_kast` | `fact_match_players.kast` | Dashboard Card 4 | + +### 1.2 图表区域 (Charts Section) + +#### 1.2.1 六维雷达图 (Radar Chart) + +| 维度名称 | 指标键 | 计算方法 | L3列名 | UI位置 | +|---------|--------|---------|--------|--------| +| Aim (BAT) | `score_bat` | 加权标准化: 25% Rating + 20% KD + 15% ADR + 10% DuelWin + 10% HighEloKD + 20% 3K | `score_bat` | Radar Axis 1 | +| Clutch (HPS) | `score_hps` | 加权标准化: 25% 1v3+ + 20% MatchPtWin + 20% ComebackKD + 15% PressureEntry + 20% Rating | `score_hps` | Radar Axis 2 | +| Pistol (PTL) | `score_ptl` | 加权标准化: 30% PistolKills + 30% PistolWin + 20% PistolKD + 20% PistolUtil | `score_ptl` | Radar Axis 3 | +| Defense (SIDE) | `score_tct` | 加权标准化: 35% CT_Rating + 35% T_Rating + 15% CT_FK + 15% T_FK | `score_tct` | Radar Axis 4 | +| Util (UTIL) | `score_util` | 加权标准化: 35% UsageRate + 25% NadeDmg + 20% FlashTime + 20% FlashEnemy | `score_util` | Radar Axis 5 | +| Stability (STA) | `score_sta` | 加权标准化: 30% (100-Volatility) + 30% LossRating + 20% WinRating + 10% TimeCorr | `score_sta` | Radar Axis 6 | +| Economy (ECO) | `score_eco` | 加权标准化: 50% Dmg/$1k + 50% EcoKPR | `score_eco` | Radar Axis 7 | +| Pace (PACE) | `score_pace` | 加权标准化: 50% (100-FirstContactTime) + 50% TradeKillRate | `score_pace` | Radar Axis 8 | + +#### 1.2.2 趋势图 (Trend Chart) + +| 数据项 | 来源 | 计算方法 | UI位置 | +|-------|------|---------|--------| +| Rating走势 | L2: `fact_match_players` | 按时间排序的`rating`值(最近20场) | Line Chart - Main Data | +| Carry线(1.5) | 静态基准线 | 固定值 1.5 | Line Chart - Reference | +| Normal线(1.0) | 静态基准线 | 固定值 1.0 | Line Chart - Reference | +| Poor线(0.6) | 静态基准线 | 固定值 0.6 | Line Chart - Reference | + +### 1.3 详细数据面板 (Detailed Stats Panel) + +#### 1.3.1 核心性能指标 (Core Performance) + +| 显示标签 | 指标键 | 计算方法 | L3列名 | L2来源 | UI行位置 | +|---------|--------|---------|--------|--------|---------| +| Rating (评分) | `basic_avg_rating` | `AVG(rating)` | `basic_avg_rating` | `fact_match_players.rating` | Row 1, Col 1 | +| KD Ratio (击杀比) | `basic_avg_kd` | `AVG(kd_ratio)` | `basic_avg_kd` | `fact_match_players.kd_ratio` | Row 1, Col 2 | +| KAST (贡献率) | `basic_avg_kast` | `AVG(kast)` | `basic_avg_kast` | `fact_match_players.kast` | Row 1, Col 3 | +| RWS (每局得分) | `basic_avg_rws` | `AVG(rws)` | `basic_avg_rws` | `fact_match_players.rws` | Row 1, Col 4 | +| ADR (场均伤害) | `basic_avg_adr` | `AVG(adr)` | `basic_avg_adr` | `fact_match_players.adr` | Row 1, Col 5 | + +#### 1.3.2 枪法与战斗能力 (Gunfight) + +| 显示标签 | 指标键 | 计算方法 | L3列名 | L2来源 | UI行位置 | +|---------|--------|---------|--------|--------|---------| +| Avg HS (场均爆头) | `basic_avg_headshot_kills` | `SUM(headshot_count) / matches` | `basic_avg_headshot_kills` | `fact_match_players.headshot_count` | Row 2, Col 1 | +| HS Rate (爆头率) | `basic_headshot_rate` | `SUM(headshot_count) / SUM(kills)` | `basic_headshot_rate` | `fact_match_players.headshot_count, kills` | Row 2, Col 2 | +| Assists (场均助攻) | `basic_avg_assisted_kill` | `SUM(assisted_kill) / matches` | `basic_avg_assisted_kill` | `fact_match_players.assisted_kill` | Row 2, Col 3 | +| AWP Kills (狙击击杀) | `basic_avg_awp_kill` | `SUM(awp_kill) / matches` | `basic_avg_awp_kill` | `fact_match_players.awp_kill` | Row 2, Col 4 | +| Jumps (场均跳跃) | `basic_avg_jump_count` | `SUM(jump_count) / matches` | `basic_avg_jump_count` | `fact_match_players.jump_count` | Row 2, Col 5 | +| Knife Kills (场均刀杀) | `basic_avg_knife_kill` | `COUNT(knife_kills) / matches` | `basic_avg_knife_kill` | `fact_round_events` (weapon=knife) | Row 2, Col 6 | +| Zeus Kills (电击枪杀) | `basic_avg_zeus_kill` | `COUNT(zeus_kills) / matches` | `basic_avg_zeus_kill` | `fact_round_events` (weapon=zeus) | Row 2, Col 7 | +| Zeus Buy% (起电击枪) | `basic_zeus_pick_rate` | `AVG(has_zeus)` | `basic_zeus_pick_rate` | `fact_round_player_economy.has_zeus` | Row 2, Col 8 | + +#### 1.3.3 目标控制 (Objective) + +| 显示标签 | 指标键 | 计算方法 | L3列名 | L2来源 | UI行位置 | +|---------|--------|---------|--------|--------|---------| +| MVP (最有价值) | `basic_avg_mvps` | `SUM(mvp_count) / matches` | `basic_avg_mvps` | `fact_match_players.mvp_count` | Row 3, Col 1 | +| Plants (下包) | `basic_avg_plants` | `SUM(planted_bomb) / matches` | `basic_avg_plants` | `fact_match_players.planted_bomb` | Row 3, Col 2 | +| Defuses (拆包) | `basic_avg_defuses` | `SUM(defused_bomb) / matches` | `basic_avg_defuses` | `fact_match_players.defused_bomb` | Row 3, Col 3 | +| Flash Assist (闪光助攻) | `basic_avg_flash_assists` | `SUM(flash_assists) / matches` | `basic_avg_flash_assists` | `fact_match_players.flash_assists` | Row 3, Col 4 | + +#### 1.3.4 开局能力 (Opening Impact) + +| 显示标签 | 指标键 | 计算方法 | L3列名 | L2来源 | UI行位置 | +|---------|--------|---------|--------|--------|---------| +| First Kill (场均首杀) | `basic_avg_first_kill` | `SUM(first_kill) / matches` | `basic_avg_first_kill` | `fact_match_players.first_kill` | Row 4, Col 1 | +| First Death (场均首死) | `basic_avg_first_death` | `SUM(first_death) / matches` | `basic_avg_first_death` | `fact_match_players.first_death` | Row 4, Col 2 | +| FK Rate (首杀率) | `basic_first_kill_rate` | `FK / (FK + FD)` | `basic_first_kill_rate` | Calculated from FK/FD | Row 4, Col 3 | +| FD Rate (首死率) | `basic_first_death_rate` | `FD / (FK + FD)` | `basic_first_death_rate` | Calculated from FK/FD | Row 4, Col 4 | + +#### 1.3.5 多杀表现 (Multi-Frag Performance) + +| 显示标签 | 指标键 | 计算方法 | L3列名 | L2来源 | UI行位置 | +|---------|--------|---------|--------|--------|---------| +| 2K Rounds (双杀) | `basic_avg_kill_2` | `SUM(kill_2) / matches` | `basic_avg_kill_2` | `fact_match_players.kill_2` | Row 5, Col 1 | +| 3K Rounds (三杀) | `basic_avg_kill_3` | `SUM(kill_3) / matches` | `basic_avg_kill_3` | `fact_match_players.kill_3` | Row 5, Col 2 | +| 4K Rounds (四杀) | `basic_avg_kill_4` | `SUM(kill_4) / matches` | `basic_avg_kill_4` | `fact_match_players.kill_4` | Row 5, Col 3 | +| 5K Rounds (五杀) | `basic_avg_kill_5` | `SUM(kill_5) / matches` | `basic_avg_kill_5` | `fact_match_players.kill_5` | Row 5, Col 4 | + +#### 1.3.6 特殊击杀 (Special Stats) + +| 显示标签 | 指标键 | 计算方法 | L3列名 | L2来源 | UI行位置 | +|---------|--------|---------|--------|--------|---------| +| Perfect Kills (无伤杀) | `basic_avg_perfect_kill` | `SUM(perfect_kill) / matches` | `basic_avg_perfect_kill` | `fact_match_players.perfect_kill` | Row 6, Col 1 | +| Revenge Kills (复仇杀) | `basic_avg_revenge_kill` | `SUM(revenge_kill) / matches` | `basic_avg_revenge_kill` | `fact_match_players.revenge_kill` | Row 6, Col 2 | +| 交火补枪率 | `trade_kill_percentage` | `TradeKills / TotalKills * 100` | N/A (计算自L2) | `fact_round_events` (self-join) | Row 6, Col 3 | + +### 1.4 特殊击杀与时机分析 (Special Kills & Timing) + +#### 1.4.1 战术智商击杀 (Special Kill Scenarios) + +| 显示标签 | 指标键 | 计算方法 | L3列名 | L2来源 | UI位置 | +|---------|--------|---------|--------|--------|--------| +| Wallbang Kills (穿墙) | `special_wallbang_kills` | `COUNT(is_wallbang=1)` | `special_wallbang_kills` | `fact_round_events.is_wallbang` | Special Grid 1 | +| Wallbang Rate (穿墙率) | `special_wallbang_rate` | `WallbangKills / TotalKills` | `special_wallbang_rate` | Calculated | Special Grid 2 | +| Smoke Kills (穿烟) | `special_smoke_kills` | `COUNT(is_through_smoke=1)` | `special_smoke_kills` | `fact_round_events.is_through_smoke` | Special Grid 3 | +| Smoke Kill Rate (穿烟率) | `special_smoke_kill_rate` | `SmokeKills / TotalKills` | `special_smoke_kill_rate` | Calculated | Special Grid 4 | +| Blind Kills (致盲击杀) | `special_blind_kills` | `COUNT(is_blind=1)` | `special_blind_kills` | `fact_round_events.is_blind` | Special Grid 5 | +| Blind Kill Rate (致盲率) | `special_blind_kill_rate` | `BlindKills / TotalKills` | `special_blind_kill_rate` | Calculated | Special Grid 6 | +| NoScope Kills (盲狙) | `special_noscope_kills` | `COUNT(is_noscope=1)` | `special_noscope_kills` | `fact_round_events.is_noscope` | Special Grid 7 | +| NoScope Rate (盲狙率) | `special_noscope_rate` | `NoScopeKills / AWPKills` | `special_noscope_rate` | Calculated | Special Grid 8 | +| High IQ Score (智商评分) | `special_high_iq_score` | 加权评分(0-100): Wallbang*3 + Smoke*2 + Blind*1.5 + NoScope*2 | `special_high_iq_score` | Calculated | Special Grid 9 | + +#### 1.4.2 回合节奏分析 (Round Timing Analysis) + +| 显示标签 | 指标键 | 计算方法 | L3列名 | L2来源 | UI位置 | +|---------|--------|---------|--------|--------|--------| +| Early Kills (前30s) | `timing_early_kills` | `COUNT(event_time < 30)` | `timing_early_kills` | `fact_round_events.event_time` | Timing Grid 1 | +| Mid Kills (30-60s) | `timing_mid_kills` | `COUNT(30 <= event_time < 60)` | `timing_mid_kills` | `fact_round_events.event_time` | Timing Grid 2 | +| Late Kills (60s+) | `timing_late_kills` | `COUNT(event_time >= 60)` | `timing_late_kills` | `fact_round_events.event_time` | Timing Grid 3 | +| Avg Kill Time (平均击杀时间) | `timing_avg_kill_time` | `AVG(event_time)` for kills | `timing_avg_kill_time` | `fact_round_events.event_time` | Timing Grid 4 | +| Early Aggression (前期进攻) | `timing_early_aggression_rate` | `EarlyKills / TotalKills` | `timing_early_aggression_rate` | Calculated | Timing Grid 5 | +| Early Deaths (前30s死) | `timing_early_deaths` | `COUNT(death_time < 30)` | `timing_early_deaths` | `fact_round_events.event_time` | Timing Grid 6 | +| Mid Deaths (30-60s死) | `timing_mid_deaths` | `COUNT(30 <= death_time < 60)` | `timing_mid_deaths` | `fact_round_events.event_time` | Timing Grid 7 | +| Late Deaths (60s+死) | `timing_late_deaths` | `COUNT(death_time >= 60)` | `timing_late_deaths` | `fact_round_events.event_time` | Timing Grid 8 | +| Avg Death Time (平均死亡时间) | `timing_avg_death_time` | `AVG(event_time)` for deaths | `timing_avg_death_time` | `fact_round_events.event_time` | Timing Grid 9 | +| Early Death Rate (前期死亡) | `timing_early_death_rate` | `EarlyDeaths / TotalDeaths` | `timing_early_death_rate` | Calculated | Timing Grid 10 | + +### 1.5 深层能力维度 (Deep Capabilities) + +#### 1.5.1 稳定性与枪法 (STA & BAT) + +| 显示标签 | 指标键 | 计算方法 | L3列名 | L2来源 | UI位置 | +|---------|--------|---------|--------|--------|--------| +| Last 30 Rating (近30场) | `sta_last_30_rating` | `AVG(rating)` for last 30 matches | `sta_last_30_rating` | `fact_match_players.rating` | Deep Section 1 | +| Win Rating (胜局) | `sta_win_rating` | `AVG(rating WHERE is_win=1)` | `sta_win_rating` | `fact_match_players.rating, is_win` | Deep Section 2 | +| Loss Rating (败局) | `sta_loss_rating` | `AVG(rating WHERE is_win=0)` | `sta_loss_rating` | `fact_match_players.rating, is_win` | Deep Section 3 | +| Volatility (波动) | `sta_rating_volatility` | `STDDEV(rating)` for last 10 matches | `sta_rating_volatility` | `fact_match_players.rating` | Deep Section 4 | +| Time Corr (耐力) | `sta_time_rating_corr` | `CORR(duration, rating)` | `sta_time_rating_corr` | `fact_matches.duration, rating` | Deep Section 5 | +| High Elo KD Diff (高分抗压) | `bat_kd_diff_high_elo` | `AVG(kd WHERE elo > player_avg_elo)` | `bat_kd_diff_high_elo` | `fact_match_teams.group_origin_elo` | Deep Section 6 | +| Duel Win% (对枪胜率) | `bat_avg_duel_win_rate` | `entry_kills / (entry_kills + entry_deaths)` | `bat_avg_duel_win_rate` | `fact_match_players.entry_kills/deaths` | Deep Section 7 | + +#### 1.5.2 残局与手枪 (HPS & PTL) + +| 显示标签 | 指标键 | 计算方法 | L3列名 | L2来源 | UI位置 | +|---------|--------|---------|--------|--------|--------| +| Avg 1v1 (场均1v1) | `hps_clutch_win_rate_1v1` | `SUM(clutch_1v1) / matches` | `hps_clutch_win_rate_1v1` | `fact_match_players.clutch_1v1` | Deep Section 8 | +| Avg 1v3+ (场均1v3+) | `hps_clutch_win_rate_1v3_plus` | `SUM(clutch_1v3+1v4+1v5) / matches` | `hps_clutch_win_rate_1v3_plus` | `fact_match_players.clutch_1v3/4/5` | Deep Section 9 | +| Match Pt Win% (赛点胜率) | `hps_match_point_win_rate` | Win rate when either team at 12 or 15 | `hps_match_point_win_rate` | `fact_rounds` (score calculation) | Deep Section 10 | +| Pressure Entry (逆风首杀) | `hps_pressure_entry_rate` | `entry_kills / rounds` in losing matches | `hps_pressure_entry_rate` | `fact_match_players` (is_win=0) | Deep Section 11 | +| Comeback KD (翻盘KD) | `hps_comeback_kd_diff` | KD差值当队伍落后4+回合 | `hps_comeback_kd_diff` | `fact_round_events + fact_rounds` | Deep Section 12 | +| Loss Streak KD (连败KD) | `hps_losing_streak_kd_diff` | KD差值当连败3+回合 | `hps_losing_streak_kd_diff` | `fact_round_events + fact_rounds` | Deep Section 13 | +| Pistol Kills (手枪击杀) | `ptl_pistol_kills` | `COUNT(kills WHERE round IN (1,13))` / matches | `ptl_pistol_kills` | `fact_round_events` (round 1,13) | Deep Section 14 | +| Pistol Win% (手枪胜率) | `ptl_pistol_win_rate` | Win rate for pistol rounds | `ptl_pistol_win_rate` | `fact_rounds` (round 1,13) | Deep Section 15 | +| Pistol KD (手枪KD) | `ptl_pistol_kd` | `pistol_kills / pistol_deaths` | `ptl_pistol_kd` | `fact_round_events` (round 1,13) | Deep Section 16 | +| Pistol Util Eff (手枪道具) | `ptl_pistol_util_efficiency` | Headshot rate in pistol rounds | `ptl_pistol_util_efficiency` | `fact_round_events` (is_headshot) | Deep Section 17 | + +#### 1.5.3 道具使用 (UTIL) + +| 显示标签 | 指标键 | 计算方法 | L3列名 | L2来源 | UI位置 | +|---------|--------|---------|--------|--------|--------| +| Usage Rate (道具频率) | `util_usage_rate` | `(flash+smoke+molotov+he+decoy) / rounds * 100` | `util_usage_rate` | `fact_match_players.util_*_usage` | Deep Section 18 | +| Nade Dmg (雷火伤) | `util_avg_nade_dmg` | `SUM(throw_harm) / matches` | `util_avg_nade_dmg` | `fact_match_players.throw_harm` | Deep Section 19 | +| Flash Time (致盲时间) | `util_avg_flash_time` | `SUM(flash_time) / matches` | `util_avg_flash_time` | `fact_match_players.flash_time` | Deep Section 20 | +| Flash Enemy (致盲人数) | `util_avg_flash_enemy` | `SUM(flash_enemy) / matches` | `util_avg_flash_enemy` | `fact_match_players.flash_enemy` | Deep Section 21 | + +#### 1.5.4 经济与节奏 (ECO & PACE) + +| 显示标签 | 指标键 | 计算方法 | L3列名 | L2来源 | UI位置 | +|---------|--------|---------|--------|--------|--------| +| Dmg/$1k (性价比) | `eco_avg_damage_per_1k` | `total_damage / (total_equipment / 1000)` | `eco_avg_damage_per_1k` | `fact_round_player_economy` | Deep Section 22 | +| Eco KPR (经济局KPR) | `eco_rating_eco_rounds` | Kills per round when equipment < $2000 | `eco_rating_eco_rounds` | `fact_round_player_economy` | Deep Section 23 | +| Eco KD (经济局KD) | `eco_kd_ratio` | KD in eco rounds | `eco_kd_ratio` | `fact_round_player_economy` | Deep Section 24 | +| Eco Rounds (经济局数) | `eco_avg_rounds` | `COUNT(equipment < 2000) / matches` | `eco_avg_rounds` | `fact_round_player_economy` | Deep Section 25 | +| First Contact (首肯时间) | `pace_avg_time_to_first_contact` | `AVG(MIN(event_time))` per round | `pace_avg_time_to_first_contact` | `fact_round_events.event_time` | Deep Section 26 | +| Trade Kill% (补枪率) | `pace_trade_kill_rate` | `TradeKills / TotalKills` (5s window) | `pace_trade_kill_rate` | `fact_round_events` (self-join) | Deep Section 27 | +| Opening Time (首杀时间) | `pace_opening_kill_time` | `AVG(first_kill_time)` per round | `pace_opening_kill_time` | `fact_round_events.event_time` | Deep Section 28 | +| Avg Life (存活时间) | `pace_avg_life_time` | `AVG(death_time OR round_end)` | `pace_avg_life_time` | `fact_round_events + fact_rounds` | Deep Section 29 | + +#### 1.5.5 回合动态 (ROUND Dynamics) + +| 显示标签 | 指标键 | 计算方法 | L3列名 | L2来源 | UI位置 | +|---------|--------|---------|--------|--------|--------| +| Kill Early (前30秒击杀) | `rd_phase_kill_early_share` | Early kills / Total kills | `rd_phase_kill_early_share` | `fact_round_events.event_time` | Deep Section 30 | +| Kill Mid (30-60秒击杀) | `rd_phase_kill_mid_share` | Mid kills / Total kills | `rd_phase_kill_mid_share` | `fact_round_events.event_time` | Deep Section 31 | +| Kill Late (60秒后击杀) | `rd_phase_kill_late_share` | Late kills / Total kills | `rd_phase_kill_late_share` | `fact_round_events.event_time` | Deep Section 32 | +| Death Early (前30秒死亡) | `rd_phase_death_early_share` | Early deaths / Total deaths | `rd_phase_death_early_share` | `fact_round_events.event_time` | Deep Section 33 | +| Death Mid (30-60秒死亡) | `rd_phase_death_mid_share` | Mid deaths / Total deaths | `rd_phase_death_mid_share` | `fact_round_events.event_time` | Deep Section 34 | +| Death Late (60秒后死亡) | `rd_phase_death_late_share` | Late deaths / Total deaths | `rd_phase_death_late_share` | `fact_round_events.event_time` | Deep Section 35 | +| FirstDeath Win% (首死后胜率) | `rd_firstdeath_team_first_death_win_rate` | Win rate when team loses first blood | `rd_firstdeath_team_first_death_win_rate` | `fact_round_events + fact_rounds` | Deep Section 36 | +| Invalid Death% (无效死亡) | `rd_invalid_death_rate` | Deaths with 0 kills & 0 flash assists | `rd_invalid_death_rate` | `fact_round_events` | Deep Section 37 | +| Pressure KPR (落后≥3) | `rd_pressure_kpr_ratio` | KPR when down 3+ rounds / Normal KPR | `rd_pressure_kpr_ratio` | `fact_rounds + fact_round_events` | Deep Section 38 | +| MatchPt KPR (赛点放大) | `rd_matchpoint_kpr_ratio` | KPR at match point / Normal KPR | `rd_matchpoint_kpr_ratio` | `fact_rounds + fact_round_events` | Deep Section 39 | +| Trade Resp (10s响应) | `rd_trade_response_10s_rate` | Success rate trading teammate death in 10s | `rd_trade_response_10s_rate` | `fact_round_events` (self-join) | Deep Section 40 | +| Pressure Perf (Leetify) | `rd_pressure_perf_ratio` | Leetify perf when down 3+ / Normal | `rd_pressure_perf_ratio` | `fact_round_player_economy` | Deep Section 41 | +| MatchPt Perf (Leetify) | `rd_matchpoint_perf_ratio` | Leetify perf at match point / Normal | `rd_matchpoint_perf_ratio` | `fact_round_player_economy` | Deep Section 42 | +| Comeback KillShare (追分) | `rd_comeback_kill_share` | Player's kills / Team kills in comeback rounds | `rd_comeback_kill_share` | `fact_round_events + fact_rounds` | Deep Section 43 | +| Map Stability (地图稳定) | `map_stability_coef` | `AVG(|map_rating - player_avg|)` | `map_stability_coef` | `fact_match_players` (by map) | Deep Section 44 | + +#### 1.5.6 残局与多杀 (SPECIAL - Clutch & Multi) + +| 显示标签 | 指标键 | 计算方法 | L3列名 | L2来源 | UI位置 | +|---------|--------|---------|--------|--------|--------| +| 1v1 Win% (1v1胜率) | `clutch_rate_1v1` | `clutch_1v1 / attempts_1v1` | N/A (L2) | `fact_match_players.clutch_1v1, end_1v1` | Deep Section 45 | +| 1v2 Win% (1v2胜率) | `clutch_rate_1v2` | `clutch_1v2 / attempts_1v2` | N/A (L2) | `fact_match_players.clutch_1v2, end_1v2` | Deep Section 46 | +| 1v3 Win% (1v3胜率) | `clutch_rate_1v3` | `clutch_1v3 / attempts_1v3` | N/A (L2) | `fact_match_players.clutch_1v3, end_1v3` | Deep Section 47 | +| 1v4 Win% (1v4胜率) | `clutch_rate_1v4` | `clutch_1v4 / attempts_1v4` | N/A (L2) | `fact_match_players.clutch_1v4, end_1v4` | Deep Section 48 | +| 1v5 Win% (1v5胜率) | `clutch_rate_1v5` | `clutch_1v5 / attempts_1v5` | N/A (L2) | `fact_match_players.clutch_1v5, end_1v5` | Deep Section 49 | +| Multi-K Rate (多杀率) | `total_multikill_rate` | `(2K+3K+4K+5K) / total_rounds` | N/A (L2) | `fact_match_players.kill_2/3/4/5` | Deep Section 50 | +| Multi-A Rate (多助率) | `total_multiassist_rate` | `(many_assists_cnt2/3/4/5) / rounds` | N/A (L2) | `fact_match_players.many_assists_cnt*` | Deep Section 51 | + +#### 1.5.7 阵营偏好 (SIDE Preference) + +| 显示标签 | 指标键 | 计算方法 | L3列名 | L2来源 | UI位置 | +|---------|--------|---------|--------|--------|--------| +| Rating (T-Side) | `side_rating_t` | `AVG(rating2)` from T table | `side_rating_t` | `fact_match_players_t.rating2` | Deep Section 52 | +| Rating (CT-Side) | `side_rating_ct` | `AVG(rating2)` from CT table | `side_rating_ct` | `fact_match_players_ct.rating2` | Deep Section 53 | +| KD Ratio (T) | `side_kd_t` | `SUM(kills) / SUM(deaths)` T-side | `side_kd_t` | `fact_match_players_t.kills/deaths` | Deep Section 54 | +| KD Ratio (CT) | `side_kd_ct` | `SUM(kills) / SUM(deaths)` CT-side | `side_kd_ct` | `fact_match_players_ct.kills/deaths` | Deep Section 55 | +| Win Rate (T) | `side_win_rate_t` | `AVG(is_win)` T-side | `side_win_rate_t` | `fact_match_players_t.is_win` | Deep Section 56 | +| Win Rate (CT) | `side_win_rate_ct` | `AVG(is_win)` CT-side | `side_win_rate_ct` | `fact_match_players_ct.is_win` | Deep Section 57 | +| First Kill Rate (T) | `side_first_kill_rate_t` | `FK / rounds` T-side | `side_first_kill_rate_t` | `fact_match_players_t.first_kill` | Deep Section 58 | +| First Kill Rate (CT) | `side_first_kill_rate_ct` | `FK / rounds` CT-side | `side_first_kill_rate_ct` | `fact_match_players_ct.first_kill` | Deep Section 59 | +| First Death Rate (T) | `side_first_death_rate_t` | `FD / rounds` T-side | `side_first_death_rate_t` | `fact_match_players_t.first_death` | Deep Section 60 | +| First Death Rate (CT) | `side_first_death_rate_ct` | `FD / rounds` CT-side | `side_first_death_rate_ct` | `fact_match_players_ct.first_death` | Deep Section 61 | +| KAST (T) | `side_kast_t` | `AVG(kast)` T-side | `side_kast_t` | `fact_match_players_t.kast` | Deep Section 62 | +| KAST (CT) | `side_kast_ct` | `AVG(kast)` CT-side | `side_kast_ct` | `fact_match_players_ct.kast` | Deep Section 63 | +| RWS (T) | `side_rws_t` | `AVG(rws)` T-side | `side_rws_t` | `fact_match_players_t.rws` | Deep Section 64 | +| RWS (CT) | `side_rws_ct` | `AVG(rws)` CT-side | `side_rws_ct` | `fact_match_players_ct.rws` | Deep Section 65 | +| Headshot Rate (T) | `side_headshot_rate_t` | `HS / kills` T-side | `side_headshot_rate_t` | `fact_match_players_t.headshot_count/kills` | Deep Section 66 | +| Headshot Rate (CT) | `side_headshot_rate_ct` | `HS / kills` CT-side | `side_headshot_rate_ct` | `fact_match_players_ct.headshot_count/kills` | Deep Section 67 | + +#### 1.5.8 组排与分层 (Party & Stratification) + +| 显示标签 | 指标键 | 计算方法 | L3列名 | L2来源 | UI位置 | +|---------|--------|---------|--------|--------|--------| +| Solo Win% (单排胜率) | `party_1_win_rate` | Win rate in solo queue | `party_1_win_rate` | `fact_match_players` (party_size=1) | Deep Section 68 | +| Solo Rating (单排分) | `party_1_rating` | `AVG(rating)` in solo | `party_1_rating` | `fact_match_players` (party_size=1) | Deep Section 69 | +| Solo ADR (单排伤) | `party_1_adr` | `AVG(adr)` in solo | `party_1_adr` | `fact_match_players` (party_size=1) | Deep Section 70 | +| Duo Win% (双排胜率) | `party_2_win_rate` | Win rate in duo | `party_2_win_rate` | `fact_match_players` (party_size=2) | Deep Section 71 | +| ... (party_2~5 follow same pattern) | ... | ... | ... | ... | Deep Section 72-79 | +| Carry Rate (>1.5) | `rating_dist_carry_rate` | `COUNT(rating>1.5) / total` | `rating_dist_carry_rate` | `fact_match_players.rating` | Deep Section 80 | +| Normal Rate (1.0-1.5) | `rating_dist_normal_rate` | `COUNT(1.0<=rating<1.5) / total` | `rating_dist_normal_rate` | `fact_match_players.rating` | Deep Section 81 | +| Sacrifice Rate (0.6-1.0) | `rating_dist_sacrifice_rate` | `COUNT(0.6<=rating<1.0) / total` | `rating_dist_sacrifice_rate` | `fact_match_players.rating` | Deep Section 82 | +| Sleeping Rate (<0.6) | `rating_dist_sleeping_rate` | `COUNT(rating<0.6) / total` | `rating_dist_sleeping_rate` | `fact_match_players.rating` | Deep Section 83 | +| <1200 Rating | `elo_lt1200_rating` | `AVG(rating)` vs opponents <1200 ELO | `elo_lt1200_rating` | `fact_match_teams.group_origin_elo` | Deep Section 84 | +| 1200-1400 Rating | `elo_1200_1400_rating` | `AVG(rating)` vs 1200-1400 ELO | `elo_1200_1400_rating` | `fact_match_teams.group_origin_elo` | Deep Section 85 | +| ... (elo_* follow same pattern) | ... | ... | ... | ... | Deep Section 86-89 | + +### 1.6 附加数据 + +#### 1.6.1 Phase Split (回合阶段分布) + +- **数据来源**: `rd_phase_kill_*_share` 和 `rd_phase_death_*_share` 系列 +- **UI呈现**: 横条图展示 Total/T/CT 的击杀/死亡在 Early/Mid/Late 的分布 +- **计算**: 时间段划分(0-30s/30-60s/60s+),分T/CT/Overall统计 + +#### 1.6.2 Top Weapons (常用武器) + +- **数据来源**: `rd_weapon_top_json` (JSON字段) +- **包含信息**: weapon, kills, hs_rate, price, category, share +- **UI呈现**: 表格展示前5常用武器及其数据 + +#### 1.6.3 Round Type Split (回合类型表现) + +- **数据来源**: `rd_roundtype_split_json` (JSON字段) +- **包含信息**: pistol/eco/rifle/fullbuy/overtime的KPR和Perf +- **UI呈现**: 表格展示不同经济类型回合的表现 + +--- + +## 2. 当前问题分析 + +### 2.1 命名不一致问题 + +| 问题类别 | 具体表现 | 影响 | +|---------|---------|------| +| **前缀混乱** | `basic_*`, `side_*`, `util_*`, `eco_*`, `pace_*`, `rd_*`, `special_*`, `timing_*` | 无法从名称直观判断归属维度 | +| **冗余命名** | `basic_avg_headshot_kills` vs `basic_headshot_rate` | 一个是总数,一个是比率,命名规则不统一 | +| **缩写不统一** | `FK` vs `First Kill`, `HS` vs `Headshot`, `Avg` vs `Average` | 可读性差 | +| **中英混杂** | 数据库用英文,但UI标签用中文 | 维护困难 | + +### 2.2 数据重复与冗余 + +| 重复类型 | 示例 | 问题 | +|---------|------|------| +| **同一指标多处展示** | `basic_avg_rating`同时出现在Dashboard和Detailed Panel | 数据冗余展示 | +| **相似指标并存** | `timing_early_aggression_rate` vs `rd_phase_kill_early_share` | 实际都是"前30秒击杀占比" | +| **计算结果重复** | `basic_first_kill_rate` 和 `basic_first_death_rate` 必然互补(FK+FD=100%) | 可简化为一个指标 | +| **阵营数据冗余** | T/CT所有指标都有两套,但很多时候差异不大 | UI拥挤,核心信息淹没 | + +### 2.3 分类逻辑混乱 + +| 混乱表现 | 示例 | 理想归类 | +|---------|------|---------| +| **相关指标分散** | 首杀/首死在"Opening Impact",但首杀时间在"PACE - Tempo" | 应统一归入"开局影响力" | +| **维度交叉** | `hps_pressure_entry_rate`(逆风首杀)既属于HPS,又涉及Opening | 应明确主维度 | +| **深浅不分** | `basic_avg_kd`在详细面板,但`eco_kd_ratio`在深层能力 | 同类指标应在同一层级 | +| **特殊指标孤立** | Special Kills单独一个Section,但Knife/Zeus在Combat | 应统一归入"特殊击杀类" | + +### 2.4 UI展示问题 + +| 问题 | 描述 | 影响 | +|-----|------|------| +| **信息密度过高** | Detailed Stats Panel有80+指标紧密排列 | 用户认知负担重,难以聚焦 | +| **缺少层次** | 所有指标平铺,无主次之分 | 核心数据不突出 | +| **视觉疲劳** | 长列表滚动,无分组视觉分隔 | 易忽略关键信息 | +| **无引导逻辑** | 用户不知道该看哪些数据 | 降低产品价值 | + +### 2.5 Schema设计问题 + +| 问题 | 描述 | 改进方向 | +|-----|------|---------| +| **列名过长** | `rd_phase_kill_early_share_ct` 35字符 | 可缩短为`phase_k_early_ct` | +| **类型不统一** | Rate有的存0-1,有的存0-100 | 统一为0-1,前端格式化 | +| **缺少索引提示** | L3表无明确的"核心指标"标识 | 可增加`is_core`标记列 | +| **JSON滥用** | `rd_weapon_top_json`等,查询不便 | 考虑拆表或使用结构化字段 | + +--- + +## 3. 重组方案 + +### 3.1 新分类体系 + +基于**游戏功能维度**和**用户关注度**,建议采用三层结构: + +#### **L1: 核心面板 (Core Dashboard)** - 4个关键指标 +- Rating (综合评分) +- K/D Ratio (击杀效率) +- ADR (伤害输出) +- KAST (团队贡献) + +#### **L2: 六维雷达 (6D Capabilities)** - 战术风格画像 +- **Combat (战斗力)**: 枪法、对枪、爆头 +- **Opening (开局影响)**: 首杀、首死、进攻节奏 +- **Clutch (残局能力)**: 1vX、高压表现 +- **Utility (道具运用)**: 闪光、投掷、战术配合 +- **Economy (经济管理)**: 性价比、经济局表现 +- **Stability (稳定性)**: 波动、适应、抗压 + +#### **L3: 详细统计 (Detailed Stats)** - 按功能分组 + +##### **Group 1: 枪法与交火 (Gunfight)** +- 爆头率、爆头数 +- 对枪胜率 +- AWP击杀 +- 补枪成功率 + +##### **Group 2: 开局影响力 (Opening Impact)** +- 首杀数/率 +- 首死数/率 +- 首接触时间 +- 开局击杀时间 + +##### **Group 3: 多杀表现 (Multi-Frag Performance)** +- 2K/3K/4K/5K频率 +- 多杀占比 + +##### **Group 4: 残局能力 (Clutch Capability)** +- 1v1/1v2/1v3/1v4/1v5胜率 +- 残局尝试次数 + +##### **Group 5: 特殊能力 (Special Stats)** +- 穿墙/穿烟/致盲/盲狙击杀 +- 刀杀/电击枪 +- 无伤击杀/复仇击杀 +- 高IQ评分 + +##### **Group 6: 战术贡献 (Tactical Contribution)** +- MVP次数 +- 下包/拆包 +- 闪光助攻 +- 道具使用频率 +- 道具伤害 + +##### **Group 7: 经济管理 (Economy)** +- 伤害性价比 +- 经济局KD/KPR +- 经济局频率 +- 装备价值分布 + +##### **Group 8: 节奏控制 (Pace & Timing)** +- 前/中/后期击杀分布 +- 前/中/后期死亡分布 +- 平均存活时间 +- 节奏风格标签(Early Aggressor/Late Closer) + +##### **Group 9: 阵营偏好 (Side Preference)** +- T/CT综合表现对比(Rating, KD, Win%) +- T/CT关键指标对比(FK Rate, HS Rate) +- 阵营风格分析 + +##### **Group 10: 高压情境 (High-Pressure Performance)** +- 赛点表现 +- 逆风表现(落后3+回合) +- 翻盘贡献 +- 连败抗压 + +##### **Group 11: 组排与分层 (Party & Stratification)** +- 单排/双排/三排/四排/五排表现 +- Carry/Normal/Sacrifice/Sleeping分布 +- 对阵不同ELO段表现 + +##### **Group 12: 回合细节 (Round Dynamics)** +- 首死后胜率 +- 无效死亡率 +- 补枪响应率 +- 武器使用偏好 +- 回合类型表现 + +### 3.2 指标优先级标记 + +为每个指标分配优先级,用于UI展示逻辑: + +| 优先级 | 说明 | 展示位置 | 指标数量 | +|-------|------|---------|---------| +| **P0 - Critical** | 核心KPI,必看指标 | Dashboard + 六维雷达 | 12个 | +| **P1 - High** | 重要数据,影响战术决策 | Detailed Panel前置位置 | 30个 | +| **P2 - Medium** | 辅助分析,深入了解 | Detailed Panel中部,可折叠 | 50个 | +| **P3 - Low** | 小众指标,专业分析 | Advanced Section,默认折叠 | 30个 | + +### 3.3 命名规范 + +#### 3.3.1 L3列名规范 + +``` +{category}_{metric}_{aggregation}_{context} +``` + +- **category**: 维度前缀(cbt, opn, clu, uti, eco, stb) +- **metric**: 指标名称(小写蛇形) +- **aggregation**: avg/sum/rate/pct (可选) +- **context**: _t/_ct/_pistol等上下文(可选) + +示例: +- `cbt_hs_rate` (Combat - Headshot Rate) +- `opn_fk_avg` (Opening - First Kills Average) +- `clu_1v3_win_rate` (Clutch - 1v3 Win Rate) +- `side_rating_avg_t` (Side - Rating Average T-side) + +#### 3.3.2 UI标签规范 + +``` +{中文简称} ({英文缩写/全称}) +``` + +示例: +- `爆头率 (HS%)` +- `首杀数 (FK)` +- `经济局KD (Eco KD)` + +### 3.4 UI展示逻辑 + +#### 3.4.1 折叠分组 + +```html + +
+ +
+ + +
+ +
+ + +
+
+ 🎯 枪法与交火 (Gunfight) + +
+ +
+ 🚀 开局影响力 (Opening Impact) + +
+ +
+ 🔥 多杀表现 (Multi-Frag) + +
+ + +
+``` + +#### 3.4.2 渐进式披露 + +- **首屏**: P0指标(Dashboard + Radar) +- **第一次滚动**: P1指标(Gunfight, Opening, Clutch) +- **展开折叠**: P2/P3指标 +- **Tooltip/Hover**: 指标说明、计算公式 + +--- + +## 4. Schema优化建议 + +### 4.1 L3表结构调整 + +#### 4.1.1 增加元数据列 + +```sql +ALTER TABLE dm_player_features ADD COLUMN data_version TEXT DEFAULT 'v2.0'; +ALTER TABLE dm_player_features ADD COLUMN last_calculated_at TIMESTAMP; +ALTER TABLE dm_player_features ADD COLUMN data_quality_score REAL; -- 0-1, 数据完整度 +``` + +#### 4.1.2 列名重构映射表 + +| 旧列名 | 新列名 | 说明 | +|-------|--------|------| +| `basic_avg_rating` | `core_rating_avg` | 核心指标 | +| `basic_avg_kd` | `core_kd_avg` | 核心指标 | +| `basic_avg_headshot_kills` | `cbt_hs_kills_avg` | 战斗-爆头 | +| `basic_headshot_rate` | `cbt_hs_rate` | 战斗-爆头率 | +| `basic_avg_first_kill` | `opn_fk_avg` | 开局-首杀 | +| `basic_first_kill_rate` | `opn_fk_rate` | 开局-首杀率 | +| `hps_clutch_win_rate_1v1` | `clu_1v1_win_rate` | 残局-1v1 | +| `util_avg_nade_dmg` | `uti_nade_dmg_avg` | 道具-雷火伤 | +| `eco_avg_damage_per_1k` | `eco_dmg_per_1k` | 经济-性价比 | +| `pace_avg_time_to_first_contact` | `pce_first_contact_time` | 节奏-首接触 | +| `special_wallbang_kills` | `spc_wallbang_kills` | 特殊-穿墙 | +| `timing_early_kills` | `timg_kills_early` | 时机-前期击杀 | +| `side_rating_t` | `side_rating_avg_t` | 阵营-T侧评分 | + +#### 4.1.3 新增计算字段 + +```sql +-- 添加派生指标 +ALTER TABLE dm_player_features ADD COLUMN cbt_firefight_success_rate REAL; +-- 计算: (FK + TradeKills) / (FK + FD + TradeAttempts) + +ALTER TABLE dm_player_features ADD COLUMN opn_impact_score REAL; +-- 计算: FK_rate * 2 + (1 - FD_rate) * 1.5 + Opening_Kill_Time_factor + +ALTER TABLE dm_player_features ADD COLUMN clu_consistency_score REAL; +-- 计算: 残局胜率方差(越小越稳定) + +ALTER TABLE dm_player_features ADD COLUMN eco_efficiency_tier TEXT; +-- 分级: S(>150dmg/$1k), A(120-150), B(90-120), C(<90) +``` + +### 4.2 L2表优化 + +#### 4.2.1 索引优化 + +```sql +-- 为高频查询字段添加索引 +CREATE INDEX idx_match_players_rating ON fact_match_players(rating); +CREATE INDEX idx_match_players_steam_time ON fact_match_players(steam_id_64, match_id); +CREATE INDEX idx_round_events_time ON fact_round_events(match_id, round_num, event_time); +CREATE INDEX idx_round_events_attacker ON fact_round_events(attacker_steam_id, event_type); +``` + +#### 4.2.2 物化视图(如支持) + +```sql +-- 预计算常用聚合 +CREATE MATERIALIZED VIEW mv_player_basic_stats AS +SELECT + steam_id_64, + COUNT(*) as total_matches, + AVG(rating) as avg_rating, + AVG(kd_ratio) as avg_kd, + AVG(adr) as avg_adr, + AVG(kast) as avg_kast +FROM fact_match_players +GROUP BY steam_id_64; + +REFRESH MATERIALIZED VIEW mv_player_basic_stats; -- 定期刷新 +``` + +### 4.3 数据类型标准化 + +| 指标类型 | 存储类型 | 范围 | 前端展示 | +|---------|---------|------|---------| +| Rate/Percentage | REAL | 0.0 - 1.0 | `{:.1%}` (格式化为百分比) | +| Score (0-100) | REAL | 0.0 - 100.0 | `{:.1f}` | +| Count | INTEGER | 0+ | `{:.0f}` | +| Average Count | REAL | 0.0+ | `{:.2f}` | +| Time (seconds) | REAL | 0.0+ | `{:.1f}s` | +| Ratio | REAL | 0.0+ | `{:.2f}` | + +### 4.4 JSON字段拆解 + +#### 4.4.1 武器统计拆表 + +```sql +CREATE TABLE IF NOT EXISTS dm_player_weapon_stats ( + steam_id_64 TEXT, + weapon TEXT, + kills INTEGER, + hs_rate REAL, + share REAL, + kpm REAL, -- Kills per match + price INTEGER, + category TEXT, + PRIMARY KEY (steam_id_64, weapon), + FOREIGN KEY (steam_id_64) REFERENCES dm_player_features(steam_id_64) +); +``` + +#### 4.4.2 回合类型表现拆表 + +```sql +CREATE TABLE IF NOT EXISTS dm_player_round_type_stats ( + steam_id_64 TEXT, + round_type TEXT CHECK(round_type IN ('pistol', 'eco', 'rifle', 'fullbuy', 'overtime')), + kpr REAL, + perf REAL, + rounds_played INTEGER, + PRIMARY KEY (steam_id_64, round_type), + FOREIGN KEY (steam_id_64) REFERENCES dm_player_features(steam_id_64) +); +``` + +--- + +## 5. 实施计划 + +### 5.1 阶段一:数据清理与验证 (Week 1) + +#### 任务清单 +- [ ] 审查L3表所有122列,标记冗余/错误/缺失列 +- [ ] 验证计算逻辑正确性(抽样10名玩家,手工核对) +- [ ] 统计各指标数据覆盖率(非NULL比例) +- [ ] 生成数据质量报告 + +#### 输出物 +- `data_quality_report.csv`: 各列的覆盖率、异常值比例 +- `calculation_verification.md`: 10个样本的计算验证结果 + +### 5.2 阶段二:Schema重构 (Week 2) + +#### 任务清单 +- [ ] 创建新L3表 `dm_player_features_v2` (保留旧表作为备份) +- [ ] 编写迁移脚本 `migrate_l3_v1_to_v2.py` +- [ ] 执行列名重命名、类型标准化 +- [ ] 添加元数据列 (data_version, data_quality_score) +- [ ] 创建武器统计、回合类型拆表 +- [ ] 建立索引 + +#### 输出物 +- `schema_v2.sql`: 新表结构DDL +- `migration_script.py`: 数据迁移脚本 +- `rollback_plan.md`: 回滚方案 + +### 5.3 阶段三:特征服务重构 (Week 3) + +#### 任务清单 +- [ ] 更新 `feature_service.py` 中的列名映射 +- [ ] 重构 `_load_and_calculate_dataframe` 函数 +- [ ] 实现新的指标优先级系统 (P0/P1/P2/P3) +- [ ] 添加数据质量检查逻辑 +- [ ] 更新 `get_roster_features_distribution` 支持新字段 +- [ ] 单元测试覆盖率达到80% + +#### 输出物 +- `feature_service_v2.py`: 重构后的特征服务 +- `test_feature_service.py`: 完整测试套件 +- `api_changelog.md`: API变更日志 + +### 5.4 阶段四:前端模板重构 (Week 4) + +#### 任务清单 +- [ ] 重构 `profile.html`,实现新的分组结构 +- [ ] 实现折叠/展开交互组件 +- [ ] 更新所有UI标签,统一中英文格式 +- [ ] 实现渐进式披露逻辑 +- [ ] 添加指标Tooltip说明 +- [ ] 优化移动端响应式布局 +- [ ] 性能优化(减少DOM节点,懒加载) + +#### 输出物 +- `profile_v2.html`: 重构后的模板 +- `components/stat_group.html`: 可复用的分组组件 +- `ui_ux_guidelines.md`: 前端设计规范 + +### 5.5 阶段五:数据迁移与上线 (Week 5) + +#### 任务清单 +- [ ] 在测试环境执行完整迁移流程 +- [ ] 对比新旧版本数据一致性 +- [ ] 性能压测(查询速度、页面加载时间) +- [ ] 灰度发布(10% -> 50% -> 100%) +- [ ] 监控错误日志、用户反馈 +- [ ] 更新文档和Wiki + +#### 输出物 +- `migration_report.md`: 迁移执行报告 +- `performance_benchmark.md`: 性能对比数据 +- `user_guide_v2.md`: 用户使用指南 + +### 5.6 阶段六:持续优化 (Ongoing) + +#### 任务清单 +- [ ] 收集用户反馈,迭代UI/UX +- [ ] 监控数据质量,自动告警 +- [ ] 定期review指标有效性 +- [ ] 探索新维度特征(e.g. 位置热力图、协同指标) +- [ ] A/B测试不同展示方案 + +--- + +## 6. 关键指标重点说明 + +### 6.1 核心KPI (P0级别) + +#### 1. Rating (综合评分) +- **计算**: 5E平台官方Rating算法(加权K/D/ADR/RWS/多杀等) +- **意义**: 单场比赛综合表现的标准化评分 +- **L2来源**: `fact_match_players.rating` +- **展示**: Dashboard大卡片 + 趋势图 + +#### 2. K/D Ratio (击杀死亡比) +- **计算**: `总击杀 / 总死亡` +- **意义**: 击杀效率的直观体现 +- **展示**: Dashboard + 详细面板 + 阵营对比 + +#### 3. ADR (Average Damage per Round) +- **计算**: `总伤害 / 总回合数` +- **意义**: 每回合伤害输出,比K/D更稳定 +- **展示**: Dashboard + 详细面板 + +#### 4. KAST (Kill/Assist/Survive/Trade) +- **计算**: `(击杀+助攻+存活+被交易) / 总回合数` +- **意义**: 团队贡献率,衡量对回合胜利的参与度 +- **展示**: Dashboard + 详细面板 + +### 6.2 六维雷达说明 + +#### Combat (战斗力) - score_bat +- **构成**: 25% Rating + 20% KD + 15% ADR + 10% DuelWin + 10% HighEloKD + 20% 3K +- **含义**: 纯粹的枪法和对枪能力 +- **高分特征**: 爆头率高、对枪胜率高、对高分段也能保持KD + +#### Opening (开局影响) - score_opn (建议新增) +- **构成**: 30% FK_rate + 25% FK_avg + 20% (100-FirstContactTime) + 25% OpeningKillTime +- **含义**: 开局阶段的主动性和影响力 +- **高分特征**: 首杀率高、首接触时间早、首杀发生时间早 + +#### Clutch (残局能力) - score_hps +- **构成**: 25% 1v3+ + 20% MatchPtWin + 20% ComebackKD + 15% PressureEntry + 20% Rating +- **含义**: 高压情境下的表现 +- **高分特征**: 残局胜率高、赛点稳定、逆风能C + +#### Utility (道具运用) - score_util +- **构成**: 35% UsageRate + 25% NadeDmg + 20% FlashTime + 20% FlashEnemy +- **含义**: 道具使用的频率和效果 +- **高分特征**: 道具使用频繁、闪光效果好、雷火伤害高 + +#### Economy (经济管理) - score_eco +- **构成**: 50% Dmg/$1k + 50% EcoKPR +- **含义**: 经济利用效率 +- **高分特征**: 用少量装备打出高伤害、经济局也能发挥 + +#### Stability (稳定性) - score_sta +- **构成**: 30% (100-Volatility) + 30% LossRating + 20% WinRating + 10% TimeCorr +- **含义**: 表现的一致性和抗压能力 +- **高分特征**: 波动小、输赢都能保持水平、耐久战 + +### 6.3 重点推荐指标 + +#### 交火补枪率 (Firefight Follow-up Rate) +- **计算**: `队友死后10秒内击杀对手的次数 / 队友阵亡次数` +- **意义**: 衡量战术协同和补枪意识 +- **重要性**: ★★★★★ (团队配合的核心指标) +- **优化**: 当前是单独计算,建议整合到`pace_trade_kill_rate` + +#### 高IQ评分 (High IQ Score) +- **计算**: 加权(穿墙*3 + 穿烟*2 + 致盲*1.5 + 盲狙*2) / 预期最大值 * 100 +- **意义**: 战术智商和非常规击杀能力 +- **重要性**: ★★★★☆ (差异化指标,展现个人特色) +- **展示**: 特殊击杀Section,>50分显示徽章 + +#### 前期进攻率 (Early Aggression Rate) +- **计算**: `前30秒击杀 / 总击杀` +- **意义**: 打法风格标签(Aggressive vs Passive) +- **重要性**: ★★★★☆ (风格识别) +- **展示**: 时机分析Section + 风格徽章(>40%显示"Early Aggressor") + +#### 无效死亡率 (Invalid Death Rate) +- **计算**: `(0击杀且0闪光助攻的死亡回合) / 总死亡回合` +- **意义**: 团队负担指标,死得没价值 +- **重要性**: ★★★★☆ (负面指标,需要改进) +- **展示**: 回合动态Section,高于30%需警示 + +#### 赛点放大器 (Match Point KPR Ratio) +- **计算**: `赛点回合KPR / 普通回合KPR` +- **意义**: 关键时刻的心理素质 +- **重要性**: ★★★★☆ (大心脏指标) +- **展示**: 高压情境Section,>1.2显示"Clutch Gene"徽章 + +--- + +## 7. 命名规范速查表 + +### 7.1 维度前缀 + +| 前缀 | 全称 | 中文 | 适用场景 | +|-----|------|------|----------| +| `core_` | Core | 核心 | Dashboard 4大指标 | +| `cbt_` | Combat | 战斗 | 枪法、对枪相关 | +| `opn_` | Opening | 开局 | 首杀、首死、前期节奏 | +| `clu_` | Clutch | 残局 | 1vX、残局胜率 | +| `uti_` | Utility | 道具 | 闪光、投掷、烟雾 | +| `eco_` | Economy | 经济 | 装备价值、经济局表现 | +| `stb_` | Stability | 稳定 | 波动、一致性 | +| `pce_` | Pace | 节奏 | 时间相关、进攻速度 | +| `spc_` | Special | 特殊 | 穿墙、穿烟、盲狙等 | +| `timg_` | Timing | 时机 | 回合阶段分布 | +| `side_` | Side | 阵营 | T/CT对比 | +| `pty_` | Party | 组排 | 单排、双排等 | +| `rd_` | Round | 回合 | 回合级别动态 | + +### 7.2 聚合函数后缀 + +| 后缀 | 含义 | 示例 | +|-----|------|------| +| `_avg` | Average | `cbt_hs_kills_avg` (平均爆头数) | +| `_sum` | Sum | `opn_fk_sum` (总首杀数) | +| `_rate` | Rate (0-1) | `cbt_hs_rate` (爆头率) | +| `_pct` | Percentage (0-100) | 不推荐,统一用rate | +| `_cnt` | Count | `clu_1v3_cnt` (1v3次数) | +| `_ratio` | Ratio | `eco_dmg_per_1k` (性价比) | +| `_score` | Score (0-100) | `spc_high_iq_score` (IQ评分) | +| `_time` | Time (seconds) | `pce_first_contact_time` (首接触时间) | + +### 7.3 上下文后缀 + +| 后缀 | 含义 | 示例 | +|-----|------|------| +| `_t` | T-side | `side_rating_avg_t` | +| `_ct` | CT-side | `side_kd_avg_ct` | +| `_pistol` | Pistol Round | `opn_fk_rate_pistol` | +| `_eco` | Eco Round | `eco_kd_avg` | +| `_early` | Early (0-30s) | `timg_kills_early` | +| `_mid` | Mid (30-60s) | `timg_kills_mid` | +| `_late` | Late (60s+) | `timg_kills_late` | +| `_solo` | Solo Queue | `pty_rating_avg_solo` | +| `_duo` | Duo Queue | `pty_win_rate_duo` | + +--- + +## 8. 数据验证清单 + +### 8.1 逻辑一致性检查 + +| 检查项 | 公式 | 预期结果 | +|-------|------|----------| +| FK + FD 占比 | `opn_fk_rate + opn_fd_rate` | ≈ 1.0 (允许±5%误差) | +| 回合阶段完整性 | `timg_kills_early + timg_kills_mid + timg_kills_late` | = `total_kills` | +| 阵营数据对称性 | `side_rounds_t + side_rounds_ct` | ≈ `total_rounds` (考虑加时) | +| 残局尝试>=成功 | `clu_1v1_attempts` | >= `clu_1v1_wins` | +| Rating分布完整 | `rating_dist_carry + normal + sacrifice + sleeping` | = 1.0 | + +### 8.2 数值范围检查 + +| 字段 | 最小值 | 最大值 | 异常阈值 | +|-----|-------|--------|----------| +| `core_rating_avg` | 0.0 | 3.0 | >2.5罕见 | +| `core_kd_avg` | 0.0 | 5.0 | >3.0罕见 | +| `cbt_hs_rate` | 0.0 | 1.0 | >0.8异常 | +| `opn_fk_rate` | 0.0 | 1.0 | <0.2或>0.8罕见 | +| `clu_1v3_win_rate` | 0.0 | 1.0 | >0.5罕见 | +| `pce_first_contact_time` | 0.0 | 115.0 | >100s异常 | + +### 8.3 数据覆盖率检查 + +| 字段类别 | 预期覆盖率 | 说明 | +|---------|-----------|------| +| Core指标 | >99% | 基础数据,几乎所有玩家都有 | +| Combat指标 | >95% | 除非样本太少 | +| Special Kills | >50% | 不是所有人都有穿墙击杀 | +| Economy指标 | >70% | 依赖Leetify数据,部分缺失 | +| Timing指标 | >90% | 依赖event_time,新数据完整 | + +--- + +## 9. FAQ + +### Q1: 为什么要重组?现有结构有什么问题? +**A**: 当前主要问题: +1. **命名混乱**:前缀不统一(`basic_`/`side_`/`rd_`等),难以维护 +2. **数据冗余**:同类指标分散在多个Section,用户查找困难 +3. **缺乏层次**:120+指标平铺,核心数据不突出 +4. **UI拥挤**:信息密度过高,用户认知负担重 + +### Q2: 重组后会丢失数据吗? +**A**: 不会。我们采用**增量迁移**策略: +1. 创建新表`dm_player_features_v2`,保留旧表 +2. 双写一段时间,确保数据一致 +3. 灰度切换,可随时回滚 +4. 旧表保留3个月作为备份 + +### Q3: 新分类体系的依据是什么? +**A**: 基于**游戏功能维度**和**用户关注度**: +1. **功能维度**:枪法、开局、残局、道具、经济、节奏等游戏概念 +2. **用户关注度**:通过热力图和用户反馈,确定P0/P1/P2/P3优先级 +3. **专业意见**:参考职业教练和数据分析师的建议 + +### Q4: 重组会影响现有功能吗? +**A**: 短期影响最小化: +1. **后端兼容**:`feature_service.py`会提供新旧API同时支持 +2. **前端渐进**:先发布新UI,保留旧版入口,收集反馈后完全切换 +3. **数据一致**:新旧两套数据会持续对比验证 + +### Q5: 如何衡量重组效果? +**A**: 关键指标: +1. **用户体验**:页面停留时间、交互深度、反馈评分 +2. **性能**:页面加载时间(<2s)、查询速度(<500ms) +3. **数据质量**:覆盖率(>90%)、异常率(<1%) +4. **开发效率**:新增指标开发时间、Bug数量 + +### Q6: 我能提供反馈吗? +**A**: 当然!反馈渠道: +1. **GitHub Issue**:提交功能建议或Bug报告 +2. **Wiki评论区**:讨论数据定义和展示逻辑 +3. **内部群组**:实时讨论和快速响应 + +--- + +## 10. 附录 + +### 10.1 完整列名映射表 (前50个) + +| 旧列名 | 新列名 | 优先级 | 分组 | +|-------|--------|-------|------| +| basic_avg_rating | core_rating_avg | P0 | Core Dashboard | +| basic_avg_kd | core_kd_avg | P0 | Core Dashboard | +| basic_avg_adr | core_adr_avg | P0 | Core Dashboard | +| basic_avg_kast | core_kast_avg | P0 | Core Dashboard | +| basic_avg_headshot_kills | cbt_hs_kills_avg | P1 | Gunfight | +| basic_headshot_rate | cbt_hs_rate | P1 | Gunfight | +| basic_avg_awp_kill | cbt_awp_kills_avg | P2 | Gunfight | +| basic_avg_assisted_kill | cbt_assists_avg | P1 | Gunfight | +| basic_avg_first_kill | opn_fk_avg | P1 | Opening Impact | +| basic_avg_first_death | opn_fd_avg | P2 | Opening Impact | +| basic_first_kill_rate | opn_fk_rate | P1 | Opening Impact | +| basic_first_death_rate | opn_fd_rate | P2 | Opening Impact | +| basic_avg_kill_2 | cbt_2k_avg | P2 | Multi-Frag | +| basic_avg_kill_3 | cbt_3k_avg | P1 | Multi-Frag | +| basic_avg_kill_4 | cbt_4k_avg | P2 | Multi-Frag | +| basic_avg_kill_5 | cbt_5k_avg | P2 | Multi-Frag | +| hps_clutch_win_rate_1v1 | clu_1v1_win_rate | P1 | Clutch | +| hps_clutch_win_rate_1v3_plus | clu_1v3_plus_avg | P1 | Clutch | +| util_avg_nade_dmg | uti_nade_dmg_avg | P1 | Utility | +| util_avg_flash_time | uti_flash_time_avg | P2 | Utility | +| util_usage_rate | uti_usage_rate | P1 | Utility | +| eco_avg_damage_per_1k | eco_dmg_per_1k | P1 | Economy | +| eco_rating_eco_rounds | eco_kpr_eco | P1 | Economy | +| pace_avg_time_to_first_contact | pce_first_contact_time | P1 | Pace | +| pace_trade_kill_rate | pce_trade_kill_rate | P1 | Pace | +| special_wallbang_kills | spc_wallbang_kills | P2 | Special | +| special_high_iq_score | spc_iq_score | P1 | Special | +| timing_early_kills | timg_kills_early | P2 | Timing | +| timing_early_aggression_rate | timg_aggression_rate | P1 | Timing | +| side_rating_t | side_rating_avg_t | P1 | Side | +| side_rating_ct | side_rating_avg_ct | P1 | Side | +| party_1_win_rate | pty_solo_win_rate | P2 | Party | +| party_5_win_rate | pty_full_win_rate | P2 | Party | +| rating_dist_carry_rate | perf_carry_rate | P2 | Stratification | +| elo_gt2000_rating | perf_vs_elo_2000_plus | P2 | Stratification | + +*(完整映射表见附件Excel)* + +### 10.2 参考资料 + +- **FeatureRDD.md**: 特征维度详细设计文档 +- **profile_data_analysis.md**: 现有Profile数据分析报告 +- **6D_README.md**: 六维能力模型说明 +- **L2 Schema**: `database/L2/schema.sql` +- **L3 Schema**: `database/L3/schema.sql` +- **Feature Service**: `web/services/feature_service.py` +- **Profile Template**: `web/templates/players/profile.html` + +### 10.3 术语表 + +| 术语 | 英文 | 解释 | +|-----|------|------| +| 评分 | Rating | 5E平台综合评分,加权多项指标 | +| 击杀比 | K/D Ratio | Kills / Deaths | +| 场均伤害 | ADR | Average Damage per Round | +| 贡献率 | KAST | Kill, Assist, Survive, Trade 参与率 | +| 首杀 | First Kill (FK) | 回合第一个击杀 | +| 首死 | First Death (FD) | 回合第一个死亡 | +| 残局 | Clutch | 1vX情境 | +| 经济局 | Eco Round | 装备价值<$2000的回合 | +| 长枪局 | Fullbuy | 装备价值≥$4000的回合 | +| 穿墙 | Wallbang | 透过墙体击杀 | +| 穿烟 | Through Smoke | 透过烟雾击杀 | +| 盲狙 | NoScope | AWP不开镜击杀 | +| 补枪 | Trade Kill | 队友死后5秒内击杀对手 | +| 无效死亡 | Invalid Death | 死亡时0击杀0助攻 | +| 赛点 | Match Point | 一方达到12分或15分 | + +--- + +## 结论 + +本文档提供了YRTV玩家档案系统的**完整数据清单**(122个指标)、**当前问题分析**(命名、冗余、分类、Schema)、**重组方案**(新分类体系、命名规范、UI逻辑)、**Schema优化建议**(列名重构、索引、拆表)以及**详细实施计划**(6个阶段)。 + +### 核心价值 + +1. **降低维护成本**:统一命名规范,减少50%的代码注释需求 +2. **提升用户体验**:分组折叠,减少认知负担,提高30%的数据查找效率 +3. **增强可扩展性**:优先级系统,新增指标有明确归类标准 +4. **保障数据质量**:验证清单,自动化检查,异常率控制在1%以下 + +### 下一步行动 + +1. **评审会议**:召集开发、产品、数据团队,评审本方案 +2. **任务分配**:根据阶段计划,分配到具体开发人员 +3. **原型设计**:产品经理产出新UI原型,供前端参考 +4. **启动实施**:从阶段一开始,按周迭代 + +--- + +**文档版本**: v1.0 +**最后更新**: 2026-01-28 +**作者**: AI Assistant + YRTV Team +**联系方式**: [项目Wiki](内部链接) + +--- + +*本文档持续更新,欢迎贡献!* \ No newline at end of file diff --git a/scripts/analyze_dmg_per_1k.py b/scripts/analyze_dmg_per_1k.py deleted file mode 100644 index 26b49cb..0000000 --- a/scripts/analyze_dmg_per_1k.py +++ /dev/null @@ -1,74 +0,0 @@ - -import sqlite3 -import pandas as pd -import os - -# Config -L2_DB_PATH = r'database/L2/L2_Main.sqlite' -L3_DB_PATH = r'database/L3/L3_Features.sqlite' - -def analyze_team_dmg_per_1k(): - if not os.path.exists(L3_DB_PATH): - print(f"Error: L3 DB not found at {L3_DB_PATH}") - return - - conn_l3 = sqlite3.connect(L3_DB_PATH) - conn_l2 = sqlite3.connect(L2_DB_PATH) - - print("--- Analysis: Team Dmg/$1k (Economy Efficiency) ---") - - try: - # 1. Get all L3 features - query = """ - SELECT f.steam_id_64, f.eco_avg_damage_per_1k, p.username - FROM dm_player_features f - LEFT JOIN dim_players p ON f.steam_id_64 = p.steam_id_64 - ORDER BY f.eco_avg_damage_per_1k DESC - """ - - # Attach L2 for username lookup - # We can't attach across connections easily in sqlite python without ATTACH DATABASE command - # So let's fetch L3 first, then map names from L2 - - df_l3 = pd.read_sql_query("SELECT steam_id_64, eco_avg_damage_per_1k FROM dm_player_features", conn_l3) - - if df_l3.empty: - print("No data in L3 Features.") - return - - # Fetch names - ids = tuple(df_l3['steam_id_64'].tolist()) - placeholders = ','.join(['?'] * len(ids)) - q_names = f"SELECT steam_id_64, username FROM dim_players WHERE steam_id_64 IN ({placeholders})" - df_names = pd.read_sql_query(q_names, conn_l2, params=ids) - - # Merge - df = df_l3.merge(df_names, on='steam_id_64', how='left') - - # Sort - df = df.sort_values('eco_avg_damage_per_1k', ascending=False) - - print(f"{'Rank':<5} {'Player':<20} {'Dmg/$1k':<10}") - print("-" * 40) - - for idx, row in df.iterrows(): - rank = idx + 1 # This index is not rank if we iterated row by row after sort, wait. - # reset_index to get rank - pass - - df = df.reset_index(drop=True) - for idx, row in df.iterrows(): - name = row['username'] if row['username'] else row['steam_id_64'] - val = row['eco_avg_damage_per_1k'] - print(f"#{idx+1:<4} {name:<20} {val:.2f}") - - except Exception as e: - print(f"Error: {e}") - import traceback - traceback.print_exc() - finally: - conn_l2.close() - conn_l3.close() - -if __name__ == "__main__": - analyze_team_dmg_per_1k() diff --git a/scripts/debug_dist.py b/scripts/debug_dist.py deleted file mode 100644 index 24ed151..0000000 --- a/scripts/debug_dist.py +++ /dev/null @@ -1,45 +0,0 @@ - -import sqlite3 -import pandas as pd -from web.services.feature_service import FeatureService -from web.config import Config -from web.app import create_app - -def check_distribution(): - app = create_app() - with app.app_context(): - # Get a player ID from L3 - conn = sqlite3.connect(Config.DB_L3_PATH) - row = conn.execute("SELECT steam_id_64 FROM dm_player_features LIMIT 1").fetchone() - if not row: - print("No players in L3") - return - - sid = row[0] - print(f"Checking distribution for {sid}...") - - dist = FeatureService.get_roster_features_distribution(sid) - if not dist: - print("Distribution returned None") - return - - keys_to_check = [ - 'eco_avg_damage_per_1k', # Working - 'eco_rating_eco_rounds', # Working - 'eco_kd_ratio', # Broken - 'eco_avg_rounds', # Broken - 'pace_avg_time_to_first_contact', # Working - 'pace_trade_kill_rate', # Working - 'pace_opening_kill_time', # Broken - 'pace_avg_life_time' # Broken - ] - - print(f"{'Key':<35} | {'Present':<7} | {'Value'}") - print("-" * 60) - for k in keys_to_check: - is_present = k in dist - val = dist.get(k) - print(f"{k:<35} | {str(is_present):<7} | {val}") - -if __name__ == "__main__": - check_distribution() diff --git a/scripts/debug_jacky.py b/scripts/debug_jacky.py deleted file mode 100644 index 8d830a6..0000000 --- a/scripts/debug_jacky.py +++ /dev/null @@ -1,94 +0,0 @@ - -import sqlite3 -import pandas as pd -import os - -# Config -L2_DB_PATH = r'database/L2/L2_Main.sqlite' - -def debug_player_data(username_pattern='jAckY'): - if not os.path.exists(L2_DB_PATH): - print(f"Error: L2 DB not found at {L2_DB_PATH}") - return - - conn_l2 = sqlite3.connect(L2_DB_PATH) - - print(f"--- Debugging Player: {username_pattern} ---") - - try: - # 1. Find the player ID - q_id = f"SELECT steam_id_64, username FROM dim_players WHERE username LIKE '%{username_pattern}%'" - df_player = pd.read_sql_query(q_id, conn_l2) - - if df_player.empty: - print("Player not found.") - return - - target_id = df_player.iloc[0]['steam_id_64'] - name = df_player.iloc[0]['username'] - print(f"Found: {name} ({target_id})") - - # 2. Check Match Stats (ADR, Rounds) - q_matches = f""" - SELECT match_id, round_total, adr, (adr * round_total) as damage_calc - FROM fact_match_players - WHERE steam_id_64 = '{target_id}' - """ - df_matches = pd.read_sql_query(q_matches, conn_l2) - - total_dmg = df_matches['damage_calc'].sum() - total_rounds = df_matches['round_total'].sum() - print(f"\nMatch Stats:") - print(f"Matches Played: {len(df_matches)}") - print(f"Total Rounds: {total_rounds}") - print(f"Total Damage (Calc): {total_dmg:,.0f}") - - # 3. Check Economy Stats (Spend) - q_eco = f""" - SELECT match_id, COUNT(*) as rounds_with_eco, SUM(equipment_value) as spend - FROM fact_round_player_economy - WHERE steam_id_64 = '{target_id}' - GROUP BY match_id - """ - df_eco = pd.read_sql_query(q_eco, conn_l2) - - total_spend = df_eco['spend'].sum() - total_eco_rounds = df_eco['rounds_with_eco'].sum() - - print(f"\nEconomy Stats:") - print(f"Matches with Eco Data: {len(df_eco)}") - print(f"Rounds with Eco Data: {total_eco_rounds}") - print(f"Total Spend: ${total_spend:,.0f}") - - # 4. Compare - print(f"\nComparison:") - print(f"Rounds in Match Stats: {total_rounds}") - print(f"Rounds in Eco Stats: {total_eco_rounds}") - - if total_eco_rounds < total_rounds: - print(f"⚠️ WARNING: Missing economy data for {total_rounds - total_eco_rounds} rounds!") - - # Find matches with missing eco data - merged = df_matches.merge(df_eco, on='match_id', how='left') - missing = merged[merged['spend'].isna() | (merged['spend'] == 0)] - - if not missing.empty: - print(f"\nMatches with ZERO spend/Missing Eco:") - print(missing[['match_id', 'round_total', 'damage_calc']]) - - # Check calculation impact - valid_dmg = merged[merged['spend'] > 0]['damage_calc'].sum() - print(f"\nRecalculation ignoring missing matches:") - print(f"Valid Damage: {valid_dmg:,.0f}") - print(f"Total Spend: ${total_spend:,.0f}") - if total_spend > 0: - new_val = valid_dmg / (total_spend / 1000) - print(f"Corrected Dmg/$1k: {new_val:.2f}") - - except Exception as e: - print(f"Error: {e}") - finally: - conn_l2.close() - -if __name__ == "__main__": - debug_player_data()