0.2: Json schema extractor finished.

This commit is contained in:
2026-01-23 18:17:45 +08:00
parent 81df352607
commit 0a78c78fc7
10 changed files with 9038 additions and 0 deletions

View File

@@ -0,0 +1,81 @@
import re
# Regex patterns for masking sensitive/dynamic data
STEAMID_REGEX = re.compile(r"^7656\d+$")
FIVE_E_ID_REGEX = re.compile(r"^1\d{7}$") # 1 followed by 7 digits (8 digits total)
# Group merging
GROUP_KEY_REGEX = re.compile(r"^group_\d+$")
# URL Exclusion patterns
# We skip these URLs as they are analytics/auth related and not data payload
IGNORE_URL_PATTERNS = [
r"sentry_key=",
r"gate\.5eplay\.com/blacklistfront",
r"favicon\.ico",
]
# URL Inclusion/Interest patterns (Optional, if we want to be strict)
# INTEREST_URL_PATTERNS = [
# r"api/data/match",
# r"leetify",
# ]
def is_ignored_url(url):
for pattern in IGNORE_URL_PATTERNS:
if re.search(pattern, url):
return True
return False
def get_key_mask(key):
"""
Returns a masked key name if it matches a pattern (e.g. group_1 -> group_N).
Otherwise returns the key itself.
"""
if GROUP_KEY_REGEX.match(key):
return "group_N"
if STEAMID_REGEX.match(key):
return "<steamid>"
if FIVE_E_ID_REGEX.match(key):
return "<5eid>"
# Merge fight variants
if key in ["fight", "fight_t", "fight_ct"]:
return "fight_any"
# Merge numeric keys (likely round numbers)
if key.isdigit():
return "<round_n>"
return key
def get_value_type(value):
"""
Returns a generalized type string for a value, masking IDs.
"""
if value is None:
return "null"
if isinstance(value, bool):
return "bool"
if isinstance(value, int):
# Check for IDs
s_val = str(value)
if FIVE_E_ID_REGEX.match(s_val):
return "<5eid>"
if STEAMID_REGEX.match(s_val):
return "<steamid>"
return "int"
if isinstance(value, float):
return "float"
if isinstance(value, str):
if FIVE_E_ID_REGEX.match(value):
return "<5eid>"
if STEAMID_REGEX.match(value):
return "<steamid>"
# Heuristic for other IDs or timestamps could go here
return "string"
if isinstance(value, list):
return "list"
if isinstance(value, dict):
return "dict"
return "unknown"