0.2: Json schema extractor finished.
This commit is contained in:
81
utils/json_extractor/rules.py
Normal file
81
utils/json_extractor/rules.py
Normal file
@@ -0,0 +1,81 @@
|
||||
import re
|
||||
|
||||
# Regex patterns for masking sensitive/dynamic data
|
||||
STEAMID_REGEX = re.compile(r"^7656\d+$")
|
||||
FIVE_E_ID_REGEX = re.compile(r"^1\d{7}$") # 1 followed by 7 digits (8 digits total)
|
||||
|
||||
# Group merging
|
||||
GROUP_KEY_REGEX = re.compile(r"^group_\d+$")
|
||||
|
||||
# URL Exclusion patterns
|
||||
# We skip these URLs as they are analytics/auth related and not data payload
|
||||
IGNORE_URL_PATTERNS = [
|
||||
r"sentry_key=",
|
||||
r"gate\.5eplay\.com/blacklistfront",
|
||||
r"favicon\.ico",
|
||||
]
|
||||
|
||||
# URL Inclusion/Interest patterns (Optional, if we want to be strict)
|
||||
# INTEREST_URL_PATTERNS = [
|
||||
# r"api/data/match",
|
||||
# r"leetify",
|
||||
# ]
|
||||
|
||||
def is_ignored_url(url):
|
||||
for pattern in IGNORE_URL_PATTERNS:
|
||||
if re.search(pattern, url):
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_key_mask(key):
|
||||
"""
|
||||
Returns a masked key name if it matches a pattern (e.g. group_1 -> group_N).
|
||||
Otherwise returns the key itself.
|
||||
"""
|
||||
if GROUP_KEY_REGEX.match(key):
|
||||
return "group_N"
|
||||
if STEAMID_REGEX.match(key):
|
||||
return "<steamid>"
|
||||
if FIVE_E_ID_REGEX.match(key):
|
||||
return "<5eid>"
|
||||
|
||||
# Merge fight variants
|
||||
if key in ["fight", "fight_t", "fight_ct"]:
|
||||
return "fight_any"
|
||||
|
||||
# Merge numeric keys (likely round numbers)
|
||||
if key.isdigit():
|
||||
return "<round_n>"
|
||||
|
||||
return key
|
||||
|
||||
def get_value_type(value):
|
||||
"""
|
||||
Returns a generalized type string for a value, masking IDs.
|
||||
"""
|
||||
if value is None:
|
||||
return "null"
|
||||
if isinstance(value, bool):
|
||||
return "bool"
|
||||
if isinstance(value, int):
|
||||
# Check for IDs
|
||||
s_val = str(value)
|
||||
if FIVE_E_ID_REGEX.match(s_val):
|
||||
return "<5eid>"
|
||||
if STEAMID_REGEX.match(s_val):
|
||||
return "<steamid>"
|
||||
return "int"
|
||||
if isinstance(value, float):
|
||||
return "float"
|
||||
if isinstance(value, str):
|
||||
if FIVE_E_ID_REGEX.match(value):
|
||||
return "<5eid>"
|
||||
if STEAMID_REGEX.match(value):
|
||||
return "<steamid>"
|
||||
# Heuristic for other IDs or timestamps could go here
|
||||
return "string"
|
||||
if isinstance(value, list):
|
||||
return "list"
|
||||
if isinstance(value, dict):
|
||||
return "dict"
|
||||
return "unknown"
|
||||
Reference in New Issue
Block a user