import re # Regex patterns for masking sensitive/dynamic data STEAMID_REGEX = re.compile(r"^7656\d+$") FIVE_E_ID_REGEX = re.compile(r"^1\d{7}$") # 1 followed by 7 digits (8 digits total) # Group merging GROUP_KEY_REGEX = re.compile(r"^group_\d+$") # URL Exclusion patterns # We skip these URLs as they are analytics/auth related and not data payload IGNORE_URL_PATTERNS = [ r"sentry_key=", r"gate\.5eplay\.com/blacklistfront", r"favicon\.ico", ] # URL Inclusion/Interest patterns (Optional, if we want to be strict) # INTEREST_URL_PATTERNS = [ # r"api/data/match", # r"leetify", # ] def is_ignored_url(url): for pattern in IGNORE_URL_PATTERNS: if re.search(pattern, url): return True return False def get_key_mask(key): """ Returns a masked key name if it matches a pattern (e.g. group_1 -> group_N). Otherwise returns the key itself. """ if GROUP_KEY_REGEX.match(key): return "group_N" if STEAMID_REGEX.match(key): return "" if FIVE_E_ID_REGEX.match(key): return "<5eid>" # Merge fight variants if key in ["fight", "fight_t", "fight_ct"]: return "fight_any" # Merge numeric keys (likely round numbers) if key.isdigit(): return "" return key def get_value_type(value): """ Returns a generalized type string for a value, masking IDs. """ if value is None: return "null" if isinstance(value, bool): return "bool" if isinstance(value, int): # Check for IDs s_val = str(value) if FIVE_E_ID_REGEX.match(s_val): return "<5eid>" if STEAMID_REGEX.match(s_val): return "" return "int" if isinstance(value, float): return "float" if isinstance(value, str): if FIVE_E_ID_REGEX.match(value): return "<5eid>" if STEAMID_REGEX.match(value): return "" # Heuristic for other IDs or timestamps could go here return "string" if isinstance(value, list): return "list" if isinstance(value, dict): return "dict" return "unknown"