add postal address to config

This commit is contained in:
Frank Xu
2026-01-27 13:40:09 -05:00
parent 7e72ede099
commit 5d94088004
30 changed files with 2325 additions and 51 deletions

View File

@@ -1,39 +1,39 @@
db_files = [
# "test2.db",
# "users.db",
# "A1_commerce.db",
"A1_commerce.db",
"A1_msgstore.db",
# "A1_wa.db",
# "A2_core.db",
# "A2_journal.db",
# "A2_main.db",
# "A3_account1cache4.db",
# "A3_account2cache4.db",
# "A3_account3cache4.db",
# "A4_gmm_myplaces.db",
# "A4_gmm_storage.db",
# "A4_peopleCache_sharononeil368@gmail.com_com.google_14.db",
# "A5_SBrowser.db",
# "A5_SBrowser2.db",
# "A5_searchengine.db",
# "I1_CallHistory.sqlite",
# "I1_ChatStorage.sqlite",
# "I1_ContactsV2.sqlite",
# "I2_AddressBook.sqlitedb",
# "I2_AddressBookImages.sqlitedb",
# "I3_sms.db",
# "I4_CloudTabs.db",
# "I4_History.db",
# "I5_Calendar.sqlitedb",
# "I5_Extras.db",
"A1_wa.db",
"A2_core.db",
"A2_journal.db",
"A2_main.db",
"A3_account1cache4.db",
"A3_account2cache4.db",
"A3_account3cache4.db",
"A4_gmm_myplaces.db",
"A4_gmm_storage.db",
"A4_peopleCache_sharononeil368@gmail.com_com.google_14.db",
"A5_SBrowser.db",
"A5_SBrowser2.db",
"A5_searchengine.db",
"I1_CallHistory.sqlite",
"I1_ChatStorage.sqlite",
"I1_ContactsV2.sqlite",
"I2_AddressBook.sqlitedb",
"I2_AddressBookImages.sqlitedb",
"I3_sms.db",
"I4_CloudTabs.db",
"I4_History.db",
"I5_Calendar.sqlitedb",
"I5_Extras.db",
]
PII_CONFIG = {
"EMAIL": {
"type":"email address",
"regex": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
"desc": "a unique identifier for a destination to which electronic mail (email) can be sent and received over the internet or a private network"
},
"desc": "a unique identifier for a destination to which electronic mail (email) can be sent and received over the internet; examples include jane.doe@example.com, john.smith@provider.net, dev-team@startup.io, and user.name+label@domain.org"
},
"PHONE": {
"type":"US phone number",
"regex": r"\+?[0-9]{1,4}[- .]?\(?[0-9]{1,3}?\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}",
@@ -42,7 +42,7 @@ PII_CONFIG = {
"USERNAME": {
"type":"username",
"regex": r"\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\b",
"desc": "a username is a unique string of characters without any space —often a combination of letters, numbers, and symbols—used by a system to identify a user during the Authentication, Authorization, and Accounting (AAA) process"
"desc": " unique, whitespace-free alphanumeric string used as a system-internal identifier or public handle that lacks a domain suffix, distinguishing it from an email address while still serving as a primary anchor for account attribution."
},
"PERSON_NAME": {
"type":"person name",
@@ -56,16 +56,33 @@ PII_CONFIG = {
# (1) PO Box patterns, OR
# (2) optional street number + some tokens + a street suffix, OR
# (3) street suffix with nearby tokens (even without a number).
"regex": r"(?is)\b("
r"(?:P\.?\s*O\.?\s*BOX|POST\s+OFFICE\s+BOX)\s*\d{1,6}"
r"|"
r"(?:\d{1,7}\s*)?" # OPTIONAL street number
r"(?:[A-Z0-9][A-Z0-9'.,/#\-]*\s*){1,25}?" # optional-ish tokens before suffix
r"(?:AVE|AVENUE|ST|STREET|RD|ROAD|BLVD|BOULEVARD|DR|DRIVE|LN|LANE|CT|COURT|PL|PLACE|WAY|"
r"PKWY|PARKWAY|CIR|CIRCLE|TER|TERRACE|HWY|HIGHWAY|TRL|TRAIL|SQ|SQUARE|PIKE|LOOP|RUN|WALK|PATH|BYP|BYPASS)\b"
r"(?:\s*(?:,|\s)\s*(?:N|S|E|W|NE|NW|SE|SW))?" # optional directional
r"(?:.{0,60}?\b\d{5}(?:-\d{4})?\b)?" # optional ZIP nearby
r")\b",
"regex": (
r"(?i)\b(?:"
r"p\.?\s*o\.?\s*box|post\s+office\s+box|"
r"ave\.?|avenue|"
r"st\.?|street|"
r"rd\.?|road|"
r"blvd\.?|boulevard|"
r"dr\.?|drive|"
r"ln\.?|lane|"
r"ct\.?|court|"
r"pl\.?|place|"
r"way|"
r"pkwy\.?|parkway|"
r"cir\.?|circle|"
r"ter\.?|terrace|"
r"hwy\.?|highway|"
r"trl\.?|trail|"
r"sq\.?|square|"
r"pike|"
r"loop|"
r"run|"
r"walk|"
r"path|"
r"byp\.?|bypass|"
r"(?:n|s|e|w|ne|nw|se|sw)\b"
r")\b"
),
"desc": "a US postal address is a street-level mailing location in the United States, commonly appearing as a street name and suffix (e.g., 'Market St') optionally with a street number (e.g., '1500 Market St'), unit, city/state, ZIP, or a PO Box (e.g., 'P.O. Box 123')"
}
}