add postal address to config

This commit is contained in:
Frank Xu
2026-01-26 22:29:05 -05:00
parent fe649183a0
commit 1b3d3e2e95
51 changed files with 115 additions and 28 deletions

View File

@@ -0,0 +1 @@
{"db_path": "commerce.db", "PII Type": null, "PII": [], "Num_of_PII": null, "source_columns": []}

View File

@@ -0,0 +1 @@
{"db_path": "msgstore.db", "PII Type": "Phone", "PII": ["+1 (318) 833-6425"], "Num_of_PII": 1, "source_columns": ["business_name"]}

View File

@@ -0,0 +1,8 @@
{"db_path": "wa.db", "PII Type": "Phone", "PII": ["+19199037779", "8085096467", "5713349815", "+16263678865", "+16106046786", "7034241981", "5715917168", "+12065937224", "5713298742", "8624338328", "+18056377243", "2028177932", "2025692832", "+19735203731", "+81367430271", "+17423794330"], "Num_of_PII": 16, "source_columns": ["number"]}
{"db_path": "wa.db", "PII Type": "Name", "PII": ["Russ Philby", "Don Wayne", "Whalen", "Mary Garcia", "Joey", "Hank", "Goldie Kahn", "Abe Rudder", "Svetlana Chernoff", "Karen Tate", "Voice Mail", "Brian Reynolds", "Vladamir Stravinsky", "Otto", "Toks", "Bo", "Russ Philby", "Don Wayne", "Whalen", "Mary Garcia", "Joey", "Hank", "Goldie Kahn", "Abe Rudder", "Svetlana Chernoff", "Karen Tate", "Voice Mail", "Brian Reynolds", "Vladamir Stravinsky", "Otto", "Toks", "Bo"], "Num_of_PII": 32, "source_columns": ["given_name+family_name", "sort_name"]}
{"db_path": "wa.db", "PII Type": "Username", "PII": ["Russ Philby", "Capt Don Wayne", "LTC Whalen", "Mary Garcia", "Joey", "Hank", "Goldie Kahn", "Abe Rudder", "Svetlana Chernoff", "Karen Tate", "Voice Mail", "Brian Reynolds", "Vladamir Stravinsky", "Otto", "Toks", "Bo"], "Num_of_PII": 16, "source_columns": ["display_name"]}
{"db_path": "wa.db", "PII Type": "Phone", "PII": ["5713298742", "5713349815", "8085096467", "8624338328", "7034241981", "2028177932", "5715917168", "2025692832", "+16106046786", "+16263678865", "+12065937224", "+19199037779", "+18056377243", "+19735203731", "+81367430271", "+17423794330"], "Num_of_PII": 16, "source_columns": ["number"]}
{"db_path": "wa.db", "PII Type": "Username", "PII": ["Svetlana Chernoff", "LTC Whalen", "Capt Don Wayne", "Karen Tate", "Hank", "Brian Reynolds", "Goldie Kahn", "Vladamir Stravinsky", "6\ufe0f\u20e3 Wealth Builders Club", "Joey", "Mary Garcia", "Abe Rudder", "Russ Philby", "Voice Mail", "Otto", "Toks", "Bo", "CLARKE", "Philips", "a_kalachikova", "Elizabeth Jones", "george", "Morton", "latefire_eu", "Mario.drapela", "Dolgetta", "Jacob Emily", "Thomas", "Alethea", "Peter", "Imogen Holman", "Chloe Rodriguez", "Robert jane", "Mason", "James Matthew Brown", "Jalen", "Official representative", "Gilbert", "Andie", "Connor Haggarty", "kop", "Winona", "laurenncbrown", "Gabriel", "Sarah Cox", "Jesus Delgado", "Milicardis", "jackson", "Dave", "Benjamin", "Elliot Dowell", "Mary Garcia", "Gaia", "renwbw", "Jim Anderson", "Marit Bonthuis", "rola nd", "marcel_juhas", "Kevin Destiny", "Minardo Gaspari", "Stephanna smith", "YTliken", "Thomas Anderson", "pnod", "Paco Almeida", "Oscar Steven", "Lockerbie Do-Vip", "Admin", "Roberto Cadorin", "Brandon Addison", "Matt Galligan", "Christellecamiller", "Carlos", "Marcinmis", "Isabel Abarca", "Adam Taylor", "Antonio", "scrichpower", "Hcibc", "Gabi", "Alethea", "Thomas Arthur Heber Fearn", "Augustin Richard", "Kamila", "Jim Anderson", "Aiden Savannah", "Edwardsaliendra", "Jayden", "Morgan", "harry", "kandicesledge", "Jaroslaw Machek", "Juan Pablo Pesqueira", "Mandy Lauren\ud83e\uddd9\u200d\u2640\ufe0f", "gemmadorney", "zara", "Marcos Amorim", "Amanda White", "Mauro Silvabarbosa", "Felix Davey", "\ud83d\udc99\ud83d\udc99\ud83d\udc99", "AbeRudder", "Isabella", "Olivia", "Felipe Hernandez", "Sophia", "Elizabeth Jones", "Miss you", "Russell Philby", "Stephcoleman", "Brian Goudy", "Liam Thomas", "Shawn Hoxie", "Carolasol_espinoza", "13135550002", "Otto Matik", "Ruby"], "Num_of_PII": 117, "source_columns": ["display_name", "wa_name"]}
{"db_path": "wa.db", "PII Type": "Name", "PII": ["Svetlana Chernoff", "Whalen", "Don Wayne", "Karen Tate", "Hank", "Brian Reynolds", "Goldie Kahn", "Vladamir Stravinsky", "Joey", "Mary Garcia", "Abe Rudder", "Russ Philby", "Voice Mail", "Otto", "Toks", "Bo", "Svetlana Chernoff", "Whalen", "Don Wayne", "Karen Tate", "Hank", "Brian Reynolds", "Goldie Kahn", "Vladamir Stravinsky", "Joey", "Mary Garcia", "Abe Rudder", "Russ Philby", "Voice Mail", "Otto", "Toks", "Bo"], "Num_of_PII": 32, "source_columns": ["given_name+family_name", "sort_name"]}
{"db_path": "wa.db", "PII Type": "Username", "PII": ["Million Marketing", "Million Marketing", "Jason Steven", "\u200eshirley", "Shehriyar Ansari", "nole", "Dungeon Master", "Alvin the Alien", "Perry", "Angie", "Bob the robot", "Leo", "Sally", "Brian", "Liv", "Coco", "Victor", "Tamika", "Becca", "Lorena", "Zach", "Luiz", "Bru", "Thalia", "Lily", "Izzy", "Max", "Scarlett", "Amber", "Dylan", "Jade", "Billie", "Jane Austen"], "Num_of_PII": 33, "source_columns": ["verified_name"]}
{"db_path": "wa.db", "PII Type": "Username", "PII": ["Meta", "leaura", "faxinezidohne", "Meta", "Meta", "patrickh34", "yauyauyauhen", "nathanmorris", "faxinezidohne", "0jamesf", "italianmatters", "Meta", "robm435", "loususi", "Meta", "lanaire2023", "Meta", "pet_the_bunny", "madmax_mgm", "reidback", "rennymorales", "yjr_fit.inba", "Meta", "Meta", "the_real_flockfam", "Meta", "Meta", "Meta", "Meta", "Meta", "Meta", "Meta", "Meta", "yauyauyauhen", "gibbogram", "almondeyezbitch", "visionz2turnt", "stars_hinemoon", "thisvillage_ijn", "Meta", "patrick_c_doyle", "Meta", "patrickvaxter", "Meta", "Meta", "Meta", "Meta", "patrickh34", "humans_of_data", "airwicksol", "homan.jason", "Meta", "Meta", "Meta", "nadhir_chiu_oficial", "Meta", "Meta", "Meta", "Meta", "Meta", "brandonmcclainl", "geezdagawd", "superflysugar2024", "curia__", "psychicadvisor345", "lemieuxbrands", "Meta", "james_macray_", "Meta", "Meta", "Meta", "Meta", "Meta", "Meta", "Meta"], "Num_of_PII": 75, "source_columns": ["creator_name"]}

View File

@@ -0,0 +1,4 @@
{"db_path": "core.db", "PII Type": "Username", "PII": ["oneil3607", "oneil3607"], "Num_of_PII": 2, "source_columns": ["textVal"]}
{"db_path": "core.db", "PII Type": "Name", "PII": ["Sharon Oneil"], "Num_of_PII": 1, "source_columns": ["textVal"]}
{"db_path": "core.db", "PII Type": "Email", "PII": ["sharononeil368@gmail.com"], "Num_of_PII": 1, "source_columns": ["textVal"]}
{"db_path": "core.db", "PII Type": "Phone", "PII": ["18624338329"], "Num_of_PII": 1, "source_columns": ["textVal"]}

View File

@@ -0,0 +1 @@
{"db_path": "journal.db", "PII Type": null, "PII": [], "Num_of_PII": null, "source_columns": []}

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1 @@
{"db_path": "account1cache4.db", "PII Type": null, "PII": [], "Num_of_PII": null, "source_columns": []}

View File

@@ -0,0 +1 @@
{"db_path": "account2cache4.db", "PII Type": null, "PII": [], "Num_of_PII": null, "source_columns": []}

View File

@@ -0,0 +1 @@
{"db_path": "account3cache4.db", "PII Type": null, "PII": [], "Num_of_PII": null, "source_columns": []}

View File

@@ -0,0 +1 @@
{"db_path": "gmm_myplaces.db", "PII Type": null, "PII": [], "Num_of_PII": null, "source_columns": []}

View File

@@ -0,0 +1 @@
{"db_path": "gmm_storage.db", "PII Type": null, "PII": [], "Num_of_PII": null, "source_columns": []}

View File

@@ -0,0 +1,6 @@
{"db_path": "peopleCache_sharononeil368@gmail.com_com.google_14.db", "PII Type": "Email", "PII": ["heather@cellebrite.com", "hmahalik@gmail.com"], "Num_of_PII": 2, "source_columns": ["value"]}
{"db_path": "peopleCache_sharononeil368@gmail.com_com.google_14.db", "PII Type": "Username", "PII": ["heather", "hmahalik", "Bo"], "Num_of_PII": 3, "source_columns": ["value"]}
{"db_path": "peopleCache_sharononeil368@gmail.com_com.google_14.db", "PII Type": "Phone", "PII": ["17423794330"], "Num_of_PII": 1, "source_columns": ["value"]}
{"db_path": "peopleCache_sharononeil368@gmail.com_com.google_14.db", "PII Type": "Email", "PII": ["heather@cellebrite.com", "hmahalik@gmail.com"], "Num_of_PII": 2, "source_columns": ["c1value"]}
{"db_path": "peopleCache_sharononeil368@gmail.com_com.google_14.db", "PII Type": "Username", "PII": ["heather", "hmahalik", "Bo"], "Num_of_PII": 3, "source_columns": ["c1value"]}
{"db_path": "peopleCache_sharononeil368@gmail.com_com.google_14.db", "PII Type": "Phone", "PII": ["17423794330"], "Num_of_PII": 1, "source_columns": ["c1value"]}

View File

@@ -0,0 +1,3 @@
{"db_path": "SBrowser.db", "PII Type": "Email", "PII": ["sharononeil368@gmail.com"], "Num_of_PII": 1, "source_columns": ["ACCOUNT_NAME"]}
{"db_path": "SBrowser.db", "PII Type": "Email", "PII": ["sharononeil368@gmail.com"], "Num_of_PII": 1, "source_columns": ["account_name"]}
{"db_path": "SBrowser.db", "PII Type": "Email", "PII": ["sharononeil368@gmail.com", "sharononeil368@gmail.com", "sharononeil368@gmail.com", "sharononeil368@gmail.com"], "Num_of_PII": 4, "source_columns": ["ACCOUNT_NAME"]}

View File

@@ -0,0 +1 @@
{"db_path": "SBrowser2.db", "PII Type": null, "PII": [], "Num_of_PII": null, "source_columns": []}

View File

@@ -0,0 +1 @@
{"db_path": "searchengine.db", "PII Type": null, "PII": [], "Num_of_PII": null, "source_columns": []}

View File

@@ -0,0 +1 @@
{"db_path": "CallHistory.sqlitedb", "PII Type": null, "PII": [], "Num_of_PII": null, "source_columns": []}

View File

@@ -0,0 +1,4 @@
{"db_path": "ChatStorage.sqlite", "PII Type": "Phone", "PII": ["+1 (971) 678-6701"], "Num_of_PII": 1, "source_columns": ["ZPARTNERNAME"]}
{"db_path": "ChatStorage.sqlite", "PII Type": "Name", "PII": ["Chad Hunt", "Toni Yu", "Charles Finley", "Ronen Engler", "John Raynolds", "Jonathan Reyes", "Ronen Engler", "Johnny Good", "Russell Philby", "Sharon \ud83d\ude0d", "Abe Rudder"], "Num_of_PII": 11, "source_columns": ["ZPARTNERNAME"]}
{"db_path": "ChatStorage.sqlite", "PII Type": "Phone", "PII": ["19716786701"], "Num_of_PII": 1, "source_columns": ["ZPUSHNAME"]}
{"db_path": "ChatStorage.sqlite", "PII Type": "Name", "PII": ["Finn", "Ronen Engler", "John Reynolds", "Colin DaCopps", "Russell Philby", "Lisena Gocaj", "Lisena Gocaj", "Lisena Gocaj", "Andy Sieg", "Howell", "Yaliweisi", "Andy Sieg", "Abner", "Andy Schweichert", "Lisena Gocaj", "Andy Sieg", "Brian Arseneau", "Virginia", "Christian Justiniano", "Lisena Gocaj", "Jim Wilson", "Lisena Gocaj", "Virginia Benton", "Howell", "Lisena", "Yaliweisi", "Abner", "Abe Rudder", "Virginia", "Jason", "Sharon Oneil", "Job Vizcarra", "Robert Elliott", "Sultan", "Emerick", "Nia Yuniar", "\u200bskol", "David Wilson", "Robechucks Raul", "Ella Bella", "Dick Oscar", "Charlie", "Steven", "Ameya Joshi", "Robechucks Raul", "Robechucks Raul", "Amiel Williamson", "Ashwin Menon", "Ajax Edmiston", "Eleazar Lewden", "Polly Lucas", "Eleazar Lewden", "Robechucks Raul", "Lemuel Glasgow", "Bazel McConnel", "William Stevenson", "Robechucks Raul", "Denice R Allen", "Leif Fox", "William Hopkins", "Robechucks Raul", "Jonas Bradley", "Robechucks Raul", "Amit Sharma"], "Num_of_PII": 64, "source_columns": ["ZPUSHNAME"]}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1 @@
{"db_path": "AddressBookImages.sqlitedb", "PII Type": null, "PII": [], "Num_of_PII": null, "source_columns": []}

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1 @@
{"db_path": "CloudTabs.db", "PII Type": null, "PII": [], "Num_of_PII": null, "source_columns": []}

View File

@@ -0,0 +1 @@
{"db_path": "History.db", "PII Type": null, "PII": [], "Num_of_PII": null, "source_columns": []}

View File

@@ -0,0 +1,5 @@
{"db_path": "Calendar.sqlitedb", "PII Type": "Email", "PII": ["ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com", "ottomatik1234@gmail.com"], "Num_of_PII": 27, "source_columns": ["last_sync_title", "notes", "owner_identity_email", "self_identity_email", "shared_owner_address", "title"]}
{"db_path": "Calendar.sqlitedb", "PII Type": "Name", "PII": ["Otto Matik", "Otto Matik", "Otto Matik", "Otto Matik", "Otto Matik", "Otto Matik"], "Num_of_PII": 6, "source_columns": ["shared_owner_name"]}
{"db_path": "Calendar.sqlitedb", "PII Type": "Email", "PII": ["ottomatik1234@gmail.com"], "Num_of_PII": 1, "source_columns": ["address"]}
{"db_path": "Calendar.sqlitedb", "PII Type": "Name", "PII": ["Otto Matik"], "Num_of_PII": 1, "source_columns": ["display_name"]}
{"db_path": "Calendar.sqlitedb", "PII Type": "Name", "PII": ["Otto Matik", "Otto Matik"], "Num_of_PII": 2, "source_columns": ["owner_name"]}

View File

@@ -0,0 +1 @@
{"db_path": "Extras.db", "PII Type": null, "PII": [], "Num_of_PII": null, "source_columns": []}

View File

@@ -1,31 +1,31 @@
db_files = [ db_files = [
# "test2.db", # "test2.db",
# "users.db", # "users.db",
# "A1_commerce.db", "A1_commerce.db",
# "A1_msgstore.db", "A1_msgstore.db",
# "A1_wa.db", "A1_wa.db",
# "A2_core.db", "A2_core.db",
# "A2_journal.db", "A2_journal.db",
# "A2_main.db", "A2_main.db",
# "A3_account1cache4.db", "A3_account1cache4.db",
# "A3_account2cache4.db", "A3_account2cache4.db",
# "A3_account3cache4.db", "A3_account3cache4.db",
# "A4_gmm_myplaces.db", "A4_gmm_myplaces.db",
# "A4_gmm_storage.db", "A4_gmm_storage.db",
# "A4_peopleCache_sharononeil368@gmail.com_com.google_14.db", "A4_peopleCache_sharononeil368@gmail.com_com.google_14.db",
# "A5_SBrowser.db", "A5_SBrowser.db",
# "A5_SBrowser2.db", "A5_SBrowser2.db",
# "A5_searchengine.db", "A5_searchengine.db",
# "I1_CallHistory.sqlite", "I1_CallHistory.sqlite",
# "I1_ChatStorage.sqlite", "I1_ChatStorage.sqlite",
# "I1_ContactsV2.sqlite", "I1_ContactsV2.sqlite",
# "I2_AddressBook.sqlitedb", "I2_AddressBook.sqlitedb",
# "I2_AddressBookImages.sqlitedb", "I2_AddressBookImages.sqlitedb",
# "I3_sms.db", "I3_sms.db",
# "I4_CloudTabs.db", "I4_CloudTabs.db",
# "I4_History.db", "I4_History.db",
# "I5_Calendar.sqlitedb", "I5_Calendar.sqlitedb",
# "I5_Extras.db", "I5_Extras.db",
] ]
PII_CONFIG = { PII_CONFIG = {
@@ -42,12 +42,31 @@ PII_CONFIG = {
"USERNAME": { "USERNAME": {
"type":"username", "type":"username",
"regex": r"\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\b", "regex": r"\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\b",
"desc": "a username (also called a login name, user ID, or account name) is a unique string of characters that identifies a user on a computer system, website, application, or online platform" "desc": "a username (also called a login name, user ID, or account name) is a unique string of characters that identifies a user on a computer system, website, application, or online platform. It is often created by the user during the registration process and is used in combination with a password to authenticate the user's identity"
}, },
"PERSON_NAME": { "PERSON_NAME": {
"type":"person's name", "type":"person name",
"regex": r"[A-Za-z][A-Za-z\s\.\-]{1,50}", "regex": r"[A-Za-z][A-Za-z\s\.\-]{1,50}",
"desc": "a loosely structured human name-like strings that typically consist of a first name, a first name and a last name, and may also include middle names, initials, prefixes (e.g., Mr., Dr.), and suffixes (e.g., Jr., Sr.)" "desc": "a loosely structured human name-like strings that typically consist of a first name, a first name and a last name, and may also include middle names, initials, prefixes (e.g., Mr., Dr.), and suffixes (e.g., Jr., Sr.)"
} },
"POSTAL_ADDRESS": {
"type": "US postal address",
# MAX RECALL prefilter (street number optional).
# Matches either:
# (1) PO Box patterns, OR
# (2) optional street number + some tokens + a street suffix, OR
# (3) street suffix with nearby tokens (even without a number).
"regex": r"(?is)\b("
r"(?:P\.?\s*O\.?\s*BOX|POST\s+OFFICE\s+BOX)\s*\d{1,6}"
r"|"
r"(?:\d{1,7}\s*)?" # OPTIONAL street number
r"(?:[A-Z0-9][A-Z0-9'.,/#\-]*\s*){1,25}?" # optional-ish tokens before suffix
r"(?:AVE|AVENUE|ST|STREET|RD|ROAD|BLVD|BOULEVARD|DR|DRIVE|LN|LANE|CT|COURT|PL|PLACE|WAY|"
r"PKWY|PARKWAY|CIR|CIRCLE|TER|TERRACE|HWY|HIGHWAY|TRL|TRAIL|SQ|SQUARE|PIKE|LOOP|RUN|WALK|PATH|BYP|BYPASS)\b"
r"(?:\s*(?:,|\s)\s*(?:N|S|E|W|NE|NW|SE|SW))?" # optional directional
r"(?:.{0,60}?\b\d{5}(?:-\d{4})?\b)?" # optional ZIP nearby
r")\b",
"desc": "a US postal address is a street-level mailing location in the United States, commonly appearing as a street name and suffix (e.g., 'Market St') optionally with a street number (e.g., '1500 Market St'), unit, city/state, ZIP, or a PO Box (e.g., 'P.O. Box 123')"
}
} }