Files
mobile-pii-discovery-agent/model_PII_results/GPT-4o-mini/PII_A1_wa_20260208T171020Z.jsonl
2026-02-11 22:13:22 -05:00

6 lines
11 KiB
JSON
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{"db_path": "selectedDBs\\A1_wa.db", "PII_type": "EMAIL", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["12014946184@s.whatsapp.net"], ["12025692832@s.whatsapp.net"], ["12028177932@s.whatsapp.net"], ["12028275725@s.whatsapp.net"], ["120363098389106519@g.us"], ["12037188989@s.whatsapp.net"], ["12065937224@s.whatsapp.net"], ["12088549831@s.whatsapp.net"], ["12089234440@s.whatsapp.net"], ["12092759604@s.whatsapp.net"]], "Total_raw_rows": 10, "Exploration_sql": "SELECT email FROM wa_biz_profiles WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' \nUNION ALL \nSELECT jid FROM wa_contacts WHERE jid REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' \nUNION ALL \nSELECT account_id FROM wa_biz_profiles_linked_accounts_table WHERE account_id REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' \nUNION ALL \nSELECT websites FROM wa_biz_profiles_websites WHERE websites REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}';", "Extraction_sql": null, "PII_Prompt": "a unique identifier for a destination to which electronic mail (email) can be sent and received over the internet; examples include jane.doe@example.com, john.smith@provider.net, dev-team@startup.io, and user.name+label@domain.org"}
{"db_path": "selectedDBs\\A1_wa.db", "PII_type": "PHONE", "PII": ["19199037779", "8085096467", "5713349815", "16263678865", "16106046786", "7034241981", "5715917168", "2065937224", "5713298742", "8624338328", "18056377243", "2028177932", "2025692832", "19735203731", "81367430271", "17423794330", "5713298742", "5713349815", "8085096467", "8624338328", "7034241981", "2028177932", "5715917168", "2025692832", "16106046786", "16263678865", "2065937224", "19199037779", "18056377243", "19735203731", "81367430271", "17423794330"], "Num_of_PII": 32, "source_columns": ["wa_address_book.number", "wa_contacts.number"], "Raw_rows_first_100": [["+19199037779"], ["8085096467"], ["5713349815"], ["+16263678865"], ["+16106046786"], ["7034241981"], ["5715917168"], ["+12065937224"], ["5713298742"], ["8624338328"], ["+18056377243"], ["2028177932"], ["2025692832"], ["+19735203731"], ["+81367430271"], ["+17423794330"], ["5713298742"], ["5713349815"], ["8085096467"], ["8624338328"], ["7034241981"], ["2028177932"], ["5715917168"], ["2025692832"], ["+16106046786"], ["+16263678865"], ["+12065937224"], ["+19199037779"], ["+18056377243"], ["+19735203731"], ["+81367430271"], ["+17423794330"]], "Total_raw_rows": 32, "Exploration_sql": "SELECT number FROM wa_address_book WHERE number REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' \nUNION ALL \nSELECT number FROM wa_biz_profiles WHERE number REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' \nUNION ALL \nSELECT number FROM wa_contacts WHERE number REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}';", "Extraction_sql": "SELECT number FROM wa_address_book WHERE number REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\nUNION ALL\nSELECT number FROM wa_biz_profiles WHERE number REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\nUNION ALL\nSELECT number FROM wa_contacts WHERE number REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}';", "PII_Prompt": "a US phone number is a 10-digit NANP number (area code + exchange + line) that may be written as 2023133725, 202-313-3725, (202) 313-3725, 202.313.3725, +1 202 313 3725, or 1-202-313-3725"}
{"db_path": "selectedDBs\\A1_wa.db", "PII_type": "USERNAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["Russ Philby"], ["Capt Don Wayne"], ["LTC Whalen"], ["Mary Garcia"], ["Joey"], ["Hank"], ["Goldie Kahn"], ["Abe Rudder"], ["Svetlana Chernoff"], ["Karen Tate"], ["Svetlana Chernoff"], ["LTC Whalen"], ["Capt Don Wayne"], ["Karen Tate"], ["Hank"], ["Brian Reynolds"], ["Goldie Kahn"], ["Vladamir Stravinsky"], ["6⃣ Wealth Builders Club"], ["Joey"]], "Total_raw_rows": 20, "Exploration_sql": "SELECT display_name FROM wa_address_book WHERE display_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \nUNION ALL \nSELECT nickname FROM wa_address_book WHERE nickname REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \nUNION ALL \nSELECT display_name FROM wa_contacts WHERE display_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \nUNION ALL \nSELECT nickname FROM wa_contacts WHERE nickname REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \nUNION ALL \nSELECT custom_url FROM wa_biz_profiles WHERE custom_url REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \nUNION ALL \nSELECT verified_name FROM wa_biz_profiles WHERE verified_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';", "Extraction_sql": null, "PII_Prompt": "a username is a short textual identifier chosen by a user to represent their account or public handle within an application or service it is stored as plain text contains no whitespace does not include a domain component and is intended for human recognition rather than internal system uniqueness"}
{"db_path": "selectedDBs\\A1_wa.db", "PII_type": "PERSON_NAME", "PII": ["Svetlana Chernoff", "LTC Whalen", "Capt Don Wayne", "Karen Tate", "Hank", "Brian Reynolds", "Goldie Kahn", "Vladamir Stravinsky", "Joey", "Mary Garcia", "Abe Rudder", "Russ Philby", "Otto", "Toks", "Bo"], "Num_of_PII": 15, "source_columns": ["wa_contacts.display_name", "wa_contacts.given_name", "wa_contacts.family_name", "wa_contacts.nickname", "wa_address_book.display_name", "wa_address_book.given_name", "wa_address_book.family_name", "wa_address_book.nickname"], "Raw_rows_first_100": [["Svetlana Chernoff"], ["LTC Whalen"], ["Capt Don Wayne"], ["Karen Tate"], ["Hank"], ["Brian Reynolds"], ["Goldie Kahn"], ["Vladamir Stravinsky"], ["6⃣ Wealth Builders Club"], ["Joey"], ["Mary Garcia"], ["Abe Rudder"], ["Russ Philby"], ["Voice Mail"], ["Otto"], ["Toks"], ["Bo"], ["Svetlana"], ["Don"], ["Karen"], ["Hank"], ["Brian"], ["Goldie"], ["Vladamir"], ["Joey"], ["Mary"], ["Abe"], ["Russ"], ["Voice Mail"], ["Otto"], ["Toks"], ["Bo"], ["Chernoff"], ["Whalen"], ["Wayne"], ["Tate"], ["Reynolds"], ["Kahn"], ["Stravinsky"], ["Garcia"], ["Rudder"], ["Philby"], ["Russ Philby"], ["Capt Don Wayne"], ["LTC Whalen"], ["Mary Garcia"], ["Joey"], ["Hank"], ["Goldie Kahn"], ["Abe Rudder"], ["Svetlana Chernoff"], ["Karen Tate"], ["Voice Mail"], ["Brian Reynolds"], ["Vladamir Stravinsky"], ["Otto"], ["Toks"], ["Bo"], ["Russ"], ["Don"], ["Mary"], ["Joey"], ["Hank"], ["Goldie"], ["Abe"], ["Svetlana"], ["Karen"], ["Voice Mail"], ["Brian"], ["Vladamir"], ["Otto"], ["Toks"], ["Bo"], ["Philby"], ["Wayne"], ["Whalen"], ["Garcia"], ["Kahn"], ["Rudder"], ["Chernoff"], ["Tate"], ["Reynolds"], ["Stravinsky"]], "Total_raw_rows": 83, "Exploration_sql": "SELECT display_name FROM wa_contacts WHERE display_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \nUNION ALL \nSELECT given_name FROM wa_contacts WHERE given_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \nUNION ALL \nSELECT family_name FROM wa_contacts WHERE family_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \nUNION ALL \nSELECT nickname FROM wa_contacts WHERE nickname REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \nUNION ALL \nSELECT display_name FROM wa_address_book WHERE display_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \nUNION ALL \nSELECT given_name FROM wa_address_book WHERE given_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \nUNION ALL \nSELECT family_name FROM wa_address_book WHERE family_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \nUNION ALL \nSELECT nickname FROM wa_address_book WHERE nickname REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';", "Extraction_sql": "SELECT display_name FROM wa_contacts WHERE display_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT given_name FROM wa_contacts WHERE given_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT family_name FROM wa_contacts WHERE family_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT nickname FROM wa_contacts WHERE nickname REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT display_name FROM wa_address_book WHERE display_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT given_name FROM wa_address_book WHERE given_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT family_name FROM wa_address_book WHERE family_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT nickname FROM wa_address_book WHERE nickname REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';", "PII_Prompt": "a loosely structured human name-like strings that typically consist of a first name, a first name and a last name, and may also include middle names, initials, prefixes (e.g., Mr., Dr.), and suffixes (e.g., Jr., Sr.)"}
{"db_path": "selectedDBs\\A1_wa.db", "PII_type": "POSTAL_ADDRESS", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "SELECT display_name FROM wa_contacts WHERE display_name REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \nUNION ALL \nSELECT company FROM wa_contacts WHERE company REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \nUNION ALL \nSELECT display_name FROM wa_address_book WHERE display_name REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \nUNION ALL \nSELECT company FROM wa_address_book WHERE company REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b';", "Extraction_sql": null, "PII_Prompt": "a US postal address is a street-level mailing location in the United States, commonly appearing as a street name and suffix (e.g., 'Market St') optionally with a street number (e.g., '1500 Market St'), unit, city/state, ZIP, or a PO Box (e.g., 'P.O. Box 123')"}