mirror of
https://github.com/frankwxu/mobile-pii-discovery-agent.git
synced 2026-02-20 13:40:41 +00:00
6 lines
14 KiB
JSON
6 lines
14 KiB
JSON
{"db_path": "selectedDBs\\A1_wa.db", "PII_type": "EMAIL", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "The following tables and columns have been identified as potential sources for email addresses:\n1. Table: quick_promotion_payload\n - Columns: trigger_id, trigger_context\n\n2. Table: wa_address_book\n - Columns: jid, email\n\n3. Table: wa_biz_profiles\n - Columns: jid, email\n\n4. Table: wa_bot_profiles\n - Columns: jid\n\n5. Table: wa_contacts\n - Columns: jid, number, display_name, given_name, family_name, nickname, company, title\n\n6. Table: wa_trusted_contacts\n - Columns: jid\n\n7. Table: wa_trusted_contacts_send\n - Columns: jid\n\nYou can use the following SQL query to search for email addresses in these tables:\n```sql\nSELECT trigger_id AS email FROM quick_promotion_payload WHERE trigger_id REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT trigger_context AS email FROM quick_promotion_payload WHERE trigger_context REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT email FROM wa_address_book WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT email FROM wa_biz_profiles WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT jid AS email FROM wa_biz_profiles WHERE jid REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT jid AS email FROM wa_bot_profiles WHERE jid REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT jid AS email FROM wa_contacts WHERE jid REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT number AS email FROM wa_contacts WHERE number REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT display_name AS email FROM wa_contacts WHERE display_name REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT given_name AS email FROM wa_contacts WHERE given_name REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT family_name AS email FROM wa_contacts WHERE family_name REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT nickname AS email FROM wa_contacts WHERE nickname REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT company AS email FROM wa_contacts WHERE company REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT title AS email FROM wa_contacts WHERE title REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT jid AS email FROM wa_trusted_contacts WHERE jid REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT jid AS email FROM wa_trusted_contacts_send WHERE jid REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\\\.[a-zA-Z]{2,}'\n```\nThis query will search for email addresses in the specified columns of the identified tables.", "Extraction_sql": null, "PII_Prompt": "a unique identifier for a destination to which electronic mail (email) can be sent and received over the internet; examples include jane.doe@example.com, john.smith@provider.net, dev-team@startup.io, and user.name+label@domain.org"}
|
|
{"db_path": "selectedDBs\\A1_wa.db", "PII_type": "PHONE", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["5713298742"], ["5713349815"], ["8085096467"], ["8624338328"], ["7034241981"], ["2028177932"], ["5715917168"], ["2025692832"], ["+16106046786"], ["+16263678865"]], "Total_raw_rows": 10, "Exploration_sql": "The columns in the relevant tables are as follows:\n- Table 'wa_address_book':\n - number TEXT\n\n- Table 'wa_contacts':\n - number TEXT\n\n- Table 'wa_newsletter_props':\n - prop_name TEXT\n - prop_value TEXT\n\n- Table 'wa_props':\n - prop_name TEXT\n - prop_value TEXT\n\n- Table 'wa_trusted_contacts':\n - jid TEXT\n - incoming_tc_token BLOB\n - incoming_tc_token_timestamp LONG\n\n- Table 'wa_trusted_contacts_send':\n - jid TEXT\n - sent_tc_token_timestamp LONG\n - real_issue_timestamp LONG\n\nNow, I will search these columns for US phone numbers.\nSELECT number FROM wa_address_book WHERE number REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\nUNION ALL\nSELECT number FROM wa_contacts WHERE number REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'", "Extraction_sql": null, "PII_Prompt": "a US phone number is a 10-digit NANP number (area code + exchange + line) that may be written as 2023133725, 202-313-3725, (202) 313-3725, 202.313.3725, +1 202 313 3725, or 1-202-313-3725"}
|
|
{"db_path": "selectedDBs\\A1_wa.db", "PII_type": "USERNAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "The following tables and columns may contain usernames:\n1. Table: quick_promotion_payload\n - Columns: trigger_id, trigger_context\n\n2. Table: wa_address_book\n - Columns: jid, display_name, family_name, given_name, nickname\n\n3. Table: wa_biz_profiles\n - Columns: jid, email, tag, name, description, creator_name\n\n4. Table: wa_bot_profiles\n - Columns: jid, tag, name, description, category, creator_name\n\n5. Table: wa_contacts\n - Columns: jid, display_name, family_name, given_name, nickname, wa_name\n\n6. Table: wa_trusted_contacts\n - Columns: jid, incoming_tc_token\n\n7. Table: wa_trusted_contacts_send\n - Columns: jid\n\nYou can use the following SQL query to search for possible usernames in these columns:\n```sql\nSELECT trigger_id AS username FROM quick_promotion_payload WHERE trigger_id REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT trigger_context AS username FROM quick_promotion_payload WHERE trigger_context REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT jid AS username FROM wa_address_book WHERE jid REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT display_name AS username FROM wa_address_book WHERE display_name REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT family_name AS username FROM wa_address_book WHERE family_name REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT given_name AS username FROM wa_address_book WHERE given_name REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT nickname AS username FROM wa_address_book WHERE nickname REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT jid AS username FROM wa_biz_profiles WHERE jid REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT email AS username FROM wa_biz_profiles WHERE email REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT tag AS username FROM wa_biz_profiles WHERE tag REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT name AS username FROM wa_biz_profiles WHERE name REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT description AS username FROM wa_biz_profiles WHERE description REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT creator_name AS username FROM wa_biz_profiles WHERE creator_name REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT jid AS username FROM wa_bot_profiles WHERE jid REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT tag AS username FROM wa_bot_profiles WHERE tag REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT name AS username FROM wa_bot_profiles WHERE name REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT description AS username FROM wa_bot_profiles WHERE description REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT category AS username FROM wa_bot_profiles WHERE category REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT creator_name AS username FROM wa_bot_profiles WHERE creator_name REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT jid AS username FROM wa_contacts WHERE jid REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT display_name AS username FROM wa_contacts WHERE display_name REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT family_name AS username FROM wa_contacts WHERE family_name REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT given_name AS username FROM wa_contacts WHERE given_name REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT nickname AS username FROM wa_contacts WHERE nickname REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT wa_name AS username FROM wa_contacts WHERE wa_name REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT jid AS username FROM wa_trusted_contacts WHERE jid REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT incoming_tc_token AS username FROM wa_trusted_contacts WHERE incoming_tc_token REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT jid AS username FROM wa_trusted_contacts_send WHERE jid REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'", "Extraction_sql": null, "PII_Prompt": "a username is a short textual identifier chosen by a user to represent their account or public handle within an application or service it is stored as plain text contains no whitespace does not include a domain component and is intended for human recognition rather than internal system uniqueness"}
|
|
{"db_path": "selectedDBs\\A1_wa.db", "PII_type": "PERSON_NAME", "PII": ["Svetlana", "Don", "Karen", "Hank", "Brian", "Goldie", "Vladamir", "Joey", "Mary", "Abe", "Russ", "Otto", "Toks", "Bo", "Chernoff", "Whalen", "Wayne", "Tate", "Reynolds", "Kahn", "Stravinsky", "Garcia", "Rudder", "Philby"], "Num_of_PII": 24, "source_columns": ["wa_contacts.given_name", "wa_contacts.family_name"], "Raw_rows_first_100": [["Svetlana"], ["Don"], ["Karen"], ["Hank"], ["Brian"], ["Goldie"], ["Vladamir"], ["Joey"], ["Mary"], ["Abe"], ["Russ"], ["Voice Mail"], ["Otto"], ["Toks"], ["Bo"], ["Chernoff"], ["Whalen"], ["Wayne"], ["Tate"], ["Reynolds"], ["Kahn"], ["Stravinsky"], ["Garcia"], ["Rudder"], ["Philby"]], "Total_raw_rows": 25, "Exploration_sql": "Based on the schema of the relevant tables, here is the SQL query to find possible person names in the database:\n\n```sql\nSELECT display_name FROM wa_address_book WHERE display_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT given_name FROM wa_contacts WHERE given_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT family_name FROM wa_contacts WHERE family_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT description FROM wa_group_descriptions WHERE description REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n```\n\nThis query will search for loosely structured human name-like strings in the 'display_name' column of 'wa_address_book', 'given_name' and 'family_name' columns of 'wa_contacts', and 'description' column of 'wa_group_descriptions' tables.", "Extraction_sql": "Based on the schema of the relevant tables, here is the SQL query to find possible person names in the database:\n```sql\nSELECT display_name FROM wa_address_book WHERE display_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT given_name FROM wa_contacts WHERE given_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT family_name FROM wa_contacts WHERE family_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT description FROM wa_group_descriptions WHERE description REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n```\nThis query will search for loosely structured human name-like strings in the 'display_name' column of 'wa_address_book', 'given_name' and 'family_name' columns of 'wa_contacts', and 'description' column of 'wa_group_descriptions' tables.", "PII_Prompt": "a loosely structured human name-like strings that typically consist of a first name, a first name and a last name, and may also include middle names, initials, prefixes (e.g., Mr., Dr.), and suffixes (e.g., Jr., Sr.)"}
|
|
{"db_path": "selectedDBs\\A1_wa.db", "PII_type": "POSTAL_ADDRESS", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "Based on the schema of the available tables, the following tables and columns have the potential to contain US postal addresses:\n\n1. Table: `wa_address_book`\n - Column: `address`\n\n2. Table: `wa_biz_profiles`\n - Columns: `address`, `address_postal_code`, `address_city_id`, `address_city_name`\n\n3. Table: `wa_biz_profiles_websites`\n - Column: `websites`\n\nTo search for US postal addresses in these tables, you can use the following SQL queries:\n\n```sql\nSELECT address FROM wa_address_book WHERE address REGEXP '(?i)\\\\b(?:p\\\\.?\\\\s*o\\\\.?\\\\s*box|post\\\\s+office\\\\s+box|ave\\\\.?|avenue|st\\\\.?|street|rd\\\\.?|road|blvd\\\\.?|boulevard|dr\\\\.?|drive|ln\\\\.?|lane|ct\\\\.?|court|pl\\\\.?|place|way|pkwy\\\\.?|parkway|cir\\\\.?|circle|ter\\\\.?|terrace|hwy\\\\.?|highway|trl\\\\.?|trail|sq\\\\.?|square|pike|loop|run|walk|path|byp\\\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\\\b)\\\\b';\n\nUNION ALL\n\nSELECT address, address_postal_code, address_city_id, address_city_name FROM wa_biz_profiles WHERE address REGEXP '(?i)\\\\b(?:p\\\\.?\\\\s*o\\\\.?\\\\s*box|post\\\\s+office\\\\s+box|ave\\\\.?|avenue|st\\\\.?|street|rd\\\\.?|road|blvd\\\\.?|boulevard|dr\\\\.?|drive|ln\\\\.?|lane|ct\\\\.?|court|pl\\\\.?|place|way|pkwy\\\\.?|parkway|cir\\\\.?|circle|terrace|hwy\\\\.?|highway|trl\\\\.?|trail|sq\\\\.?|square|pike|loop|run|walk|path|byp\\\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\\\b)\\\\b';\n\nUNION ALL\n\nSELECT websites FROM wa_biz_profiles_websites WHERE websites REGEXP '(?i)\\\\b(?:p\\\\.?\\\\s*o\\\\.?\\\\s*box|post\\\\s+office\\\\s+box|ave\\\\.?|avenue|st\\\\.?|street|rd\\\\.?|road|blvd\\\\.?|boulevard|dr\\\\.?|drive|ln\\\\.?|lane|ct\\\\.?|court|pl\\\\.?|place|way|pkwy\\\\.?|parkway|cir\\\\.?|circle|terrace|hwy\\\\.?|highway|trl\\\\.?|trail|sq\\\\.?|square|pike|loop|run|walk|path|byp\\\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\\\b)\\\\b';\n``` \n\nThese queries will search for US postal addresses in the specified columns of the respective tables.", "Extraction_sql": null, "PII_Prompt": "a US postal address is a street-level mailing location in the United States, commonly appearing as a street name and suffix (e.g., 'Market St') optionally with a street number (e.g., '1500 Market St'), unit, city/state, ZIP, or a PO Box (e.g., 'P.O. Box 123')"}
|