mirror of
https://github.com/frankwxu/mobile-pii-discovery-agent.git
synced 2026-02-20 13:40:41 +00:00
6 lines
10 KiB
JSON
6 lines
10 KiB
JSON
{"db_path": "selectedDBs\\A5_SBrowser.db", "PII_type": "EMAIL", "PII": ["sharononeil368@gmail.com"], "Num_of_PII": 1, "source_columns": ["BOOKMARKS.ACCOUNT_TYPE", "INTERNET_SYNC.SYNC_KEY", "SYNC_STATE.account_name", "TABS.ACCOUNT_NAME"], "Raw_rows_first_100": [["sharononeil368@gmail.com"], ["sharononeil368@gmail.com"], ["sharononeil368@gmail.com"], ["sharononeil368@gmail.com"], ["sharononeil368@gmail.com"]], "Total_raw_rows": 5, "Exploration_sql": "The following columns may contain email addresses:\n- BOOKMARKS: ACCOUNT_NAME, ACCOUNT_TYPE\n- INTERNET_SYNC: SYNC_KEY\n- SYNC_STATE: account_name\n- TABS: ACCOUNT_NAME\n\nYou can use the following SQL query to search for email addresses in these columns:\n```sql\nSELECT ACCOUNT_NAME FROM BOOKMARKS WHERE ACCOUNT_NAME REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT ACCOUNT_TYPE FROM BOOKMARKS WHERE ACCOUNT_TYPE REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT SYNC_KEY FROM INTERNET_SYNC WHERE SYNC_KEY REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT account_name FROM SYNC_STATE WHERE account_name REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT ACCOUNT_NAME FROM TABS WHERE ACCOUNT_NAME REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'", "Extraction_sql": "The following columns may contain email addresses:\n- BOOKMARKS: ACCOUNT_NAME, ACCOUNT_TYPE\n- INTERNET_SYNC: SYNC_KEY\n- SYNC_STATE: account_name\n- TABS: ACCOUNT_NAME\nYou can use the following SQL query to search for email addresses in these columns:\n```sql\nSELECT ACCOUNT_NAME FROM BOOKMARKS WHERE ACCOUNT_NAME REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT ACCOUNT_TYPE FROM BOOKMARKS WHERE ACCOUNT_TYPE REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT SYNC_KEY FROM INTERNET_SYNC WHERE SYNC_KEY REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT account_name FROM SYNC_STATE WHERE account_name REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT ACCOUNT_NAME FROM TABS WHERE ACCOUNT_NAME REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'", "PII_Prompt": "a unique identifier for a destination to which electronic mail (email) can be sent and received over the internet; examples include jane.doe@example.com, john.smith@provider.net, dev-team@startup.io, and user.name+label@domain.org"}
|
|
{"db_path": "selectedDBs\\A5_SBrowser.db", "PII_type": "PHONE", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["https://www.pinterest.com/pin/410812797236816112/"]], "Total_raw_rows": 1, "Exploration_sql": "Based on the schema of the available tables, the following columns may contain US phone numbers:\n- In the BOOKMARKS table: URL, TITLE, TAGS\n- In the INTERNET_SYNC table: SYNC_KEY\n- In the SYNC_STATE table: SYNC_VALUE\n- In the TABS table: TAB_URL, TAB_TITLE\n- In the android_metadata table: No relevant columns\n\nNow, I will construct SQL queries to search for US phone numbers in these columns.\n```sql\nSELECT URL FROM BOOKMARKS WHERE URL REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\nUNION ALL\nSELECT TITLE FROM BOOKMARKS WHERE TITLE REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\nUNION ALL\nSELECT TAGS FROM BOOKMARKS WHERE TAGS REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\nUNION ALL\nSELECT SYNC_KEY FROM INTERNET_SYNC WHERE SYNC_KEY REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\nUNION ALL\nSELECT SYNC_VALUE FROM SYNC_STATE WHERE SYNC_VALUE REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\nUNION ALL\nSELECT TAB_URL FROM TABS WHERE TAB_URL REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\nUNION ALL\nSELECT TAB_TITLE FROM TABS WHERE TAB_TITLE REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'", "Extraction_sql": null, "PII_Prompt": "a US phone number is a 10-digit NANP number (area code + exchange + line) that may be written as 2023133725, 202-313-3725, (202) 313-3725, 202.313.3725, +1 202 313 3725, or 1-202-313-3725"}
|
|
{"db_path": "selectedDBs\\A5_SBrowser.db", "PII_type": "USERNAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "Based on the schema of the tables, the following columns may contain usernames:\n- BOOKMARKS: ACCOUNT_NAME\n- INTERNET_SYNC: SYNC_KEY\n- SYNC_STATE: account_name\n- TABS: ACCOUNT_NAME\n- android_metadata: No relevant columns for usernames\n\nTo search for possible usernames, you can use the following SQL query:\n```sql\nSELECT ACCOUNT_NAME FROM BOOKMARKS WHERE ACCOUNT_NAME REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT SYNC_KEY FROM INTERNET_SYNC WHERE SYNC_KEY REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT account_name FROM SYNC_STATE WHERE account_name REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT ACCOUNT_NAME FROM TABS WHERE ACCOUNT_NAME REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'", "Extraction_sql": null, "PII_Prompt": "a username is a short textual identifier chosen by a user to represent their account or public handle within an application or service it is stored as plain text contains no whitespace does not include a domain component and is intended for human recognition rather than internal system uniqueness"}
|
|
{"db_path": "selectedDBs\\A5_SBrowser.db", "PII_type": "PERSON_NAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["sync_internet_data"], ["sync_bookmarks"], ["sync_open_pages"], ["sync_saved_pages"], ["SBROWSER_TAB1724467631361__BROWSER1724467632107__SBROWSER_SAVEDPAGES1724467635256__QUICKACCESS_SYNC_V21724467633471__SBROWSER_HISTORY1724467634746__"], ["Pin on Simon, God of Hairdos"], ["puck from.glee - Google Search"], ["Midjourney AI - Free Image Generator"], ["billie eilish birds of a feather lyrics - Google Search"]], "Total_raw_rows": 9, "Exploration_sql": "Based on the schema of the tables, the following columns may contain person names:\n- BOOKMARKS: TITLE, TAGS\n- INTERNET_SYNC: SYNC_KEY\n- SYNC_STATE: data\n- TABS: TAB_TITLE\n\nTo search for person names in these columns, you can use the following SQL query:\n\n```sql\nSELECT TITLE AS person_name FROM BOOKMARKS WHERE TITLE REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT TAGS AS person_name FROM BOOKMARKS WHERE TAGS REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT SYNC_KEY AS person_name FROM INTERNET_SYNC WHERE SYNC_KEY REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT data AS person_name FROM SYNC_STATE WHERE data REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT TAB_TITLE AS person_name FROM TABS WHERE TAB_TITLE REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'", "Extraction_sql": null, "PII_Prompt": "a loosely structured human name-like strings that typically consist of a first name, a first name and a last name, and may also include middle names, initials, prefixes (e.g., Mr., Dr.), and suffixes (e.g., Jr., Sr.)"}
|
|
{"db_path": "selectedDBs\\A5_SBrowser.db", "PII_type": "POSTAL_ADDRESS", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "Based on the schema of the available tables, there is no specific column that directly indicates the presence of a US postal address. However, we can search through text fields in these tables to find possible US postal addresses. Let's search through the text fields in these tables for US postal addresses.\nSELECT URL FROM BOOKMARKS WHERE URL REGEXP '(?i)\\\\b(?:p\\\\.?\\\\s*o\\\\.?\\\\s*box|post\\\\s+office\\\\s+box|ave\\\\.?|avenue|st\\\\.?|street|rd\\\\.?|road|blvd\\\\.?|boulevard|dr\\\\.?|drive|ln\\\\.?|lane|ct\\\\.?|court|pl\\\\.?|place|way|pkwy\\\\.?|parkway|cir\\\\.?|circle|ter\\\\.?|terrace|hwy\\\\.?|highway|trl\\\\.?|trail|sq\\\\.?|square|pike|loop|run|walk|path|byp\\\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\\\b)\\\\b' UNION ALL \nSELECT SYNC_VALUE FROM INTERNET_SYNC WHERE SYNC_VALUE REGEXP '(?i)\\\\b(?:p\\\\.?\\\\s*o\\\\.?\\\\s*box|post\\\\s+office\\\\s+box|ave\\\\.?|avenue|st\\\\.?|street|rd\\\\.?|road|blvd\\\\.?|boulevard|dr\\\\.?|drive|ln\\\\.?|lane|ct\\\\.?|court|pl\\\\.?|place|way|pkwy\\\\.?|parkway|cir\\\\.?|circle|terrace|hwy\\\\.?|highway|trl\\\\.?|trail|sq\\\\.?|square|pike|loop|run|walk|path|byp\\\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\\\b)\\\\b' UNION ALL \nSELECT data FROM SYNC_STATE WHERE data REGEXP '(?i)\\\\b(?:p\\\\.?\\\\s*o\\\\.?\\\\s*box|post\\\\s+office\\\\s+box|ave\\\\.?|avenue|st\\\\.?|street|rd\\\\.?|road|blvd\\\\.?|boulevard|dr\\\\.?|drive|ln\\\\.?|lane|ct\\\\.?|court|pl\\\\.?|place|way|pkwy\\\\.?|parkway|cir\\\\.?|circle|terrace|hwy\\\\.?|highway|trl\\\\.?|trail|sq\\\\.?|square|pike|loop|run|walk|path|byp\\\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\\\b)\\\\b' UNION ALL \nSELECT TAB_URL FROM TABS WHERE TAB_URL REGEXP '(?i)\\\\b(?:p\\\\.?\\\\s*o\\\\.?\\\\s*box|post\\\\s+office\\\\s+box|ave\\\\.?|avenue|st\\\\.?|street|rd\\\\.?|road|blvd\\\\.?|boulevard|dr\\\\.?|drive|ln\\\\.?|lane|ct\\\\.?|court|pl\\\\.?|place|way|pkwy\\\\.?|parkway|cir\\\\.?|circle|terrace|hwy\\\\.?|highway|trl\\\\.?|trail|sq\\\\.?|square|pike|loop|run|walk|path|byp\\\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\\\b)\\\\b' UNION ALL \nSELECT locale FROM android_metadata WHERE locale REGEXP '(?i)\\\\b(?:p\\\\.?\\\\s*o\\\\.?\\\\s*box|post\\\\s+office\\\\s+box|ave\\\\.?|avenue|st\\\\.?|street|rd\\\\.?|road|blvd\\\\.?|boulevard|dr\\\\.?|drive|ln\\\\.?|lane|ct\\\\.?|court|pl\\\\.?|place|way|pkwy\\\\.?|parkway|cir\\\\.?|circle|terrace|hwy\\\\.?|highway|trl\\\\.?|trail|sq\\\\.?|square|pike|loop|run|walk|path|byp\\\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\\\b)\\\\b'", "Extraction_sql": null, "PII_Prompt": "a US postal address is a street-level mailing location in the United States, commonly appearing as a street name and suffix (e.g., 'Market St') optionally with a street number (e.g., '1500 Market St'), unit, city/state, ZIP, or a PO Box (e.g., 'P.O. Box 123')"}
|