mirror of
https://github.com/frankwxu/mobile-pii-discovery-agent.git
synced 2026-02-20 13:40:41 +00:00
6 lines
9.8 KiB
JSON
6 lines
9.8 KiB
JSON
{"db_path": "selectedDBs\\A5_searchengine.db", "PII_type": "EMAIL", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "SELECT title FROM searchengine WHERE title REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL SELECT url FROM searchengine WHERE url REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL SELECT extra1 FROM searchengine WHERE extra1 REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL SELECT extra2 FROM searchengine WHERE extra2 REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL SELECT extra3 FROM searchengine WHERE extra3 REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}';", "Extraction_sql": null, "PII_Prompt": "a unique identifier for a destination to which electronic mail (email) can be sent and received over the internet; examples include jane.doe@example.com, john.smith@provider.net, dev-team@startup.io, and user.name+label@domain.org"}
|
|
{"db_path": "selectedDBs\\A5_searchengine.db", "PII_type": "PHONE", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["https://home.imgsmail.ru/resplash/123689/i/meta/favicon.ico"], ["https://search.seznam.cz/re/media/favicon.192a42730e.ico"]], "Total_raw_rows": 2, "Exploration_sql": "SELECT locale FROM android_metadata WHERE locale REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' \nUNION ALL \nSELECT title FROM searchengine WHERE title REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' \nUNION ALL \nSELECT url FROM searchengine WHERE url REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' \nUNION ALL \nSELECT extra1 FROM searchengine WHERE extra1 REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' \nUNION ALL \nSELECT extra2 FROM searchengine WHERE extra2 REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' \nUNION ALL \nSELECT extra3 FROM searchengine WHERE extra3 REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'", "Extraction_sql": null, "PII_Prompt": "a US phone number is a 10-digit NANP number (area code + exchange + line) that may be written as 2023133725, 202-313-3725, (202) 313-3725, 202.313.3725, +1 202 313 3725, or 1-202-313-3725"}
|
|
{"db_path": "selectedDBs\\A5_searchengine.db", "PII_type": "USERNAME", "PII": ["google", "duckduckgo", "yahoo", "youtube", "bing", "so360", "qwant", "toutiao", "startpage", "shenma", "naver", "yandex", "mailru", "seznam", "yandexru", "daum", "baidu"], "Num_of_PII": 17, "source_columns": ["android_metadata.locale", "searchengine.title", "searchengine.url", "searchengine.image_url", "searchengine.extra1", "searchengine.extra2", "searchengine.extra3"], "Raw_rows_first_100": [["en_US"], ["google"], ["DuckDuckGo"], ["yahoo"], ["youtube"], ["bing"], ["so360"], ["qwant"], ["toutiao"], ["StartPage"], ["shenma"], ["naver"], ["yandex_com"], ["mail_ru"], ["seznam"], ["yandex_ru"], ["daum"], ["baidu"], ["youtube"], ["google"], ["https://duckduckgo.com/favicon.ico"], ["https://home.imgsmail.ru/resplash/123689/i/meta/favicon.ico"], ["https://m.toutiao.com/favicon.ico"], ["https://p0.ssl.qhimg.com/d/inn/128c749e/icon.png"], ["https://search.daum.net/favicon.ico"], ["https://search.naver.com/favicon.ico"], ["https://search.seznam.cz/re/media/favicon.192a42730e.ico"], ["https://search.yahoo.com/favicon.ico"], ["https://sm01.alicdn.com/L1/272/1990/favicon/favicon.ico"], ["https://www.baidu.com/favicon.ico"], ["https://www.bing.com/sa/simg/bing_p_rr_teal_min.ico"], ["https://www.google.com/favicon.ico"], ["https://www.google.com/images/branding/product/ico/googleg_alldp.ico"], ["https://www.qwant.com/favicon-64.png"], ["https://www.startpage.com/favicon.ico"], ["https://www.youtube.com/img/favicon_32.png"], ["https://www.youtube.com/s/desktop/fadc8afc/img/favicon_48x48.png"], ["https://yandex.com/favicon.ico"], ["https://yandex.ru/favicon.ico"]], "Total_raw_rows": 39, "Exploration_sql": "SELECT locale FROM android_metadata WHERE locale REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL SELECT title FROM searchengine WHERE title REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL SELECT url FROM searchengine WHERE url REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL SELECT image_url FROM searchengine WHERE image_url REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL SELECT extra1 FROM searchengine WHERE extra1 REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL SELECT extra2 FROM searchengine WHERE extra2 REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL SELECT extra3 FROM searchengine WHERE extra3 REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';", "Extraction_sql": "SELECT locale FROM android_metadata WHERE locale REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL SELECT title FROM searchengine WHERE title REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL SELECT url FROM searchengine WHERE url REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL SELECT image_url FROM searchengine WHERE image_url REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL SELECT extra1 FROM searchengine WHERE extra1 REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL SELECT extra2 FROM searchengine WHERE extra2 REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL SELECT extra3 FROM searchengine WHERE extra3 REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';", "PII_Prompt": "a username is a short textual identifier chosen by a user to represent their account or public handle within an application or service it is stored as plain text contains no whitespace does not include a domain component and is intended for human recognition rather than internal system uniqueness"}
|
|
{"db_path": "selectedDBs\\A5_searchengine.db", "PII_type": "PERSON_NAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["en_US"], ["google"], ["DuckDuckGo"], ["yahoo"], ["youtube"], ["bing"], ["so360"], ["qwant"], ["toutiao"], ["StartPage"], ["shenma"], ["https://duckduckgo.com/favicon.ico"], ["https://home.imgsmail.ru/resplash/123689/i/meta/favicon.ico"], ["https://m.toutiao.com/favicon.ico"], ["https://p0.ssl.qhimg.com/d/inn/128c749e/icon.png"], ["https://search.daum.net/favicon.ico"], ["https://search.naver.com/favicon.ico"], ["https://search.seznam.cz/re/media/favicon.192a42730e.ico"], ["https://search.yahoo.com/favicon.ico"], ["https://sm01.alicdn.com/L1/272/1990/favicon/favicon.ico"], ["https://www.baidu.com/favicon.ico"]], "Total_raw_rows": 21, "Exploration_sql": "SELECT locale FROM android_metadata WHERE locale REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL SELECT title FROM searchengine WHERE title REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL SELECT url FROM searchengine WHERE url REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL SELECT extra1 FROM searchengine WHERE extra1 REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL SELECT extra2 FROM searchengine WHERE extra2 REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL SELECT extra3 FROM searchengine WHERE extra3 REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';", "Extraction_sql": null, "PII_Prompt": "a loosely structured human name-like strings that typically consist of a first name, a first name and a last name, and may also include middle names, initials, prefixes (e.g., Mr., Dr.), and suffixes (e.g., Jr., Sr.)"}
|
|
{"db_path": "selectedDBs\\A5_searchengine.db", "PII_type": "POSTAL_ADDRESS", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["https://www.youtube.com/s/desktop/fadc8afc/img/favicon_48x48.png"]], "Total_raw_rows": 1, "Exploration_sql": "SELECT title FROM searchengine WHERE title REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \nUNION ALL \nSELECT url FROM searchengine WHERE url REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \nUNION ALL \nSELECT extra1 FROM searchengine WHERE extra1 REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \nUNION ALL \nSELECT extra2 FROM searchengine WHERE extra2 REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \nUNION ALL \nSELECT extra3 FROM searchengine WHERE extra3 REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b';", "Extraction_sql": null, "PII_Prompt": "a US postal address is a street-level mailing location in the United States, commonly appearing as a street name and suffix (e.g., 'Market St') optionally with a street number (e.g., '1500 Market St'), unit, city/state, ZIP, or a PO Box (e.g., 'P.O. Box 123')"}
|