mirror of
https://github.com/frankwxu/mobile-pii-discovery-agent.git
synced 2026-02-20 13:40:41 +00:00
6 lines
10 KiB
JSON
6 lines
10 KiB
JSON
{"db_path": "selectedDBs\\I5_Calendar.sqlitedb", "PII_type": "EMAIL", "PII": ["ottomatik1234@gmail.com"], "Num_of_PII": 1, "source_columns": ["Alarm.email_address", "Calendar.shared_owner_address", "Calendar.self_identity_email", "Calendar.owner_identity_email"], "Raw_rows_first_100": [["mailto:ottomatik1234@gmail.com"], ["mailto:ottomatik1234@gmail.com"], ["mailto:ottomatik1234@gmail.com"], ["mailto:ottomatik1234@gmail.com"], ["mailto:ottomatik1234@gmail.com"], ["mailto:ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"]], "Total_raw_rows": 18, "Exploration_sql": "SELECT email_address FROM Alarm WHERE email_address REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' \nUNION ALL \nSELECT shared_owner_address FROM Calendar WHERE shared_owner_address REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' \nUNION ALL \nSELECT self_identity_email FROM Calendar WHERE self_identity_email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' \nUNION ALL \nSELECT owner_identity_email FROM Calendar WHERE owner_identity_email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}';", "Extraction_sql": "SELECT email_address FROM Alarm WHERE email_address REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT shared_owner_address FROM Calendar WHERE shared_owner_address REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT self_identity_email FROM Calendar WHERE self_identity_email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT owner_identity_email FROM Calendar WHERE owner_identity_email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}';", "PII_Prompt": "a unique identifier for a destination to which electronic mail (email) can be sent and received over the internet; examples include jane.doe@example.com, john.smith@provider.net, dev-team@startup.io, and user.name+label@domain.org"}
|
|
{"db_path": "selectedDBs\\I5_Calendar.sqlitedb", "PII_type": "PHONE", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "SELECT self_identity_phone_number FROM Calendar WHERE self_identity_phone_number REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' \nUNION ALL \nSELECT owner_identity_phone_number FROM Calendar WHERE owner_identity_phone_number REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' \nUNION ALL \nSELECT description FROM CalendarItem WHERE description REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' \nUNION ALL \nSELECT notes FROM Calendar WHERE notes REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}';", "Extraction_sql": null, "PII_Prompt": "a US phone number is a 10-digit NANP number (area code + exchange + line) that may be written as 2023133725, 202-313-3725, (202) 313-3725, 202.313.3725, +1 202 313 3725, or 1-202-313-3725"}
|
|
{"db_path": "selectedDBs\\I5_Calendar.sqlitedb", "PII_type": "USERNAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["Default"], ["Facebook Birthdays"], ["Found in Mail"], ["Found in Natural Language"], ["Home"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["Work"], ["US Holidays"], ["ottomatik1234@gmail.com"], ["The exact date of this holiday is difficult to predict precisely; this is just an approximation."], ["The exact date of this holiday is difficult to predict precisely; this is just an approximation."], ["The exact date of this holiday is difficult to predict precisely; this is just an approximation."], ["The exact date of this holiday is difficult to predict precisely; this is just an approximation."], ["The exact date of this holiday is difficult to predict precisely; this is just an approximation."], ["The exact date of this holiday is difficult to predict precisely; this is just an approximation."], ["The exact date of this holiday is difficult to predict precisely; this is just an approximation."], ["The exact date of this holiday is difficult to predict precisely; this is just an approximation."], ["This date is approximate because it is based on a lunar calendar; the beginning of Ramadan is the day after the new moon. "], ["This date is approximate because it is based on a lunar calendar; the beginning of Ramadan is the day after the new moon. "], ["Otto Matik"], ["Holidays"], ["com.apple.dataaccessd-3D8B9641-4D39-4E7D-A007-B69A2F855F6B"], ["com.apple.dataaccessd-FB18EA77-54AF-4788-865E-B5C4DF5E2818"], ["com.apple.suggestd"], ["Otto Matik"]], "Total_raw_rows": 26, "Exploration_sql": "SELECT title FROM Calendar WHERE title REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \nSELECT summary FROM CalendarChanges WHERE summary REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \nSELECT description FROM CalendarItem WHERE description REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \nSELECT contact_name FROM CalendarItem WHERE contact_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \nSELECT name FROM Category WHERE name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \nSELECT client_identifier FROM ClientCursor WHERE client_identifier REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \nSELECT display_name FROM Identity WHERE display_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';", "Extraction_sql": null, "PII_Prompt": "a username is a short textual identifier chosen by a user to represent their account or public handle within an application or service it is stored as plain text contains no whitespace does not include a domain component and is intended for human recognition rather than internal system uniqueness"}
|
|
{"db_path": "selectedDBs\\I5_Calendar.sqlitedb", "PII_type": "PERSON_NAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["Default"], ["Facebook Birthdays"], ["Found in Mail"], ["Found in Natural Language"], ["Home"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["Work"], ["US Holidays"], ["ottomatik1234@gmail.com"], ["Hanukkah (1st day)"], ["Eid al-Adha"], ["Ashura"], ["Easter"], ["Eid al-Fitr"], ["Ashura"], ["Eid al-Adha"], ["Daylight Saving Time End"], ["Inauguration Day"], ["Eid al-Fitr"], ["The exact date of this holiday is difficult to predict precisely; this is just an approximation."], ["The exact date of this holiday is difficult to predict precisely; this is just an approximation."], ["The exact date of this holiday is difficult to predict precisely; this is just an approximation."], ["The exact date of this holiday is difficult to predict precisely; this is just an approximation."], ["The exact date of this holiday is difficult to predict precisely; this is just an approximation."], ["The exact date of this holiday is difficult to predict precisely; this is just an approximation."], ["The exact date of this holiday is difficult to predict precisely; this is just an approximation."], ["The exact date of this holiday is difficult to predict precisely; this is just an approximation."], ["This date is approximate because it is based on a lunar calendar; the beginning of Ramadan is the day after the new moon. "], ["This date is approximate because it is based on a lunar calendar; the beginning of Ramadan is the day after the new moon. "], ["Otto Matik"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"]], "Total_raw_rows": 43, "Exploration_sql": "SELECT title FROM Calendar WHERE title REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \nSELECT summary FROM CalendarItem WHERE summary REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \nSELECT description FROM CalendarItem WHERE description REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \nSELECT contact_name FROM Identity WHERE contact_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \nSELECT display_name FROM Identity WHERE display_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \nSELECT owner_identity_email FROM Calendar WHERE owner_identity_email REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \nSELECT owner_identity_phone_number FROM Calendar WHERE owner_identity_phone_number REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \nSELECT self_identity_email FROM Calendar WHERE self_identity_email REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \nSELECT self_identity_phone_number FROM Calendar WHERE self_identity_phone_number REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';", "Extraction_sql": null, "PII_Prompt": "a loosely structured human name-like strings that typically consist of a first name, a first name and a last name, and may also include middle names, initials, prefixes (e.g., Mr., Dr.), and suffixes (e.g., Jr., Sr.)"}
|
|
{"db_path": "selectedDBs\\I5_Calendar.sqlitedb", "PII_type": "POSTAL_ADDRESS", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "SELECT notes FROM Calendar WHERE notes REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \nUNION ALL \nSELECT description FROM CalendarItem WHERE description REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'", "Extraction_sql": null, "PII_Prompt": "a US postal address is a street-level mailing location in the United States, commonly appearing as a street name and suffix (e.g., 'Market St') optionally with a street number (e.g., '1500 Market St'), unit, city/state, ZIP, or a PO Box (e.g., 'P.O. Box 123')"}
|