Files
mobile-pii-discovery-agent/model_PII_results/GPT-3.5-turbo/PII_I5_Calendar_20260208T005006Z.jsonl
2026-02-11 22:13:22 -05:00

6 lines
15 KiB
JSON

{"db_path": "selectedDBs\\I5_Calendar.sqlitedb", "PII_type": "EMAIL", "PII": ["ottomatik1234@gmail.com"], "Num_of_PII": 1, "source_columns": ["Calendar.self_identity_email", "Calendar.owner_identity_email", "CalendarItem.contact_identifier", "Identity.address"], "Raw_rows_first_100": [["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["ottomatik1234@gmail.com"], ["mailto:ottomatik1234@gmail.com"]], "Total_raw_rows": 13, "Exploration_sql": "Based on the schema of the tables in the database, the following columns have the potential to contain email addresses:\n- Alarm: email_address\n- Calendar: self_identity_email, owner_identity_email\n- CalendarItem: contact_identifier\n- Identity: address\n- Store: display_name, address, first_name, last_name\n\nTo search for email addresses in these columns, you can use the following SQL query:\n\n```sql\nSELECT email_address FROM Alarm WHERE email_address REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT self_identity_email FROM Calendar WHERE self_identity_email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT owner_identity_email FROM Calendar WHERE owner_identity_email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT contact_identifier FROM CalendarItem WHERE contact_identifier REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT address FROM Identity WHERE address REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT display_name FROM Store WHERE display_name REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT address FROM Store WHERE address REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT first_name FROM Store WHERE first_name REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT last_name FROM Store WHERE last_name REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n```\n\nThis query will search for email addresses in the specified columns of the respective tables.", "Extraction_sql": "Based on the schema of the tables in the database, the following columns have the potential to contain email addresses:\n- Alarm: email_address\n- Calendar: self_identity_email, owner_identity_email\n- CalendarItem: contact_identifier\n- Identity: address\n- Store: display_name, address, first_name, last_name\nTo search for email addresses in these columns, you can use the following SQL query:\n```sql\nSELECT email_address FROM Alarm WHERE email_address REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT self_identity_email FROM Calendar WHERE self_identity_email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT owner_identity_email FROM Calendar WHERE owner_identity_email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT contact_identifier FROM CalendarItem WHERE contact_identifier REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT address FROM Identity WHERE address REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT display_name FROM Store WHERE display_name REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT address FROM Store WHERE address REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT first_name FROM Store WHERE first_name REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT last_name FROM Store WHERE last_name REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n```\nThis query will search for email addresses in the specified columns of the respective tables.", "PII_Prompt": "a unique identifier for a destination to which electronic mail (email) can be sent and received over the internet; examples include jane.doe@example.com, john.smith@provider.net, dev-team@startup.io, and user.name+label@domain.org"}
{"db_path": "selectedDBs\\I5_Calendar.sqlitedb", "PII_type": "PHONE", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "Based on the schema of the available tables, the following tables and columns may contain US phone numbers:\n1. Table: CalendarItem\n - Columns: organizer_id, self_attendee_id, self_identity_phone_number, owner_identity_phone_number\n\n2. Table: Identity\n - Columns: display_name, address, first_name, last_name\n\nTo search for US phone numbers in these tables, you can use the following SQL queries:\n\n```sql\nSELECT organizer_id FROM CalendarItem WHERE organizer_id REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\nUNION ALL\nSELECT self_attendee_id FROM CalendarItem WHERE self_attendee_id REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\nUNION ALL\nSELECT self_identity_phone_number FROM CalendarItem WHERE self_identity_phone_number REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\nUNION ALL\nSELECT owner_identity_phone_number FROM CalendarItem WHERE owner_identity_phone_number REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\nUNION ALL\nSELECT display_name FROM Identity WHERE display_name REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\nUNION ALL\nSELECT address FROM Identity WHERE address REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\nUNION ALL\nSELECT first_name FROM Identity WHERE first_name REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\nUNION ALL\nSELECT last_name FROM Identity WHERE last_name REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n```\n\nThese queries will search for US phone numbers in the specified columns of the CalendarItem and Identity tables.", "Extraction_sql": null, "PII_Prompt": "a US phone number is a 10-digit NANP number (area code + exchange + line) that may be written as 2023133725, 202-313-3725, (202) 313-3725, 202.313.3725, +1 202 313 3725, or 1-202-313-3725"}
{"db_path": "selectedDBs\\I5_Calendar.sqlitedb", "PII_type": "USERNAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "Based on the schema of the tables in the database, the following tables and columns have the potential to contain usernames:\n- Table: Alarm, Column: email_address\n- Table: Calendar, Columns: display_name, address, first_name, last_name\n- Table: Identity, Columns: name\n- Table: Recurrence, Columns: name\n- Table: Store, Columns: owner_name\n\nTo search for possible usernames in these columns, you can use the following SQL query:\n```sql\nSELECT email_address AS username FROM Alarm WHERE email_address REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT display_name AS username FROM Calendar WHERE display_name REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT address AS username FROM Calendar WHERE address REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT first_name AS username FROM Calendar WHERE first_name REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT last_name AS username FROM Calendar WHERE last_name REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT name AS username FROM Identity WHERE name REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT name AS username FROM Recurrence WHERE name REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'\nUNION ALL\nSELECT owner_name AS username FROM Store WHERE owner_name REGEXP '\\\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\\\b'", "Extraction_sql": null, "PII_Prompt": "a username is a short textual identifier chosen by a user to represent their account or public handle within an application or service it is stored as plain text contains no whitespace does not include a domain component and is intended for human recognition rather than internal system uniqueness"}
{"db_path": "selectedDBs\\I5_Calendar.sqlitedb", "PII_type": "PERSON_NAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "Based on the schema of the tables in the database, the following tables and columns have text fields where person names might be found:\n- CalendarItem: contact_name\n- Identity: name\n- Category: display_name, address, first_name, last_name\n\nTo search for possible person names in these columns, you can use the following SQL query:\n\n```sql\nSELECT contact_name FROM CalendarItem WHERE contact_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT name FROM Identity WHERE name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT display_name FROM Category WHERE display_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT address FROM Category WHERE address REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT first_name FROM Category WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT last_name FROM Category WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'", "Extraction_sql": null, "PII_Prompt": "a loosely structured human name-like strings that typically consist of a first name, a first name and a last name, and may also include middle names, initials, prefixes (e.g., Mr., Dr.), and suffixes (e.g., Jr., Sr.)"}
{"db_path": "selectedDBs\\I5_Calendar.sqlitedb", "PII_type": "POSTAL_ADDRESS", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "Based on the schema of the available tables, the following tables and columns have the potential to contain US postal addresses:\n\n1. Table: CalendarItem\n - Columns: summary, location_id, description, notes, location_prediction_state, structured_data, local_structured_data\n\n2. Table: Store\n - Columns: display_name, address, first_name, last_name\n\nTo search for US postal addresses in these tables, we can use the following SQL queries:\n\n```sql\nSELECT summary FROM CalendarItem WHERE summary REGEXP '(?i)\\\\b(?:p\\\\.?\\\\s*o\\\\.?\\\\s*box|post\\\\s+office\\\\s+box|ave\\\\.?|avenue|st\\\\.?|street|rd\\\\.?|road|blvd\\\\.?|boulevard|dr\\\\.?|drive|ln\\\\.?|lane|ct\\\\.?|court|pl\\\\.?|place|way|pkwy\\\\.?|parkway|cir\\\\.?|circle|ter\\\\.?|terrace|hwy\\\\.?|highway|trl\\\\.?|trail|sq\\\\.?|square|pike|loop|run|walk|path|byp\\\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\\\b)\\\\b'\n\nUNION ALL\n\nSELECT location_id FROM CalendarItem WHERE location_id REGEXP '(?i)\\\\b(?:p\\\\.?\\\\s*o\\\\.?\\\\s*box|post\\\\s+office\\\\s+box|ave\\\\.?|avenue|st\\\\.?|street|rd\\\\.?|road|blvd\\\\.?|boulevard|dr\\\\.?|drive|ln\\\\.?|lane|ct\\\\.?|court|pl\\\\.?|place|way|pkwy\\\\.?|parkway|cir\\\\.?|circle|terrace|hwy\\\\.?|highway|trl\\\\.?|trail|sq\\\\.?|square|pike|loop|run|walk|path|byp\\\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\\\b)\\\\b'\n\nUNION ALL\n\nSELECT description FROM CalendarItem WHERE description REGEXP '(?i)\\\\b(?:p\\\\.?\\\\s*o\\\\.?\\\\s*box|post\\\\s+office\\\\s+box|ave\\\\.?|avenue|st\\\\.?|street|rd\\\\.?|road|blvd\\\\.?|boulevard|dr\\\\.?|drive|ln\\\\.?|lane|ct\\\\.?|court|pl\\\\.?|place|way|pkwy\\\\.?|parkway|cir\\\\.?|circle|terrace|hwy\\\\.?|highway|trl\\\\.?|trail|sq\\\\.?|square|pike|loop|run|walk|path|byp\\\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\\\b)\\\\b'\n\nUNION ALL\n\nSELECT notes FROM CalendarItem WHERE notes REGEXP '(?i)\\\\b(?:p\\\\.?\\\\s*o\\\\.?\\\\s*box|post\\\\s+office\\\\s+box|ave\\\\.?|avenue|st\\\\.?|street|rd\\\\.?|road|blvd\\\\.?|boulevard|dr\\\\.?|drive|ln\\\\.?|lane|ct\\\\.?|court|pl\\\\.?|place|way|pkwy\\\\.?|parkway|cir\\\\.?|circle|terrace|hwy\\\\.?|highway|trl\\\\.?|trail|sq\\\\.?|square|pike|loop|run|walk|path|byp\\\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\\\b)\\\\b'\n\nUNION ALL\n\nSELECT structured_data FROM CalendarItem WHERE structured_data REGEXP '(?i)\\\\b(?:p\\\\.?\\\\s*o\\\\.?\\\\s*box|post\\\\s+office\\\\s+box|ave\\\\.?|avenue|st\\\\.?|street|rd\\\\.?|road|blvd\\\\.?|boulevard|dr\\\\.?|drive|ln\\\\.?|lane|ct\\\\.?|court|pl\\\\.?|place|way|pkwy\\\\.?|parkway|cir\\\\.?|circle|terrace|hwy\\\\.?|highway|trl\\\\.?|trail|sq\\\\.?|square|pike|loop|run|walk|path|byp\\\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\\\b)\\\\b'\n\nUNION ALL\n\nSELECT local_structured_data FROM CalendarItem WHERE local_structured_data REGEXP '(?i)\\\\b(?:p\\\\.?\\\\s*o\\\\.?\\\\s*box|post\\\\s+office\\\\s+box|ave\\\\.?|avenue|st\\\\.?|street|rd\\\\.?|road|blvd\\\\.?|boulevard|dr\\\\.?|drive|ln\\\\.?|lane|ct\\\\.?|court|pl\\\\.?|place|way|pkwy\\\\.?|parkway|cir\\\\.?|circle|terrace|hwy\\\\.?|highway|trl\\\\.?|trail|sq\\\\.?|square|pike|loop|run|walk|path|byp\\\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\\\b)\\\\b'\n\nUNION ALL\n\nSELECT display_name FROM Store WHERE display_name REGEXP '(?i)\\\\b(?:p\\\\.?\\\\s*o\\\\.?\\\\s*box|post\\\\s+office\\\\s+box|ave\\\\.?|avenue|st\\\\.?|street|rd\\\\.?|road|blvd\\\\.?|boulevard|dr\\\\.?|drive|ln\\\\.?|lane|ct\\\\.?|court|pl\\\\.?|place|way|pkwy\\\\.?|parkway|cir\\\\.?|circle|terrace|hwy\\\\.?|highway|trl\\\\.?|trail|sq\\\\.?|square|pike|loop|run|walk|path|byp\\\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\\\b)\\\\b'\n\nUNION ALL\n\nSELECT address FROM Store WHERE address REGEXP '(?i)\\\\b(?:p\\\\.?\\\\s*o\\\\.?\\\\s*box|post\\\\s+office\\\\s+box|ave\\\\.?|avenue|st\\\\.?|street|rd\\\\.?|road|blvd\\\\.?|boulevard|dr\\\\.?|drive|ln\\\\.?|lane|ct\\\\.?|court|pl\\\\.?|place|way|pkwy\\\\.?|parkway|cir\\\\.?|circle|terrace|hwy\\\\.?|highway|trl\\\\.?|trail|sq\\\\.?|square|pike|loop|run|walk|path|byp\\\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\\\b)\\\\b'\n\nUNION ALL\n\nSELECT first_name FROM Store WHERE first_name REGEXP '(?i)\\\\b(?:p\\\\.?\\\\s*o\\\\.?\\\\s*box|post\\\\s+office\\\\s+box|ave\\\\.?|avenue|st\\\\.?|street|rd\\\\.?|road|blvd\\\\.?|boulevard|dr\\\\.?|drive|ln\\\\.?|lane|ct\\\\.?|court|pl\\\\.?|place|way|pkwy\\\\.?|parkway|cir\\\\.?|circle|terrace|hwy\\\\.?|highway|trl\\\\.?|trail|sq\\\\.?|square|pike|loop|run|walk|path|byp\\\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\\\b)\\\\b'\n\nUNION ALL\n\nSELECT last_name FROM Store WHERE last_name REGEXP '(?i)\\\\b(?:p\\\\.?\\\\s*o\\\\.?\\\\s*box|post\\\\s+office\\\\s+box|ave\\\\.?|avenue|st\\\\.?|street|rd\\\\.?|road|blvd\\\\.?|boulevard|dr\\\\.?|drive|ln\\\\.?|lane|ct\\\\.?|court|pl\\\\.?|place|way|pkwy\\\\.?|parkway|cir\\\\.?|circle|terrace|hwy\\\\.?|highway|trl\\\\.?|trail|sq\\\\.?|square|pike|loop|run|walk|path|byp\\\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\\\b)\\\\b'", "Extraction_sql": null, "PII_Prompt": "a US postal address is a street-level mailing location in the United States, commonly appearing as a street name and suffix (e.g., 'Market St') optionally with a street number (e.g., '1500 Market St'), unit, city/state, ZIP, or a PO Box (e.g., 'P.O. Box 123')"}