mirror of
https://github.com/frankwxu/mobile-pii-discovery-agent.git
synced 2026-02-20 13:40:41 +00:00
update results
This commit is contained in:
4
batch_results/PII_A1_commerce_20260120T224053Z.jsonl
Normal file
4
batch_results/PII_A1_commerce_20260120T224053Z.jsonl
Normal file
@@ -0,0 +1,4 @@
|
||||
{"db_path": "selectedDBs\\A1_commerce.db", "PII_type": "EMAIL", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "No user tables are available in the database to query for possible email addresses.", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A1_commerce.db", "PII_type": "PHONE", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "SELECT locale FROM android_metadata WHERE locale REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A1_commerce.db", "PII_type": "USERNAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["en_US"]], "Total_raw_rows": 1, "Exploration_sql": "SELECT locale FROM android_metadata WHERE locale REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A1_commerce.db", "PII_type": "PERSON_NAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "No user tables are available in the database to query for possible human name-like strings.", "Extraction_sql": null}
|
||||
4
batch_results/PII_A1_msgstore_20260120T224142Z.jsonl
Normal file
4
batch_results/PII_A1_msgstore_20260120T224142Z.jsonl
Normal file
File diff suppressed because one or more lines are too long
4
batch_results/PII_A1_wa_20260120T224400Z.jsonl
Normal file
4
batch_results/PII_A1_wa_20260120T224400Z.jsonl
Normal file
File diff suppressed because one or more lines are too long
4
batch_results/PII_A2_core_20260120T224440Z.jsonl
Normal file
4
batch_results/PII_A2_core_20260120T224440Z.jsonl
Normal file
File diff suppressed because one or more lines are too long
4
batch_results/PII_A2_journal_20260120T224502Z.jsonl
Normal file
4
batch_results/PII_A2_journal_20260120T224502Z.jsonl
Normal file
@@ -0,0 +1,4 @@
|
||||
{"db_path": "selectedDBs\\A2_journal.db", "PII_type": "EMAIL", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "SELECT path FROM journal WHERE path REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL SELECT key FROM journal_entry WHERE key REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL SELECT value_count FROM journal_entry WHERE value_count REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A2_journal.db", "PII_type": "PHONE", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "The existing tables and their columns are as follows:\n\n1. **journal**\n - _id INTEGER\n - path TEXT\n - size INTEGER\n - locked_size INTEGER\n\n2. **journal_entry**\n - _id INTEGER\n - journal_id INTEGER\n - key TEXT\n - sequence_number INTEGER\n - value_count INTEGER\n - status INTEGER\n - last_update_time INTEGER\n - last_read_time INTEGER\n - lock_count INTEGER\n - total_size INTEGER\n - value_sizes BLOB\n - expiration INTEGER\n - last_consumed_time INTEGER\n - metadata BLOB\n\nBased on the available columns, the only potential candidate for containing a US phone number would be the `key` column in the `journal_entry` table, as it is a text field. \n\nHere is the SQL query to find possible US phone numbers:\n\n```sql\nSELECT key FROM journal_entry WHERE key REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A2_journal.db", "PII_type": "USERNAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["/data/data/com.snapchat.android/files/file_manager/BLOOPS_STICKER"], ["/data/data/com.snapchat.android/files/file_manager/Bitmoji_Preview"], ["/data/data/com.snapchat.android/files/file_manager/LENS_ASSET_CONTENT_TYPE_NAME"], ["/data/data/com.snapchat.android/files/file_manager/Live_Mirror_Model"], ["/data/data/com.snapchat.android/files/file_manager/Login_Kit_Privacy"], ["/data/data/com.snapchat.android/files/file_manager/MUSIC_GENERIC_ASSET_TYPE"], ["/data/data/com.snapchat.android/files/file_manager/Maps_Kashmir"], ["/data/data/com.snapchat.android/files/file_manager/Maps_WorldEffects"], ["/data/data/com.snapchat.android/files/file_manager/Perception"], ["/data/data/com.snapchat.android/files/file_manager/PerceptionMl"], ["12DB3FD3B46FC8F9DD60F79CB359FBFE.khand_medium"], ["9F3F465DC00D96696DDDFE0A946AAB99.khand_medium"], ["2FC6ABAAFF969A947FAB4E52FE0971FC.thumbnail"], ["C730963C61386A34712C819CA25436C9.media"], ["70177660B739FDDF75DE848B97DC6A6E.edits"], ["03FD66A15523689AD035E1E2B1AD6DAE.chat_wallpaper_media"], ["D41F76126B39D1F7E7EC3D8FA4079D0F.discover_story_streaming_snap"], ["5BBE52CE6D0010CB50CA3221C4741E7D.discover_story_streaming_snap_ff"], ["F05AD4876AFE7190FBF88E879238978A.discover_story_streaming_snap_ff"], ["67B685FF2948DC22416716E822D4F5A1.discover_story_streaming_snap_ff"]], "Total_raw_rows": 20, "Exploration_sql": "SELECT path FROM journal WHERE path REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL SELECT key FROM journal_entry WHERE key REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A2_journal.db", "PII_type": "PERSON_NAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["/data/data/com.snapchat.android/files/file_manager/BLOOPS_STICKER"], ["/data/data/com.snapchat.android/files/file_manager/Bitmoji_Preview"], ["/data/data/com.snapchat.android/files/file_manager/LENS_ASSET_CONTENT_TYPE_NAME"], ["/data/data/com.snapchat.android/files/file_manager/Live_Mirror_Model"], ["/data/data/com.snapchat.android/files/file_manager/Login_Kit_Privacy"], ["/data/data/com.snapchat.android/files/file_manager/MUSIC_GENERIC_ASSET_TYPE"], ["/data/data/com.snapchat.android/files/file_manager/Maps_Kashmir"], ["/data/data/com.snapchat.android/files/file_manager/Maps_WorldEffects"], ["/data/data/com.snapchat.android/files/file_manager/Perception"], ["/data/data/com.snapchat.android/files/file_manager/PerceptionMl"], ["12DB3FD3B46FC8F9DD60F79CB359FBFE.khand_medium"], ["9F3F465DC00D96696DDDFE0A946AAB99.khand_medium"], ["2FC6ABAAFF969A947FAB4E52FE0971FC.thumbnail"], ["C730963C61386A34712C819CA25436C9.media"], ["70177660B739FDDF75DE848B97DC6A6E.edits"], ["03FD66A15523689AD035E1E2B1AD6DAE.chat_wallpaper_media"], ["D41F76126B39D1F7E7EC3D8FA4079D0F.discover_story_streaming_snap"], ["5BBE52CE6D0010CB50CA3221C4741E7D.discover_story_streaming_snap_ff"], ["F05AD4876AFE7190FBF88E879238978A.discover_story_streaming_snap_ff"], ["67B685FF2948DC22416716E822D4F5A1.discover_story_streaming_snap_ff"]], "Total_raw_rows": 20, "Exploration_sql": "SELECT path FROM journal WHERE path REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL SELECT key FROM journal_entry WHERE key REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL SELECT value FROM journal_entry WHERE value REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'", "Extraction_sql": null}
|
||||
4
batch_results/PII_A2_main_20260120T225136Z.jsonl
Normal file
4
batch_results/PII_A2_main_20260120T225136Z.jsonl
Normal file
File diff suppressed because one or more lines are too long
@@ -0,0 +1,4 @@
|
||||
{"db_path": "selectedDBs\\A3_account1cache4.db", "PII_type": "EMAIL", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "SELECT title FROM dialog_filter WHERE title REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL SELECT id FROM params WHERE id REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A3_account1cache4.db", "PII_type": "PHONE", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "SELECT title FROM dialog_filter WHERE title REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' UNION ALL SELECT pbytes FROM params WHERE pbytes REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A3_account1cache4.db", "PII_type": "USERNAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["ALL_CHATS"]], "Total_raw_rows": 1, "Exploration_sql": "SELECT title FROM dialog_filter WHERE title REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL SELECT id FROM params WHERE id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A3_account1cache4.db", "PII_type": "PERSON_NAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["ALL_CHATS"]], "Total_raw_rows": 1, "Exploration_sql": "SELECT title FROM dialog_filter WHERE title REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL SELECT id FROM params WHERE id REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'", "Extraction_sql": null}
|
||||
@@ -0,0 +1,4 @@
|
||||
{"db_path": "selectedDBs\\A3_account2cache4.db", "PII_type": "EMAIL", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "SELECT title FROM dialog_filter WHERE title REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL SELECT CAST(pbytes AS TEXT) FROM params WHERE CAST(pbytes AS TEXT) REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}';", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A3_account2cache4.db", "PII_type": "PHONE", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "SELECT title FROM dialog_filter WHERE title REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' UNION ALL SELECT pbytes FROM params WHERE pbytes REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A3_account2cache4.db", "PII_type": "USERNAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["ALL_CHATS"]], "Total_raw_rows": 1, "Exploration_sql": "SELECT title FROM dialog_filter WHERE title REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL SELECT id FROM params WHERE id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A3_account2cache4.db", "PII_type": "PERSON_NAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["ALL_CHATS"]], "Total_raw_rows": 1, "Exploration_sql": "SELECT title FROM dialog_filter WHERE title REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL SELECT id FROM params WHERE id REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'", "Extraction_sql": null}
|
||||
@@ -0,0 +1,4 @@
|
||||
{"db_path": "selectedDBs\\A3_account3cache4.db", "PII_type": "EMAIL", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "SELECT title FROM dialog_filter WHERE title REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL SELECT pbytes FROM params WHERE pbytes REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A3_account3cache4.db", "PII_type": "PHONE", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "SELECT title FROM dialog_filter WHERE title REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' UNION ALL SELECT pbytes FROM params WHERE pbytes REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A3_account3cache4.db", "PII_type": "USERNAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["ALL_CHATS"]], "Total_raw_rows": 1, "Exploration_sql": "SELECT title FROM dialog_filter WHERE title REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL SELECT id FROM params WHERE id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A3_account3cache4.db", "PII_type": "PERSON_NAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["ALL_CHATS"]], "Total_raw_rows": 1, "Exploration_sql": "SELECT title FROM dialog_filter WHERE title REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL SELECT id FROM params WHERE id REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'", "Extraction_sql": null}
|
||||
4
batch_results/PII_A4_gmm_myplaces_20260120T225259Z.jsonl
Normal file
4
batch_results/PII_A4_gmm_myplaces_20260120T225259Z.jsonl
Normal file
@@ -0,0 +1,4 @@
|
||||
{"db_path": "selectedDBs\\A4_gmm_myplaces.db", "PII_type": "EMAIL", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "SELECT key_string FROM sync_item WHERE key_string REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A4_gmm_myplaces.db", "PII_type": "PHONE", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "SELECT key_string FROM sync_item WHERE key_string REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' UNION ALL SELECT corpus FROM sync_corpus WHERE corpus REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A4_gmm_myplaces.db", "PII_type": "USERNAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["en_US"]], "Total_raw_rows": 1, "Exploration_sql": "SELECT locale FROM android_metadata WHERE locale REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \nUNION ALL \nSELECT key_string FROM sync_item WHERE key_string REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A4_gmm_myplaces.db", "PII_type": "PERSON_NAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "SELECT key_string FROM sync_item WHERE key_string REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL SELECT last_sync_time FROM sync_corpus WHERE last_sync_time REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'", "Extraction_sql": null}
|
||||
4
batch_results/PII_A4_gmm_storage_20260120T225320Z.jsonl
Normal file
4
batch_results/PII_A4_gmm_storage_20260120T225320Z.jsonl
Normal file
File diff suppressed because one or more lines are too long
@@ -0,0 +1,4 @@
|
||||
{"db_path": "selectedDBs\\A4_peopleCache_sharononeil368@gmail.com_com.google_14.db", "PII_type": "EMAIL", "PII": ["heather@cellebrite.com", "hmahalik@gmail.com"], "Num_of_PII": 2, "source_columns": ["Tokens.value", "Tokens_content.c1value", "Tokens_stat.value", "CacheInfo.affinity_response_context"], "Raw_rows_first_100": [["heather@cellebrite.com"], ["hmahalik@gmail.com"], ["heather@cellebrite.com"], ["hmahalik@gmail.com"]], "Total_raw_rows": 4, "Exploration_sql": "SELECT value FROM Tokens WHERE value REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL \nSELECT c1value FROM Tokens_content WHERE c1value REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL \nSELECT value FROM Tokens_stat WHERE value REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL \nSELECT affinity_response_context FROM CacheInfo WHERE affinity_response_context REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'", "Extraction_sql": "SELECT value FROM Tokens WHERE value REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL\nSELECT c1value FROM Tokens_content WHERE c1value REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL\nSELECT value FROM Tokens_stat WHERE value REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL\nSELECT affinity_response_context FROM CacheInfo WHERE affinity_response_context REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'"}
|
||||
{"db_path": "selectedDBs\\A4_peopleCache_sharononeil368@gmail.com_com.google_14.db", "PII_type": "PHONE", "PII": [], "Num_of_PII": 0, "source_columns": ["Tokens.value", "Tokens_content.c1value", "CacheInfo.num_contacts"], "Raw_rows_first_100": [["17423794330"], ["7423794330"], ["3794330"], ["17423794330"], ["7423794330"], ["3794330"]], "Total_raw_rows": 6, "Exploration_sql": "SELECT value FROM Tokens WHERE value REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' \nUNION ALL \nSELECT c1value FROM Tokens_content WHERE c1value REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' \nUNION ALL \nSELECT num_contacts FROM CacheInfo WHERE num_contacts REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}';", "Extraction_sql": "SELECT value FROM Tokens WHERE value REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\nUNION ALL\nSELECT c1value FROM Tokens_content WHERE c1value REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\nUNION ALL\nSELECT num_contacts FROM CacheInfo WHERE num_contacts REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}';"}
|
||||
{"db_path": "selectedDBs\\A4_peopleCache_sharononeil368@gmail.com_com.google_14.db", "PII_type": "USERNAME", "PII": ["heather@cellebrite.com", "heather", "hmahalik@gmail.com", "hmahalik"], "Num_of_PII": 4, "source_columns": ["CacheInfo.affinity_response_context", "Tokens.value", "Tokens_content.c1value", "Tokens_stat.value"], "Raw_rows_first_100": [["heather@cellebrite.com"], ["heather"], ["hmahalik@gmail.com"], ["hmahalik"], ["heather@cellebrite.com"], ["heather"], ["hmahalik@gmail.com"], ["hmahalik"]], "Total_raw_rows": 8, "Exploration_sql": "SELECT affinity_response_context FROM CacheInfo WHERE affinity_response_context REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \nUNION ALL \nSELECT value FROM Tokens WHERE value REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \nUNION ALL \nSELECT c1value FROM Tokens_content WHERE c1value REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \nUNION ALL \nSELECT value FROM Tokens_stat WHERE value REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \nUNION ALL \nSELECT identity_hash FROM android_metadata WHERE identity_hash REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';", "Extraction_sql": "SELECT affinity_response_context FROM CacheInfo WHERE affinity_response_context REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\nUNION ALL\nSELECT value FROM Tokens WHERE value REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\nUNION ALL\nSELECT c1value FROM Tokens_content WHERE c1value REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\nUNION ALL\nSELECT value FROM Tokens_stat WHERE value REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\nUNION ALL\nSELECT identity_hash FROM android_metadata WHERE identity_hash REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';"}
|
||||
{"db_path": "selectedDBs\\A4_peopleCache_sharononeil368@gmail.com_com.google_14.db", "PII_type": "PERSON_NAME", "PII": [], "Num_of_PII": 0, "source_columns": ["Tokens.value", "Tokens_content.c1value", "Contacts.id", "CacheInfo.affinity_response_context"], "Raw_rows_first_100": [["heather@cellebrite.com"], ["heather"], ["hmahalik@gmail.com"], ["hmahalik"], ["Bo"], ["heather@cellebrite.com"], ["heather"], ["hmahalik@gmail.com"], ["hmahalik"], ["Bo"]], "Total_raw_rows": 10, "Exploration_sql": "SELECT value FROM Tokens WHERE value REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \nUNION ALL \nSELECT c1value FROM Tokens_content WHERE c1value REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \nUNION ALL \nSELECT id FROM Contacts WHERE id REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \nUNION ALL \nSELECT affinity_response_context FROM CacheInfo WHERE affinity_response_context REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';", "Extraction_sql": "SELECT value FROM Tokens WHERE value REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT c1value FROM Tokens_content WHERE c1value REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT id FROM Contacts WHERE id REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\nUNION ALL\nSELECT affinity_response_context FROM CacheInfo WHERE affinity_response_context REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';"}
|
||||
4
batch_results/PII_A5_SBrowser2_20260120T225516Z.jsonl
Normal file
4
batch_results/PII_A5_SBrowser2_20260120T225516Z.jsonl
Normal file
@@ -0,0 +1,4 @@
|
||||
{"db_path": "selectedDBs\\A5_SBrowser2.db", "PII_type": "EMAIL", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "SELECT URL FROM BOOKMARKS WHERE URL REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' \nUNION ALL \nSELECT TITLE FROM BOOKMARKS WHERE TITLE REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' \nUNION ALL \nSELECT TAGS FROM BOOKMARKS WHERE TAGS REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' \nUNION ALL \nSELECT ACCOUNT_NAME FROM BOOKMARKS WHERE ACCOUNT_NAME REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' \nUNION ALL \nSELECT ACCOUNT_TYPE FROM BOOKMARKS WHERE ACCOUNT_TYPE REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' \nUNION ALL \nSELECT description FROM BOOKMARKS WHERE description REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}';", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A5_SBrowser2.db", "PII_type": "PHONE", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "SELECT URL FROM BOOKMARKS WHERE URL REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' UNION ALL SELECT TITLE FROM BOOKMARKS WHERE TITLE REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' UNION ALL SELECT TAGS FROM BOOKMARKS WHERE TAGS REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' UNION ALL SELECT description FROM BOOKMARKS WHERE description REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A5_SBrowser2.db", "PII_type": "USERNAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [[{"__bytes_b64__": "BAGBHAIGAQABLGwCAOx/Yz8pWMaZgKzK9gidY1JbWlHtbmZoPVRvHa8L1Qa5v77osj/b9cxf6AihGfAj9/oMzNgzwG4TEa1ocxhMgTw="}], [{"__bytes_b64__": "BAGBHAIGAQABLGwCADg08Y0rLftwtKLewxQylCXdY9YcWUe01qQU3juIWvYiZTepFwA/AGQEV8xammaotcT+LNCNEQBvYK49zZmdniQ="}], [{"__bytes_b64__": "BAGBXgIHAQABLIEsAgAx0od46RvJj3ZMpmE/r++aNuxFfDaTafncAW0XddYSB0WkI7vryB4lplJKYOFVJvriCJ54MlN56/+r6ZnwIIKx3TF4rvHTUNRIc9LKLCfh5hqUwF5ebYY+wMqebfiqMSY="}], [{"__bytes_b64__": "BAGBXgIHAQABLIEsAgAN/bPhb+tihkAfwO8uIxTzh9YWVp1U/kZ3oy/IzxRDnWH35ch+01jfwXwTtCmEj+LyGgSjHzzowXmue9uLyUvfpmVNbt3JCOqCl0EyBAo5+xpVCiij7EFhenIbvW/5cN0="}], [{"__bytes_b64__": "BAGBfgIHAQABLIFMAgAxMZf3KoVtVYHMTrajimnorfRebkfgH/qYhDfOJ2RRzWYOGMxn9xTFfrGxr/Cg60A6HnjLCVgpgftWYAW/bKRQc57iUOY3/35T/FmViNlLg/0T1xmtyNMJL85eTN6Ty8FnisaqUE3+iEK+drFSfnJt"}], [{"__bytes_b64__": "BAGBXgIHAQABLIEsAgDferB3B+clQ8gWXYeFleQ01pIC6VzQ+FxtlyoVYuGZ/sO3K+0Bwv1BqzJe5oM8CyJQtis/qRu/Li9LMYVDQxRCkfClKVoXFf6qVbzv+0XrXKADvEyQD083AV/geGnfwx8="}]], "Total_raw_rows": 6, "Exploration_sql": "SELECT ACCOUNT_NAME FROM BOOKMARKS WHERE ACCOUNT_NAME REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL SELECT TITLE FROM BOOKMARKS WHERE TITLE REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A5_SBrowser2.db", "PII_type": "PERSON_NAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [[{"__bytes_b64__": "BAGBHAIGAQABLGwCAOx/Yz8pWMaZgKzK9gidY1JbWlHtbmZoPVRvHa8L1Qa5v77osj/b9cxf6AihGfAj9/oMzNgzwG4TEa1ocxhMgTw="}], [{"__bytes_b64__": "BAGBHAIGAQABLGwCADg08Y0rLftwtKLewxQylCXdY9YcWUe01qQU3juIWvYiZTepFwA/AGQEV8xammaotcT+LNCNEQBvYK49zZmdniQ="}], [{"__bytes_b64__": "BAGBXgIHAQABLIEsAgAx0od46RvJj3ZMpmE/r++aNuxFfDaTafncAW0XddYSB0WkI7vryB4lplJKYOFVJvriCJ54MlN56/+r6ZnwIIKx3TF4rvHTUNRIc9LKLCfh5hqUwF5ebYY+wMqebfiqMSY="}], [{"__bytes_b64__": "BAGBXgIHAQABLIEsAgAN/bPhb+tihkAfwO8uIxTzh9YWVp1U/kZ3oy/IzxRDnWH35ch+01jfwXwTtCmEj+LyGgSjHzzowXmue9uLyUvfpmVNbt3JCOqCl0EyBAo5+xpVCiij7EFhenIbvW/5cN0="}], [{"__bytes_b64__": "BAGBfgIHAQABLIFMAgAxMZf3KoVtVYHMTrajimnorfRebkfgH/qYhDfOJ2RRzWYOGMxn9xTFfrGxr/Cg60A6HnjLCVgpgftWYAW/bKRQc57iUOY3/35T/FmViNlLg/0T1xmtyNMJL85eTN6Ty8FnisaqUE3+iEK+drFSfnJt"}], [{"__bytes_b64__": "BAGBXgIHAQABLIEsAgDferB3B+clQ8gWXYeFleQ01pIC6VzQ+FxtlyoVYuGZ/sO3K+0Bwv1BqzJe5oM8CyJQtis/qRu/Li9LMYVDQxRCkfClKVoXFf6qVbzv+0XrXKADvEyQD083AV/geGnfwx8="}]], "Total_raw_rows": 6, "Exploration_sql": "SELECT TITLE FROM BOOKMARKS WHERE TITLE REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \nSELECT ACCOUNT_NAME FROM BOOKMARKS WHERE ACCOUNT_NAME REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \nSELECT TAGS FROM BOOKMARKS WHERE TAGS REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \nSELECT description FROM BOOKMARKS WHERE description REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'", "Extraction_sql": null}
|
||||
4
batch_results/PII_A5_SBrowser_20260120T225448Z.jsonl
Normal file
4
batch_results/PII_A5_SBrowser_20260120T225448Z.jsonl
Normal file
@@ -0,0 +1,4 @@
|
||||
{"db_path": "selectedDBs\\A5_SBrowser.db", "PII_type": "EMAIL", "PII": ["sharononeil368@gmail.com"], "Num_of_PII": 1, "source_columns": ["BOOKMARKS.ACCOUNT_NAME", "INTERNET_SYNC.SYNC_KEY", "SYNC_STATE.account_name", "TABS.ACCOUNT_NAME"], "Raw_rows_first_100": [["sharononeil368@gmail.com"], ["sharononeil368@gmail.com"], ["sharononeil368@gmail.com"], ["sharononeil368@gmail.com"], ["sharononeil368@gmail.com"], ["sharononeil368@gmail.com"]], "Total_raw_rows": 6, "Exploration_sql": "SELECT ACCOUNT_NAME FROM BOOKMARKS WHERE ACCOUNT_NAME REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL \nSELECT SYNC_KEY FROM INTERNET_SYNC WHERE SYNC_KEY REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL \nSELECT account_name FROM SYNC_STATE WHERE account_name REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL \nSELECT ACCOUNT_NAME FROM TABS WHERE ACCOUNT_NAME REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'", "Extraction_sql": "SELECT ACCOUNT_NAME FROM BOOKMARKS WHERE ACCOUNT_NAME REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL\nSELECT SYNC_KEY FROM INTERNET_SYNC WHERE SYNC_KEY REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL\nSELECT account_name FROM SYNC_STATE WHERE account_name REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL\nSELECT ACCOUNT_NAME FROM TABS WHERE ACCOUNT_NAME REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'"}
|
||||
{"db_path": "selectedDBs\\A5_SBrowser.db", "PII_type": "PHONE", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["https://www.samsung.com/mobile/?cid=global_ow_app_s-internet_none_none_bookmark_bookmark_202008_none"], ["https://www.samsung.com/mobile/?cid=global_ow_app_s-internet_none_none_bookmark_bookmark_202008_none"], ["SBROWSER_TAB1724467631361__BROWSER1724467632107__SBROWSER_SAVEDPAGES1724467635256__QUICKACCESS_SYNC_V21724467633471__SBROWSER_HISTORY1724467634746__"], ["https://www.pinterest.com/pin/410812797236816112/"]], "Total_raw_rows": 4, "Exploration_sql": "SELECT URL FROM BOOKMARKS WHERE URL REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' UNION ALL \nSELECT SURL FROM BOOKMARKS WHERE SURL REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' UNION ALL \nSELECT TITLE FROM BOOKMARKS WHERE TITLE REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' UNION ALL \nSELECT TAGS FROM BOOKMARKS WHERE TAGS REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' UNION ALL \nSELECT data FROM SYNC_STATE WHERE data REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' UNION ALL \nSELECT TAB_URL FROM TABS WHERE TAB_URL REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' UNION ALL \nSELECT TAB_TITLE FROM TABS WHERE TAB_TITLE REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' UNION ALL \nSELECT history FROM TABS WHERE history REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A5_SBrowser.db", "PII_type": "USERNAME", "PII": ["sharononeil368@gmail.com"], "Num_of_PII": 1, "source_columns": ["BOOKMARKS.ACCOUNT_NAME", "TABS.ACCOUNT_NAME", "SYNC_STATE.account_name"], "Raw_rows_first_100": [["sharononeil368@gmail.com"], ["sharononeil368@gmail.com"], ["sharononeil368@gmail.com"], ["sharononeil368@gmail.com"], ["sharononeil368@gmail.com"], ["sharononeil368@gmail.com"]], "Total_raw_rows": 6, "Exploration_sql": "SELECT ACCOUNT_NAME FROM BOOKMARKS WHERE ACCOUNT_NAME REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \nUNION ALL \nSELECT ACCOUNT_NAME FROM TABS WHERE ACCOUNT_NAME REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \nUNION ALL \nSELECT account_name FROM SYNC_STATE WHERE account_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';", "Extraction_sql": "SELECT ACCOUNT_NAME FROM BOOKMARKS WHERE ACCOUNT_NAME REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\nUNION ALL\nSELECT ACCOUNT_NAME FROM TABS WHERE ACCOUNT_NAME REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\nUNION ALL\nSELECT account_name FROM SYNC_STATE WHERE account_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';"}
|
||||
{"db_path": "selectedDBs\\A5_SBrowser.db", "PII_type": "PERSON_NAME", "PII": [], "Num_of_PII": 0, "source_columns": ["BOOKMARKS.TITLE", "BOOKMARKS.ACCOUNT_NAME", "BOOKMARKS.ACCOUNT_TYPE", "SYNC_STATE.data", "TABS.TAB_TITLE", "TABS.ACCOUNT_NAME", "TABS.ACCOUNT_TYPE"], "Raw_rows_first_100": [["Bookmarks"], ["Samsung account"], ["how to meditate - Google Search"], ["Google"], ["Galaxy Shop"], ["User guide"], ["sharononeil368@gmail.com"], ["com.osp.app.signin"], ["SBROWSER_TAB1724467631361__BROWSER1724467632107__SBROWSER_SAVEDPAGES1724467635256__QUICKACCESS_SYNC_V21724467633471__SBROWSER_HISTORY1724467634746__"], ["Pin on Simon, God of Hairdos"], ["puck from.glee - Google Search"], ["Midjourney AI - Free Image Generator"], ["billie eilish birds of a feather lyrics - Google Search"], ["sharononeil368@gmail.com"], ["sharononeil368@gmail.com"], ["sharononeil368@gmail.com"], ["sharononeil368@gmail.com"], ["com.osp.app.signin"], ["com.osp.app.signin"], ["com.osp.app.signin"], ["com.osp.app.signin"]], "Total_raw_rows": 21, "Exploration_sql": "SELECT TITLE FROM BOOKMARKS WHERE TITLE REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \nSELECT ACCOUNT_NAME FROM BOOKMARKS WHERE ACCOUNT_NAME REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \nSELECT ACCOUNT_TYPE FROM BOOKMARKS WHERE ACCOUNT_TYPE REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \nSELECT data FROM SYNC_STATE WHERE data REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \nSELECT TAB_TITLE FROM TABS WHERE TAB_TITLE REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \nSELECT ACCOUNT_NAME FROM TABS WHERE ACCOUNT_NAME REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \nSELECT ACCOUNT_TYPE FROM TABS WHERE ACCOUNT_TYPE REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'", "Extraction_sql": "SELECT TITLE FROM BOOKMARKS WHERE TITLE REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL\nSELECT ACCOUNT_NAME FROM BOOKMARKS WHERE ACCOUNT_NAME REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL\nSELECT ACCOUNT_TYPE FROM BOOKMARKS WHERE ACCOUNT_TYPE REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL\nSELECT data FROM SYNC_STATE WHERE data REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL\nSELECT TAB_TITLE FROM TABS WHERE TAB_TITLE REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL\nSELECT ACCOUNT_NAME FROM TABS WHERE ACCOUNT_NAME REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL\nSELECT ACCOUNT_TYPE FROM TABS WHERE ACCOUNT_TYPE REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'"}
|
||||
4
batch_results/PII_A5_searchengine_20260120T225558Z.jsonl
Normal file
4
batch_results/PII_A5_searchengine_20260120T225558Z.jsonl
Normal file
@@ -0,0 +1,4 @@
|
||||
{"db_path": "selectedDBs\\A5_searchengine.db", "PII_type": "EMAIL", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "SELECT locale FROM android_metadata WHERE locale REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' \nUNION ALL \nSELECT title FROM searchengine WHERE title REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' \nUNION ALL \nSELECT url FROM searchengine WHERE url REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' \nUNION ALL \nSELECT image_url FROM searchengine WHERE image_url REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' \nUNION ALL \nSELECT extra1 FROM searchengine WHERE extra1 REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' \nUNION ALL \nSELECT extra2 FROM searchengine WHERE extra2 REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' \nUNION ALL \nSELECT extra3 FROM searchengine WHERE extra3 REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}';", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A5_searchengine.db", "PII_type": "PHONE", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["https://home.imgsmail.ru/resplash/123689/i/meta/favicon.ico"], ["https://search.seznam.cz/re/media/favicon.192a42730e.ico"]], "Total_raw_rows": 2, "Exploration_sql": "SELECT locale FROM android_metadata WHERE locale REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' \nUNION ALL \nSELECT title FROM searchengine WHERE title REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' \nUNION ALL \nSELECT url FROM searchengine WHERE url REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' \nUNION ALL \nSELECT extra1 FROM searchengine WHERE extra1 REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' \nUNION ALL \nSELECT extra2 FROM searchengine WHERE extra2 REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' \nUNION ALL \nSELECT extra3 FROM searchengine WHERE extra3 REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A5_searchengine.db", "PII_type": "USERNAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["en_US"], ["google"], ["DuckDuckGo"], ["yahoo"], ["youtube"], ["bing"], ["so360"], ["qwant"], ["toutiao"], ["StartPage"], ["shenma"], ["https://duckduckgo.com/favicon.ico"], ["https://home.imgsmail.ru/resplash/123689/i/meta/favicon.ico"], ["https://m.toutiao.com/favicon.ico"], ["https://p0.ssl.qhimg.com/d/inn/128c749e/icon.png"], ["https://search.daum.net/favicon.ico"], ["https://search.naver.com/favicon.ico"], ["https://search.seznam.cz/re/media/favicon.192a42730e.ico"], ["https://search.yahoo.com/favicon.ico"], ["https://sm01.alicdn.com/L1/272/1990/favicon/favicon.ico"], ["https://www.baidu.com/favicon.ico"]], "Total_raw_rows": 21, "Exploration_sql": "SELECT locale FROM android_metadata WHERE locale REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL SELECT title FROM searchengine WHERE title REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL SELECT url FROM searchengine WHERE url REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL SELECT image_url FROM searchengine WHERE image_url REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL SELECT extra1 FROM searchengine WHERE extra1 REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL SELECT extra2 FROM searchengine WHERE extra2 REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL SELECT extra3 FROM searchengine WHERE extra3 REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\A5_searchengine.db", "PII_type": "PERSON_NAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["en_US"], ["google"], ["DuckDuckGo"], ["yahoo"], ["youtube"], ["bing"], ["so360"], ["qwant"], ["toutiao"], ["StartPage"], ["shenma"], ["https://duckduckgo.com/favicon.ico"], ["https://home.imgsmail.ru/resplash/123689/i/meta/favicon.ico"], ["https://m.toutiao.com/favicon.ico"], ["https://p0.ssl.qhimg.com/d/inn/128c749e/icon.png"], ["https://search.daum.net/favicon.ico"], ["https://search.naver.com/favicon.ico"], ["https://search.seznam.cz/re/media/favicon.192a42730e.ico"], ["https://search.yahoo.com/favicon.ico"], ["https://sm01.alicdn.com/L1/272/1990/favicon/favicon.ico"], ["https://www.baidu.com/favicon.ico"]], "Total_raw_rows": 21, "Exploration_sql": "SELECT locale FROM android_metadata WHERE locale REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL SELECT title FROM searchengine WHERE title REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL SELECT url FROM searchengine WHERE url REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL SELECT extra1 FROM searchengine WHERE extra1 REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL SELECT extra2 FROM searchengine WHERE extra2 REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL SELECT extra3 FROM searchengine WHERE extra3 REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'", "Extraction_sql": null}
|
||||
4
batch_results/PII_I1_CallHistory_20260120T225650Z.jsonl
Normal file
4
batch_results/PII_I1_CallHistory_20260120T225650Z.jsonl
Normal file
@@ -0,0 +1,4 @@
|
||||
{"db_path": "selectedDBs\\I1_CallHistory.sqlite", "PII_type": "EMAIL", "PII": ["19735203731@s.whatsapp.net", "923402582955@s.whatsapp.net", "14847353029@s.whatsapp.net", "19199037779@s.whatsapp.net"], "Num_of_PII": 4, "source_columns": ["ZWAAGGREGATECALLEVENT.ZLINKTOKEN", "ZWACDCALLEVENT.ZCALLIDSTRING", "ZWACDCALLEVENT.ZGROUPCALLCREATORUSERJIDSTRING", "ZWACDCALLEVENT.ZGROUPJIDSTRING", "ZWACDCALLEVENT.ZSCHEDULEDID", "ZWACDCALLEVENTPARTICIPANT.ZJIDSTRING"], "Raw_rows_first_100": [["19735203731@s.whatsapp.net"], ["19735203731@s.whatsapp.net"], ["19735203731@s.whatsapp.net"], ["923402582955@s.whatsapp.net"], ["923402582955@s.whatsapp.net"], ["14847353029@s.whatsapp.net"], ["14847353029@s.whatsapp.net"], ["19199037779@s.whatsapp.net"], ["923402582955@s.whatsapp.net"]], "Total_raw_rows": 9, "Exploration_sql": "SELECT ZLINKTOKEN FROM ZWAAGGREGATECALLEVENT WHERE ZLINKTOKEN REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' \nUNION ALL \nSELECT ZCALLIDSTRING FROM ZWACDCALLEVENT WHERE ZCALLIDSTRING REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' \nUNION ALL \nSELECT ZGROUPCALLCREATORUSERJIDSTRING FROM ZWACDCALLEVENT WHERE ZGROUPCALLCREATORUSERJIDSTRING REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' \nUNION ALL \nSELECT ZGROUPJIDSTRING FROM ZWACDCALLEVENT WHERE ZGROUPJIDSTRING REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' \nUNION ALL \nSELECT ZSCHEDULEDID FROM ZWACDCALLEVENT WHERE ZSCHEDULEDID REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' \nUNION ALL \nSELECT ZJIDSTRING FROM ZWACDCALLEVENTPARTICIPANT WHERE ZJIDSTRING REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}';", "Extraction_sql": "SELECT ZLINKTOKEN FROM ZWAAGGREGATECALLEVENT WHERE ZLINKTOKEN REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT ZCALLIDSTRING FROM ZWACDCALLEVENT WHERE ZCALLIDSTRING REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT ZGROUPCALLCREATORUSERJIDSTRING FROM ZWACDCALLEVENT WHERE ZGROUPCALLCREATORUSERJIDSTRING REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT ZGROUPJIDSTRING FROM ZWACDCALLEVENT WHERE ZGROUPJIDSTRING REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT ZSCHEDULEDID FROM ZWACDCALLEVENT WHERE ZSCHEDULEDID REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\nUNION ALL\nSELECT ZJIDSTRING FROM ZWACDCALLEVENTPARTICIPANT WHERE ZJIDSTRING REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}';"}
|
||||
{"db_path": "selectedDBs\\I1_CallHistory.sqlite", "PII_type": "PHONE", "PII": [], "Num_of_PII": 0, "source_columns": ["ZWACDCALLEVENT.ZCALLIDSTRING", "ZWACDCALLEVENTPARTICIPANT.ZJIDSTRING", "ZWAAGGREGATECALLEVENT.ZLINKTOKEN"], "Raw_rows_first_100": [["372FA57E129467051E04B3E4DD5A26D5"], ["3A9CEF8B4996D645358B"], ["14847353029@s.whatsapp.net"], ["14847353029@s.whatsapp.net"], ["19199037779@s.whatsapp.net"], ["923402582955@s.whatsapp.net"]], "Total_raw_rows": 6, "Exploration_sql": "SELECT ZCALLIDSTRING FROM ZWACDCALLEVENT WHERE ZCALLIDSTRING REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' \nUNION ALL \nSELECT ZJIDSTRING FROM ZWACDCALLEVENTPARTICIPANT WHERE ZJIDSTRING REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' \nUNION ALL \nSELECT ZLINKTOKEN FROM ZWAAGGREGATECALLEVENT WHERE ZLINKTOKEN REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'", "Extraction_sql": "SELECT ZCALLIDSTRING FROM ZWACDCALLEVENT WHERE ZCALLIDSTRING REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\nUNION ALL\nSELECT ZJIDSTRING FROM ZWACDCALLEVENTPARTICIPANT WHERE ZJIDSTRING REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\nUNION ALL\nSELECT ZLINKTOKEN FROM ZWAAGGREGATECALLEVENT WHERE ZLINKTOKEN REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'"}
|
||||
{"db_path": "selectedDBs\\I1_CallHistory.sqlite", "PII_type": "USERNAME", "PII": ["19735203731", "923402582955", "14847353029", "19199037779"], "Num_of_PII": 4, "source_columns": ["ZWAAGGREGATECALLEVENT.ZLINKTOKEN", "ZWACDCALLEVENT.ZCALLIDSTRING", "ZWACDCALLEVENT.ZGROUPCALLCREATORUSERJIDSTRING", "ZWACDCALLEVENT.ZGROUPJIDSTRING", "ZWACDCALLEVENTPARTICIPANT.ZJIDSTRING"], "Raw_rows_first_100": [["19735203731@s.whatsapp.net"], ["19735203731@s.whatsapp.net"], ["19735203731@s.whatsapp.net"], ["923402582955@s.whatsapp.net"], ["923402582955@s.whatsapp.net"], ["14847353029@s.whatsapp.net"], ["14847353029@s.whatsapp.net"], ["19199037779@s.whatsapp.net"], ["923402582955@s.whatsapp.net"]], "Total_raw_rows": 9, "Exploration_sql": "SELECT ZLINKTOKEN FROM ZWAAGGREGATECALLEVENT WHERE ZLINKTOKEN REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \nUNION ALL \nSELECT ZCALLIDSTRING FROM ZWACDCALLEVENT WHERE ZCALLIDSTRING REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \nUNION ALL \nSELECT ZGROUPCALLCREATORUSERJIDSTRING FROM ZWACDCALLEVENT WHERE ZGROUPCALLCREATORUSERJIDSTRING REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \nUNION ALL \nSELECT ZGROUPJIDSTRING FROM ZWACDCALLEVENT WHERE ZGROUPJIDSTRING REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \nUNION ALL \nSELECT ZJIDSTRING FROM ZWACDCALLEVENTPARTICIPANT WHERE ZJIDSTRING REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';", "Extraction_sql": "SELECT ZLINKTOKEN FROM ZWAAGGREGATECALLEVENT WHERE ZLINKTOKEN REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\nUNION ALL\nSELECT ZCALLIDSTRING FROM ZWACDCALLEVENT WHERE ZCALLIDSTRING REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\nUNION ALL\nSELECT ZGROUPCALLCREATORUSERJIDSTRING FROM ZWACDCALLEVENT WHERE ZGROUPCALLCREATORUSERJIDSTRING REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\nUNION ALL\nSELECT ZGROUPJIDSTRING FROM ZWACDCALLEVENT WHERE ZGROUPJIDSTRING REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\nUNION ALL\nSELECT ZJIDSTRING FROM ZWACDCALLEVENTPARTICIPANT WHERE ZJIDSTRING REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';"}
|
||||
{"db_path": "selectedDBs\\I1_CallHistory.sqlite", "PII_type": "PERSON_NAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["3C399CDDAF11A41F7AFF2892E0A4B10C"], ["3C37CBFE11C261E6CD80C2DE7834D770"], ["372FA57E129467051E04B3E4DD5A26D5"], ["3A6DF670F7121CD6D08B"], ["3A9CEF8B4996D645358B"], ["14847353029@s.whatsapp.net"], ["14847353029@s.whatsapp.net"], ["19199037779@s.whatsapp.net"], ["923402582955@s.whatsapp.net"], ["WAAggregateCallEvent"], ["WACDCallEvent"], ["WACDCallEventParticipant"], ["WAJoinableCallEvent"], ["WAJoinableCallEventParticipant"], ["WAUpcomingCallEvent"]], "Total_raw_rows": 15, "Exploration_sql": "SELECT ZCALLIDSTRING FROM ZWACDCALLEVENT WHERE ZCALLIDSTRING REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \nUNION ALL \nSELECT ZJIDSTRING FROM ZWACDCALLEVENTPARTICIPANT WHERE ZJIDSTRING REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \nUNION ALL \nSELECT Z_NAME FROM Z_PRIMARYKEY WHERE Z_NAME REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'", "Extraction_sql": null}
|
||||
4
batch_results/PII_I1_ChatStorage_20260120T230640Z.jsonl
Normal file
4
batch_results/PII_I1_ChatStorage_20260120T230640Z.jsonl
Normal file
File diff suppressed because one or more lines are too long
4
batch_results/PII_I1_ContactsV2_20260120T231626Z.jsonl
Normal file
4
batch_results/PII_I1_ContactsV2_20260120T231626Z.jsonl
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
4
batch_results/PII_I2_AddressBook_20260120T232427Z.jsonl
Normal file
4
batch_results/PII_I2_AddressBook_20260120T232427Z.jsonl
Normal file
File diff suppressed because one or more lines are too long
4
batch_results/PII_I3_sms_20260120T232531Z.jsonl
Normal file
4
batch_results/PII_I3_sms_20260120T232531Z.jsonl
Normal file
File diff suppressed because one or more lines are too long
4
batch_results/PII_I4_CloudTabs_20260120T232557Z.jsonl
Normal file
4
batch_results/PII_I4_CloudTabs_20260120T232557Z.jsonl
Normal file
@@ -0,0 +1,4 @@
|
||||
{"db_path": "selectedDBs\\I4_CloudTabs.db", "PII_type": "EMAIL", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "SELECT device_uuid FROM cloud_tab_devices WHERE device_uuid REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL \nSELECT title FROM cloud_tabs WHERE title REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL \nSELECT url FROM cloud_tabs WHERE url REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL \nSELECT key FROM metadata WHERE key REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL \nSELECT value FROM metadata WHERE value REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\I4_CloudTabs.db", "PII_type": "PHONE", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["3075F5C2-E558-4E12-B421-6118960D1D2E"], ["ambarrestaurant.com/wp-content/uploads/2024/07/ambar-ch-dinner-07012024.pdf"]], "Total_raw_rows": 2, "Exploration_sql": "SELECT device_uuid FROM cloud_tab_devices WHERE device_uuid REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' UNION ALL \nSELECT title FROM cloud_tabs WHERE title REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' UNION ALL \nSELECT value FROM metadata WHERE value REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\I4_CloudTabs.db", "PII_type": "USERNAME", "PII": ["ravensmanorexperience.com", "ambarrestaurant.com", "jackalope", "CatFish Cops", "server_change_token", "use_manatee_container"], "Num_of_PII": 6, "source_columns": ["cloud_tab_devices.device_name", "cloud_tabs.title", "metadata.key"], "Raw_rows_first_100": [["Cellebrite"], ["falafel hardboiled egg israel - Google Search"], ["Dickens Cider is now Dickins Cider Company | Hard Cider Drinks | Dickens Cider"], ["Blue Bite | Connecting Possibility"], ["Home page - Mapat"], ["snack with dairy kids - Google Search"], ["figs growing green not ripe - Google Search"], ["Hours: Amusement Park & Hershey Attractions | Hersheypark"], ["How to Change a Light Bulb"], ["Tolls"], ["ravensmanorexperience.com"], ["cool dry place storage - Google Search"], ["Haifa Airport - Wikipedia"], ["whats the sales tax in nyc on clothing - Google Search"], ["can you copy amiibo with flipper zero - Google Search"], ["jackalope - Google Search"], ["ambarrestaurant.com/wp-content/uploads/2024/07/ambar-ch-dinner-07012024.pdf"], ["Menu | Raven's Manor"], ["Use Crash Detection on iPhone or Apple Watch to call for help in an accident - Apple Support"], ["figs tre wont ripen green - Search"], ["Hours: Amusement Park & Hershey Attractions | Hersheypark"], ["AirTag"], ["CatFish Cops"], ["my files download - Google Search"], ["server_change_token"], ["use_manatee_container"]], "Total_raw_rows": 26, "Exploration_sql": "SELECT device_name FROM cloud_tab_devices WHERE device_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \nUNION ALL \nSELECT title FROM cloud_tabs WHERE title REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \nUNION ALL \nSELECT key FROM metadata WHERE key REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';", "Extraction_sql": "SELECT device_name FROM cloud_tab_devices WHERE device_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\nUNION ALL\nSELECT title FROM cloud_tabs WHERE title REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\nUNION ALL\nSELECT key FROM metadata WHERE key REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';"}
|
||||
{"db_path": "selectedDBs\\I4_CloudTabs.db", "PII_type": "PERSON_NAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["Cellebrite"], ["falafel hardboiled egg israel - Google Search"], ["Dickens Cider is now Dickins Cider Company | Hard Cider Drinks | Dickens Cider"], ["Blue Bite | Connecting Possibility"], ["Home page - Mapat"], ["snack with dairy kids - Google Search"], ["figs growing green not ripe - Google Search"], ["Hours: Amusement Park & Hershey Attractions | Hersheypark"], ["How to Change a Light Bulb"], ["Tolls"], ["ravensmanorexperience.com"], [{"__bytes_b64__": "YnBsaXN0MDDUAQIDBAUGBwpYJHZlcnNpb25ZJGFyY2hpdmVyVCR0b3BYJG9iamVjdHMSAAGGoF8QD05TS2V5ZWRBcmNoaXZlctEICVRyb290gAGkCwwRElUkbnVsbNINDg8QViRjbGFzc18QD0NoYW5nZVRva2VuRGF0YYADgAJPEC8fChASDAAATBJW9UHWAAAAABgBGAAiFgj5m+me89ruvc4BEM2Hk5/Ph+jnpgEoANITFBUWWiRjbGFzc25hbWVYJGNsYXNzZXNfEBNDS1NlcnZlckNoYW5nZVRva2VuohUXWE5TT2JqZWN0CBEaJCkyN0lMUVNYXmNqfH6AsrfCy+HkAAAAAAAAAQEAAAAAAAAAGAAAAAAAAAAAAAAAAAAAAO0="}]], "Total_raw_rows": 12, "Exploration_sql": "SELECT device_name FROM cloud_tab_devices WHERE device_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \nUNION ALL \nSELECT title FROM cloud_tabs WHERE title REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \nUNION ALL \nSELECT value FROM metadata WHERE value REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'", "Extraction_sql": null}
|
||||
4
batch_results/PII_I4_History_20260120T232632Z.jsonl
Normal file
4
batch_results/PII_I4_History_20260120T232632Z.jsonl
Normal file
File diff suppressed because one or more lines are too long
4
batch_results/PII_I5_Calendar_20260120T232720Z.jsonl
Normal file
4
batch_results/PII_I5_Calendar_20260120T232720Z.jsonl
Normal file
File diff suppressed because one or more lines are too long
4
batch_results/PII_I5_Extras_20260120T232749Z.jsonl
Normal file
4
batch_results/PII_I5_Extras_20260120T232749Z.jsonl
Normal file
@@ -0,0 +1,4 @@
|
||||
{"db_path": "selectedDBs\\I5_Extras.db", "PII_type": "EMAIL", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [], "Total_raw_rows": 0, "Exploration_sql": "SELECT ZKEY FROM ZSETTING WHERE ZKEY REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL \nSELECT ZVALUE FROM ZSETTING WHERE ZVALUE REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL \nSELECT Z_UUID FROM Z_METADATA WHERE Z_UUID REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}' UNION ALL \nSELECT Z_NAME FROM Z_PRIMARYKEY WHERE Z_NAME REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\I5_Extras.db", "PII_type": "PHONE", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["748022768.494298"]], "Total_raw_rows": 1, "Exploration_sql": "SELECT ZVALUE FROM ZSETTING WHERE ZVALUE REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' \nUNION ALL \nSELECT Z_UUID FROM Z_METADATA WHERE Z_UUID REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' \nUNION ALL \nSELECT Z_CONTENT FROM Z_MODELCACHE WHERE Z_CONTENT REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}' \nUNION ALL \nSELECT Z_NAME FROM Z_PRIMARYKEY WHERE Z_NAME REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'", "Extraction_sql": null}
|
||||
{"db_path": "selectedDBs\\I5_Extras.db", "PII_type": "USERNAME", "PII": [], "Num_of_PII": 0, "source_columns": ["ZSETTING.ZKEY", "ZSETTING.ZVALUE", "Z_METADATA.Z_UUID", "Z_PRIMARYKEY.Z_NAME"], "Raw_rows_first_100": [["CacheEndDate"], ["LastDBSequence"], ["AE9BAC24-6E29-4FA2-9FA4-699FD2BE7479"], ["Alarm"], ["Setting"]], "Total_raw_rows": 5, "Exploration_sql": "SELECT ZKEY FROM ZSETTING WHERE ZKEY REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \nSELECT ZVALUE FROM ZSETTING WHERE ZVALUE REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \nSELECT Z_UUID FROM Z_METADATA WHERE Z_UUID REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \nSELECT Z_NAME FROM Z_PRIMARYKEY WHERE Z_NAME REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'", "Extraction_sql": "SELECT ZKEY FROM ZSETTING WHERE ZKEY REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL\nSELECT ZVALUE FROM ZSETTING WHERE ZVALUE REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL\nSELECT Z_UUID FROM Z_METADATA WHERE Z_UUID REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL\nSELECT Z_NAME FROM Z_PRIMARYKEY WHERE Z_NAME REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'"}
|
||||
{"db_path": "selectedDBs\\I5_Extras.db", "PII_type": "PERSON_NAME", "PII": [], "Num_of_PII": 0, "source_columns": [], "Raw_rows_first_100": [["CacheEndDate"], ["LastDBSequence"], ["AE9BAC24-6E29-4FA2-9FA4-699FD2BE7479"], ["Alarm"], ["Setting"]], "Total_raw_rows": 5, "Exploration_sql": "SELECT ZKEY FROM ZSETTING WHERE ZKEY REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \nUNION ALL \nSELECT ZVALUE FROM ZSETTING WHERE ZVALUE REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \nUNION ALL \nSELECT Z_UUID FROM Z_METADATA WHERE Z_UUID REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \nUNION ALL \nSELECT Z_NAME FROM Z_PRIMARYKEY WHERE Z_NAME REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'", "Extraction_sql": null}
|
||||
8
stats/aggregated_by_app_and_type.jsonl
Normal file
8
stats/aggregated_by_app_and_type.jsonl
Normal file
File diff suppressed because one or more lines are too long
209
stats/stats_app_level.ipynb
Normal file
209
stats/stats_app_level.ipynb
Normal file
@@ -0,0 +1,209 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "234eed3f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Wrote: I:\\project2026\\llmagent\\stats\\aggregated_by_app_and_type.jsonl\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"from pathlib import Path\n",
|
||||
"from typing import Any, Dict, Tuple\n",
|
||||
"\n",
|
||||
"IGNORE_FIELDS = {\"Raw_rows_first_100\", \"Exploration_sql\", \"Extraction_sql\"}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_app_code(db_path: str) -> str:\n",
|
||||
" \"\"\"\n",
|
||||
" selectedDBs\\\\A2_journal.db -> A2\n",
|
||||
" selectedDBs/A1_msgstore.db -> A1\n",
|
||||
" \"\"\"\n",
|
||||
" stem = Path(db_path).stem # A2_journal\n",
|
||||
" return stem.split(\"_\", 1)[0]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def _dedupe_preserve_order(items):\n",
|
||||
" seen = set()\n",
|
||||
" out = []\n",
|
||||
" for x in items:\n",
|
||||
" key = json.dumps(x, sort_keys=True, ensure_ascii=False) if isinstance(x, (dict, list)) else x\n",
|
||||
" if key in seen:\n",
|
||||
" continue\n",
|
||||
" seen.add(key)\n",
|
||||
" out.append(x)\n",
|
||||
" return out\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def prefix_source_columns(db_path: str, cols: list) -> list:\n",
|
||||
" \"\"\"\n",
|
||||
" Prefix each source column with the database filename to avoid ambiguity\n",
|
||||
" after aggregating multiple DB files under the same app.\n",
|
||||
"\n",
|
||||
" Example:\n",
|
||||
" db_path = selectedDBs\\\\A1_msgstore.db\n",
|
||||
" col = message.text_data\n",
|
||||
" -> A1_msgstore.db:message.text_data\n",
|
||||
" \"\"\"\n",
|
||||
" db_file = Path(db_path).name # includes extension\n",
|
||||
" out = []\n",
|
||||
" for c in cols:\n",
|
||||
" if isinstance(c, str) and c:\n",
|
||||
" out.append(f\"{db_file}:{c}\")\n",
|
||||
" return out\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def aggregate_jsonl_folder(in_dir: str | Path, out_path: str | Path) -> Path:\n",
|
||||
" \"\"\"\n",
|
||||
" Read all *.jsonl files under in_dir and aggregate records by:\n",
|
||||
" (app_code derived from db_path, PII_type)\n",
|
||||
"\n",
|
||||
" Output per group:\n",
|
||||
" - db_path: \"selectedDBs\\\\<APP_CODE>\"\n",
|
||||
" - PII_type\n",
|
||||
" - PII_all: with duplicates\n",
|
||||
" - PII_unique: deduped (exact match)\n",
|
||||
" - Num_of_PII_all: with duplicates (sum of per-record Num_of_PII or len(PII))\n",
|
||||
" - Num_of_PII_unique: len(PII_unique)\n",
|
||||
" - source_columns: deduped, prefixed with db filename\n",
|
||||
" - other list fields: deduped\n",
|
||||
" - other numeric fields: summed\n",
|
||||
" - ignores Raw_rows_first_100, Exploration_sql, Extraction_sql\n",
|
||||
" \"\"\"\n",
|
||||
" in_dir = Path(in_dir)\n",
|
||||
" out_path = Path(out_path)\n",
|
||||
" out_path.parent.mkdir(parents=True, exist_ok=True)\n",
|
||||
"\n",
|
||||
" grouped: Dict[Tuple[str, str], Dict[str, Any]] = {}\n",
|
||||
"\n",
|
||||
" for jsonl_file in sorted(in_dir.glob(\"*.jsonl\")):\n",
|
||||
" with jsonl_file.open(\"r\", encoding=\"utf-8\") as f:\n",
|
||||
" for line_no, line in enumerate(f, start=1):\n",
|
||||
" line = line.strip()\n",
|
||||
" if not line:\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" rec = json.loads(line)\n",
|
||||
" except json.JSONDecodeError as e:\n",
|
||||
" raise ValueError(f\"Bad JSON in {jsonl_file} line {line_no}: {e}\") from e\n",
|
||||
"\n",
|
||||
" dbp = rec.get(\"db_path\", \"\")\n",
|
||||
" pii_type = rec.get(\"PII_type\")\n",
|
||||
" if not pii_type:\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" app_code = get_app_code(dbp)\n",
|
||||
" key = (app_code, pii_type)\n",
|
||||
"\n",
|
||||
" if key not in grouped:\n",
|
||||
" grouped[key] = {\n",
|
||||
" \"db_path\": f\"selectedDBs\\\\{app_code}\",\n",
|
||||
" \"PII_type\": pii_type,\n",
|
||||
" \"PII_all\": [],\n",
|
||||
" \"PII_unique\": [],\n",
|
||||
" \"Num_of_PII_all\": 0,\n",
|
||||
" \"Num_of_PII_unique\": 0,\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" agg = grouped[key]\n",
|
||||
"\n",
|
||||
" # --- Special handling: PII + counts ---\n",
|
||||
" pii_list = rec.get(\"PII\", [])\n",
|
||||
" if isinstance(pii_list, list):\n",
|
||||
" agg[\"PII_all\"].extend(pii_list)\n",
|
||||
"\n",
|
||||
" n = rec.get(\"Num_of_PII\")\n",
|
||||
" if isinstance(n, (int, float)) and not isinstance(n, bool):\n",
|
||||
" agg[\"Num_of_PII_all\"] += int(n)\n",
|
||||
" else:\n",
|
||||
" agg[\"Num_of_PII_all\"] += len(pii_list) if isinstance(pii_list, list) else 0\n",
|
||||
"\n",
|
||||
" # --- Aggregate other fields (arrays/numbers only) ---\n",
|
||||
" for k, v in rec.items():\n",
|
||||
" if k in IGNORE_FIELDS:\n",
|
||||
" continue\n",
|
||||
" if k in (\"db_path\", \"PII_type\", \"PII\", \"Num_of_PII\"):\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" # Prefix source_columns with db filename\n",
|
||||
" if k == \"source_columns\":\n",
|
||||
" cols = v if isinstance(v, list) else []\n",
|
||||
" v = prefix_source_columns(dbp, cols)\n",
|
||||
"\n",
|
||||
" if isinstance(v, list):\n",
|
||||
" if k not in agg:\n",
|
||||
" agg[k] = []\n",
|
||||
" if isinstance(agg[k], list):\n",
|
||||
" agg[k].extend(v)\n",
|
||||
"\n",
|
||||
" elif isinstance(v, (int, float)) and not isinstance(v, bool):\n",
|
||||
" if k not in agg:\n",
|
||||
" agg[k] = 0\n",
|
||||
" if isinstance(agg[k], (int, float)) and not isinstance(agg[k], bool):\n",
|
||||
" agg[k] += v\n",
|
||||
"\n",
|
||||
" # ignore non-list, non-numeric values\n",
|
||||
"\n",
|
||||
" # --- Finalize: dedupe lists + compute unique PII fields ---\n",
|
||||
" for agg in grouped.values():\n",
|
||||
" agg[\"PII_unique\"] = _dedupe_preserve_order(agg[\"PII_all\"])\n",
|
||||
" agg[\"Num_of_PII_unique\"] = len(agg[\"PII_unique\"])\n",
|
||||
"\n",
|
||||
" for k, v in list(agg.items()):\n",
|
||||
" if isinstance(v, list) and k not in (\"PII_all\", \"PII_unique\"):\n",
|
||||
" agg[k] = _dedupe_preserve_order(v)\n",
|
||||
"\n",
|
||||
" # source_columns counts\n",
|
||||
" src = agg.get(\"source_columns\", [])\n",
|
||||
" if isinstance(src, list):\n",
|
||||
" agg[\"Num_of_source_columns_unique\"] = len(src)\n",
|
||||
" # optional: with-dup count (before dedupe) is not available anymore here\n",
|
||||
" # unless you track it separately.\n",
|
||||
" else:\n",
|
||||
" agg[\"Num_of_source_columns_unique\"] = 0\n",
|
||||
"\n",
|
||||
" # --- Write aggregated JSONL ---\n",
|
||||
" with out_path.open(\"w\", encoding=\"utf-8\") as f:\n",
|
||||
" for (app_code, pii_type) in sorted(grouped.keys()):\n",
|
||||
" f.write(json.dumps(grouped[(app_code, pii_type)], ensure_ascii=False) + \"\\n\")\n",
|
||||
"\n",
|
||||
" return out_path\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"if __name__ == \"__main__\":\n",
|
||||
" out = aggregate_jsonl_folder(r\"..\\batch_results_2\", \"aggregated_by_app_and_type.jsonl\")\n",
|
||||
" print(f\"Wrote: {out.resolve()}\")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.18"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user