diff --git a/RQs/RQ0/RQ0_batch_results_normalization.ipynb b/RQs/RQ0/RQ0_batch_results_normalization.ipynb index 951040d..d61b665 100644 --- a/RQs/RQ0/RQ0_batch_results_normalization.ipynb +++ b/RQs/RQ0/RQ0_batch_results_normalization.ipynb @@ -42,21 +42,17 @@ ], "source": [ "import json\n", - "from pathlib import Path\n", - "import sys\n", "import shutil\n", - "\n", - "# IMPORTANT: sys.path needs a DIRECTORY, not the .py file itself\n", - "STATS_DIR = Path(r\"I:\\project2026\\llmagent\\RQs\").resolve() # folder containing stats_utils.py\n", - "sys.path.insert(0, str(STATS_DIR))\n", - "\n", - "from stats_utils import normalize_and_slim_record\n", - "\n", - "IN_DIR = Path(r\"..\\..\\batch_results\")\n", - "OUT_DIR = Path(r\"..\\batch_results_normalized\")\n", + "import sys\n", + "from pathlib import Path\n", + "from typing import Callable, Tuple\n", "\n", "\n", - "def process_file(in_path: Path, out_path: Path) -> int:\n", + "def process_file_jsonl(\n", + " in_path: Path,\n", + " out_path: Path,\n", + " normalize_record_fn: Callable[[dict], dict],\n", + ") -> int:\n", " n = 0\n", " with in_path.open(\"r\", encoding=\"utf-8\") as fin, out_path.open(\"w\", encoding=\"utf-8\") as fout:\n", " for line in fin:\n", @@ -66,43 +62,66 @@ " obj = json.loads(line)\n", " if not isinstance(obj, dict):\n", " continue\n", - " slim = normalize_and_slim_record(obj)\n", + " slim = normalize_record_fn(obj)\n", " fout.write(json.dumps(slim, ensure_ascii=False) + \"\\n\")\n", " n += 1\n", " return n\n", "\n", "\n", - "def main() -> None:\n", - " # Delete OUT_DIR if it exists, then recreate it cleanly\n", - " if OUT_DIR.exists():\n", - " if OUT_DIR.is_dir():\n", - " shutil.rmtree(OUT_DIR)\n", + "def normalize_jsonl_folder(\n", + " in_dir: Path,\n", + " out_dir: Path,\n", + " normalize_record_fn: Callable[[dict], dict],\n", + " *,\n", + " delete_out_dir_first: bool = True,\n", + ") -> Tuple[int, int]:\n", + " \"\"\"\n", + " Normalize every *.jsonl file in `in_dir` and write outputs (same filenames) to `out_dir`.\n", + "\n", + " Returns: (num_files_processed, num_records_written)\n", + " \"\"\"\n", + " if delete_out_dir_first and out_dir.exists():\n", + " if out_dir.is_dir():\n", + " shutil.rmtree(out_dir)\n", " else:\n", - " OUT_DIR.unlink()\n", + " out_dir.unlink()\n", "\n", - " OUT_DIR.mkdir(parents=True, exist_ok=True)\n", + " out_dir.mkdir(parents=True, exist_ok=True)\n", "\n", - " files = sorted(IN_DIR.glob(\"*.jsonl\"))\n", + " files = sorted(in_dir.glob(\"*.jsonl\"))\n", " if not files:\n", - " print(f\"No .jsonl files found in: {IN_DIR.resolve()}\")\n", - " return\n", + " print(f\"No .jsonl files found in: {in_dir.resolve()}\")\n", + " return (0, 0)\n", "\n", - " total_files = 0\n", " total_records = 0\n", - "\n", " for fp in files:\n", - " out_fp = OUT_DIR / fp.name\n", - " n = process_file(fp, out_fp)\n", + " out_fp = out_dir / fp.name\n", + " n = process_file_jsonl(fp, out_fp, normalize_record_fn)\n", " print(f\"{fp.name}: {n} records -> {out_fp}\")\n", - " total_files += 1\n", " total_records += n\n", "\n", - " print(f\"Done. Files: {total_files}, Records: {total_records}\")\n", - " print(f\"Output folder: {OUT_DIR.resolve()}\")\n", + " print(f\"Done. Files: {len(files)}, Records: {total_records}\")\n", + " print(f\"Output folder: {out_dir.resolve()}\")\n", + " return (len(files), total_records)\n", "\n", "\n", + "# ---- Example usage (your exact paths) ----\n", "if __name__ == \"__main__\":\n", - " main()" + " STATS_DIR = Path(r\"I:\\project2026\\llmagent\\RQs\").resolve() # folder containing stats_utils.py\n", + " sys.path.insert(0, str(STATS_DIR))\n", + "\n", + " from stats_utils import normalize_and_slim_record\n", + "\n", + " IN_DIR = Path(r\"..\\..\\batch_results\")\n", + " OUT_DIR = Path(r\"..\\batch_results_normalized\")\n", + "\n", + " normalize_jsonl_folder(IN_DIR, OUT_DIR, normalize_and_slim_record, delete_out_dir_first=True)\n", + " \n", + " IN_DIR = Path(r\"..\\..\\ground_truth\")\n", + " OUT_DIR = Path(r\"..\\ground_truth_normalized\")\n", + "\n", + " normalize_jsonl_folder(IN_DIR, OUT_DIR, normalize_and_slim_record, delete_out_dir_first=True)\n", + " " ] }, { diff --git a/RQs/ground_truth_normalized/PII_A1_commerce.jsonl b/RQs/ground_truth_normalized/PII_A1_commerce.jsonl new file mode 100644 index 0000000..ef4928c --- /dev/null +++ b/RQs/ground_truth_normalized/PII_A1_commerce.jsonl @@ -0,0 +1 @@ +{"db_path": "commerce.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": [], "Num_of_source_columns": 0} diff --git a/RQs/ground_truth_normalized/PII_A1_msgstore.jsonl b/RQs/ground_truth_normalized/PII_A1_msgstore.jsonl new file mode 100644 index 0000000..d6c1c8a --- /dev/null +++ b/RQs/ground_truth_normalized/PII_A1_msgstore.jsonl @@ -0,0 +1 @@ +{"db_path": "msgstore.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["business_name"], "Num_of_source_columns": 1} diff --git a/RQs/ground_truth_normalized/PII_A1_wa.jsonl b/RQs/ground_truth_normalized/PII_A1_wa.jsonl new file mode 100644 index 0000000..e4850f1 --- /dev/null +++ b/RQs/ground_truth_normalized/PII_A1_wa.jsonl @@ -0,0 +1,8 @@ +{"db_path": "wa.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["number"], "Num_of_source_columns": 1} +{"db_path": "wa.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["given_name+family_name", "sort_name"], "Num_of_source_columns": 2} +{"db_path": "wa.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["display_name"], "Num_of_source_columns": 1} +{"db_path": "wa.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["number"], "Num_of_source_columns": 1} +{"db_path": "wa.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["display_name", "wa_name"], "Num_of_source_columns": 2} +{"db_path": "wa.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["given_name+family_name", "sort_name"], "Num_of_source_columns": 2} +{"db_path": "wa.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["verified_name"], "Num_of_source_columns": 1} +{"db_path": "wa.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["creator_name"], "Num_of_source_columns": 1} diff --git a/RQs/ground_truth_normalized/PII_A2_core.jsonl b/RQs/ground_truth_normalized/PII_A2_core.jsonl new file mode 100644 index 0000000..c2585ea --- /dev/null +++ b/RQs/ground_truth_normalized/PII_A2_core.jsonl @@ -0,0 +1,4 @@ +{"db_path": "core.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["textval"], "Num_of_source_columns": 1} +{"db_path": "core.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["textval"], "Num_of_source_columns": 1} +{"db_path": "core.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["textval"], "Num_of_source_columns": 1} +{"db_path": "core.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["textval"], "Num_of_source_columns": 1} diff --git a/RQs/ground_truth_normalized/PII_A2_journal.jsonl b/RQs/ground_truth_normalized/PII_A2_journal.jsonl new file mode 100644 index 0000000..f675743 --- /dev/null +++ b/RQs/ground_truth_normalized/PII_A2_journal.jsonl @@ -0,0 +1 @@ +{"db_path": "journal.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": [], "Num_of_source_columns": 0} diff --git a/RQs/ground_truth_normalized/PII_A2_main.jsonl b/RQs/ground_truth_normalized/PII_A2_main.jsonl new file mode 100644 index 0000000..198e79f --- /dev/null +++ b/RQs/ground_truth_normalized/PII_A2_main.jsonl @@ -0,0 +1,10 @@ +{"db_path": "main.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["encodedusername", "mutableusername", "originalusername"], "Num_of_source_columns": 3} +{"db_path": "main.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["phone", "rawphone"], "Num_of_source_columns": 2} +{"db_path": "main.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["displayname"], "Num_of_source_columns": 1} +{"db_path": "main.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["displayname", "serverdisplayname", "username"], "Num_of_source_columns": 3} +{"db_path": "main.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["displayname"], "Num_of_source_columns": 1} +{"db_path": "main.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["captiontextdisplay"], "Num_of_source_columns": 1} +{"db_path": "main.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["displayname", "serverdisplayname", "username", "usernameforsorting"], "Num_of_source_columns": 4} +{"db_path": "main.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["displayinteractionuserusername", "frienddisplayusername"], "Num_of_source_columns": 2} +{"db_path": "main.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["displayinteractionuserdisplayname"], "Num_of_source_columns": 1} +{"db_path": "main.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["frienddisplayname", "friendusername"], "Num_of_source_columns": 2} diff --git a/RQs/ground_truth_normalized/PII_A3_account1cache4.jsonl b/RQs/ground_truth_normalized/PII_A3_account1cache4.jsonl new file mode 100644 index 0000000..14b20f2 --- /dev/null +++ b/RQs/ground_truth_normalized/PII_A3_account1cache4.jsonl @@ -0,0 +1 @@ +{"db_path": "account1cache4.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": [], "Num_of_source_columns": 0} diff --git a/RQs/ground_truth_normalized/PII_A3_account2cache4.jsonl b/RQs/ground_truth_normalized/PII_A3_account2cache4.jsonl new file mode 100644 index 0000000..c1dc63c --- /dev/null +++ b/RQs/ground_truth_normalized/PII_A3_account2cache4.jsonl @@ -0,0 +1 @@ +{"db_path": "account2cache4.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": [], "Num_of_source_columns": 0} diff --git a/RQs/ground_truth_normalized/PII_A3_account3cache4.jsonl b/RQs/ground_truth_normalized/PII_A3_account3cache4.jsonl new file mode 100644 index 0000000..b802427 --- /dev/null +++ b/RQs/ground_truth_normalized/PII_A3_account3cache4.jsonl @@ -0,0 +1 @@ +{"db_path": "account3cache4.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": [], "Num_of_source_columns": 0} diff --git a/RQs/ground_truth_normalized/PII_A4_gmm_myplaces.jsonl b/RQs/ground_truth_normalized/PII_A4_gmm_myplaces.jsonl new file mode 100644 index 0000000..82b1e89 --- /dev/null +++ b/RQs/ground_truth_normalized/PII_A4_gmm_myplaces.jsonl @@ -0,0 +1 @@ +{"db_path": "gmm_myplaces.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": [], "Num_of_source_columns": 0} diff --git a/RQs/ground_truth_normalized/PII_A4_gmm_storage.jsonl b/RQs/ground_truth_normalized/PII_A4_gmm_storage.jsonl new file mode 100644 index 0000000..ce57d30 --- /dev/null +++ b/RQs/ground_truth_normalized/PII_A4_gmm_storage.jsonl @@ -0,0 +1 @@ +{"db_path": "gmm_storage.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": [], "Num_of_source_columns": 0} diff --git a/RQs/ground_truth_normalized/PII_A4_peopleCache_sharononeil368@gmail.com_com.google_14.jsonl b/RQs/ground_truth_normalized/PII_A4_peopleCache_sharononeil368@gmail.com_com.google_14.jsonl new file mode 100644 index 0000000..d78831c --- /dev/null +++ b/RQs/ground_truth_normalized/PII_A4_peopleCache_sharononeil368@gmail.com_com.google_14.jsonl @@ -0,0 +1,6 @@ +{"db_path": "peopleCache_sharononeil368@gmail.com_com.google_14.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["value"], "Num_of_source_columns": 1} +{"db_path": "peopleCache_sharononeil368@gmail.com_com.google_14.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["value"], "Num_of_source_columns": 1} +{"db_path": "peopleCache_sharononeil368@gmail.com_com.google_14.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["value"], "Num_of_source_columns": 1} +{"db_path": "peopleCache_sharononeil368@gmail.com_com.google_14.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["c1value"], "Num_of_source_columns": 1} +{"db_path": "peopleCache_sharononeil368@gmail.com_com.google_14.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["c1value"], "Num_of_source_columns": 1} +{"db_path": "peopleCache_sharononeil368@gmail.com_com.google_14.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["c1value"], "Num_of_source_columns": 1} diff --git a/RQs/ground_truth_normalized/PII_A5_SBrowser.jsonl b/RQs/ground_truth_normalized/PII_A5_SBrowser.jsonl new file mode 100644 index 0000000..6bcf6f4 --- /dev/null +++ b/RQs/ground_truth_normalized/PII_A5_SBrowser.jsonl @@ -0,0 +1,3 @@ +{"db_path": "SBrowser.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["account_name"], "Num_of_source_columns": 1} +{"db_path": "SBrowser.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["account_name"], "Num_of_source_columns": 1} +{"db_path": "SBrowser.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["account_name"], "Num_of_source_columns": 1} diff --git a/RQs/ground_truth_normalized/PII_A5_SBrowser2.jsonl b/RQs/ground_truth_normalized/PII_A5_SBrowser2.jsonl new file mode 100644 index 0000000..b84ba31 --- /dev/null +++ b/RQs/ground_truth_normalized/PII_A5_SBrowser2.jsonl @@ -0,0 +1 @@ +{"db_path": "SBrowser2.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": [], "Num_of_source_columns": 0} diff --git a/RQs/ground_truth_normalized/PII_A5_searchengine.jsonl b/RQs/ground_truth_normalized/PII_A5_searchengine.jsonl new file mode 100644 index 0000000..3d5b8a4 --- /dev/null +++ b/RQs/ground_truth_normalized/PII_A5_searchengine.jsonl @@ -0,0 +1 @@ +{"db_path": "searchengine.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": [], "Num_of_source_columns": 0} diff --git a/RQs/ground_truth_normalized/PII_I1_CallHistory.jsonl b/RQs/ground_truth_normalized/PII_I1_CallHistory.jsonl new file mode 100644 index 0000000..188cf7b --- /dev/null +++ b/RQs/ground_truth_normalized/PII_I1_CallHistory.jsonl @@ -0,0 +1 @@ +{"db_path": "CallHistory.sqlitedb", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": [], "Num_of_source_columns": 0} diff --git a/RQs/ground_truth_normalized/PII_I1_ChatStorage.jsonl b/RQs/ground_truth_normalized/PII_I1_ChatStorage.jsonl new file mode 100644 index 0000000..01f31ca --- /dev/null +++ b/RQs/ground_truth_normalized/PII_I1_ChatStorage.jsonl @@ -0,0 +1,4 @@ +{"db_path": "ChatStorage.sqlite", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["zpartnername"], "Num_of_source_columns": 1} +{"db_path": "ChatStorage.sqlite", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["zpartnername"], "Num_of_source_columns": 1} +{"db_path": "ChatStorage.sqlite", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["zpushname"], "Num_of_source_columns": 1} +{"db_path": "ChatStorage.sqlite", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["zpushname"], "Num_of_source_columns": 1} diff --git a/RQs/ground_truth_normalized/PII_I1_ContactsV2.jsonl b/RQs/ground_truth_normalized/PII_I1_ContactsV2.jsonl new file mode 100644 index 0000000..27ba427 --- /dev/null +++ b/RQs/ground_truth_normalized/PII_I1_ContactsV2.jsonl @@ -0,0 +1,2 @@ +{"db_path": "ContactsV2.sqlite", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["zfullname"], "Num_of_source_columns": 1} +{"db_path": "ContactsV2.sqlite", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["zphonenumber"], "Num_of_source_columns": 1} diff --git a/RQs/ground_truth_normalized/PII_I2_AddressBook.jsonl b/RQs/ground_truth_normalized/PII_I2_AddressBook.jsonl new file mode 100644 index 0000000..c93a193 --- /dev/null +++ b/RQs/ground_truth_normalized/PII_I2_AddressBook.jsonl @@ -0,0 +1,5 @@ +{"db_path": "AddressBook.sqlitedb", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["first+last"], "Num_of_source_columns": 1} +{"db_path": "AddressBook.sqlitedb", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["value"], "Num_of_source_columns": 1} +{"db_path": "AddressBook.sqlitedb", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["value"], "Num_of_source_columns": 1} +{"db_path": "AddressBook.sqlitedb", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["c17email"], "Num_of_source_columns": 1} +{"db_path": "AddressBook.sqlitedb", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["c0first+c1last+c2middle"], "Num_of_source_columns": 1} diff --git a/RQs/ground_truth_normalized/PII_I2_AddressBookImages.jsonl b/RQs/ground_truth_normalized/PII_I2_AddressBookImages.jsonl new file mode 100644 index 0000000..ea15947 --- /dev/null +++ b/RQs/ground_truth_normalized/PII_I2_AddressBookImages.jsonl @@ -0,0 +1 @@ +{"db_path": "AddressBookImages.sqlitedb", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": [], "Num_of_source_columns": 0} diff --git a/RQs/ground_truth_normalized/PII_I3_sms.jsonl b/RQs/ground_truth_normalized/PII_I3_sms.jsonl new file mode 100644 index 0000000..6e3efec --- /dev/null +++ b/RQs/ground_truth_normalized/PII_I3_sms.jsonl @@ -0,0 +1,6 @@ +{"db_path": "sms.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["account_login", "chat_identifier", "guid", "last_addressed_handle"], "Num_of_source_columns": 4} +{"db_path": "sms.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["account_login", "chat_identifier", "guid"], "Num_of_source_columns": 3} +{"db_path": "sms.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["account", "destination_caller_id"], "Num_of_source_columns": 2} +{"db_path": "sms.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["account"], "Num_of_source_columns": 1} +{"db_path": "sms.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["id", "uncanonicalized_id"], "Num_of_source_columns": 2} +{"db_path": "sms.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["id"], "Num_of_source_columns": 1} diff --git a/RQs/ground_truth_normalized/PII_I4_CloudTabs.jsonl b/RQs/ground_truth_normalized/PII_I4_CloudTabs.jsonl new file mode 100644 index 0000000..d9f7088 --- /dev/null +++ b/RQs/ground_truth_normalized/PII_I4_CloudTabs.jsonl @@ -0,0 +1 @@ +{"db_path": "CloudTabs.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": [], "Num_of_source_columns": 0} diff --git a/RQs/ground_truth_normalized/PII_I4_History.jsonl b/RQs/ground_truth_normalized/PII_I4_History.jsonl new file mode 100644 index 0000000..c8167a3 --- /dev/null +++ b/RQs/ground_truth_normalized/PII_I4_History.jsonl @@ -0,0 +1 @@ +{"db_path": "History.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": [], "Num_of_source_columns": 0} diff --git a/RQs/ground_truth_normalized/PII_I5_Calendar.jsonl b/RQs/ground_truth_normalized/PII_I5_Calendar.jsonl new file mode 100644 index 0000000..78c9198 --- /dev/null +++ b/RQs/ground_truth_normalized/PII_I5_Calendar.jsonl @@ -0,0 +1,5 @@ +{"db_path": "Calendar.sqlitedb", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["last_sync_title", "notes", "owner_identity_email", "self_identity_email", "shared_owner_address", "title"], "Num_of_source_columns": 6} +{"db_path": "Calendar.sqlitedb", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["shared_owner_name"], "Num_of_source_columns": 1} +{"db_path": "Calendar.sqlitedb", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["address"], "Num_of_source_columns": 1} +{"db_path": "Calendar.sqlitedb", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["display_name"], "Num_of_source_columns": 1} +{"db_path": "Calendar.sqlitedb", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": ["owner_name"], "Num_of_source_columns": 1} diff --git a/RQs/ground_truth_normalized/PII_I5_Extras.jsonl b/RQs/ground_truth_normalized/PII_I5_Extras.jsonl new file mode 100644 index 0000000..d64dd2f --- /dev/null +++ b/RQs/ground_truth_normalized/PII_I5_Extras.jsonl @@ -0,0 +1 @@ +{"db_path": "Extras.db", "PII_type": "", "PII": [], "Num_of_PII": 0, "source_columns": [], "Num_of_source_columns": 0}