refactor code. mainly organize files into folders

This commit is contained in:
Frank Xu
2026-02-01 21:39:56 -05:00
parent 583c27ba0b
commit 59432cff2b
55 changed files with 113 additions and 2102 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"id": "c2d824a6",
"metadata": {},
"outputs": [
@@ -139,13 +139,13 @@
"\n",
" from stats_utils import normalize_and_slim_record\n",
"\n",
" IN_DIR = Path(r\"..\\..\\batch_results_gpt4o\")\n",
" IN_DIR = Path(r\"..\\..\\model_PII_results\\gpt4o\")\n",
" RESULTS_DIR = Path(r\"normalized_PII_results\\gpt4o\\db_level\")\n",
" OUT_DIR = STATS_DIR/ RESULTS_DIR \n",
"\n",
" normalize_jsonl_folder(IN_DIR, OUT_DIR, normalize_and_slim_record, delete_out_dir_first=True)\n",
" \n",
" IN_DIR = Path(r\"..\\..\\ground_truth\")\n",
" IN_DIR = Path(r\"..\\..\\model_PII_results\\ground_truth\")\n",
" RESULTS_DIR = Path(r\"normalized_PII_results\\ground_truth\\db_level\")\n",
" OUT_DIR = STATS_DIR/ RESULTS_DIR \n",
"\n",

File diff suppressed because one or more lines are too long

View File

@@ -1,6 +1,7 @@
db_dir: selectedDBs
out_dir: batch_results
config_py: my_run_config.py
enable_observe: false
pii_targets:
- EMAIL
- PHONE

View File

@@ -1,6 +1,6 @@
db_files = [
# "test2.db",
# "users.db",
"test2.db",
"users.db",
# "A1_commerce.db",
# "A1_msgstore.db",
# "A1_wa.db",
@@ -8,24 +8,24 @@ db_files = [
# "A2_journal.db",
# "A2_main.db",
# "A3_account1cache4.db",
"A3_account2cache4.db",
"A3_account3cache4.db",
"A4_gmm_myplaces.db",
"A4_gmm_storage.db",
"A4_peopleCache_sharononeil368@gmail.com_com.google_14.db",
"A5_SBrowser.db",
"A5_SBrowser2.db",
"A5_searchengine.db",
"I1_CallHistory.sqlite",
"I1_ChatStorage.sqlite",
"I1_ContactsV2.sqlite",
"I2_AddressBook.sqlitedb",
"I2_AddressBookImages.sqlitedb",
"I3_sms.db",
"I4_CloudTabs.db",
"I4_History.db",
"I5_Calendar.sqlitedb",
"I5_Extras.db",
# "A3_account2cache4.db",
# "A3_account3cache4.db",
# "A4_gmm_myplaces.db",
# "A4_gmm_storage.db",
# "A4_peopleCache_sharononeil368@gmail.com_com.google_14.db",
# "A5_SBrowser.db",
# "A5_SBrowser2.db",
# "A5_searchengine.db",
# "I1_CallHistory.sqlite",
# "I1_ChatStorage.sqlite",
# "I1_ContactsV2.sqlite",
# "I2_AddressBook.sqlitedb",
# "I2_AddressBookImages.sqlitedb",
# "I3_sms.db",
# "I4_CloudTabs.db",
# "I4_History.db",
# "I5_Calendar.sqlitedb",
# "I5_Extras.db",
]
PII_CONFIG = {