refactor code. mainly organize files into folders

This commit is contained in:
Frank Xu
2026-02-01 21:39:56 -05:00
parent 583c27ba0b
commit 59432cff2b
55 changed files with 113 additions and 2102 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": null,
"id": "c2d824a6", "id": "c2d824a6",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -139,13 +139,13 @@
"\n", "\n",
" from stats_utils import normalize_and_slim_record\n", " from stats_utils import normalize_and_slim_record\n",
"\n", "\n",
" IN_DIR = Path(r\"..\\..\\batch_results_gpt4o\")\n", " IN_DIR = Path(r\"..\\..\\model_PII_results\\gpt4o\")\n",
" RESULTS_DIR = Path(r\"normalized_PII_results\\gpt4o\\db_level\")\n", " RESULTS_DIR = Path(r\"normalized_PII_results\\gpt4o\\db_level\")\n",
" OUT_DIR = STATS_DIR/ RESULTS_DIR \n", " OUT_DIR = STATS_DIR/ RESULTS_DIR \n",
"\n", "\n",
" normalize_jsonl_folder(IN_DIR, OUT_DIR, normalize_and_slim_record, delete_out_dir_first=True)\n", " normalize_jsonl_folder(IN_DIR, OUT_DIR, normalize_and_slim_record, delete_out_dir_first=True)\n",
" \n", " \n",
" IN_DIR = Path(r\"..\\..\\ground_truth\")\n", " IN_DIR = Path(r\"..\\..\\model_PII_results\\ground_truth\")\n",
" RESULTS_DIR = Path(r\"normalized_PII_results\\ground_truth\\db_level\")\n", " RESULTS_DIR = Path(r\"normalized_PII_results\\ground_truth\\db_level\")\n",
" OUT_DIR = STATS_DIR/ RESULTS_DIR \n", " OUT_DIR = STATS_DIR/ RESULTS_DIR \n",
"\n", "\n",

File diff suppressed because one or more lines are too long

View File

@@ -1,6 +1,7 @@
db_dir: selectedDBs db_dir: selectedDBs
out_dir: batch_results out_dir: batch_results
config_py: my_run_config.py config_py: my_run_config.py
enable_observe: false
pii_targets: pii_targets:
- EMAIL - EMAIL
- PHONE - PHONE

View File

@@ -1,6 +1,6 @@
db_files = [ db_files = [
# "test2.db", "test2.db",
# "users.db", "users.db",
# "A1_commerce.db", # "A1_commerce.db",
# "A1_msgstore.db", # "A1_msgstore.db",
# "A1_wa.db", # "A1_wa.db",
@@ -8,24 +8,24 @@ db_files = [
# "A2_journal.db", # "A2_journal.db",
# "A2_main.db", # "A2_main.db",
# "A3_account1cache4.db", # "A3_account1cache4.db",
"A3_account2cache4.db", # "A3_account2cache4.db",
"A3_account3cache4.db", # "A3_account3cache4.db",
"A4_gmm_myplaces.db", # "A4_gmm_myplaces.db",
"A4_gmm_storage.db", # "A4_gmm_storage.db",
"A4_peopleCache_sharononeil368@gmail.com_com.google_14.db", # "A4_peopleCache_sharononeil368@gmail.com_com.google_14.db",
"A5_SBrowser.db", # "A5_SBrowser.db",
"A5_SBrowser2.db", # "A5_SBrowser2.db",
"A5_searchengine.db", # "A5_searchengine.db",
"I1_CallHistory.sqlite", # "I1_CallHistory.sqlite",
"I1_ChatStorage.sqlite", # "I1_ChatStorage.sqlite",
"I1_ContactsV2.sqlite", # "I1_ContactsV2.sqlite",
"I2_AddressBook.sqlitedb", # "I2_AddressBook.sqlitedb",
"I2_AddressBookImages.sqlitedb", # "I2_AddressBookImages.sqlitedb",
"I3_sms.db", # "I3_sms.db",
"I4_CloudTabs.db", # "I4_CloudTabs.db",
"I4_History.db", # "I4_History.db",
"I5_Calendar.sqlitedb", # "I5_Calendar.sqlitedb",
"I5_Extras.db", # "I5_Extras.db",
] ]
PII_CONFIG = { PII_CONFIG = {