mirror of
https://github.com/frankwxu/mobile-pii-discovery-agent.git
synced 2026-02-20 13:40:41 +00:00
refactor code. mainly organize files into folders
This commit is contained in:
2118
PII_Discovery.ipynb
2118
PII_Discovery.ipynb
File diff suppressed because it is too large
Load Diff
@@ -2,7 +2,7 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": null,
|
||||||
"id": "c2d824a6",
|
"id": "c2d824a6",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -139,13 +139,13 @@
|
|||||||
"\n",
|
"\n",
|
||||||
" from stats_utils import normalize_and_slim_record\n",
|
" from stats_utils import normalize_and_slim_record\n",
|
||||||
"\n",
|
"\n",
|
||||||
" IN_DIR = Path(r\"..\\..\\batch_results_gpt4o\")\n",
|
" IN_DIR = Path(r\"..\\..\\model_PII_results\\gpt4o\")\n",
|
||||||
" RESULTS_DIR = Path(r\"normalized_PII_results\\gpt4o\\db_level\")\n",
|
" RESULTS_DIR = Path(r\"normalized_PII_results\\gpt4o\\db_level\")\n",
|
||||||
" OUT_DIR = STATS_DIR/ RESULTS_DIR \n",
|
" OUT_DIR = STATS_DIR/ RESULTS_DIR \n",
|
||||||
"\n",
|
"\n",
|
||||||
" normalize_jsonl_folder(IN_DIR, OUT_DIR, normalize_and_slim_record, delete_out_dir_first=True)\n",
|
" normalize_jsonl_folder(IN_DIR, OUT_DIR, normalize_and_slim_record, delete_out_dir_first=True)\n",
|
||||||
" \n",
|
" \n",
|
||||||
" IN_DIR = Path(r\"..\\..\\ground_truth\")\n",
|
" IN_DIR = Path(r\"..\\..\\model_PII_results\\ground_truth\")\n",
|
||||||
" RESULTS_DIR = Path(r\"normalized_PII_results\\ground_truth\\db_level\")\n",
|
" RESULTS_DIR = Path(r\"normalized_PII_results\\ground_truth\\db_level\")\n",
|
||||||
" OUT_DIR = STATS_DIR/ RESULTS_DIR \n",
|
" OUT_DIR = STATS_DIR/ RESULTS_DIR \n",
|
||||||
"\n",
|
"\n",
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -1,6 +1,7 @@
|
|||||||
db_dir: selectedDBs
|
db_dir: selectedDBs
|
||||||
out_dir: batch_results
|
out_dir: batch_results
|
||||||
config_py: my_run_config.py
|
config_py: my_run_config.py
|
||||||
|
enable_observe: false
|
||||||
pii_targets:
|
pii_targets:
|
||||||
- EMAIL
|
- EMAIL
|
||||||
- PHONE
|
- PHONE
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
db_files = [
|
db_files = [
|
||||||
# "test2.db",
|
"test2.db",
|
||||||
# "users.db",
|
"users.db",
|
||||||
# "A1_commerce.db",
|
# "A1_commerce.db",
|
||||||
# "A1_msgstore.db",
|
# "A1_msgstore.db",
|
||||||
# "A1_wa.db",
|
# "A1_wa.db",
|
||||||
@@ -8,24 +8,24 @@ db_files = [
|
|||||||
# "A2_journal.db",
|
# "A2_journal.db",
|
||||||
# "A2_main.db",
|
# "A2_main.db",
|
||||||
# "A3_account1cache4.db",
|
# "A3_account1cache4.db",
|
||||||
"A3_account2cache4.db",
|
# "A3_account2cache4.db",
|
||||||
"A3_account3cache4.db",
|
# "A3_account3cache4.db",
|
||||||
"A4_gmm_myplaces.db",
|
# "A4_gmm_myplaces.db",
|
||||||
"A4_gmm_storage.db",
|
# "A4_gmm_storage.db",
|
||||||
"A4_peopleCache_sharononeil368@gmail.com_com.google_14.db",
|
# "A4_peopleCache_sharononeil368@gmail.com_com.google_14.db",
|
||||||
"A5_SBrowser.db",
|
# "A5_SBrowser.db",
|
||||||
"A5_SBrowser2.db",
|
# "A5_SBrowser2.db",
|
||||||
"A5_searchengine.db",
|
# "A5_searchengine.db",
|
||||||
"I1_CallHistory.sqlite",
|
# "I1_CallHistory.sqlite",
|
||||||
"I1_ChatStorage.sqlite",
|
# "I1_ChatStorage.sqlite",
|
||||||
"I1_ContactsV2.sqlite",
|
# "I1_ContactsV2.sqlite",
|
||||||
"I2_AddressBook.sqlitedb",
|
# "I2_AddressBook.sqlitedb",
|
||||||
"I2_AddressBookImages.sqlitedb",
|
# "I2_AddressBookImages.sqlitedb",
|
||||||
"I3_sms.db",
|
# "I3_sms.db",
|
||||||
"I4_CloudTabs.db",
|
# "I4_CloudTabs.db",
|
||||||
"I4_History.db",
|
# "I4_History.db",
|
||||||
"I5_Calendar.sqlitedb",
|
# "I5_Calendar.sqlitedb",
|
||||||
"I5_Extras.db",
|
# "I5_Extras.db",
|
||||||
]
|
]
|
||||||
|
|
||||||
PII_CONFIG = {
|
PII_CONFIG = {
|
||||||
|
|||||||
Reference in New Issue
Block a user