diff --git a/PII_Discovery.ipynb b/PII_Discovery.ipynb index f459f5d..51246f2 100644 --- a/PII_Discovery.ipynb +++ b/PII_Discovery.ipynb @@ -385,24 +385,40 @@ "metadata": {}, "outputs": [], "source": [ - "def observe(state: EvidenceState):\n", + "from typing import Any, Dict\n", + "from functools import partial\n", + "from langgraph.graph import StateGraph, END\n", + "\n", + "def observe(\n", + " state: \"EvidenceState\",\n", + " enable_observe: bool = False,\n", + " label: str = \"OBSERVE\",\n", + " sample_rows: int = 20,\n", + " sample_evidence: int = 10,\n", + ") -> Dict[str, Any]:\n", " \"\"\"\n", " Debug / inspection node.\n", " Does NOT modify state.\n", + " If enable_observe is False, prints nothing.\n", " \"\"\"\n", - " print(\"\\n=== STATE SNAPSHOT ===\")\n", + " if not enable_observe:\n", + " return {}\n", + "\n", + " print(f\"\\n=== STATE SNAPSHOT [{label}] ===\")\n", "\n", " # Messages\n", " print(\"\\n--- MESSAGES ---\")\n", - " for i, m in enumerate(state[\"messages\"]):\n", - " print(f\"{i}: {m.type.upper()} -> {m.content}\")\n", + " for i, m in enumerate(state.get(\"messages\", [])):\n", + " mtype = getattr(m, \"type\", \"unknown\")\n", + " mcontent = getattr(m, \"content\", str(m))\n", + " print(f\"{i}: {str(mtype).upper()} -> {mcontent}\")\n", "\n", " # Metadata\n", " print(\"\\n--- BEGIN METADATA ---\")\n", - " print(f\"attempt : {state['attempt']}\")\n", - " print(f\"max_attempts : {state['max_attempts']}\")\n", - " print(f\"phase : {state['phase']}\")\n", - " print(f\"PII type : {state['entity_config'].get('type')}\")\n", + " print(f\"attempt : {state.get('attempt')}\")\n", + " print(f\"max_attempts : {state.get('max_attempts')}\")\n", + " print(f\"phase : {state.get('phase')}\")\n", + " print(f\"PII type : {(state.get('entity_config') or {}).get('type')}\")\n", "\n", " # SQL separation\n", " print(f\"exploration_sql : {state.get('exploration_sql')}\")\n", @@ -411,70 +427,72 @@ " # Outputs\n", " rows = state.get(\"rows\") or []\n", " print(f\"rows_count : {len(rows)}\")\n", - " print(f\"rows_sample : {rows[:1000] if rows else []}\") # small sample to avoid huge logs\n", + " print(f\"rows_sample : {rows[:sample_rows] if rows else []}\")\n", "\n", + " evidence = state.get(\"evidence\") or []\n", " print(f\"classification : {state.get('classification')}\")\n", - " print(f\"evidence_count : {len(state.get('evidence') or [])}\")\n", - " print(f\"evidence_sample : {(state.get('evidence') or [])[:10]}\")\n", + " print(f\"evidence_count : {len(evidence)}\")\n", + " print(f\"evidence_sample : {evidence[:sample_evidence]}\")\n", "\n", " print(f\"source_columns : {state.get('source_columns')}\")\n", " print(\"\\n--- END METADATA ---\")\n", "\n", - " # IMPORTANT: do not return state, return no-op update\n", - " return {}\n", + " return {} # no-op update\n", "\n", "\n", + "# ---- Build graph with an enable flag ----\n", + "def build_graph(enable_observe: bool = False):\n", + " graph = StateGraph(EvidenceState)\n", "\n", - "from langgraph.graph import StateGraph, END\n", + " # Nodes\n", + " graph.add_node(\"planner\", planner)\n", "\n", - "graph = StateGraph(EvidenceState)\n", + " # Wrap observe so it matches (state) -> update\n", + " graph.add_node(\"observe_plan\", partial(observe, enable_observe=enable_observe, label=\"PLAN\"))\n", + " graph.add_node(\"execute\", sql_execute)\n", + " graph.add_node(\"observe_execution\", partial(observe, enable_observe=enable_observe, label=\"EXECUTION\"))\n", + " graph.add_node(\"classify\", classify)\n", + " graph.add_node(\"observe_classify\", partial(observe, enable_observe=enable_observe, label=\"CLASSIFY\"))\n", + " graph.add_node(\"switch_phase\", switch_to_extraction)\n", + " graph.add_node(\"extract\", extract)\n", + " graph.add_node(\"observe_final\", partial(observe, enable_observe=enable_observe, label=\"FINAL\"))\n", "\n", - "# Nodes\n", - "graph.add_node(\"planner\", planner)\n", - "graph.add_node(\"observe_plan\", observe) # Checkpoint 1: The SQL Plan\n", - "graph.add_node(\"execute\", sql_execute)\n", - "graph.add_node(\"observe_execution\", observe) # NEW Checkpoint: Post-execution\n", - "graph.add_node(\"classify\", classify)\n", - "graph.add_node(\"observe_classify\", observe) # Checkpoint 2: Post-classify\n", - "graph.add_node(\"switch_phase\", switch_to_extraction)\n", - "graph.add_node(\"extract\", extract)\n", - "graph.add_node(\"observe_final\", observe) # Checkpoint 3: Final results\n", + " graph.set_entry_point(\"planner\")\n", "\n", - "graph.set_entry_point(\"planner\")\n", + " # --- FLOW ---\n", + " graph.add_edge(\"planner\", \"observe_plan\")\n", + " graph.add_edge(\"observe_plan\", \"execute\")\n", "\n", - "# --- FLOW ---\n", - "graph.add_edge(\"planner\", \"observe_plan\")\n", - "graph.add_edge(\"observe_plan\", \"execute\")\n", + " graph.add_edge(\"execute\", \"observe_execution\")\n", + " graph.add_edge(\"observe_execution\", \"classify\")\n", "\n", - "# NEW: observe after execution, before classify\n", - "graph.add_edge(\"execute\", \"observe_execution\")\n", - "graph.add_edge(\"observe_execution\", \"classify\")\n", + " graph.add_edge(\"classify\", \"observe_classify\")\n", "\n", - "graph.add_edge(\"classify\", \"observe_classify\")\n", + " graph.add_conditional_edges(\n", + " \"observe_classify\",\n", + " next_step,\n", + " {\n", + " \"to_extraction\": \"switch_phase\",\n", + " \"do_extract\": \"extract\",\n", + " \"replan\": \"planner\",\n", + " \"stop_none\": END,\n", + " \"stop_limit\": END,\n", + " },\n", + " )\n", "\n", - "graph.add_conditional_edges(\n", - " \"observe_classify\",\n", - " next_step,\n", - " {\n", - " \"to_extraction\": \"switch_phase\",\n", - " \"do_extract\": \"extract\",\n", - " \"replan\": \"planner\",\n", - " \"stop_none\": END,\n", - " \"stop_limit\": END,\n", - " }\n", - ")\n", + " graph.add_edge(\"switch_phase\", \"planner\")\n", + " graph.add_edge(\"extract\", \"observe_final\")\n", + " graph.add_edge(\"observe_final\", END)\n", "\n", - "graph.add_edge(\"switch_phase\", \"planner\")\n", + " return graph.compile()\n", "\n", - "graph.add_edge(\"extract\", \"observe_final\")\n", - "graph.add_edge(\"observe_final\", END)\n", "\n", - "app = graph.compile()\n" + "\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "655e0915", "metadata": {}, "outputs": [ @@ -483,2026 +501,61 @@ "output_type": "stream", "text": [ "Will process 2 databases (from db_files list).\n", + "enable_observe: False\n", + "pii_targets: ['EMAIL', 'PHONE', 'USERNAME', 'PERSON_NAME', 'POSTAL_ADDRESS']\n", "\n", "Processing DB: selectedDBs\\test2.db\n", " Processing: EMAIL\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique identifier for a destination to which electronic mail (email) can be sent and received over the internet; examples include jane.doe@example.com, john.smith@provider.net, dev-team@startup.io, and user.name+label@domain.org in the database\n", - "1: AI -> SELECT email FROM users WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : email address\n", - "exploration_sql : SELECT email FROM users WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "extraction_sql : None\n", - "rows_count : 0\n", - "rows_sample : []\n", - "classification : None\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", "[SQL EXEC] Retrieved 10 rows\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique identifier for a destination to which electronic mail (email) can be sent and received over the internet; examples include jane.doe@example.com, john.smith@provider.net, dev-team@startup.io, and user.name+label@domain.org in the database\n", - "1: AI -> SELECT email FROM users WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "2: AI -> Retrieved 10 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : email address\n", - "exploration_sql : SELECT email FROM users WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "extraction_sql : None\n", - "rows_count : 10\n", - "rows_sample : [('alice.johnson@example.com',), ('brian.smith@example.com',), ('carol.davis@example.com',), ('david.miller@example.com',), ('emma.wilson@example.com',), ('frank.brown@example.com',), ('grace.taylor@example.com',), ('henry.anderson@example.com',), ('irene.thomas@example.com',), ('jack.moore@example.com',)]\n", - "classification : None\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique identifier for a destination to which electronic mail (email) can be sent and received over the internet; examples include jane.doe@example.com, john.smith@provider.net, dev-team@startup.io, and user.name+label@domain.org in the database\n", - "1: AI -> SELECT email FROM users WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "2: AI -> Retrieved 10 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : email address\n", - "exploration_sql : SELECT email FROM users WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "extraction_sql : None\n", - "rows_count : 10\n", - "rows_sample : [('alice.johnson@example.com',), ('brian.smith@example.com',), ('carol.davis@example.com',), ('david.miller@example.com',), ('emma.wilson@example.com',), ('frank.brown@example.com',), ('grace.taylor@example.com',), ('henry.anderson@example.com',), ('irene.thomas@example.com',), ('jack.moore@example.com',)]\n", - "classification : {'found': True, 'confidence': 1.0, 'reason': 'The text contains multiple valid email addresses formatted correctly.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", "[PHASE] discovery → extraction\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique identifier for a destination to which electronic mail (email) can be sent and received over the internet; examples include jane.doe@example.com, john.smith@provider.net, dev-team@startup.io, and user.name+label@domain.org in the database\n", - "1: AI -> SELECT email FROM users WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "2: AI -> Retrieved 10 rows\n", - "3: AI -> SELECT email FROM users WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : email address\n", - "exploration_sql : SELECT email FROM users WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "extraction_sql : SELECT email FROM users WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "rows_count : 10\n", - "rows_sample : [('alice.johnson@example.com',), ('brian.smith@example.com',), ('carol.davis@example.com',), ('david.miller@example.com',), ('emma.wilson@example.com',), ('frank.brown@example.com',), ('grace.taylor@example.com',), ('henry.anderson@example.com',), ('irene.thomas@example.com',), ('jack.moore@example.com',)]\n", - "classification : {'found': True, 'confidence': 1.0, 'reason': 'The text contains multiple valid email addresses formatted correctly.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", "[SQL EXEC] Retrieved 10 rows\n", "[TRACKING] Saved source columns: ['users.email']\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique identifier for a destination to which electronic mail (email) can be sent and received over the internet; examples include jane.doe@example.com, john.smith@provider.net, dev-team@startup.io, and user.name+label@domain.org in the database\n", - "1: AI -> SELECT email FROM users WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "2: AI -> Retrieved 10 rows\n", - "3: AI -> SELECT email FROM users WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "4: AI -> Retrieved 10 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : email address\n", - "exploration_sql : SELECT email FROM users WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "extraction_sql : SELECT email FROM users WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "rows_count : 10\n", - "rows_sample : [('alice.johnson@example.com',), ('brian.smith@example.com',), ('carol.davis@example.com',), ('david.miller@example.com',), ('emma.wilson@example.com',), ('frank.brown@example.com',), ('grace.taylor@example.com',), ('henry.anderson@example.com',), ('irene.thomas@example.com',), ('jack.moore@example.com',)]\n", - "classification : {'found': True, 'confidence': 1.0, 'reason': 'The text contains multiple valid email addresses formatted correctly.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : ['users.email']\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique identifier for a destination to which electronic mail (email) can be sent and received over the internet; examples include jane.doe@example.com, john.smith@provider.net, dev-team@startup.io, and user.name+label@domain.org in the database\n", - "1: AI -> SELECT email FROM users WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "2: AI -> Retrieved 10 rows\n", - "3: AI -> SELECT email FROM users WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "4: AI -> Retrieved 10 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : email address\n", - "exploration_sql : SELECT email FROM users WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "extraction_sql : SELECT email FROM users WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "rows_count : 10\n", - "rows_sample : [('alice.johnson@example.com',), ('brian.smith@example.com',), ('carol.davis@example.com',), ('david.miller@example.com',), ('emma.wilson@example.com',), ('frank.brown@example.com',), ('grace.taylor@example.com',), ('henry.anderson@example.com',), ('irene.thomas@example.com',), ('jack.moore@example.com',)]\n", - "classification : {'found': True, 'confidence': 1.0, 'reason': 'The text contains multiple valid email addresses formatted correctly.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : ['users.email']\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique identifier for a destination to which electronic mail (email) can be sent and received over the internet; examples include jane.doe@example.com, john.smith@provider.net, dev-team@startup.io, and user.name+label@domain.org in the database\n", - "1: AI -> SELECT email FROM users WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "2: AI -> Retrieved 10 rows\n", - "3: AI -> SELECT email FROM users WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "4: AI -> Retrieved 10 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : email address\n", - "exploration_sql : SELECT email FROM users WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "extraction_sql : SELECT email FROM users WHERE email REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "rows_count : 10\n", - "rows_sample : [('alice.johnson@example.com',), ('brian.smith@example.com',), ('carol.davis@example.com',), ('david.miller@example.com',), ('emma.wilson@example.com',), ('frank.brown@example.com',), ('grace.taylor@example.com',), ('henry.anderson@example.com',), ('irene.thomas@example.com',), ('jack.moore@example.com',)]\n", - "classification : {'found': True, 'confidence': 1.0, 'reason': 'The text contains multiple valid email addresses formatted correctly.'}\n", - "evidence_count : 10\n", - "evidence_sample : ['alice.johnson@example.com', 'brian.smith@example.com', 'carol.davis@example.com', 'david.miller@example.com', 'emma.wilson@example.com', 'frank.brown@example.com', 'grace.taylor@example.com', 'henry.anderson@example.com', 'irene.thomas@example.com', 'jack.moore@example.com']\n", - "source_columns : ['users.email']\n", - "\n", - "--- END METADATA ---\n", " Processing: PHONE\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a US phone number is a 10-digit NANP number (area code + exchange + line) that may be written as 2023133725, 202-313-3725, (202) 313-3725, 202.313.3725, +1 202 313 3725, or 1-202-313-3725 in the database\n", - "1: AI -> SELECT phone FROM users WHERE phone REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : US phone number\n", - "exploration_sql : SELECT phone FROM users WHERE phone REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "extraction_sql : None\n", - "rows_count : 0\n", - "rows_sample : []\n", - "classification : None\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", "[SQL EXEC] Retrieved 10 rows\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a US phone number is a 10-digit NANP number (area code + exchange + line) that may be written as 2023133725, 202-313-3725, (202) 313-3725, 202.313.3725, +1 202 313 3725, or 1-202-313-3725 in the database\n", - "1: AI -> SELECT phone FROM users WHERE phone REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "2: AI -> Retrieved 10 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : US phone number\n", - "exploration_sql : SELECT phone FROM users WHERE phone REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "extraction_sql : None\n", - "rows_count : 10\n", - "rows_sample : [('410-555-1001',), ('301-555-1002',), ('202-555-1003',), ('703-555-1004',), ('240-555-1005',), ('571-555-1006',), ('410-555-1007',), ('301-555-1008',), ('202-555-1009',), ('703-555-1010',)]\n", - "classification : None\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a US phone number is a 10-digit NANP number (area code + exchange + line) that may be written as 2023133725, 202-313-3725, (202) 313-3725, 202.313.3725, +1 202 313 3725, or 1-202-313-3725 in the database\n", - "1: AI -> SELECT phone FROM users WHERE phone REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "2: AI -> Retrieved 10 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : US phone number\n", - "exploration_sql : SELECT phone FROM users WHERE phone REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "extraction_sql : None\n", - "rows_count : 10\n", - "rows_sample : [('410-555-1001',), ('301-555-1002',), ('202-555-1003',), ('703-555-1004',), ('240-555-1005',), ('571-555-1006',), ('410-555-1007',), ('301-555-1008',), ('202-555-1009',), ('703-555-1010',)]\n", - "classification : {'found': True, 'confidence': 1.0, 'reason': 'The text contains multiple valid US phone numbers formatted correctly.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", "[PHASE] discovery → extraction\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a US phone number is a 10-digit NANP number (area code + exchange + line) that may be written as 2023133725, 202-313-3725, (202) 313-3725, 202.313.3725, +1 202 313 3725, or 1-202-313-3725 in the database\n", - "1: AI -> SELECT phone FROM users WHERE phone REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "2: AI -> Retrieved 10 rows\n", - "3: AI -> SELECT phone FROM users WHERE phone REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : US phone number\n", - "exploration_sql : SELECT phone FROM users WHERE phone REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "extraction_sql : SELECT phone FROM users WHERE phone REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "rows_count : 10\n", - "rows_sample : [('410-555-1001',), ('301-555-1002',), ('202-555-1003',), ('703-555-1004',), ('240-555-1005',), ('571-555-1006',), ('410-555-1007',), ('301-555-1008',), ('202-555-1009',), ('703-555-1010',)]\n", - "classification : {'found': True, 'confidence': 1.0, 'reason': 'The text contains multiple valid US phone numbers formatted correctly.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", "[SQL EXEC] Retrieved 10 rows\n", "[TRACKING] Saved source columns: ['users.phone']\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a US phone number is a 10-digit NANP number (area code + exchange + line) that may be written as 2023133725, 202-313-3725, (202) 313-3725, 202.313.3725, +1 202 313 3725, or 1-202-313-3725 in the database\n", - "1: AI -> SELECT phone FROM users WHERE phone REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "2: AI -> Retrieved 10 rows\n", - "3: AI -> SELECT phone FROM users WHERE phone REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "4: AI -> Retrieved 10 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : US phone number\n", - "exploration_sql : SELECT phone FROM users WHERE phone REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "extraction_sql : SELECT phone FROM users WHERE phone REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "rows_count : 10\n", - "rows_sample : [('410-555-1001',), ('301-555-1002',), ('202-555-1003',), ('703-555-1004',), ('240-555-1005',), ('571-555-1006',), ('410-555-1007',), ('301-555-1008',), ('202-555-1009',), ('703-555-1010',)]\n", - "classification : {'found': True, 'confidence': 1.0, 'reason': 'The text contains multiple valid US phone numbers formatted correctly.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : ['users.phone']\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a US phone number is a 10-digit NANP number (area code + exchange + line) that may be written as 2023133725, 202-313-3725, (202) 313-3725, 202.313.3725, +1 202 313 3725, or 1-202-313-3725 in the database\n", - "1: AI -> SELECT phone FROM users WHERE phone REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "2: AI -> Retrieved 10 rows\n", - "3: AI -> SELECT phone FROM users WHERE phone REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "4: AI -> Retrieved 10 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : US phone number\n", - "exploration_sql : SELECT phone FROM users WHERE phone REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "extraction_sql : SELECT phone FROM users WHERE phone REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "rows_count : 10\n", - "rows_sample : [('410-555-1001',), ('301-555-1002',), ('202-555-1003',), ('703-555-1004',), ('240-555-1005',), ('571-555-1006',), ('410-555-1007',), ('301-555-1008',), ('202-555-1009',), ('703-555-1010',)]\n", - "classification : {'found': True, 'confidence': 1.0, 'reason': 'The text contains multiple valid US phone numbers formatted correctly.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : ['users.phone']\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a US phone number is a 10-digit NANP number (area code + exchange + line) that may be written as 2023133725, 202-313-3725, (202) 313-3725, 202.313.3725, +1 202 313 3725, or 1-202-313-3725 in the database\n", - "1: AI -> SELECT phone FROM users WHERE phone REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "2: AI -> Retrieved 10 rows\n", - "3: AI -> SELECT phone FROM users WHERE phone REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "4: AI -> Retrieved 10 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : US phone number\n", - "exploration_sql : SELECT phone FROM users WHERE phone REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "extraction_sql : SELECT phone FROM users WHERE phone REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "rows_count : 10\n", - "rows_sample : [('410-555-1001',), ('301-555-1002',), ('202-555-1003',), ('703-555-1004',), ('240-555-1005',), ('571-555-1006',), ('410-555-1007',), ('301-555-1008',), ('202-555-1009',), ('703-555-1010',)]\n", - "classification : {'found': True, 'confidence': 1.0, 'reason': 'The text contains multiple valid US phone numbers formatted correctly.'}\n", - "evidence_count : 10\n", - "evidence_sample : ['4105551001', '3015551002', '2025551003', '7035551004', '2405551005', '5715551006', '4105551007', '3015551008', '2025551009', '7035551010']\n", - "source_columns : ['users.phone']\n", - "\n", - "--- END METADATA ---\n", " Processing: USERNAME\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique or quasi-unique identifier used to authenticate or reference a user account on an application or online service. Even if pseudonymous, it is treated as PII when it can be linked to a specific individual through account records, cross-app correlation, or supporting metadata. Examples include John.smith, ericxu99, marsha_mellos, heisenbergercarro, x7Qp_13, user_482019 in the database\n", - "1: AI -> SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT phone FROM users WHERE phone REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : username\n", - "exploration_sql : SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT phone FROM users WHERE phone REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "extraction_sql : None\n", - "rows_count : 0\n", - "rows_sample : []\n", - "classification : None\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", - "[SQL EXEC] Retrieved 20 rows\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique or quasi-unique identifier used to authenticate or reference a user account on an application or online service. Even if pseudonymous, it is treated as PII when it can be linked to a specific individual through account records, cross-app correlation, or supporting metadata. Examples include John.smith, ericxu99, marsha_mellos, heisenbergercarro, x7Qp_13, user_482019 in the database\n", - "1: AI -> SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT phone FROM users WHERE phone REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "2: AI -> Retrieved 20 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : username\n", - "exploration_sql : SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT phone FROM users WHERE phone REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "extraction_sql : None\n", - "rows_count : 20\n", - "rows_sample : [('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',), ('alice.johnson@example.com',), ('brian.smith@example.com',), ('carol.davis@example.com',), ('david.miller@example.com',), ('emma.wilson@example.com',), ('frank.brown@example.com',), ('grace.taylor@example.com',), ('henry.anderson@example.com',), ('irene.thomas@example.com',), ('jack.moore@example.com',)]\n", - "classification : None\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique or quasi-unique identifier used to authenticate or reference a user account on an application or online service. Even if pseudonymous, it is treated as PII when it can be linked to a specific individual through account records, cross-app correlation, or supporting metadata. Examples include John.smith, ericxu99, marsha_mellos, heisenbergercarro, x7Qp_13, user_482019 in the database\n", - "1: AI -> SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT phone FROM users WHERE phone REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "2: AI -> Retrieved 20 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : username\n", - "exploration_sql : SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT phone FROM users WHERE phone REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "extraction_sql : None\n", - "rows_count : 20\n", - "rows_sample : [('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',), ('alice.johnson@example.com',), ('brian.smith@example.com',), ('carol.davis@example.com',), ('david.miller@example.com',), ('emma.wilson@example.com',), ('frank.brown@example.com',), ('grace.taylor@example.com',), ('henry.anderson@example.com',), ('irene.thomas@example.com',), ('jack.moore@example.com',)]\n", - "classification : {'found': True, 'confidence': 0.9, 'reason': 'The text contains multiple entries that resemble usernames, including both simple formats (e.g., ajohnson) and email addresses that can be linked to specific individuals.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", + "[SQL EXEC] Retrieved 10 rows\n", "[PHASE] discovery → extraction\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique or quasi-unique identifier used to authenticate or reference a user account on an application or online service. Even if pseudonymous, it is treated as PII when it can be linked to a specific individual through account records, cross-app correlation, or supporting metadata. Examples include John.smith, ericxu99, marsha_mellos, heisenbergercarro, x7Qp_13, user_482019 in the database\n", - "1: AI -> SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT phone FROM users WHERE phone REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "2: AI -> Retrieved 20 rows\n", - "3: AI -> SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL\n", - "SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL\n", - "SELECT phone FROM users WHERE phone REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : username\n", - "exploration_sql : SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT phone FROM users WHERE phone REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "extraction_sql : SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL\n", - "SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL\n", - "SELECT phone FROM users WHERE phone REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "rows_count : 20\n", - "rows_sample : [('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',), ('alice.johnson@example.com',), ('brian.smith@example.com',), ('carol.davis@example.com',), ('david.miller@example.com',), ('emma.wilson@example.com',), ('frank.brown@example.com',), ('grace.taylor@example.com',), ('henry.anderson@example.com',), ('irene.thomas@example.com',), ('jack.moore@example.com',)]\n", - "classification : {'found': True, 'confidence': 0.9, 'reason': 'The text contains multiple entries that resemble usernames, including both simple formats (e.g., ajohnson) and email addresses that can be linked to specific individuals.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", - "[SQL EXEC] Retrieved 20 rows\n", - "[TRACKING] Saved source columns: ['users.username', 'users.email', 'users.phone']\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique or quasi-unique identifier used to authenticate or reference a user account on an application or online service. Even if pseudonymous, it is treated as PII when it can be linked to a specific individual through account records, cross-app correlation, or supporting metadata. Examples include John.smith, ericxu99, marsha_mellos, heisenbergercarro, x7Qp_13, user_482019 in the database\n", - "1: AI -> SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT phone FROM users WHERE phone REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "2: AI -> Retrieved 20 rows\n", - "3: AI -> SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL\n", - "SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL\n", - "SELECT phone FROM users WHERE phone REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "4: AI -> Retrieved 20 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : username\n", - "exploration_sql : SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT phone FROM users WHERE phone REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "extraction_sql : SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL\n", - "SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL\n", - "SELECT phone FROM users WHERE phone REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "rows_count : 20\n", - "rows_sample : [('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',), ('alice.johnson@example.com',), ('brian.smith@example.com',), ('carol.davis@example.com',), ('david.miller@example.com',), ('emma.wilson@example.com',), ('frank.brown@example.com',), ('grace.taylor@example.com',), ('henry.anderson@example.com',), ('irene.thomas@example.com',), ('jack.moore@example.com',)]\n", - "classification : {'found': True, 'confidence': 0.9, 'reason': 'The text contains multiple entries that resemble usernames, including both simple formats (e.g., ajohnson) and email addresses that can be linked to specific individuals.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : ['users.username', 'users.email', 'users.phone']\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique or quasi-unique identifier used to authenticate or reference a user account on an application or online service. Even if pseudonymous, it is treated as PII when it can be linked to a specific individual through account records, cross-app correlation, or supporting metadata. Examples include John.smith, ericxu99, marsha_mellos, heisenbergercarro, x7Qp_13, user_482019 in the database\n", - "1: AI -> SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT phone FROM users WHERE phone REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "2: AI -> Retrieved 20 rows\n", - "3: AI -> SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL\n", - "SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL\n", - "SELECT phone FROM users WHERE phone REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "4: AI -> Retrieved 20 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : username\n", - "exploration_sql : SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT phone FROM users WHERE phone REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "extraction_sql : SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL\n", - "SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL\n", - "SELECT phone FROM users WHERE phone REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "rows_count : 20\n", - "rows_sample : [('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',), ('alice.johnson@example.com',), ('brian.smith@example.com',), ('carol.davis@example.com',), ('david.miller@example.com',), ('emma.wilson@example.com',), ('frank.brown@example.com',), ('grace.taylor@example.com',), ('henry.anderson@example.com',), ('irene.thomas@example.com',), ('jack.moore@example.com',)]\n", - "classification : {'found': True, 'confidence': 0.9, 'reason': 'The text contains multiple entries that resemble usernames, including both simple formats (e.g., ajohnson) and email addresses that can be linked to specific individuals.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : ['users.username', 'users.email', 'users.phone']\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique or quasi-unique identifier used to authenticate or reference a user account on an application or online service. Even if pseudonymous, it is treated as PII when it can be linked to a specific individual through account records, cross-app correlation, or supporting metadata. Examples include John.smith, ericxu99, marsha_mellos, heisenbergercarro, x7Qp_13, user_482019 in the database\n", - "1: AI -> SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT phone FROM users WHERE phone REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "2: AI -> Retrieved 20 rows\n", - "3: AI -> SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL\n", - "SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL\n", - "SELECT phone FROM users WHERE phone REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "4: AI -> Retrieved 20 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : username\n", - "exploration_sql : SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL \n", - "SELECT phone FROM users WHERE phone REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "extraction_sql : SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL\n", - "SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' UNION ALL\n", - "SELECT phone FROM users WHERE phone REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "rows_count : 20\n", - "rows_sample : [('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',), ('alice.johnson@example.com',), ('brian.smith@example.com',), ('carol.davis@example.com',), ('david.miller@example.com',), ('emma.wilson@example.com',), ('frank.brown@example.com',), ('grace.taylor@example.com',), ('henry.anderson@example.com',), ('irene.thomas@example.com',), ('jack.moore@example.com',)]\n", - "classification : {'found': True, 'confidence': 0.9, 'reason': 'The text contains multiple entries that resemble usernames, including both simple formats (e.g., ajohnson) and email addresses that can be linked to specific individuals.'}\n", - "evidence_count : 20\n", - "evidence_sample : ['ajohnson', 'bsmith', 'cdavis', 'dmiller', 'ewilson', 'fbrown', 'gtaylor', 'handerson', 'ithomas', 'jmoore']\n", - "source_columns : ['users.username', 'users.email', 'users.phone']\n", - "\n", - "--- END METADATA ---\n", + "[SQL EXEC] Retrieved 10 rows\n", + "[TRACKING] Saved source columns: ['users.username']\n", " Processing: PERSON_NAME\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a loosely structured human name-like strings that typically consist of a first name, a first name and a last name, and may also include middle names, initials, prefixes (e.g., Mr., Dr.), and suffixes (e.g., Jr., Sr.) in the database\n", - "1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : person name\n", - "exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n", - "extraction_sql : None\n", - "rows_count : 0\n", - "rows_sample : []\n", - "classification : None\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", "[SQL EXEC] Retrieved 30 rows\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a loosely structured human name-like strings that typically consist of a first name, a first name and a last name, and may also include middle names, initials, prefixes (e.g., Mr., Dr.), and suffixes (e.g., Jr., Sr.) in the database\n", - "1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n", - "2: AI -> Retrieved 30 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : person name\n", - "exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n", - "extraction_sql : None\n", - "rows_count : 30\n", - "rows_sample : [('Alice',), ('Brian',), ('Carol',), ('David',), ('Emma',), ('Frank',), ('Grace',), ('Henry',), ('Irene',), ('Jack',), ('Johnson',), ('Smith',), ('Davis',), ('Miller',), ('Wilson',), ('Brown',), ('Taylor',), ('Anderson',), ('Thomas',), ('Moore',), ('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',)]\n", - "classification : None\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a loosely structured human name-like strings that typically consist of a first name, a first name and a last name, and may also include middle names, initials, prefixes (e.g., Mr., Dr.), and suffixes (e.g., Jr., Sr.) in the database\n", - "1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n", - "2: AI -> Retrieved 30 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : person name\n", - "exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n", - "extraction_sql : None\n", - "rows_count : 30\n", - "rows_sample : [('Alice',), ('Brian',), ('Carol',), ('David',), ('Emma',), ('Frank',), ('Grace',), ('Henry',), ('Irene',), ('Jack',), ('Johnson',), ('Smith',), ('Davis',), ('Miller',), ('Wilson',), ('Brown',), ('Taylor',), ('Anderson',), ('Thomas',), ('Moore',), ('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',)]\n", - "classification : {'found': True, 'confidence': 0.95, 'reason': 'The text contains multiple strings that resemble human names, including first names and last names.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", "[PHASE] discovery → extraction\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a loosely structured human name-like strings that typically consist of a first name, a first name and a last name, and may also include middle names, initials, prefixes (e.g., Mr., Dr.), and suffixes (e.g., Jr., Sr.) in the database\n", - "1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n", - "2: AI -> Retrieved 30 rows\n", - "3: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "UNION ALL\n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "UNION ALL\n", - "SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : person name\n", - "exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n", - "extraction_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "UNION ALL\n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "UNION ALL\n", - "SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n", - "rows_count : 30\n", - "rows_sample : [('Alice',), ('Brian',), ('Carol',), ('David',), ('Emma',), ('Frank',), ('Grace',), ('Henry',), ('Irene',), ('Jack',), ('Johnson',), ('Smith',), ('Davis',), ('Miller',), ('Wilson',), ('Brown',), ('Taylor',), ('Anderson',), ('Thomas',), ('Moore',), ('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',)]\n", - "classification : {'found': True, 'confidence': 0.95, 'reason': 'The text contains multiple strings that resemble human names, including first names and last names.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", "[SQL EXEC] Retrieved 30 rows\n", "[TRACKING] Saved source columns: ['users.first_name', 'users.last_name', 'users.username']\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a loosely structured human name-like strings that typically consist of a first name, a first name and a last name, and may also include middle names, initials, prefixes (e.g., Mr., Dr.), and suffixes (e.g., Jr., Sr.) in the database\n", - "1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n", - "2: AI -> Retrieved 30 rows\n", - "3: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "UNION ALL\n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "UNION ALL\n", - "SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n", - "4: AI -> Retrieved 30 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : person name\n", - "exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n", - "extraction_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "UNION ALL\n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "UNION ALL\n", - "SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n", - "rows_count : 30\n", - "rows_sample : [('Alice',), ('Brian',), ('Carol',), ('David',), ('Emma',), ('Frank',), ('Grace',), ('Henry',), ('Irene',), ('Jack',), ('Johnson',), ('Smith',), ('Davis',), ('Miller',), ('Wilson',), ('Brown',), ('Taylor',), ('Anderson',), ('Thomas',), ('Moore',), ('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',)]\n", - "classification : {'found': True, 'confidence': 0.95, 'reason': 'The text contains multiple strings that resemble human names, including first names and last names.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : ['users.first_name', 'users.last_name', 'users.username']\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a loosely structured human name-like strings that typically consist of a first name, a first name and a last name, and may also include middle names, initials, prefixes (e.g., Mr., Dr.), and suffixes (e.g., Jr., Sr.) in the database\n", - "1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n", - "2: AI -> Retrieved 30 rows\n", - "3: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "UNION ALL\n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "UNION ALL\n", - "SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n", - "4: AI -> Retrieved 30 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : person name\n", - "exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n", - "extraction_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "UNION ALL\n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "UNION ALL\n", - "SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n", - "rows_count : 30\n", - "rows_sample : [('Alice',), ('Brian',), ('Carol',), ('David',), ('Emma',), ('Frank',), ('Grace',), ('Henry',), ('Irene',), ('Jack',), ('Johnson',), ('Smith',), ('Davis',), ('Miller',), ('Wilson',), ('Brown',), ('Taylor',), ('Anderson',), ('Thomas',), ('Moore',), ('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',)]\n", - "classification : {'found': True, 'confidence': 0.95, 'reason': 'The text contains multiple strings that resemble human names, including first names and last names.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : ['users.first_name', 'users.last_name', 'users.username']\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a loosely structured human name-like strings that typically consist of a first name, a first name and a last name, and may also include middle names, initials, prefixes (e.g., Mr., Dr.), and suffixes (e.g., Jr., Sr.) in the database\n", - "1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n", - "2: AI -> Retrieved 30 rows\n", - "3: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "UNION ALL\n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "UNION ALL\n", - "SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n", - "4: AI -> Retrieved 30 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : person name\n", - "exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n", - "extraction_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "UNION ALL\n", - "SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "UNION ALL\n", - "SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n", - "rows_count : 30\n", - "rows_sample : [('Alice',), ('Brian',), ('Carol',), ('David',), ('Emma',), ('Frank',), ('Grace',), ('Henry',), ('Irene',), ('Jack',), ('Johnson',), ('Smith',), ('Davis',), ('Miller',), ('Wilson',), ('Brown',), ('Taylor',), ('Anderson',), ('Thomas',), ('Moore',), ('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',)]\n", - "classification : {'found': True, 'confidence': 0.95, 'reason': 'The text contains multiple strings that resemble human names, including first names and last names.'}\n", - "evidence_count : 30\n", - "evidence_sample : ['Alice', 'Brian', 'Carol', 'David', 'Emma', 'Frank', 'Grace', 'Henry', 'Irene', 'Jack']\n", - "source_columns : ['users.first_name', 'users.last_name', 'users.username']\n", - "\n", - "--- END METADATA ---\n", " Processing: POSTAL_ADDRESS\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a US postal address is a street-level mailing location in the United States, commonly appearing as a street name and suffix (e.g., 'Market St') optionally with a street number (e.g., '1500 Market St'), unit, city/state, ZIP, or a PO Box (e.g., 'P.O. Box 123') in the database\n", - "1: AI -> SELECT street FROM users WHERE street REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT city FROM users WHERE city REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT state FROM users WHERE state REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT zip_code FROM users WHERE zip_code REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b';\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : US postal address\n", - "exploration_sql : SELECT street FROM users WHERE street REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT city FROM users WHERE city REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT state FROM users WHERE state REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT zip_code FROM users WHERE zip_code REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b';\n", - "extraction_sql : None\n", - "rows_count : 0\n", - "rows_sample : []\n", - "classification : None\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", "[SQL EXEC] Retrieved 10 rows\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a US postal address is a street-level mailing location in the United States, commonly appearing as a street name and suffix (e.g., 'Market St') optionally with a street number (e.g., '1500 Market St'), unit, city/state, ZIP, or a PO Box (e.g., 'P.O. Box 123') in the database\n", - "1: AI -> SELECT street FROM users WHERE street REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT city FROM users WHERE city REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT state FROM users WHERE state REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT zip_code FROM users WHERE zip_code REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b';\n", - "2: AI -> Retrieved 10 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : US postal address\n", - "exploration_sql : SELECT street FROM users WHERE street REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT city FROM users WHERE city REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT state FROM users WHERE state REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT zip_code FROM users WHERE zip_code REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b';\n", - "extraction_sql : None\n", - "rows_count : 10\n", - "rows_sample : [('123 Maple St',), ('456 Oak Ave',), ('789 Pine Rd',), ('321 Birch Blvd',), ('654 Cedar Ln',), ('987 Walnut St',), ('159 Spruce Ct',), ('753 Aspen Way',), ('852 Poplar Dr',), ('951 Cherry Pl',)]\n", - "classification : None\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a US postal address is a street-level mailing location in the United States, commonly appearing as a street name and suffix (e.g., 'Market St') optionally with a street number (e.g., '1500 Market St'), unit, city/state, ZIP, or a PO Box (e.g., 'P.O. Box 123') in the database\n", - "1: AI -> SELECT street FROM users WHERE street REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT city FROM users WHERE city REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT state FROM users WHERE state REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT zip_code FROM users WHERE zip_code REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b';\n", - "2: AI -> Retrieved 10 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : US postal address\n", - "exploration_sql : SELECT street FROM users WHERE street REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT city FROM users WHERE city REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT state FROM users WHERE state REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT zip_code FROM users WHERE zip_code REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b';\n", - "extraction_sql : None\n", - "rows_count : 10\n", - "rows_sample : [('123 Maple St',), ('456 Oak Ave',), ('789 Pine Rd',), ('321 Birch Blvd',), ('654 Cedar Ln',), ('987 Walnut St',), ('159 Spruce Ct',), ('753 Aspen Way',), ('852 Poplar Dr',), ('951 Cherry Pl',)]\n", - "classification : {'found': True, 'confidence': 0.95, 'reason': 'The text contains multiple street-level addresses in the United States format, including street names and suffixes.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", "[PHASE] discovery → extraction\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a US postal address is a street-level mailing location in the United States, commonly appearing as a street name and suffix (e.g., 'Market St') optionally with a street number (e.g., '1500 Market St'), unit, city/state, ZIP, or a PO Box (e.g., 'P.O. Box 123') in the database\n", - "1: AI -> SELECT street FROM users WHERE street REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT city FROM users WHERE city REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT state FROM users WHERE state REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT zip_code FROM users WHERE zip_code REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b';\n", - "2: AI -> Retrieved 10 rows\n", - "3: AI -> SELECT street FROM users WHERE street REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "UNION ALL\n", - "SELECT city FROM users WHERE city REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "UNION ALL\n", - "SELECT state FROM users WHERE state REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "UNION ALL\n", - "SELECT zip_code FROM users WHERE zip_code REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b';\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : US postal address\n", - "exploration_sql : SELECT street FROM users WHERE street REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT city FROM users WHERE city REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT state FROM users WHERE state REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT zip_code FROM users WHERE zip_code REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b';\n", - "extraction_sql : SELECT street FROM users WHERE street REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "UNION ALL\n", - "SELECT city FROM users WHERE city REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "UNION ALL\n", - "SELECT state FROM users WHERE state REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "UNION ALL\n", - "SELECT zip_code FROM users WHERE zip_code REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b';\n", - "rows_count : 10\n", - "rows_sample : [('123 Maple St',), ('456 Oak Ave',), ('789 Pine Rd',), ('321 Birch Blvd',), ('654 Cedar Ln',), ('987 Walnut St',), ('159 Spruce Ct',), ('753 Aspen Way',), ('852 Poplar Dr',), ('951 Cherry Pl',)]\n", - "classification : {'found': True, 'confidence': 0.95, 'reason': 'The text contains multiple street-level addresses in the United States format, including street names and suffixes.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", "[SQL EXEC] Retrieved 10 rows\n", - "[TRACKING] Saved source columns: ['users.street', 'users.city', 'users.state', 'users.zip_code']\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a US postal address is a street-level mailing location in the United States, commonly appearing as a street name and suffix (e.g., 'Market St') optionally with a street number (e.g., '1500 Market St'), unit, city/state, ZIP, or a PO Box (e.g., 'P.O. Box 123') in the database\n", - "1: AI -> SELECT street FROM users WHERE street REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT city FROM users WHERE city REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT state FROM users WHERE state REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT zip_code FROM users WHERE zip_code REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b';\n", - "2: AI -> Retrieved 10 rows\n", - "3: AI -> SELECT street FROM users WHERE street REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "UNION ALL\n", - "SELECT city FROM users WHERE city REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "UNION ALL\n", - "SELECT state FROM users WHERE state REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "UNION ALL\n", - "SELECT zip_code FROM users WHERE zip_code REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b';\n", - "4: AI -> Retrieved 10 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : US postal address\n", - "exploration_sql : SELECT street FROM users WHERE street REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT city FROM users WHERE city REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT state FROM users WHERE state REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT zip_code FROM users WHERE zip_code REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b';\n", - "extraction_sql : SELECT street FROM users WHERE street REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "UNION ALL\n", - "SELECT city FROM users WHERE city REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "UNION ALL\n", - "SELECT state FROM users WHERE state REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "UNION ALL\n", - "SELECT zip_code FROM users WHERE zip_code REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b';\n", - "rows_count : 10\n", - "rows_sample : [('123 Maple St',), ('456 Oak Ave',), ('789 Pine Rd',), ('321 Birch Blvd',), ('654 Cedar Ln',), ('987 Walnut St',), ('159 Spruce Ct',), ('753 Aspen Way',), ('852 Poplar Dr',), ('951 Cherry Pl',)]\n", - "classification : {'found': True, 'confidence': 0.95, 'reason': 'The text contains multiple street-level addresses in the United States format, including street names and suffixes.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : ['users.street', 'users.city', 'users.state', 'users.zip_code']\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a US postal address is a street-level mailing location in the United States, commonly appearing as a street name and suffix (e.g., 'Market St') optionally with a street number (e.g., '1500 Market St'), unit, city/state, ZIP, or a PO Box (e.g., 'P.O. Box 123') in the database\n", - "1: AI -> SELECT street FROM users WHERE street REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT city FROM users WHERE city REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT state FROM users WHERE state REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT zip_code FROM users WHERE zip_code REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b';\n", - "2: AI -> Retrieved 10 rows\n", - "3: AI -> SELECT street FROM users WHERE street REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "UNION ALL\n", - "SELECT city FROM users WHERE city REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "UNION ALL\n", - "SELECT state FROM users WHERE state REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "UNION ALL\n", - "SELECT zip_code FROM users WHERE zip_code REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b';\n", - "4: AI -> Retrieved 10 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : US postal address\n", - "exploration_sql : SELECT street FROM users WHERE street REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT city FROM users WHERE city REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT state FROM users WHERE state REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT zip_code FROM users WHERE zip_code REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b';\n", - "extraction_sql : SELECT street FROM users WHERE street REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "UNION ALL\n", - "SELECT city FROM users WHERE city REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "UNION ALL\n", - "SELECT state FROM users WHERE state REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "UNION ALL\n", - "SELECT zip_code FROM users WHERE zip_code REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b';\n", - "rows_count : 10\n", - "rows_sample : [('123 Maple St',), ('456 Oak Ave',), ('789 Pine Rd',), ('321 Birch Blvd',), ('654 Cedar Ln',), ('987 Walnut St',), ('159 Spruce Ct',), ('753 Aspen Way',), ('852 Poplar Dr',), ('951 Cherry Pl',)]\n", - "classification : {'found': True, 'confidence': 0.95, 'reason': 'The text contains multiple street-level mailing locations in the United States, each formatted with a street number and street name.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : ['users.street', 'users.city', 'users.state', 'users.zip_code']\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a US postal address is a street-level mailing location in the United States, commonly appearing as a street name and suffix (e.g., 'Market St') optionally with a street number (e.g., '1500 Market St'), unit, city/state, ZIP, or a PO Box (e.g., 'P.O. Box 123') in the database\n", - "1: AI -> SELECT street FROM users WHERE street REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT city FROM users WHERE city REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT state FROM users WHERE state REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT zip_code FROM users WHERE zip_code REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b';\n", - "2: AI -> Retrieved 10 rows\n", - "3: AI -> SELECT street FROM users WHERE street REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "UNION ALL\n", - "SELECT city FROM users WHERE city REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "UNION ALL\n", - "SELECT state FROM users WHERE state REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "UNION ALL\n", - "SELECT zip_code FROM users WHERE zip_code REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b';\n", - "4: AI -> Retrieved 10 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : US postal address\n", - "exploration_sql : SELECT street FROM users WHERE street REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT city FROM users WHERE city REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT state FROM users WHERE state REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b' \n", - "UNION ALL \n", - "SELECT zip_code FROM users WHERE zip_code REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b';\n", - "extraction_sql : SELECT street FROM users WHERE street REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "UNION ALL\n", - "SELECT city FROM users WHERE city REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "UNION ALL\n", - "SELECT state FROM users WHERE state REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "UNION ALL\n", - "SELECT zip_code FROM users WHERE zip_code REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b';\n", - "rows_count : 10\n", - "rows_sample : [('123 Maple St',), ('456 Oak Ave',), ('789 Pine Rd',), ('321 Birch Blvd',), ('654 Cedar Ln',), ('987 Walnut St',), ('159 Spruce Ct',), ('753 Aspen Way',), ('852 Poplar Dr',), ('951 Cherry Pl',)]\n", - "classification : {'found': True, 'confidence': 0.95, 'reason': 'The text contains multiple street-level mailing locations in the United States, each formatted with a street number and street name.'}\n", - "evidence_count : 10\n", - "evidence_sample : ['123 Maple St', '456 Oak Ave', '789 Pine Rd', '321 Birch Blvd', '654 Cedar Ln', '987 Walnut St', '159 Spruce Ct', '753 Aspen Way', '852 Poplar Dr', '951 Cherry Pl']\n", - "source_columns : ['users.street', 'users.city', 'users.state', 'users.zip_code']\n", - "\n", - "--- END METADATA ---\n", - "Wrote: I:\\project2026\\llmagent\\batch_results\\PII_test2_20260127T153449Z.jsonl\n", + "[TRACKING] Saved source columns: ['users.street', 'users.city', 'users.state', 'users.zip_code', 'users.phone']\n", + "Wrote: I:\\project2026\\llmagent\\batch_results\\PII_test2_20260202T021704Z.jsonl\n", "\n", "Processing DB: selectedDBs\\users.db\n", " Processing: EMAIL\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique identifier for a destination to which electronic mail (email) can be sent and received over the internet; examples include jane.doe@example.com, john.smith@provider.net, dev-team@startup.io, and user.name+label@domain.org in the database\n", - "1: AI -> SELECT message FROM users WHERE message REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : email address\n", - "exploration_sql : SELECT message FROM users WHERE message REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "extraction_sql : None\n", - "rows_count : 0\n", - "rows_sample : []\n", - "classification : None\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", "[SQL EXEC] Retrieved 3 rows\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique identifier for a destination to which electronic mail (email) can be sent and received over the internet; examples include jane.doe@example.com, john.smith@provider.net, dev-team@startup.io, and user.name+label@domain.org in the database\n", - "1: AI -> SELECT message FROM users WHERE message REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "2: AI -> Retrieved 3 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : email address\n", - "exploration_sql : SELECT message FROM users WHERE message REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "extraction_sql : None\n", - "rows_count : 3\n", - "rows_sample : [('brian.smith@example.com',), ('hello alice.johnson@example.com',), ('meet me at 3521 coverage street, Ellicitty City, MD abc\\x0cfrank_xum@google.com\\x0b',)]\n", - "classification : None\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique identifier for a destination to which electronic mail (email) can be sent and received over the internet; examples include jane.doe@example.com, john.smith@provider.net, dev-team@startup.io, and user.name+label@domain.org in the database\n", - "1: AI -> SELECT message FROM users WHERE message REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "2: AI -> Retrieved 3 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : email address\n", - "exploration_sql : SELECT message FROM users WHERE message REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "extraction_sql : None\n", - "rows_count : 3\n", - "rows_sample : [('brian.smith@example.com',), ('hello alice.johnson@example.com',), ('meet me at 3521 coverage street, Ellicitty City, MD abc\\x0cfrank_xum@google.com\\x0b',)]\n", - "classification : {'found': True, 'confidence': 0.95, 'reason': 'The text contains multiple email addresses formatted correctly.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", "[PHASE] discovery → extraction\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique identifier for a destination to which electronic mail (email) can be sent and received over the internet; examples include jane.doe@example.com, john.smith@provider.net, dev-team@startup.io, and user.name+label@domain.org in the database\n", - "1: AI -> SELECT message FROM users WHERE message REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "2: AI -> Retrieved 3 rows\n", - "3: AI -> SELECT message FROM users WHERE message REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : email address\n", - "exploration_sql : SELECT message FROM users WHERE message REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "extraction_sql : SELECT message FROM users WHERE message REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "rows_count : 3\n", - "rows_sample : [('brian.smith@example.com',), ('hello alice.johnson@example.com',), ('meet me at 3521 coverage street, Ellicitty City, MD abc\\x0cfrank_xum@google.com\\x0b',)]\n", - "classification : {'found': True, 'confidence': 0.95, 'reason': 'The text contains multiple email addresses formatted correctly.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", "[SQL EXEC] Retrieved 3 rows\n", "[TRACKING] Saved source columns: ['users.message']\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique identifier for a destination to which electronic mail (email) can be sent and received over the internet; examples include jane.doe@example.com, john.smith@provider.net, dev-team@startup.io, and user.name+label@domain.org in the database\n", - "1: AI -> SELECT message FROM users WHERE message REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "2: AI -> Retrieved 3 rows\n", - "3: AI -> SELECT message FROM users WHERE message REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "4: AI -> Retrieved 3 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : email address\n", - "exploration_sql : SELECT message FROM users WHERE message REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "extraction_sql : SELECT message FROM users WHERE message REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "rows_count : 3\n", - "rows_sample : [('brian.smith@example.com',), ('hello alice.johnson@example.com',), ('meet me at 3521 coverage street, Ellicitty City, MD abc\\x0cfrank_xum@google.com\\x0b',)]\n", - "classification : {'found': True, 'confidence': 0.95, 'reason': 'The text contains multiple email addresses formatted correctly.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : ['users.message']\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique identifier for a destination to which electronic mail (email) can be sent and received over the internet; examples include jane.doe@example.com, john.smith@provider.net, dev-team@startup.io, and user.name+label@domain.org in the database\n", - "1: AI -> SELECT message FROM users WHERE message REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "2: AI -> Retrieved 3 rows\n", - "3: AI -> SELECT message FROM users WHERE message REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "4: AI -> Retrieved 3 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : email address\n", - "exploration_sql : SELECT message FROM users WHERE message REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "extraction_sql : SELECT message FROM users WHERE message REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "rows_count : 3\n", - "rows_sample : [('brian.smith@example.com',), ('hello alice.johnson@example.com',), ('meet me at 3521 coverage street, Ellicitty City, MD abc\\x0cfrank_xum@google.com\\x0b',)]\n", - "classification : {'found': True, 'confidence': 0.95, 'reason': 'The text contains multiple valid email addresses formatted correctly.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : ['users.message']\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique identifier for a destination to which electronic mail (email) can be sent and received over the internet; examples include jane.doe@example.com, john.smith@provider.net, dev-team@startup.io, and user.name+label@domain.org in the database\n", - "1: AI -> SELECT message FROM users WHERE message REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "2: AI -> Retrieved 3 rows\n", - "3: AI -> SELECT message FROM users WHERE message REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "4: AI -> Retrieved 3 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : email address\n", - "exploration_sql : SELECT message FROM users WHERE message REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "extraction_sql : SELECT message FROM users WHERE message REGEXP '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}'\n", - "rows_count : 3\n", - "rows_sample : [('brian.smith@example.com',), ('hello alice.johnson@example.com',), ('meet me at 3521 coverage street, Ellicitty City, MD abc\\x0cfrank_xum@google.com\\x0b',)]\n", - "classification : {'found': True, 'confidence': 0.95, 'reason': 'The text contains multiple valid email addresses formatted correctly.'}\n", - "evidence_count : 3\n", - "evidence_sample : ['brian.smith@example.com', 'alice.johnson@example.com', 'frank_xum@google.com']\n", - "source_columns : ['users.message']\n", - "\n", - "--- END METADATA ---\n", " Processing: PHONE\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a US phone number is a 10-digit NANP number (area code + exchange + line) that may be written as 2023133725, 202-313-3725, (202) 313-3725, 202.313.3725, +1 202 313 3725, or 1-202-313-3725 in the database\n", - "1: AI -> SELECT message FROM users WHERE message REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : US phone number\n", - "exploration_sql : SELECT message FROM users WHERE message REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "extraction_sql : None\n", - "rows_count : 0\n", - "rows_sample : []\n", - "classification : None\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", "[SQL EXEC] Retrieved 0 rows\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a US phone number is a 10-digit NANP number (area code + exchange + line) that may be written as 2023133725, 202-313-3725, (202) 313-3725, 202.313.3725, +1 202 313 3725, or 1-202-313-3725 in the database\n", - "1: AI -> SELECT message FROM users WHERE message REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "2: AI -> Retrieved 0 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : US phone number\n", - "exploration_sql : SELECT message FROM users WHERE message REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "extraction_sql : None\n", - "rows_count : 0\n", - "rows_sample : []\n", - "classification : None\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a US phone number is a 10-digit NANP number (area code + exchange + line) that may be written as 2023133725, 202-313-3725, (202) 313-3725, 202.313.3725, +1 202 313 3725, or 1-202-313-3725 in the database\n", - "1: AI -> SELECT message FROM users WHERE message REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "2: AI -> Retrieved 0 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : US phone number\n", - "exploration_sql : SELECT message FROM users WHERE message REGEXP '\\+?[0-9]{1,4}[- .]?\\(?[0-9]{1,3}?\\)?[- .]?[0-9]{1,4}[- .]?[0-9]{1,4}[- .]?[0-9]{1,9}'\n", - "extraction_sql : None\n", - "rows_count : 0\n", - "rows_sample : []\n", - "classification : {'found': False, 'confidence': 0, 'reason': 'No phone number provided for analysis.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", " Processing: USERNAME\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique or quasi-unique identifier used to authenticate or reference a user account on an application or online service. Even if pseudonymous, it is treated as PII when it can be linked to a specific individual through account records, cross-app correlation, or supporting metadata. Examples include John.smith, ericxu99, marsha_mellos, heisenbergercarro, x7Qp_13, user_482019 in the database\n", - "1: AI -> SELECT user_id FROM users WHERE user_id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT first_name FROM users WHERE first_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : username\n", - "exploration_sql : SELECT user_id FROM users WHERE user_id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT first_name FROM users WHERE first_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "extraction_sql : None\n", - "rows_count : 0\n", - "rows_sample : []\n", - "classification : None\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", "[SQL EXEC] Retrieved 6 rows\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique or quasi-unique identifier used to authenticate or reference a user account on an application or online service. Even if pseudonymous, it is treated as PII when it can be linked to a specific individual through account records, cross-app correlation, or supporting metadata. Examples include John.smith, ericxu99, marsha_mellos, heisenbergercarro, x7Qp_13, user_482019 in the database\n", - "1: AI -> SELECT user_id FROM users WHERE user_id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT first_name FROM users WHERE first_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "2: AI -> Retrieved 6 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : username\n", - "exploration_sql : SELECT user_id FROM users WHERE user_id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT first_name FROM users WHERE first_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "extraction_sql : None\n", - "rows_count : 6\n", - "rows_sample : [('Alice_name@google.com',), ('Brian',), ('abc\\x0cfrank_xu@google.com\\x0b',), ('brian.smith@example.com',), ('hello alice.johnson@example.com',), ('meet me at 3521 coverage street, Ellicitty City, MD abc\\x0cfrank_xum@google.com\\x0b',)]\n", - "classification : None\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique or quasi-unique identifier used to authenticate or reference a user account on an application or online service. Even if pseudonymous, it is treated as PII when it can be linked to a specific individual through account records, cross-app correlation, or supporting metadata. Examples include John.smith, ericxu99, marsha_mellos, heisenbergercarro, x7Qp_13, user_482019 in the database\n", - "1: AI -> SELECT user_id FROM users WHERE user_id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT first_name FROM users WHERE first_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "2: AI -> Retrieved 6 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : username\n", - "exploration_sql : SELECT user_id FROM users WHERE user_id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT first_name FROM users WHERE first_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "extraction_sql : None\n", - "rows_count : 6\n", - "rows_sample : [('Alice_name@google.com',), ('Brian',), ('abc\\x0cfrank_xu@google.com\\x0b',), ('brian.smith@example.com',), ('hello alice.johnson@example.com',), ('meet me at 3521 coverage street, Ellicitty City, MD abc\\x0cfrank_xum@google.com\\x0b',)]\n", - "classification : {'found': True, 'confidence': 0.9, 'reason': \"The text contains multiple email addresses and usernames that can be linked to specific individuals, such as 'Alice_name@google.com', 'abcfrank_xu@google.com', 'brian.smith@example.com', and 'alice.johnson@example.com'.\"}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", "[PHASE] discovery → extraction\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique or quasi-unique identifier used to authenticate or reference a user account on an application or online service. Even if pseudonymous, it is treated as PII when it can be linked to a specific individual through account records, cross-app correlation, or supporting metadata. Examples include John.smith, ericxu99, marsha_mellos, heisenbergercarro, x7Qp_13, user_482019 in the database\n", - "1: AI -> SELECT user_id FROM users WHERE user_id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT first_name FROM users WHERE first_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "2: AI -> Retrieved 6 rows\n", - "3: AI -> SELECT user_id FROM users WHERE user_id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\n", - "UNION ALL\n", - "SELECT first_name FROM users WHERE first_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\n", - "UNION ALL\n", - "SELECT message FROM users WHERE message REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : username\n", - "exploration_sql : SELECT user_id FROM users WHERE user_id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT first_name FROM users WHERE first_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "extraction_sql : SELECT user_id FROM users WHERE user_id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\n", - "UNION ALL\n", - "SELECT first_name FROM users WHERE first_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\n", - "UNION ALL\n", - "SELECT message FROM users WHERE message REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "rows_count : 6\n", - "rows_sample : [('Alice_name@google.com',), ('Brian',), ('abc\\x0cfrank_xu@google.com\\x0b',), ('brian.smith@example.com',), ('hello alice.johnson@example.com',), ('meet me at 3521 coverage street, Ellicitty City, MD abc\\x0cfrank_xum@google.com\\x0b',)]\n", - "classification : {'found': True, 'confidence': 0.9, 'reason': \"The text contains multiple email addresses and usernames that can be linked to specific individuals, such as 'Alice_name@google.com', 'abcfrank_xu@google.com', 'brian.smith@example.com', and 'alice.johnson@example.com'.\"}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", - "[SQL EXEC] Retrieved 6 rows\n", - "[TRACKING] Saved source columns: ['users.user_id', 'users.first_name', 'users.message']\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique or quasi-unique identifier used to authenticate or reference a user account on an application or online service. Even if pseudonymous, it is treated as PII when it can be linked to a specific individual through account records, cross-app correlation, or supporting metadata. Examples include John.smith, ericxu99, marsha_mellos, heisenbergercarro, x7Qp_13, user_482019 in the database\n", - "1: AI -> SELECT user_id FROM users WHERE user_id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT first_name FROM users WHERE first_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "2: AI -> Retrieved 6 rows\n", - "3: AI -> SELECT user_id FROM users WHERE user_id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\n", - "UNION ALL\n", - "SELECT first_name FROM users WHERE first_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\n", - "UNION ALL\n", - "SELECT message FROM users WHERE message REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "4: AI -> Retrieved 6 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : username\n", - "exploration_sql : SELECT user_id FROM users WHERE user_id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT first_name FROM users WHERE first_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "extraction_sql : SELECT user_id FROM users WHERE user_id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\n", - "UNION ALL\n", - "SELECT first_name FROM users WHERE first_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\n", - "UNION ALL\n", - "SELECT message FROM users WHERE message REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "rows_count : 6\n", - "rows_sample : [('Alice_name@google.com',), ('Brian',), ('abc\\x0cfrank_xu@google.com\\x0b',), ('brian.smith@example.com',), ('hello alice.johnson@example.com',), ('meet me at 3521 coverage street, Ellicitty City, MD abc\\x0cfrank_xum@google.com\\x0b',)]\n", - "classification : {'found': True, 'confidence': 0.9, 'reason': \"The text contains multiple email addresses and usernames that can be linked to specific individuals, such as 'Alice_name@google.com', 'abcfrank_xu@google.com', 'brian.smith@example.com', and 'alice.johnson@example.com'.\"}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : ['users.user_id', 'users.first_name', 'users.message']\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique or quasi-unique identifier used to authenticate or reference a user account on an application or online service. Even if pseudonymous, it is treated as PII when it can be linked to a specific individual through account records, cross-app correlation, or supporting metadata. Examples include John.smith, ericxu99, marsha_mellos, heisenbergercarro, x7Qp_13, user_482019 in the database\n", - "1: AI -> SELECT user_id FROM users WHERE user_id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT first_name FROM users WHERE first_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "2: AI -> Retrieved 6 rows\n", - "3: AI -> SELECT user_id FROM users WHERE user_id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\n", - "UNION ALL\n", - "SELECT first_name FROM users WHERE first_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\n", - "UNION ALL\n", - "SELECT message FROM users WHERE message REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "4: AI -> Retrieved 6 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : username\n", - "exploration_sql : SELECT user_id FROM users WHERE user_id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT first_name FROM users WHERE first_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "extraction_sql : SELECT user_id FROM users WHERE user_id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\n", - "UNION ALL\n", - "SELECT first_name FROM users WHERE first_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\n", - "UNION ALL\n", - "SELECT message FROM users WHERE message REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "rows_count : 6\n", - "rows_sample : [('Alice_name@google.com',), ('Brian',), ('abc\\x0cfrank_xu@google.com\\x0b',), ('brian.smith@example.com',), ('hello alice.johnson@example.com',), ('meet me at 3521 coverage street, Ellicitty City, MD abc\\x0cfrank_xum@google.com\\x0b',)]\n", - "classification : {'found': True, 'confidence': 0.9, 'reason': \"The text contains multiple email addresses and usernames that can be linked to specific individuals, such as 'Alice_name@google.com', 'frank_xu@google.com', 'brian.smith@example.com', and 'alice.johnson@example.com'.\"}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : ['users.user_id', 'users.first_name', 'users.message']\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a unique or quasi-unique identifier used to authenticate or reference a user account on an application or online service. Even if pseudonymous, it is treated as PII when it can be linked to a specific individual through account records, cross-app correlation, or supporting metadata. Examples include John.smith, ericxu99, marsha_mellos, heisenbergercarro, x7Qp_13, user_482019 in the database\n", - "1: AI -> SELECT user_id FROM users WHERE user_id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT first_name FROM users WHERE first_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "2: AI -> Retrieved 6 rows\n", - "3: AI -> SELECT user_id FROM users WHERE user_id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\n", - "UNION ALL\n", - "SELECT first_name FROM users WHERE first_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\n", - "UNION ALL\n", - "SELECT message FROM users WHERE message REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "4: AI -> Retrieved 6 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : username\n", - "exploration_sql : SELECT user_id FROM users WHERE user_id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT first_name FROM users WHERE first_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "extraction_sql : SELECT user_id FROM users WHERE user_id REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\n", - "UNION ALL\n", - "SELECT first_name FROM users WHERE first_name REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\n", - "UNION ALL\n", - "SELECT message FROM users WHERE message REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n", - "rows_count : 6\n", - "rows_sample : [('Alice_name@google.com',), ('Brian',), ('abc\\x0cfrank_xu@google.com\\x0b',), ('brian.smith@example.com',), ('hello alice.johnson@example.com',), ('meet me at 3521 coverage street, Ellicitty City, MD abc\\x0cfrank_xum@google.com\\x0b',)]\n", - "classification : {'found': True, 'confidence': 0.9, 'reason': \"The text contains multiple email addresses and usernames that can be linked to specific individuals, such as 'Alice_name@google.com', 'frank_xu@google.com', 'brian.smith@example.com', and 'alice.johnson@example.com'.\"}\n", - "evidence_count : 6\n", - "evidence_sample : ['Alice_name@google.com', 'Brian', 'frank_xu@google.com', 'brian.smith@example.com', 'alice.johnson@example.com', 'frank_xum@google.com']\n", - "source_columns : ['users.user_id', 'users.first_name', 'users.message']\n", - "\n", - "--- END METADATA ---\n", - " Processing: PERSON_NAME\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a loosely structured human name-like strings that typically consist of a first name, a first name and a last name, and may also include middle names, initials, prefixes (e.g., Mr., Dr.), and suffixes (e.g., Jr., Sr.) in the database\n", - "1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : person name\n", - "exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "extraction_sql : None\n", - "rows_count : 0\n", - "rows_sample : []\n", - "classification : None\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", - "[SQL EXEC] Retrieved 6 rows\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a loosely structured human name-like strings that typically consist of a first name, a first name and a last name, and may also include middle names, initials, prefixes (e.g., Mr., Dr.), and suffixes (e.g., Jr., Sr.) in the database\n", - "1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "2: AI -> Retrieved 6 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : person name\n", - "exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "extraction_sql : None\n", - "rows_count : 6\n", - "rows_sample : [('Alice_name@google.com',), ('Brian',), ('abc\\x0cfrank_xu@google.com\\x0b',), ('brian.smith@example.com',), ('hello alice.johnson@example.com',), ('meet me at 3521 coverage street, Ellicitty City, MD abc\\x0cfrank_xum@google.com\\x0b',)]\n", - "classification : None\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a loosely structured human name-like strings that typically consist of a first name, a first name and a last name, and may also include middle names, initials, prefixes (e.g., Mr., Dr.), and suffixes (e.g., Jr., Sr.) in the database\n", - "1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "2: AI -> Retrieved 6 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : person name\n", - "exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "extraction_sql : None\n", - "rows_count : 6\n", - "rows_sample : [('Alice_name@google.com',), ('Brian',), ('abc\\x0cfrank_xu@google.com\\x0b',), ('brian.smith@example.com',), ('hello alice.johnson@example.com',), ('meet me at 3521 coverage street, Ellicitty City, MD abc\\x0cfrank_xum@google.com\\x0b',)]\n", - "classification : {'found': True, 'confidence': 0.9, 'reason': \"The text contains multiple instances of names that resemble human names, such as 'Alice', 'Brian', and 'alice.johnson'.\"}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", - "[PHASE] discovery → extraction\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a loosely structured human name-like strings that typically consist of a first name, a first name and a last name, and may also include middle names, initials, prefixes (e.g., Mr., Dr.), and suffixes (e.g., Jr., Sr.) in the database\n", - "1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "2: AI -> Retrieved 6 rows\n", - "3: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "UNION ALL\n", - "SELECT message FROM users WHERE message REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : person name\n", - "exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "extraction_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "UNION ALL\n", - "SELECT message FROM users WHERE message REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "rows_count : 6\n", - "rows_sample : [('Alice_name@google.com',), ('Brian',), ('abc\\x0cfrank_xu@google.com\\x0b',), ('brian.smith@example.com',), ('hello alice.johnson@example.com',), ('meet me at 3521 coverage street, Ellicitty City, MD abc\\x0cfrank_xum@google.com\\x0b',)]\n", - "classification : {'found': True, 'confidence': 0.9, 'reason': \"The text contains multiple instances of names that resemble human names, such as 'Alice', 'Brian', and 'alice.johnson'.\"}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", "[SQL EXEC] Retrieved 6 rows\n", "[TRACKING] Saved source columns: ['users.first_name', 'users.message']\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a loosely structured human name-like strings that typically consist of a first name, a first name and a last name, and may also include middle names, initials, prefixes (e.g., Mr., Dr.), and suffixes (e.g., Jr., Sr.) in the database\n", - "1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "2: AI -> Retrieved 6 rows\n", - "3: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "UNION ALL\n", - "SELECT message FROM users WHERE message REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "4: AI -> Retrieved 6 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : person name\n", - "exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "extraction_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "UNION ALL\n", - "SELECT message FROM users WHERE message REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "rows_count : 6\n", - "rows_sample : [('Alice_name@google.com',), ('Brian',), ('abc\\x0cfrank_xu@google.com\\x0b',), ('brian.smith@example.com',), ('hello alice.johnson@example.com',), ('meet me at 3521 coverage street, Ellicitty City, MD abc\\x0cfrank_xum@google.com\\x0b',)]\n", - "classification : {'found': True, 'confidence': 0.9, 'reason': \"The text contains multiple instances of names that resemble human names, such as 'Alice', 'Brian', and 'alice.johnson'.\"}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : ['users.first_name', 'users.message']\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a loosely structured human name-like strings that typically consist of a first name, a first name and a last name, and may also include middle names, initials, prefixes (e.g., Mr., Dr.), and suffixes (e.g., Jr., Sr.) in the database\n", - "1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "2: AI -> Retrieved 6 rows\n", - "3: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "UNION ALL\n", - "SELECT message FROM users WHERE message REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "4: AI -> Retrieved 6 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : person name\n", - "exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "extraction_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "UNION ALL\n", - "SELECT message FROM users WHERE message REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "rows_count : 6\n", - "rows_sample : [('Alice_name@google.com',), ('Brian',), ('abc\\x0cfrank_xu@google.com\\x0b',), ('brian.smith@example.com',), ('hello alice.johnson@example.com',), ('meet me at 3521 coverage street, Ellicitty City, MD abc\\x0cfrank_xum@google.com\\x0b',)]\n", - "classification : {'found': True, 'confidence': 0.9, 'reason': \"The text contains multiple instances of names that resemble human names, such as 'Alice', 'Brian', and 'Alice Johnson'.\"}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : ['users.first_name', 'users.message']\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a loosely structured human name-like strings that typically consist of a first name, a first name and a last name, and may also include middle names, initials, prefixes (e.g., Mr., Dr.), and suffixes (e.g., Jr., Sr.) in the database\n", - "1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "2: AI -> Retrieved 6 rows\n", - "3: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "UNION ALL\n", - "SELECT message FROM users WHERE message REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "4: AI -> Retrieved 6 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : person name\n", - "exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n", - "UNION ALL \n", - "SELECT message FROM users WHERE message REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "extraction_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "UNION ALL\n", - "SELECT message FROM users WHERE message REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n", - "rows_count : 6\n", - "rows_sample : [('Alice_name@google.com',), ('Brian',), ('abc\\x0cfrank_xu@google.com\\x0b',), ('brian.smith@example.com',), ('hello alice.johnson@example.com',), ('meet me at 3521 coverage street, Ellicitty City, MD abc\\x0cfrank_xum@google.com\\x0b',)]\n", - "classification : {'found': True, 'confidence': 0.9, 'reason': \"The text contains multiple instances of names that resemble human names, such as 'Alice', 'Brian', and 'Alice Johnson'.\"}\n", - "evidence_count : 5\n", - "evidence_sample : ['Alice', 'Brian', 'Frank Xu', 'Brian Smith', 'Alice Johnson']\n", - "source_columns : ['users.first_name', 'users.message']\n", - "\n", - "--- END METADATA ---\n", - " Processing: POSTAL_ADDRESS\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a US postal address is a street-level mailing location in the United States, commonly appearing as a street name and suffix (e.g., 'Market St') optionally with a street number (e.g., '1500 Market St'), unit, city/state, ZIP, or a PO Box (e.g., 'P.O. Box 123') in the database\n", - "1: AI -> SELECT message FROM users WHERE message REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : US postal address\n", - "exploration_sql : SELECT message FROM users WHERE message REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "extraction_sql : None\n", - "rows_count : 0\n", - "rows_sample : []\n", - "classification : None\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", - "[SQL EXEC] Retrieved 1 rows\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a US postal address is a street-level mailing location in the United States, commonly appearing as a street name and suffix (e.g., 'Market St') optionally with a street number (e.g., '1500 Market St'), unit, city/state, ZIP, or a PO Box (e.g., 'P.O. Box 123') in the database\n", - "1: AI -> SELECT message FROM users WHERE message REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "2: AI -> Retrieved 1 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : US postal address\n", - "exploration_sql : SELECT message FROM users WHERE message REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "extraction_sql : None\n", - "rows_count : 1\n", - "rows_sample : [('meet me at 3521 coverage street, Ellicitty City, MD abc\\x0cfrank_xum@google.com\\x0b',)]\n", - "classification : None\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a US postal address is a street-level mailing location in the United States, commonly appearing as a street name and suffix (e.g., 'Market St') optionally with a street number (e.g., '1500 Market St'), unit, city/state, ZIP, or a PO Box (e.g., 'P.O. Box 123') in the database\n", - "1: AI -> SELECT message FROM users WHERE message REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "2: AI -> Retrieved 1 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : exploration\n", - "PII type : US postal address\n", - "exploration_sql : SELECT message FROM users WHERE message REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "extraction_sql : None\n", - "rows_count : 1\n", - "rows_sample : [('meet me at 3521 coverage street, Ellicitty City, MD abc\\x0cfrank_xum@google.com\\x0b',)]\n", - "classification : {'found': True, 'confidence': 0.9, 'reason': 'The text contains a street address with a street number, street name, city, and state abbreviation, which fits the criteria for a US postal address.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", + " Processing: PERSON_NAME\n", + "[SQL EXEC] Retrieved 6 rows\n", + "[PHASE] discovery → extraction\n", + "[SQL EXEC] Retrieved 6 rows\n", + "[TRACKING] Saved source columns: ['users.first_name', 'users.message']\n", + " Processing: POSTAL_ADDRESS\n", + "[SQL EXEC] Retrieved 1 rows\n", "[PHASE] discovery → extraction\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a US postal address is a street-level mailing location in the United States, commonly appearing as a street name and suffix (e.g., 'Market St') optionally with a street number (e.g., '1500 Market St'), unit, city/state, ZIP, or a PO Box (e.g., 'P.O. Box 123') in the database\n", - "1: AI -> SELECT message FROM users WHERE message REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "2: AI -> Retrieved 1 rows\n", - "3: AI -> SELECT message FROM users WHERE message REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : US postal address\n", - "exploration_sql : SELECT message FROM users WHERE message REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "extraction_sql : SELECT message FROM users WHERE message REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "rows_count : 1\n", - "rows_sample : [('meet me at 3521 coverage street, Ellicitty City, MD abc\\x0cfrank_xum@google.com\\x0b',)]\n", - "classification : {'found': True, 'confidence': 0.9, 'reason': 'The text contains a street address with a street number, street name, city, and state abbreviation, which fits the criteria for a US postal address.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : []\n", - "\n", - "--- END METADATA ---\n", "[SQL EXEC] Retrieved 1 rows\n", "[TRACKING] Saved source columns: ['users.message']\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a US postal address is a street-level mailing location in the United States, commonly appearing as a street name and suffix (e.g., 'Market St') optionally with a street number (e.g., '1500 Market St'), unit, city/state, ZIP, or a PO Box (e.g., 'P.O. Box 123') in the database\n", - "1: AI -> SELECT message FROM users WHERE message REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "2: AI -> Retrieved 1 rows\n", - "3: AI -> SELECT message FROM users WHERE message REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "4: AI -> Retrieved 1 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : US postal address\n", - "exploration_sql : SELECT message FROM users WHERE message REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "extraction_sql : SELECT message FROM users WHERE message REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "rows_count : 1\n", - "rows_sample : [('meet me at 3521 coverage street, Ellicitty City, MD abc\\x0cfrank_xum@google.com\\x0b',)]\n", - "classification : {'found': True, 'confidence': 0.9, 'reason': 'The text contains a street address with a street number, street name, city, and state abbreviation, which fits the criteria for a US postal address.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : ['users.message']\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a US postal address is a street-level mailing location in the United States, commonly appearing as a street name and suffix (e.g., 'Market St') optionally with a street number (e.g., '1500 Market St'), unit, city/state, ZIP, or a PO Box (e.g., 'P.O. Box 123') in the database\n", - "1: AI -> SELECT message FROM users WHERE message REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "2: AI -> Retrieved 1 rows\n", - "3: AI -> SELECT message FROM users WHERE message REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "4: AI -> Retrieved 1 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : US postal address\n", - "exploration_sql : SELECT message FROM users WHERE message REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "extraction_sql : SELECT message FROM users WHERE message REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "rows_count : 1\n", - "rows_sample : [('meet me at 3521 coverage street, Ellicitty City, MD abc\\x0cfrank_xum@google.com\\x0b',)]\n", - "classification : {'found': True, 'confidence': 0.9, 'reason': 'The text contains a street address with a street number, street name, city, and state abbreviation, which fits the criteria for a US postal address.'}\n", - "evidence_count : 0\n", - "evidence_sample : []\n", - "source_columns : ['users.message']\n", - "\n", - "--- END METADATA ---\n", - "\n", - "=== STATE SNAPSHOT ===\n", - "\n", - "--- MESSAGES ---\n", - "0: HUMAN -> Find a US postal address is a street-level mailing location in the United States, commonly appearing as a street name and suffix (e.g., 'Market St') optionally with a street number (e.g., '1500 Market St'), unit, city/state, ZIP, or a PO Box (e.g., 'P.O. Box 123') in the database\n", - "1: AI -> SELECT message FROM users WHERE message REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "2: AI -> Retrieved 1 rows\n", - "3: AI -> SELECT message FROM users WHERE message REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "4: AI -> Retrieved 1 rows\n", - "\n", - "--- BEGIN METADATA ---\n", - "attempt : 2\n", - "max_attempts : 2\n", - "phase : extraction\n", - "PII type : US postal address\n", - "exploration_sql : SELECT message FROM users WHERE message REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "extraction_sql : SELECT message FROM users WHERE message REGEXP '(?i)\\b(?:p\\.?\\s*o\\.?\\s*box|post\\s+office\\s+box|ave\\.?|avenue|st\\.?|street|rd\\.?|road|blvd\\.?|boulevard|dr\\.?|drive|ln\\.?|lane|ct\\.?|court|pl\\.?|place|way|pkwy\\.?|parkway|cir\\.?|circle|ter\\.?|terrace|hwy\\.?|highway|trl\\.?|trail|sq\\.?|square|pike|loop|run|walk|path|byp\\.?|bypass|(?:n|s|e|w|ne|nw|se|sw)\\b)\\b'\n", - "rows_count : 1\n", - "rows_sample : [('meet me at 3521 coverage street, Ellicitty City, MD abc\\x0cfrank_xum@google.com\\x0b',)]\n", - "classification : {'found': True, 'confidence': 0.9, 'reason': 'The text contains a street address with a street number, street name, city, and state abbreviation, which fits the criteria for a US postal address.'}\n", - "evidence_count : 1\n", - "evidence_sample : ['3521 Coverage Street, Ellicitty City, MD']\n", - "source_columns : ['users.message']\n", - "\n", - "--- END METADATA ---\n", - "Wrote: I:\\project2026\\llmagent\\batch_results\\PII_users_20260127T153530Z.jsonl\n" + "Wrote: I:\\project2026\\llmagent\\batch_results\\PII_users_20260202T021737Z.jsonl\n" ] } ], @@ -2631,6 +684,13 @@ " print_db_path_report(db_paths, missing, not_sqlite)\n", "\n", " # Now run and save one file per DB (no global aggregation)\n", + " \n", + " enable_observe = bool(cfg.get(\"enable_observe\", False))\n", + " app = build_graph(enable_observe) \n", + " \n", + " print(f\"enable_observe: {enable_observe}\")\n", + " print(f\"pii_targets: {PII_TARGETS}\")\n", + "\n", " run_batch(db_paths, PII_TARGETS, PII_CONFIG, app, OUT_DIR)\n", "\n", "\n", diff --git a/RQs/RQ0/RQ0_1_results_normalization.ipynb b/RQs/RQ0/RQ0_1_results_normalization.ipynb index 057e0d9..b90d53a 100644 --- a/RQs/RQ0/RQ0_1_results_normalization.ipynb +++ b/RQs/RQ0/RQ0_1_results_normalization.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "c2d824a6", "metadata": {}, "outputs": [ @@ -139,13 +139,13 @@ "\n", " from stats_utils import normalize_and_slim_record\n", "\n", - " IN_DIR = Path(r\"..\\..\\batch_results_gpt4o\")\n", + " IN_DIR = Path(r\"..\\..\\model_PII_results\\gpt4o\")\n", " RESULTS_DIR = Path(r\"normalized_PII_results\\gpt4o\\db_level\")\n", " OUT_DIR = STATS_DIR/ RESULTS_DIR \n", "\n", " normalize_jsonl_folder(IN_DIR, OUT_DIR, normalize_and_slim_record, delete_out_dir_first=True)\n", " \n", - " IN_DIR = Path(r\"..\\..\\ground_truth\")\n", + " IN_DIR = Path(r\"..\\..\\model_PII_results\\ground_truth\")\n", " RESULTS_DIR = Path(r\"normalized_PII_results\\ground_truth\\db_level\")\n", " OUT_DIR = STATS_DIR/ RESULTS_DIR \n", "\n", diff --git a/RQs/normalized_PII_results/gpt4o/app_level/RQ2_app_level_gpt4o.jsonl b/RQs/normalized_PII_results/gpt4o/app_level/RQ2_app_level_gpt4o.jsonl deleted file mode 100644 index 7208f4e..0000000 --- a/RQs/normalized_PII_results/gpt4o/app_level/RQ2_app_level_gpt4o.jsonl +++ /dev/null @@ -1,50 +0,0 @@ -{"db_path": "selectedDBs\\A1", "PII_type": "EMAIL", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": [], "Num_of_source_columns": 0, "Num_of_source_columns_unique": 0} -{"db_path": "selectedDBs\\A1", "PII_type": "PERSON_NAME", "PII_all": ["jim investment team", "professor jim", "jim anderson", "mary garcia", "benjamin", "lorie logan", "biden", "yellen", "ron desantis", "michael saylor", "cathy wood", "harvey jones", "gary gensler", "abbas al qattan", "jw verret", "svetlana chernoff", "ltc whalen", "capt don wayne", "karen tate", "brian reynolds", "goldie kahn", "vladamir stravinsky", "mary garcia", "abe rudder", "russ philby"], "PII_unique": ["jim investment team", "professor jim", "jim anderson", "mary garcia", "benjamin", "lorie logan", "biden", "yellen", "ron desantis", "michael saylor", "cathy wood", "harvey jones", "gary gensler", "abbas al qattan", "jw verret", "svetlana chernoff", "ltc whalen", "capt don wayne", "karen tate", "brian reynolds", "goldie kahn", "vladamir stravinsky", "abe rudder", "russ philby"], "Num_of_PII_all": 25, "Num_of_PII_unique": 24, "source_columns": ["A1_msgstore.db:chat.subject", "A1_msgstore.db:message.text_data", "A1_msgstore.db:message_text.description", "A1_msgstore.db:message_vcard.vcard", "A1_wa.db:wa_contacts.display_name", "A1_wa.db:wa_contacts.given_name", "A1_wa.db:wa_contacts.family_name", "A1_wa.db:wa_contacts.nickname", "A1_wa.db:wa_address_book.display_name", "A1_wa.db:wa_address_book.given_name", "A1_wa.db:wa_address_book.family_name", "A1_wa.db:wa_address_book.nickname", "A1_wa.db:wa_biz_profiles.business_description", "A1_wa.db:wa_biz_profiles.location_name"], "Num_of_source_columns": 14, "Num_of_source_columns_unique": 14} -{"db_path": "selectedDBs\\A1", "PII_type": "PHONE", "PII_all": ["2023133725", "9106995488", "14244990541", "14359905938", "16467602090", "13346095713", "17622338037", "19199037779", "8085096467", "5713349815", "16263678865", "16106046786", "7034241981", "5715917168", "2065937224", "5713298742", "8624338328", "18056377243", "2028177932", "2025692832", "9735203731", "81367430271", "7423794330"], "PII_unique": ["2023133725", "9106995488", "14244990541", "14359905938", "16467602090", "13346095713", "17622338037", "19199037779", "8085096467", "5713349815", "16263678865", "16106046786", "7034241981", "5715917168", "2065937224", "5713298742", "8624338328", "18056377243", "2028177932", "2025692832", "9735203731", "81367430271", "7423794330"], "Num_of_PII_all": 23, "Num_of_PII_unique": 23, "source_columns": ["A1_msgstore.db:message.text_data", "A1_msgstore.db:chat.subject", "A1_msgstore.db:call_log.call_id", "A1_msgstore.db:message_text.description", "A1_wa.db:wa_address_book.number", "A1_wa.db:wa_contacts.number"], "Num_of_source_columns": 6, "Num_of_source_columns_unique": 6} -{"db_path": "selectedDBs\\A1", "PII_type": "POSTAL_ADDRESS", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": [], "Num_of_source_columns": 0, "Num_of_source_columns_unique": 0} -{"db_path": "selectedDBs\\A1", "PII_type": "USERNAME", "PII_all": ["wealthbuildersclub", "bitcoinmastersjiminvestmentteam", "marygarcia", "jimanderson", "btcointradingcenter", "100674318659756", "103023648964858", "103929853546678", "103981426733067", "104569854005296", "106940592033823", "107967072444508", "108272048668771", "109766730866918", "110638642757646", "112103243395097", "11356128460981", "115504622653502", "115728095129747", "116032903606475", "12014946184", "12025692832", "12028177932", "12028275725", "120363098389106519", "120363144038483540", "120363159552753674", "120363160757210405", "120363161629617396", "120363161854316501", "120363165358271161", "120363168364416063", "120363169319669622", "120363169975121665", "120363171769248504", "120363189991282340", "120363196338530920", "120363199791777094", "120363236090549442", "120363242831731954", "120363260509199299", "120363283899919854", "120363291157960202", "120363294282893350", "120363310566841953", "12037188989", "120581458518255", "12065937224", "12088549831", "12089234440", "12092759604", "12104598191", "12107693415", "12144349391", "121616528871652", "12164180421", "12182030481", "12193011016", "12242691136", "12289349643", "122939395555337", "12312851586", "123463398334515", "12347552698", "124322341462059", "12485951624", "12512653471", "125177073500366", "12519578826", "12533985096", "12563428154", "12563481926", "12569600342", "12673489822", "12677187343", "12816587274", "12817444681", "129098345132130", "13035026845", "130369789644926", "13078281042", "13086550898", "13124682262", "13135550002", "13135550005", "13135550009", "13135550012", "13135550013", "13135550014", "13135550015", "13135550019", "13135550022", "13135550023", "13135550027", "13135550030", "13135550031", "13135550033", "13135550035", "13135550039", "13135550042", "13135550043", "13135550046", "13135550047", "13135550048", "13135550049", "13135550053", "13135550054", "13135550055", "13135550062", "13135550065", "13135550067", "13135550070", "13135550082", "13135550085", "13135550086", "13135550091", "13135550092", "13135550093", "13135550096", "13135550098", "13135550100", "13135550302", "13135550303", "13135550308", "13135550309", "13135550310", "13135550312", "13135550313", "13135550314", "13135550315", "13135550316", "13135550317", "13135550320", "13135550321", "13135550322", "13135550323", "13135550324", "13135550325", "13135550326", "13135550327", "13135550328", "13135550330", "13135550334", "13135550338", "13135559000", "13135559002", "13135559003", "13135559004", "13135559005", "13135559008", "13135559009", "13135559010", "13135559011", "13135559012", "13135559013", "13135559017", "13135559018", "13135559019", "131512150323454", "13179120539", "13179380030", "13187237549", "13188336425", "131988992307398", "13202417349", "132083330588794", "13232178658", "13234051120", "13234459871", "13234851291", "13236779632", "13237429682", "13238260610", "13346095713", "13367064708", "13373930303", "13465929760", "13472347237", "13472909155", "13473104333", "13474564797", "13475168420", "13475880967", "13475936676", "13478787368", "13479516322", "13526015327", "135459007143970", "137512169304289", "13852401837", "13858310873", "139225945108501", "139607945548011", "139998804332546", "14014191471", "14016489408", "14025048783", "14027821192", "14048864781", "14054665132", "14072702768", "14076006497", "14078215797", "14085991338", "14142101524", "14159910786", "14164479785", "141940297347204", "14195158467", "14232189459", "14235022702", "14242362362", "14242363754", "14244990541", "14256244006", "143181576454319", "14356776413", "14356776955", "14359905397", "14359905938", "143834445017308", "14438380391", "144792122048696", "14692785231", "147004080578653", "14736234139697", "14805990841", "14806561073", "14847577922", "150053339533511", "15028019221", "150302598627464", "15038471839", "15038640034", "15042332249", "15057175374", "15076217539", "15076975472", "15083061090", "150839452774560", "150925352136784", "15099617093", "15103146582", "15104844091", "15105924830", "15106214216", "15109340002", "15123879210", "15125224259", "15125713374", "15125716517", "15126553035", "15138765723", "15155235640", "15155259124", "15167789277", "15168005517", "15168799747", "15168886942", "15203295777", "15203555498", "152046388932662", "152488787329139", "15302068722", "15404237607", "15595540556", "15597596875", "15598199187", "15612890324", "15618035894", "15623286070", "15673433595", "15704602737", "15704977866", "15705409111", "15705947881", "15713298742", "15713349815", "15713480045", "15713559735", "15715446505", "15715646023", "15715917168", "15715980952", "15716062052", "15739197568", "16012070456", "16017304815", "16017809365", "16028162053", "16029186659", "16073820970", "16083350264", "16083597914", "16093461419", "16093505541", "16099336348", "16104573925", "16104835210", "16106046786", "16123546753", "16143169059", "16143683432", "16144467091", "16145003405", "16145921304", "16146225841", "16152891229", "16153625864", "16155107778", "16158818625", "161688489824333", "16175941188", "16193489516", "161954777829383", "16195989232", "162135283859693", "16235229187", "16262906591", "16263678595", "16263678786", "16263678865", "16264191192", "16266766304", "16269778212", "16302169283", "16304122840", "16308639426", "16319973442", "16462411419", "16463319077", "16467602090", "16468012779", "164939880726537", "16504166544", "16506848121", "16514436258", "16572062143", "16615243843", "166292694802535", "16782038340", "16784370740", "16789395667", "16822359596", "168229909594129", "168345789841472", "169548330315856", "17015809945", "17017387027", "170201081483270", "170265724104705", "17034241981", "17034473529", "17036534502", "17043887688", "170480405340393", "17064662606", "17066755548", "17072438970", "17075022524", "17076530463", "17133853574", "17142935893", "17147523683", "17149864683", "17162175691", "17173488600", "17183005254", "17185527136", "17192528628", "17192973293", "17207425163", "17244674519", "17272651415", "17315992272", "17323557893", "17328120994", "17329001069", "17345525321", "17408335494", "17408458768", "17423794330", "174869845135609", "175342325080150", "17547778147", "17622338037", "17654766272", "17655603474", "17694875554", "17694875893", "17706484589", "177159045906534", "17723629659", "17732708847", "17736696040", "17738778243", "178185543102533", "17862039442", "17862532958", "17863899629", "17864023107", "17865717549", "17865718190", "17865821967", "17866412763", "179001368785019", "18014276170", "18022898358", "18024518808", "18025880528", "18027250068", "18045888536", "18047188072", "18052538018", "18053218602", "18056377243", "18067510314", "18085096467", "18129021981", "18134709062", "18135326643", "18135512847", "18139660839", "18165228437", "18173535149", "18175659571", "18188148927", "18189341683", "1821301047437", "182201337524439", "182282774175819", "182390332911843", "182506246656145", "182605131567300", "18284752696", "18286800850", "18312041352", "18313053342", "18317078315", "18317721351", "18328059892", "184052434886731", "184245708431525", "18435802562", "18458017418", "185499872415779", "18562681088", "18565341559", "18565538852", "18566393015", "18569973113", "18588636236", "18602817442", "18603564643", "18605582168", "18624338328", "186302997745696", "18632531704", "18635129576", "18644836084", "18647474777", "186672415256810", "186942763302988", "18702815976", "18722227422", "18747783934027", "18769174892625", "190048343453815", "19014843965", "19033263809", "19036468314", "19048785221", "19082801531", "19082968935", "19096144641", "19096309946", "19099140470", "19108982297", "19139153988", "19143864069", "19144139560", "19152920437", "19159792153", "19162048632", "19167706237", "19185653253", "19198413910", "19198856271", "19199037779", "192388983173249", "19254368625", "19254779421", "192573649993951", "19317069676", "193273729683514", "19344140808", "19372305774", "194536433307865", "19494443119", "194944572596311", "19496190099", "195232134082660", "195245220360233", "19542343915", "19549552526", "19566170438", "19567321403", "19567893122", "19568214170", "19719008724", "19735203731", "197362521739402", "19785150566", "197873673183315", "19804012349", "19842441148", "19852369962", "198535098151027", "199252324159534", "199548693651693", "200467799892008", "202907442000051", "204582412144684", "204848750411844", "206150142304493", "206377708470397", "206493689376827", "206781385048068", "208018385956938", "208632583032844", "209925384999102", "215014870888498", "215306978984178", "215319880687714", "215410024661201", "217218222682204", "217239731085336", "217617637859387", "218545384329241", "221667792027769", "221697705824396", "221856770617483", "224523945291787", "225391562211456", "227414475075628", "228548329611468", "22995271712920", "230476769964063", "231623526224069", "231971435364603", "232465323028575", "237915703664686", "239603525148848", "240432336367649", "241282874126532", "241587833577501", "24378335039589", "243958638731459", "245491942039636", "246827693678606", "253772672573507", "253944488009965", "255619357483108", "256993730236621", "257384723259421", "259356130029573", "259652516356294", "260972379022", "262452885336154", "263178684469434", "264020498092255", "266833718431948", "26710653329462", "268246779445272", "268951053439040", "269307569266755", "269586725388507", "271734125146250", "274349860909238", "275866017865913", "276029226676359", "277167376183308", "279774371000453", "279783011262693", "28531702653037", "29764358254642", "30464505073828", "32233796698326", "33785650921", "34836714635416", "35639856766996", "39844579401782", "4033209221175", "40832505757728", "40978350141447", "41042975949001", "4264919351425", "43697081192639", "43890354696378", "44418736336924", "447974045725", "45908939030566", "4915257482547", "491747383347", "491747383354", "491747386191", "491748589154", "491748599935", "491748600734", "491748672588", "49826133741698", "50350170058985", "5213311857022", "52600531632270", "54692432318606", "56410419257463", "56616359583845", "58613636853903", "6287745672611", "64360370192402", "65476977782923", "69999628665014", "71983886766200", "72928746037443", "74144305684519", "74917131374625", "75273798172747", "76742727315704", "77554526470395", "79401278517497", "8001775771719", "81367430271", "82506523119859", "85869633482900", "90452212613311", "905338541080", "91057954017452", "923130999544", "923323013167", "9534928064606", "96529440321619", "status", "svetlana chernoff", "ltc whalen", "capt don wayne", "karen tate", "hank", "brian reynolds", "goldie kahn", "vladamir stravinsky", "6️⃣ wealth builders club", "joey", "mary garcia", "abe rudder", "russ philby", "voice mail", "otto", "toks", "meta", "leaura", "faxinezidohne", "patrickh34", "yauyauyauhen", "nathanmorris", "italianmatters", "robm435", "loususi", "lanaire2023", "pet_the_bunny", "madmax_mgm", "reidback", "rennymorales", "yjr_fit.inba", "the_real_flockfam", "humans_of_data", "airwicksol", "homan.jason", "nadhir_chiu_oficial", "brandonmcclainl", "geezdagawd", "superflysugar2024", "curia__", "psychicadvisor345", "lemieuxbrands", "james_macray_"], "PII_unique": ["wealthbuildersclub", "bitcoinmastersjiminvestmentteam", "marygarcia", "jimanderson", "btcointradingcenter", "100674318659756", "103023648964858", "103929853546678", "103981426733067", "104569854005296", "106940592033823", "107967072444508", "108272048668771", "109766730866918", "110638642757646", "112103243395097", "11356128460981", "115504622653502", "115728095129747", "116032903606475", "12014946184", "12025692832", "12028177932", "12028275725", "120363098389106519", "120363144038483540", "120363159552753674", "120363160757210405", "120363161629617396", "120363161854316501", "120363165358271161", "120363168364416063", "120363169319669622", "120363169975121665", "120363171769248504", "120363189991282340", "120363196338530920", "120363199791777094", "120363236090549442", "120363242831731954", "120363260509199299", "120363283899919854", "120363291157960202", "120363294282893350", "120363310566841953", "12037188989", "120581458518255", "12065937224", "12088549831", "12089234440", "12092759604", "12104598191", "12107693415", "12144349391", "121616528871652", "12164180421", "12182030481", "12193011016", "12242691136", "12289349643", "122939395555337", "12312851586", "123463398334515", "12347552698", "124322341462059", "12485951624", "12512653471", "125177073500366", "12519578826", "12533985096", "12563428154", "12563481926", "12569600342", "12673489822", "12677187343", "12816587274", "12817444681", "129098345132130", "13035026845", "130369789644926", "13078281042", "13086550898", "13124682262", "13135550002", "13135550005", "13135550009", "13135550012", "13135550013", "13135550014", "13135550015", "13135550019", "13135550022", "13135550023", "13135550027", "13135550030", "13135550031", "13135550033", "13135550035", "13135550039", "13135550042", "13135550043", "13135550046", "13135550047", "13135550048", "13135550049", "13135550053", "13135550054", "13135550055", "13135550062", "13135550065", "13135550067", "13135550070", "13135550082", "13135550085", "13135550086", "13135550091", "13135550092", "13135550093", "13135550096", "13135550098", "13135550100", "13135550302", "13135550303", "13135550308", "13135550309", "13135550310", "13135550312", "13135550313", "13135550314", "13135550315", "13135550316", "13135550317", "13135550320", "13135550321", "13135550322", "13135550323", "13135550324", "13135550325", "13135550326", "13135550327", "13135550328", "13135550330", "13135550334", "13135550338", "13135559000", "13135559002", "13135559003", "13135559004", "13135559005", "13135559008", "13135559009", "13135559010", "13135559011", "13135559012", "13135559013", "13135559017", "13135559018", "13135559019", "131512150323454", "13179120539", "13179380030", "13187237549", "13188336425", "131988992307398", "13202417349", "132083330588794", "13232178658", "13234051120", "13234459871", "13234851291", "13236779632", "13237429682", "13238260610", "13346095713", "13367064708", "13373930303", "13465929760", "13472347237", "13472909155", "13473104333", "13474564797", "13475168420", "13475880967", "13475936676", "13478787368", "13479516322", "13526015327", "135459007143970", "137512169304289", "13852401837", "13858310873", "139225945108501", "139607945548011", "139998804332546", "14014191471", "14016489408", "14025048783", "14027821192", "14048864781", "14054665132", "14072702768", "14076006497", "14078215797", "14085991338", "14142101524", "14159910786", "14164479785", "141940297347204", "14195158467", "14232189459", "14235022702", "14242362362", "14242363754", "14244990541", "14256244006", "143181576454319", "14356776413", "14356776955", "14359905397", "14359905938", "143834445017308", "14438380391", "144792122048696", "14692785231", "147004080578653", "14736234139697", "14805990841", "14806561073", "14847577922", "150053339533511", "15028019221", "150302598627464", "15038471839", "15038640034", "15042332249", "15057175374", "15076217539", "15076975472", "15083061090", "150839452774560", "150925352136784", "15099617093", "15103146582", "15104844091", "15105924830", "15106214216", "15109340002", "15123879210", "15125224259", "15125713374", "15125716517", "15126553035", "15138765723", "15155235640", "15155259124", "15167789277", "15168005517", "15168799747", "15168886942", "15203295777", "15203555498", "152046388932662", "152488787329139", "15302068722", "15404237607", "15595540556", "15597596875", "15598199187", "15612890324", "15618035894", "15623286070", "15673433595", "15704602737", "15704977866", "15705409111", "15705947881", "15713298742", "15713349815", "15713480045", "15713559735", "15715446505", "15715646023", "15715917168", "15715980952", "15716062052", "15739197568", "16012070456", "16017304815", "16017809365", "16028162053", "16029186659", "16073820970", "16083350264", "16083597914", "16093461419", "16093505541", "16099336348", "16104573925", "16104835210", "16106046786", "16123546753", "16143169059", "16143683432", "16144467091", "16145003405", "16145921304", "16146225841", "16152891229", "16153625864", "16155107778", "16158818625", "161688489824333", "16175941188", "16193489516", "161954777829383", "16195989232", "162135283859693", "16235229187", "16262906591", "16263678595", "16263678786", "16263678865", "16264191192", "16266766304", "16269778212", "16302169283", "16304122840", "16308639426", "16319973442", "16462411419", "16463319077", "16467602090", "16468012779", "164939880726537", "16504166544", "16506848121", "16514436258", "16572062143", "16615243843", "166292694802535", "16782038340", "16784370740", "16789395667", "16822359596", "168229909594129", "168345789841472", "169548330315856", "17015809945", "17017387027", "170201081483270", "170265724104705", "17034241981", "17034473529", "17036534502", "17043887688", "170480405340393", "17064662606", "17066755548", "17072438970", "17075022524", "17076530463", "17133853574", "17142935893", "17147523683", "17149864683", "17162175691", "17173488600", "17183005254", "17185527136", "17192528628", "17192973293", "17207425163", "17244674519", "17272651415", "17315992272", "17323557893", "17328120994", "17329001069", "17345525321", "17408335494", "17408458768", "17423794330", "174869845135609", "175342325080150", "17547778147", "17622338037", "17654766272", "17655603474", "17694875554", "17694875893", "17706484589", "177159045906534", "17723629659", "17732708847", "17736696040", "17738778243", "178185543102533", "17862039442", "17862532958", "17863899629", "17864023107", "17865717549", "17865718190", "17865821967", "17866412763", "179001368785019", "18014276170", "18022898358", "18024518808", "18025880528", "18027250068", "18045888536", "18047188072", "18052538018", "18053218602", "18056377243", "18067510314", "18085096467", "18129021981", "18134709062", "18135326643", "18135512847", "18139660839", "18165228437", "18173535149", "18175659571", "18188148927", "18189341683", "1821301047437", "182201337524439", "182282774175819", "182390332911843", "182506246656145", "182605131567300", "18284752696", "18286800850", "18312041352", "18313053342", "18317078315", "18317721351", "18328059892", "184052434886731", "184245708431525", "18435802562", "18458017418", "185499872415779", "18562681088", "18565341559", "18565538852", "18566393015", "18569973113", "18588636236", "18602817442", "18603564643", "18605582168", "18624338328", "186302997745696", "18632531704", "18635129576", "18644836084", "18647474777", "186672415256810", "186942763302988", "18702815976", "18722227422", "18747783934027", "18769174892625", "190048343453815", "19014843965", "19033263809", "19036468314", "19048785221", "19082801531", "19082968935", "19096144641", "19096309946", "19099140470", "19108982297", "19139153988", "19143864069", "19144139560", "19152920437", "19159792153", "19162048632", "19167706237", "19185653253", "19198413910", "19198856271", "19199037779", "192388983173249", "19254368625", "19254779421", "192573649993951", "19317069676", "193273729683514", "19344140808", "19372305774", "194536433307865", "19494443119", "194944572596311", "19496190099", "195232134082660", "195245220360233", "19542343915", "19549552526", "19566170438", "19567321403", "19567893122", "19568214170", "19719008724", "19735203731", "197362521739402", "19785150566", "197873673183315", "19804012349", "19842441148", "19852369962", "198535098151027", "199252324159534", "199548693651693", "200467799892008", "202907442000051", "204582412144684", "204848750411844", "206150142304493", "206377708470397", "206493689376827", "206781385048068", "208018385956938", "208632583032844", "209925384999102", "215014870888498", "215306978984178", "215319880687714", "215410024661201", "217218222682204", "217239731085336", "217617637859387", "218545384329241", "221667792027769", "221697705824396", "221856770617483", "224523945291787", "225391562211456", "227414475075628", "228548329611468", "22995271712920", "230476769964063", "231623526224069", "231971435364603", "232465323028575", "237915703664686", "239603525148848", "240432336367649", "241282874126532", "241587833577501", "24378335039589", "243958638731459", "245491942039636", "246827693678606", "253772672573507", "253944488009965", "255619357483108", "256993730236621", "257384723259421", "259356130029573", "259652516356294", "260972379022", "262452885336154", "263178684469434", "264020498092255", "266833718431948", "26710653329462", "268246779445272", "268951053439040", "269307569266755", "269586725388507", "271734125146250", "274349860909238", "275866017865913", "276029226676359", "277167376183308", "279774371000453", "279783011262693", "28531702653037", "29764358254642", "30464505073828", "32233796698326", "33785650921", "34836714635416", "35639856766996", "39844579401782", "4033209221175", "40832505757728", "40978350141447", "41042975949001", "4264919351425", "43697081192639", "43890354696378", "44418736336924", "447974045725", "45908939030566", "4915257482547", "491747383347", "491747383354", "491747386191", "491748589154", "491748599935", "491748600734", "491748672588", "49826133741698", "50350170058985", "5213311857022", "52600531632270", "54692432318606", "56410419257463", "56616359583845", "58613636853903", "6287745672611", "64360370192402", "65476977782923", "69999628665014", "71983886766200", "72928746037443", "74144305684519", "74917131374625", "75273798172747", "76742727315704", "77554526470395", "79401278517497", "8001775771719", "81367430271", "82506523119859", "85869633482900", "90452212613311", "905338541080", "91057954017452", "923130999544", "923323013167", "9534928064606", "96529440321619", "status", "svetlana chernoff", "ltc whalen", "capt don wayne", "karen tate", "hank", "brian reynolds", "goldie kahn", "vladamir stravinsky", "6️⃣ wealth builders club", "joey", "mary garcia", "abe rudder", "russ philby", "voice mail", "otto", "toks", "meta", "leaura", "faxinezidohne", "patrickh34", "yauyauyauhen", "nathanmorris", "italianmatters", "robm435", "loususi", "lanaire2023", "pet_the_bunny", "madmax_mgm", "reidback", "rennymorales", "yjr_fit.inba", "the_real_flockfam", "humans_of_data", "airwicksol", "homan.jason", "nadhir_chiu_oficial", "brandonmcclainl", "geezdagawd", "superflysugar2024", "curia__", "psychicadvisor345", "lemieuxbrands", "james_macray_"], "Num_of_PII_all": 685, "Num_of_PII_unique": 685, "source_columns": ["A1_msgstore.db:chat.subject", "A1_msgstore.db:message.text_data", "A1_msgstore.db:message_text.description", "A1_msgstore.db:user_device.user_jid_row_id", "A1_msgstore.db:user_device_info.user_jid_row_id", "A1_wa.db:wa_contacts.jid", "A1_wa.db:wa_contacts.display_name", "A1_wa.db:wa_contacts.nickname", "A1_wa.db:wa_address_book.jid", "A1_wa.db:wa_address_book.display_name", "A1_wa.db:wa_address_book.nickname", "A1_wa.db:wa_biz_profiles.jid", "A1_wa.db:wa_biz_profiles.custom_url", "A1_wa.db:wa_bot_profiles.creator_name"], "Num_of_source_columns": 14, "Num_of_source_columns_unique": 14} -{"db_path": "selectedDBs\\A2", "PII_type": "EMAIL", "PII_all": ["copyright@snap.com"], "PII_unique": ["copyright@snap.com"], "Num_of_PII_all": 1, "Num_of_PII_unique": 1, "source_columns": ["A2_main.db:billboardstrings.message", "A2_main.db:combinedusername.originalusername", "A2_main.db:contact.displayname", "A2_main.db:notificationdata.userid", "A2_main.db:story.userid", "A2_main.db:suggestedfriend.userid"], "Num_of_source_columns": 6, "Num_of_source_columns_unique": 6} -{"db_path": "selectedDBs\\A2", "PII_type": "PERSON_NAME", "PII_all": ["sharon oneil", "abe rudder", "karen tate", "russ philby", "capt don wayne", "svetlana chernoff", "brian reynolds", "vladamir stravinsky", "ltc whalen", "goldie kahn", "joey", "mary garcia"], "PII_unique": ["sharon oneil", "abe rudder", "karen tate", "russ philby", "capt don wayne", "svetlana chernoff", "brian reynolds", "vladamir stravinsky", "ltc whalen", "goldie kahn", "joey", "mary garcia"], "Num_of_PII_all": 12, "Num_of_PII_unique": 12, "source_columns": ["A2_core.db:preferences.stringvalue", "A2_core.db:snapuserstore.textval", "A2_core.db:snapchatuserproperties.textval", "A2_main.db:contact.displayname", "A2_main.db:combinedusername.originalusername", "A2_main.db:billboardstrings.stringkey", "A2_main.db:friend.userid", "A2_main.db:story.username", "A2_main.db:storysnap.displayname", "A2_main.db:suggestedfriend.suggestionreason"], "Num_of_source_columns": 10, "Num_of_source_columns_unique": 10} -{"db_path": "selectedDBs\\A2", "PII_type": "PHONE", "PII_all": ["18624338329", "2065937224", "8624338328", "9199037779", "8085096467", "5713298742", "2028177932", "2025692832", "5713349815", "5715917168", "6106046786", "6263678865", "8056377243", "7423794330"], "PII_unique": ["18624338329", "2065937224", "8624338328", "9199037779", "8085096467", "5713298742", "2028177932", "2025692832", "5713349815", "5715917168", "6106046786", "6263678865", "8056377243", "7423794330"], "Num_of_PII_all": 14, "Num_of_PII_unique": 14, "source_columns": ["A2_core.db:configetag.etag", "A2_core.db:configrule.config_id", "A2_core.db:deltaforcesync.client_key", "A2_core.db:preferences.key", "A2_core.db:snapuserstore.textval", "A2_core.db:snapchatuserproperties.textval", "A2_core.db:android_metadata.locale", "A2_main.db:contact.phone", "A2_main.db:contact.rawphone", "A2_main.db:billboardstrings.stringkey", "A2_main.db:billboardstrings.message", "A2_main.db:friend.displayname", "A2_main.db:feed.specifiedname", "A2_main.db:storysnap.captiontextdisplay"], "Num_of_source_columns": 14, "Num_of_source_columns_unique": 14} -{"db_path": "selectedDBs\\A2", "PII_type": "POSTAL_ADDRESS", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": [], "Num_of_source_columns": 0, "Num_of_source_columns_unique": 0} -{"db_path": "selectedDBs\\A2", "PII_type": "USERNAME", "PII_all": ["oneil3607", "sharononeil368", "no_skin_tone", "static_image", "memories_and_camera_roll"], "PII_unique": ["oneil3607", "sharononeil368", "no_skin_tone", "static_image", "memories_and_camera_roll"], "Num_of_PII_all": 5, "Num_of_PII_unique": 5, "source_columns": ["A2_core.db:snapuserstore.textval", "A2_core.db:snapchatuserproperties.textval"], "Num_of_source_columns": 2, "Num_of_source_columns_unique": 2} -{"db_path": "selectedDBs\\A3", "PII_type": "EMAIL", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": [], "Num_of_source_columns": 0, "Num_of_source_columns_unique": 0} -{"db_path": "selectedDBs\\A3", "PII_type": "PERSON_NAME", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": [], "Num_of_source_columns": 0, "Num_of_source_columns_unique": 0} -{"db_path": "selectedDBs\\A3", "PII_type": "PHONE", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": [], "Num_of_source_columns": 0, "Num_of_source_columns_unique": 0} -{"db_path": "selectedDBs\\A3", "PII_type": "POSTAL_ADDRESS", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": [], "Num_of_source_columns": 0, "Num_of_source_columns_unique": 0} -{"db_path": "selectedDBs\\A3", "PII_type": "USERNAME", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": [], "Num_of_source_columns": 0, "Num_of_source_columns_unique": 0} -{"db_path": "selectedDBs\\A4", "PII_type": "EMAIL", "PII_all": ["heather@cellebrite.com", "hmahalik@gmail.com"], "PII_unique": ["heather@cellebrite.com", "hmahalik@gmail.com"], "Num_of_PII_all": 2, "Num_of_PII_unique": 2, "source_columns": ["A4_peopleCache_sharononeil368@gmail.com_com.google_14.db:tokens.value", "A4_peopleCache_sharononeil368@gmail.com_com.google_14.db:tokens_content.c1value", "A4_peopleCache_sharononeil368@gmail.com_com.google_14.db:tokens_stat.value"], "Num_of_source_columns": 3, "Num_of_source_columns_unique": 3} -{"db_path": "selectedDBs\\A4", "PII_type": "PERSON_NAME", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": ["A4_peopleCache_sharononeil368@gmail.com_com.google_14.db:tokens.value", "A4_peopleCache_sharononeil368@gmail.com_com.google_14.db:tokens_content.c1value", "A4_peopleCache_sharononeil368@gmail.com_com.google_14.db:tokens_stat.value", "A4_peopleCache_sharononeil368@gmail.com_com.google_14.db:cacheinfo.affinity_response_context", "A4_peopleCache_sharononeil368@gmail.com_com.google_14.db:android_metadata.locale"], "Num_of_source_columns": 5, "Num_of_source_columns_unique": 5} -{"db_path": "selectedDBs\\A4", "PII_type": "PHONE", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": ["A4_peopleCache_sharononeil368@gmail.com_com.google_14.db:tokens.value", "A4_peopleCache_sharononeil368@gmail.com_com.google_14.db:tokens_content.c1value", "A4_peopleCache_sharononeil368@gmail.com_com.google_14.db:cacheinfo.affinity_response_context"], "Num_of_source_columns": 3, "Num_of_source_columns_unique": 3} -{"db_path": "selectedDBs\\A4", "PII_type": "POSTAL_ADDRESS", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": [], "Num_of_source_columns": 0, "Num_of_source_columns_unique": 0} -{"db_path": "selectedDBs\\A4", "PII_type": "USERNAME", "PII_all": ["heather", "hmahalik"], "PII_unique": ["heather", "hmahalik"], "Num_of_PII_all": 2, "Num_of_PII_unique": 2, "source_columns": ["A4_peopleCache_sharononeil368@gmail.com_com.google_14.db:tokens.value", "A4_peopleCache_sharononeil368@gmail.com_com.google_14.db:tokens_content.c1value"], "Num_of_source_columns": 2, "Num_of_source_columns_unique": 2} -{"db_path": "selectedDBs\\A5", "PII_type": "EMAIL", "PII_all": ["sharononeil368@gmail.com"], "PII_unique": ["sharononeil368@gmail.com"], "Num_of_PII_all": 1, "Num_of_PII_unique": 1, "source_columns": ["A5_SBrowser.db:bookmarks.account_name", "A5_SBrowser.db:internet_sync.sync_key", "A5_SBrowser.db:sync_state.account_name", "A5_SBrowser.db:tabs.account_name"], "Num_of_source_columns": 4, "Num_of_source_columns_unique": 4} -{"db_path": "selectedDBs\\A5", "PII_type": "PERSON_NAME", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": ["A5_SBrowser.db:bookmarks.title", "A5_SBrowser.db:bookmarks.account_name", "A5_SBrowser.db:bookmarks.account_type", "A5_SBrowser.db:sync_state.data", "A5_SBrowser.db:tabs.tab_title", "A5_SBrowser.db:tabs.account_name", "A5_SBrowser.db:tabs.account_type", "A5_SBrowser.db:internet_sync.sync_key", "A5_SBrowser.db:internet_sync.sync_value"], "Num_of_source_columns": 9, "Num_of_source_columns_unique": 9} -{"db_path": "selectedDBs\\A5", "PII_type": "PHONE", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": [], "Num_of_source_columns": 0, "Num_of_source_columns_unique": 0} -{"db_path": "selectedDBs\\A5", "PII_type": "POSTAL_ADDRESS", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": [], "Num_of_source_columns": 0, "Num_of_source_columns_unique": 0} -{"db_path": "selectedDBs\\A5", "PII_type": "USERNAME", "PII_all": ["sharononeil368", "sync_internet_data", "sync_bookmarks", "sync_open_pages", "sync_saved_pages", "google", "duckduckgo", "yahoo", "youtube", "bing", "so360", "qwant", "toutiao", "startpage", "shenma", "naver", "yandex", "mailru", "seznam", "yandexru", "daum", "baidu"], "PII_unique": ["sharononeil368", "sync_internet_data", "sync_bookmarks", "sync_open_pages", "sync_saved_pages", "google", "duckduckgo", "yahoo", "youtube", "bing", "so360", "qwant", "toutiao", "startpage", "shenma", "naver", "yandex", "mailru", "seznam", "yandexru", "daum", "baidu"], "Num_of_PII_all": 22, "Num_of_PII_unique": 22, "source_columns": ["A5_SBrowser.db:bookmarks.account_name", "A5_SBrowser.db:tabs.account_name", "A5_SBrowser.db:sync_state.account_name", "A5_SBrowser.db:internet_sync.sync_key", "A5_searchengine.db:android_metadata.locale", "A5_searchengine.db:searchengine.title", "A5_searchengine.db:searchengine.url", "A5_searchengine.db:searchengine.image_url", "A5_searchengine.db:searchengine.extra1", "A5_searchengine.db:searchengine.extra2", "A5_searchengine.db:searchengine.extra3"], "Num_of_source_columns": 11, "Num_of_source_columns_unique": 11} -{"db_path": "selectedDBs\\I1", "PII_type": "EMAIL", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": ["I1_CallHistory.sqlite:zwaaggregatecallevent.zlinktoken", "I1_CallHistory.sqlite:zwacdcallevent.zcallidstring", "I1_CallHistory.sqlite:zwacdcallevent.zgroupcallcreatoruserjidstring", "I1_CallHistory.sqlite:zwacdcallevent.zgroupjidstring", "I1_CallHistory.sqlite:zwacdcalleventparticipant.zjidstring"], "Num_of_source_columns": 5, "Num_of_source_columns_unique": 5} -{"db_path": "selectedDBs\\I1", "PII_type": "PERSON_NAME", "PII_all": ["rick", "otto", "reynolds", "emily", "sharon", "lisena gocaj", "andy sieg", "christian justiniano", "david wilson", "robechucks raul", "abner", "nia yuniar", "william stevenson", "amit sharma"], "PII_unique": ["rick", "otto", "reynolds", "emily", "sharon", "lisena gocaj", "andy sieg", "christian justiniano", "david wilson", "robechucks raul", "abner", "nia yuniar", "william stevenson", "amit sharma"], "Num_of_PII_all": 14, "Num_of_PII_unique": 14, "source_columns": ["I1_ChatStorage.sqlite:zwamessage.ztext", "I1_ChatStorage.sqlite:zwamessagedataitem.zcontent1", "I1_ChatStorage.sqlite:zwaprofilepushname.zpushname", "I1_ContactsV2.sqlite:zwaaddressbookcontact.zfullname", "I1_ContactsV2.sqlite:zwaaddressbookcontact.zgivenname", "I1_ContactsV2.sqlite:zwaaddressbookcontact.zlastname", "I1_ContactsV2.sqlite:zwaaddressbookcontact.znotes"], "Num_of_source_columns": 7, "Num_of_source_columns_unique": 7} -{"db_path": "selectedDBs\\I1", "PII_type": "PHONE", "PII_all": ["4847353029", "5162879924", "5712679786", "7852533080", "8283677149", "8624338324", "9195796456", "9199037779", "9735203731", "2185715037", "1003163800", "1003236193", "1001036151", "1001791131", "1003787836", "1004222368", "1004894899", "1004927230", "1008320974", "1008433588", "1008559172", "1000705234", "1001184420", "1001445606", "1002781006", "1005464243", "1005606408", "1006092595", "1006522433", "1007025914", "1219230321", "1230876671", "1236195069", "1237220065", "1237697889", "1246793781", "1252341214", "1263010277", "1266958629", "1274676445", "1293350477", "1309658508", "1311142857", "1317473845", "1335889533", "1345908052", "1347504465", "1352788825", "1355886834", "1359112637", "1371651118", "1374373500", "1383511453", "1394174443", "1394974100", "1400289091", "1438414472", "1456738623", "1466660520", "1477095715", "1486535856", "1489236581", "1522593608", "1523301108", "1523732570", "1527995679", "1530023892", "1532451508", "1534800864", "1553731015", "1561833198", "1567697599", "1580127521", "1589953298", "1590233689", "1592618580", "1598803703", "1615188831", "1620904215", "1621272801", "1622622249", "1626585945", "1648180067", "1652416402", "1657127459", "1659501788", "1662243216", "1678178252", "1680737602", "1700968514", "1727060137", "1729502426", "1729630568", "1730812663", "1732642364", "1732997355", "1743515310", "1749448024", "1749883352", "1753137800", "1791376371", "1796320275", "1797099841", "1797272137", "1800961218", "1811487455", "1823675967", "1844949439", "1857964983", "1863428125", "1882931943", "1891372677", "1892088215", "1896686628", "1900788361", "1901187655", "1907972638", "1915204346", "1922733069", "1923628070", "1930557057", "1940605359", "1942666207", "1971402850", "1971801897", "1978482684", "1981414360", "1985050305", "2037185429", "2042244103", "2044704042", "2044749901", "2059475943", "2071281122", "2075045262", "2078750866", "2082116067", "2085129678", "2086948006", "2088752226", "2093942574", "2145253246", "2149843522", "2156423140", "2176748868", "2178257024", "2253177170", "2268809004", "2281070922", "2292940087", "2301217722", "2312936427", "2321532562", "2325049107", "2332301506", "2332765164", "2342380318", "2343649534", "2351866828", "2362242575", "2362917151", "2365477895", "2365633370", "2367244937", "2378997802", "2382583222", "2388902280", "2410427314", "2422119908", "2430705537", "2451448554", "2455485311", "2456987468", "2463468752", "2471186371", "2484345508", "2492332402", "2493771211", "2503573737", "2505329853", "2507461127", "2531252790", "2536230179", "2556195049", "2558034553", "2559092171", "2559407962", "2559967690", "2564993198", "2572804326", "2578572811", "2592311527", "2595052890", "2597420444", "2604459928", "2608942032", "2623332651", "2640199435", "2652792498", "2660188766", "2665354711", "2696089923", "2719028977", "2726922597", "2743635456", "2764615363", "2778473126", "2819596737", "2825030094", "2825497654", "2832911319", "2842304890", "2842392780", "2863369188", "2864011477", "2864039222", "2884748172", "2906531374", "2912304764", "2944638587", "2970271630", "2973746782", "2975502113", "2978352003", "2979962905", "2993468412", "2997037867", "2997475020", "3000889258", "3021618293", "3039574309", "3047937878", "3048753581", "3049204801", "3055206714", "3099087695", "3100323276", "3120501411", "3132605147", "3136250779", "3139456003", "3140627772", "3153400023", "3164595243", "3168868953", "3170085614", "3190937453", "3196407425", "3197359876", "3202476492", "3203229772", "3213777995", "3226948630", "3249527269", "3258060262", "3259003679", "3272635469", "3273096477", "3275396020", "3285820993", "3287778065", "3291757192", "3306690787", "3311488256", "3314878934", "3323522961", "3329851083", "3331872313", "3333022413", "3352380095", "3378749007", "3380288987", "3403416170", "3406276217", "3412330186", "3418311244", "3433304040", "3434198587", "3435729809", "3443423694", "3449078256", "3451478294", "3453844772", "3458534531", "3461950559", "3467120676", "3469607558", "3473255915", "3520879371", "3524430291", "3524438840", "3527931077", "3531637114", "3537255693", "3539795102", "3543874608", "3572260235", "3601100531", "3606730050", "3616278479", "3618848403", "3626291646", "3633790198", "3639187706", "3645473856", "3647280401", "3653893440", "3655035155", "3657099311", "3658791335", "3661787908", "3662344648", "3665926016", "3667838107", "3670778378", "3680764419", "3690246064", "3712259481", "3714631461", "3728134597", "3729042737", "3729053347", "3730960981", "3731294796", "3744576258", "3758869332", "3760687074", "3761564707", "3773611810", "3795820295", "3809110296", "3815521996", "3818608047", "3820411872", "3838851482", "3841673422", "3848750442", "3893562612", "3915394979", "3925496348", "3943382145", "3961283407", "3968198702", "3989054329", "3995552200", "4001952668", "4024206763", "4026669008", "4041012598", "4042861944", "4045354583", "4050854239", "4053936716", "4054645214", "4079595480", "4092146095", "4094039925", "4105001931", "4111963419", "4118674545", "4140532237", "4161971607", "4177934766", "4191997026", "4206081952", "4238282686", "4240717202", "4253836288", "4259796297", "4261396630", "4261770155", "4276434026", "4276617580", "4284782351", "4319810941", "4321930503", "4329686772", "4342879509", "4346425508", "4358160889", "4361296548", "4367976121", "4370656751", "4390048884", "4416414388", "4417769422", "4425163055", "4441208502", "4442409520", "4456032157", "4457506757", "4466297937", "4470799303", "4471935911", "4483092932", "518394896", "518483084", "520844610", "557225779", "565933133", "572634441", "576321561", "579685722", "591784570", "594625973", "601131360", "606300943", "606545871", "611563102", "614840422", "617088779", "638067303", "645875921", "6462544332", "651936101", "663378838", "682501400", "685336816", "712469999", "719268917", "734146386", "748345955", "7770852396", "7776547639", "7788763011", "806489782", "809601585", "823612200", "847666455", "852548082", "860283792", "878445168", "880368503", "882402752", "888142233", "889729380", "894815671", "897481933", "899862104", "901600498", "908531039", "909553757", "911548389", "916609265", "917201325", "923047852", "933789386", "944124404", "947300164", "952170584", "953550486", "953533056", "9575638913", "979493040", "999237822", "1001620285", "1022062690", "1023650078", "1031071766", "1042039325", "1042910290", "1043426489", "1048384398", "1076379459", "1091083343", "1092946107", "120231451", "124144264", "126459593", "126864025", "127725637", "152893926", "153667307", "1567471465", "172185139", "172857419", "173987257", "176810576", "177534340", "179999966", "18181972794", "18196898549", "18206832115", "18218920769", "18222629445", "18224729603", "18234351244", "18273157236", "18274588287", "18299023831", "18303670821", "18308479159", "18315560126", "18317150217", "18321754999", "18341000671", "18343149116", "18390145077", "18413439870", "18414636547", "18427018040", "18450004370", "18464368869", "18470503609", "18474822177", "18475445147", "18482405884", "18485590895", "18497456935", "18505502420", "18526504555", "18529828469", "18544748924", "18548425658", "18552589885", "18561566410", "18571283112", "18583307047", "18587000437", "18590846370", "18614431876", "18624338329", "18630879952", "18641461192", "18647910745", "18663787877", "18677556618", "18720633029", "18720731648", "18731114314", "18743190561", "18748541927", "18754840324", "18759203462", "18774545638", "18794398546", "18794427584", "18796070062", "18797795348", "18805334373", "18808161909", "18808418086", "18810406304", "18824627551", "18832383469", "18835039293", "18840654263", "18851902229", "18854121932", "18869073959", "18874442404", "18877832404", "18921661477", "18924874328", "18925836106", "18926461747", "18926855653", "18928499483", "18939632064", "18942712022", "18953394917", "18959750283", "18960022490", "18960685778", "18964577074", "19010077930", "19026068049", "19029039663", "19036348701", "19043137225", "19074230812", "19075831753", "19085681769", "19087446066", "19089063563", "19106168530", "19110673247", "19112298734", "19117205605", "19117772055", "19151996595", "19183212011", "19215001556", "19218853766", "19241129604", "19254761447", "19262292661", "19289767494", "19290449772", "19292353883", "19307077748", "19320134473", "19325695476", "19330165727", "19331145184", "19361445247", "19362434449", "19387002857", "19387034890", "19389068190", "19412180617", "19423601164", "19455266133", "19457329015", "19468461641", "19475423056", "19480251463", "19502429597", "19510542650", "19538892115", "19543128363", "19567226657", "19570443114", "19572258711", "19593320317", "19599054665", "19600247679", "19604606546", "19617443284", "19617825621", "19630053792", "19638479515", "19639991049", "19653938959", "19658403133", "19661493857", "19670689330", "19679962047", "19683621997", "19692039033", "19700373700", "19708224026", "19717806697", "19721055393", "19724880340", "19779231696", "19792416865", "19800136099", "19809071973", "19809577169", "19840428535", "19863374803", "19877641003", "19891601476", "19891997062", "19921202253", "19930550456", "19932297233", "19937890366", "19950747364", "19953946557", "19962549996", "19981326993", "19982466006", "19997911741"], "PII_unique": ["4847353029", "5162879924", "5712679786", "7852533080", "8283677149", "8624338324", "9195796456", "9199037779", "9735203731", "2185715037", "1003163800", "1003236193", "1001036151", "1001791131", "1003787836", "1004222368", "1004894899", "1004927230", "1008320974", "1008433588", "1008559172", "1000705234", "1001184420", "1001445606", "1002781006", "1005464243", "1005606408", "1006092595", "1006522433", "1007025914", "1219230321", "1230876671", "1236195069", "1237220065", "1237697889", "1246793781", "1252341214", "1263010277", "1266958629", "1274676445", "1293350477", "1309658508", "1311142857", "1317473845", "1335889533", "1345908052", "1347504465", "1352788825", "1355886834", "1359112637", "1371651118", "1374373500", "1383511453", "1394174443", "1394974100", "1400289091", "1438414472", "1456738623", "1466660520", "1477095715", "1486535856", "1489236581", "1522593608", "1523301108", "1523732570", "1527995679", "1530023892", "1532451508", "1534800864", "1553731015", "1561833198", "1567697599", "1580127521", "1589953298", "1590233689", "1592618580", "1598803703", "1615188831", "1620904215", "1621272801", "1622622249", "1626585945", "1648180067", "1652416402", "1657127459", "1659501788", "1662243216", "1678178252", "1680737602", "1700968514", "1727060137", "1729502426", "1729630568", "1730812663", "1732642364", "1732997355", "1743515310", "1749448024", "1749883352", "1753137800", "1791376371", "1796320275", "1797099841", "1797272137", "1800961218", "1811487455", "1823675967", "1844949439", "1857964983", "1863428125", "1882931943", "1891372677", "1892088215", "1896686628", "1900788361", "1901187655", "1907972638", "1915204346", "1922733069", "1923628070", "1930557057", "1940605359", "1942666207", "1971402850", "1971801897", "1978482684", "1981414360", "1985050305", "2037185429", "2042244103", "2044704042", "2044749901", "2059475943", "2071281122", "2075045262", "2078750866", "2082116067", "2085129678", "2086948006", "2088752226", "2093942574", "2145253246", "2149843522", "2156423140", "2176748868", "2178257024", "2253177170", "2268809004", "2281070922", "2292940087", "2301217722", "2312936427", "2321532562", "2325049107", "2332301506", "2332765164", "2342380318", "2343649534", "2351866828", "2362242575", "2362917151", "2365477895", "2365633370", "2367244937", "2378997802", "2382583222", "2388902280", "2410427314", "2422119908", "2430705537", "2451448554", "2455485311", "2456987468", "2463468752", "2471186371", "2484345508", "2492332402", "2493771211", "2503573737", "2505329853", "2507461127", "2531252790", "2536230179", "2556195049", "2558034553", "2559092171", "2559407962", "2559967690", "2564993198", "2572804326", "2578572811", "2592311527", "2595052890", "2597420444", "2604459928", "2608942032", "2623332651", "2640199435", "2652792498", "2660188766", "2665354711", "2696089923", "2719028977", "2726922597", "2743635456", "2764615363", "2778473126", "2819596737", "2825030094", "2825497654", "2832911319", "2842304890", "2842392780", "2863369188", "2864011477", "2864039222", "2884748172", "2906531374", "2912304764", "2944638587", "2970271630", "2973746782", "2975502113", "2978352003", "2979962905", "2993468412", "2997037867", "2997475020", "3000889258", "3021618293", "3039574309", "3047937878", "3048753581", "3049204801", "3055206714", "3099087695", "3100323276", "3120501411", "3132605147", "3136250779", "3139456003", "3140627772", "3153400023", "3164595243", "3168868953", "3170085614", "3190937453", "3196407425", "3197359876", "3202476492", "3203229772", "3213777995", "3226948630", "3249527269", "3258060262", "3259003679", "3272635469", "3273096477", "3275396020", "3285820993", "3287778065", "3291757192", "3306690787", "3311488256", "3314878934", "3323522961", "3329851083", "3331872313", "3333022413", "3352380095", "3378749007", "3380288987", "3403416170", "3406276217", "3412330186", "3418311244", "3433304040", "3434198587", "3435729809", "3443423694", "3449078256", "3451478294", "3453844772", "3458534531", "3461950559", "3467120676", "3469607558", "3473255915", "3520879371", "3524430291", "3524438840", "3527931077", "3531637114", "3537255693", "3539795102", "3543874608", "3572260235", "3601100531", "3606730050", "3616278479", "3618848403", "3626291646", "3633790198", "3639187706", "3645473856", "3647280401", "3653893440", "3655035155", "3657099311", "3658791335", "3661787908", "3662344648", "3665926016", "3667838107", "3670778378", "3680764419", "3690246064", "3712259481", "3714631461", "3728134597", "3729042737", "3729053347", "3730960981", "3731294796", "3744576258", "3758869332", "3760687074", "3761564707", "3773611810", "3795820295", "3809110296", "3815521996", "3818608047", "3820411872", "3838851482", "3841673422", "3848750442", "3893562612", "3915394979", "3925496348", "3943382145", "3961283407", "3968198702", "3989054329", "3995552200", "4001952668", "4024206763", "4026669008", "4041012598", "4042861944", "4045354583", "4050854239", "4053936716", "4054645214", "4079595480", "4092146095", "4094039925", "4105001931", "4111963419", "4118674545", "4140532237", "4161971607", "4177934766", "4191997026", "4206081952", "4238282686", "4240717202", "4253836288", "4259796297", "4261396630", "4261770155", "4276434026", "4276617580", "4284782351", "4319810941", "4321930503", "4329686772", "4342879509", "4346425508", "4358160889", "4361296548", "4367976121", "4370656751", "4390048884", "4416414388", "4417769422", "4425163055", "4441208502", "4442409520", "4456032157", "4457506757", "4466297937", "4470799303", "4471935911", "4483092932", "518394896", "518483084", "520844610", "557225779", "565933133", "572634441", "576321561", "579685722", "591784570", "594625973", "601131360", "606300943", "606545871", "611563102", "614840422", "617088779", "638067303", "645875921", "6462544332", "651936101", "663378838", "682501400", "685336816", "712469999", "719268917", "734146386", "748345955", "7770852396", "7776547639", "7788763011", "806489782", "809601585", "823612200", "847666455", "852548082", "860283792", "878445168", "880368503", "882402752", "888142233", "889729380", "894815671", "897481933", "899862104", "901600498", "908531039", "909553757", "911548389", "916609265", "917201325", "923047852", "933789386", "944124404", "947300164", "952170584", "953550486", "953533056", "9575638913", "979493040", "999237822", "1001620285", "1022062690", "1023650078", "1031071766", "1042039325", "1042910290", "1043426489", "1048384398", "1076379459", "1091083343", "1092946107", "120231451", "124144264", "126459593", "126864025", "127725637", "152893926", "153667307", "1567471465", "172185139", "172857419", "173987257", "176810576", "177534340", "179999966", "18181972794", "18196898549", "18206832115", "18218920769", "18222629445", "18224729603", "18234351244", "18273157236", "18274588287", "18299023831", "18303670821", "18308479159", "18315560126", "18317150217", "18321754999", "18341000671", "18343149116", "18390145077", "18413439870", "18414636547", "18427018040", "18450004370", "18464368869", "18470503609", "18474822177", "18475445147", "18482405884", "18485590895", "18497456935", "18505502420", "18526504555", "18529828469", "18544748924", "18548425658", "18552589885", "18561566410", "18571283112", "18583307047", "18587000437", "18590846370", "18614431876", "18624338329", "18630879952", "18641461192", "18647910745", "18663787877", "18677556618", "18720633029", "18720731648", "18731114314", "18743190561", "18748541927", "18754840324", "18759203462", "18774545638", "18794398546", "18794427584", "18796070062", "18797795348", "18805334373", "18808161909", "18808418086", "18810406304", "18824627551", "18832383469", "18835039293", "18840654263", "18851902229", "18854121932", "18869073959", "18874442404", "18877832404", "18921661477", "18924874328", "18925836106", "18926461747", "18926855653", "18928499483", "18939632064", "18942712022", "18953394917", "18959750283", "18960022490", "18960685778", "18964577074", "19010077930", "19026068049", "19029039663", "19036348701", "19043137225", "19074230812", "19075831753", "19085681769", "19087446066", "19089063563", "19106168530", "19110673247", "19112298734", "19117205605", "19117772055", "19151996595", "19183212011", "19215001556", "19218853766", "19241129604", "19254761447", "19262292661", "19289767494", "19290449772", "19292353883", "19307077748", "19320134473", "19325695476", "19330165727", "19331145184", "19361445247", "19362434449", "19387002857", "19387034890", "19389068190", "19412180617", "19423601164", "19455266133", "19457329015", "19468461641", "19475423056", "19480251463", "19502429597", "19510542650", "19538892115", "19543128363", "19567226657", "19570443114", "19572258711", "19593320317", "19599054665", "19600247679", "19604606546", "19617443284", "19617825621", "19630053792", "19638479515", "19639991049", "19653938959", "19658403133", "19661493857", "19670689330", "19679962047", "19683621997", "19692039033", "19700373700", "19708224026", "19717806697", "19721055393", "19724880340", "19779231696", "19792416865", "19800136099", "19809071973", "19809577169", "19840428535", "19863374803", "19877641003", "19891601476", "19891997062", "19921202253", "19930550456", "19932297233", "19937890366", "19950747364", "19953946557", "19962549996", "19981326993", "19982466006", "19997911741"], "Num_of_PII_all": 655, "Num_of_PII_unique": 655, "source_columns": ["I1_CallHistory.sqlite:zwacdcallevent.zcallidstring", "I1_CallHistory.sqlite:zwacdcalleventparticipant.zjidstring", "I1_CallHistory.sqlite:zwaaggregatecallevent.zlinktoken", "I1_ContactsV2.sqlite:zwaaddressbookcontact.zphonenumber", "I1_ContactsV2.sqlite:zwaaddressbookcontact.zlocalizedphonenumber", "I1_ContactsV2.sqlite:zwaaddressbookcontact.zabouttext", "I1_ContactsV2.sqlite:zwaaddressbookcontact.znotes"], "Num_of_source_columns": 7, "Num_of_source_columns_unique": 7} -{"db_path": "selectedDBs\\I1", "PII_type": "POSTAL_ADDRESS", "PII_all": ["12503 e via de palmas, chandler, az", "8500 peña blvd, denver, co"], "PII_unique": ["12503 e via de palmas, chandler, az", "8500 peña blvd, denver, co"], "Num_of_PII_all": 2, "Num_of_PII_unique": 2, "source_columns": ["I1_ChatStorage.sqlite:zwamessage.ztext", "I1_ChatStorage.sqlite:zwamessagedataitem.zcontent1", "I1_ChatStorage.sqlite:zwamessagedataitem.zcontent2"], "Num_of_source_columns": 3, "Num_of_source_columns_unique": 3} -{"db_path": "selectedDBs\\I1", "PII_type": "USERNAME", "PII_all": ["19735203731", "923402582955", "14847353029", "19199037779", "waaggregatecallevent", "wacdcallevent", "wacdcalleventparticipant", "wajoinablecallevent", "wajoinablecalleventparticipant", "waupcomingcallevent"], "PII_unique": ["19735203731", "923402582955", "14847353029", "19199037779", "waaggregatecallevent", "wacdcallevent", "wacdcalleventparticipant", "wajoinablecallevent", "wajoinablecalleventparticipant", "waupcomingcallevent"], "Num_of_PII_all": 10, "Num_of_PII_unique": 10, "source_columns": ["I1_CallHistory.sqlite:zwaaggregatecallevent.zlinktoken", "I1_CallHistory.sqlite:zwacdcallevent.zcallidstring", "I1_CallHistory.sqlite:zwacdcallevent.zgroupcallcreatoruserjidstring", "I1_CallHistory.sqlite:zwacdcallevent.zgroupjidstring", "I1_CallHistory.sqlite:zwacdcalleventparticipant.zjidstring", "I1_CallHistory.sqlite:z_primarykey.z_name"], "Num_of_source_columns": 6, "Num_of_source_columns_unique": 6} -{"db_path": "selectedDBs\\I2", "PII_type": "EMAIL", "PII_all": ["charles.r.finley11@gmail.com", "edventure77@gmail.com", "engleron@gmail.com", "jraynolds.cbdemo@gmail.com", "mederostony035@gmail.com", "ottomatik1234@gmail.com"], "PII_unique": ["charles.r.finley11@gmail.com", "edventure77@gmail.com", "engleron@gmail.com", "jraynolds.cbdemo@gmail.com", "mederostony035@gmail.com", "ottomatik1234@gmail.com"], "Num_of_PII_all": 6, "Num_of_PII_unique": 6, "source_columns": ["I2_AddressBook.sqlitedb:abmultivalue.value", "I2_AddressBook.sqlitedb:abpersonfulltextsearch_content.c17email"], "Num_of_source_columns": 2, "Num_of_source_columns_unique": 2} -{"db_path": "selectedDBs\\I2", "PII_type": "PERSON_NAME", "PII_all": ["otto", "ronen", "john", "charles", "ed", "tony", "colin", "carol", "scott", "bethany", "taylor", "luis", "jerry", "ryan", "katherine", "julie", "jonathan", "diane", "jennifer", "natalie", "juan", "melissa", "david", "michael", "joel", "donna", "rachael", "andrea", "leslie", "katie", "caitlyn", "jill", "hailey", "blake", "timothy", "eric", "sherry", "joseph", "jessica", "ronald", "johnathan", "susan", "mary", "william", "wanda", "robert", "erin", "breanna", "christopher", "richard", "joshua", "rebecca", "marilyn", "courtney", "kristen", "jeremy", "shelby", "amanda", "danielle", "melanie", "deborah", "pamela", "danny", "catherine", "clayton", "lindsey", "heather", "frank", "felicia", "kevin", "kathleen", "linda", "sue", "michele", "anthony", "curtis", "kathy", "rachel", "sarah", "cheryl", "james", "thomas", "marcus", "elizabeth", "emily", "alex", "kelly", "teresa", "collin", "anita", "christy", "chase", "nicholas", "brandon", "randy", "aaron", "benjamin", "tammy", "ann", "vicki", "kaitlyn", "brenda", "erika", "daniel", "cynthia", "nathan", "jeanette", "tara", "eugene", "alexa", "jacob", "tristan", "ashley", "meghan", "tyler", "deanna", "stacey", "kerri", "anne", "micheal", "kimberly", "bryan", "desiree", "jeremiah", "willie", "leah", "maria", "carmen", "cassandra", "jamie", "mark", "matthew", "nicole", "amber", "lisa", "leon", "louis", "sydney", "dennis", "wendy", "alison", "angela", "tiffany", "kristi", "alyssa", "stanley", "oscar", "patrick", "craig", "melinda", "lance", "vickie", "raymond", "krystal", "phillip", "samantha", "pedro", "robin", "annette", "christina", "erica", "maureen", "dominic", "steven", "penny", "olivia", "dawn", "carlos", "tanya", "joy", "monique", "erik", "marissa", "patty", "renee", "sara", "norman", "diana", "debra", "roberta", "dana", "manuel", "alexander", "tracy", "hunter", "alicia", "keith", "bruce", "fred", "kenneth", "colleen", "dylan", "christine", "gabrielle", "nancy", "destiny", "mitchell", "adrienne", "anna", "derek", "andrew", "dustin", "jay", "natasha", "andres", "albert", "joe", "patricia", "lynn", "peter", "laurie", "sonya", "donald", "denise", "jeffrey", "martin", "chris", "caitlin", "haley", "yvette", "gloria", "sharon", "dean", "zachary", "madeline", "larry", "sheri", "sandra", "shannon", "molly", "judith", "ashlee", "alexis", "edward", "suzanne", "sean", "rick", "russell", "hank", "matik", "engler", "raynolds", "finley", "venture", "mederos", "dacopps", "johnson", "davenport", "brock", "mejia", "gordon", "welch", "davis", "lee", "hernandez", "simmons", "moreno", "mccormick", "diaz", "macdonald", "west", "valenzuela", "rodriguez", "bautista", "cooper", "harrell", "gonzales", "fritz", "smith", "carr", "gonzalez", "goodwin", "dalton", "benson", "flynn", "morris", "rose", "jones", "cunningham", "ramirez", "campbell", "molina", "anderson", "pittman", "butler", "mcdonald", "rush", "burns", "sherman", "poole", "armstrong", "schneider", "rios", "williams", "macias", "holmes", "wong", "lowe", "castro", "powers", "young", "andrews", "franklin", "luna", "berg", "hughes", "hickman", "wilson", "george", "becker", "rivera", "fitzpatrick", "singh", "camacho", "sutton", "gardner", "jordan", "hebert", "moore", "sanford", "weber", "fry", "miller", "dillon", "hutchinson", "reyes", "collins", "thornton", "reid", "pierce", "acosta", "turner", "mendoza", "rowe", "snow", "yates", "long", "hall", "cordova", "garrett", "henderson", "green", "cole", "nunez", "mclaughlin", "cameron", "farmer", "guerrero", "henry", "ramos", "martinez", "baker", "nelson", "white", "barr", "ross", "higgins", "jacobs", "monroe", "rosales", "jarvis", "sanchez", "herring", "ayala", "warner", "roberts", "robinson", "bailey", "fischer", "christensen", "cain", "barnes", "morgan", "ward", "walsh", "rocha", "hill", "weaver", "jackson", "harvey", "schultz", "cuevas", "willis", "lang", "parrish", "carey", "horn", "brown", "riley", "munoz", "fisher", "warren", "orozco", "watson", "rice", "chavez", "murphy", "adkins", "ritter", "gallegos", "petersen", "morrison", "bridges", "estrada", "hunt", "boyd", "clay", "sanders", "vazquez", "hodges", "arellano", "powell", "khan", "richardson", "mathis", "meyer", "barber", "bradley", "harris", "richards", "lewis", "cardenas", "gentry", "mccarthy", "dunlap", "casey", "brady", "howard", "flores", "schaefer", "wood", "hopkins", "potter", "holloway", "donaldson", "merritt", "lynch", "barton", "stafford", "meyers", "padilla", "davidson", "randall", "caldwell", "preston", "payne", "peters", "hicks", "cox", "gibson", "clark", "blair", "malone", "washington", "fleming", "stephens", "dickerson", "patton", "booker", "lopez", "parker", "contreras", "yu", "waller", "kennedy", "ibarra", "sandoval", "mccullough", "mccann", "coleman", "crane", "cross", "salas", "howell", "kaufman", "serrano", "knox", "gilbert", "patel", "carroll", "aguirre", "obrien", "wang", "fox", "king", "meadows", "daniels", "mueller", "porter", "blevins", "walls", "evans", "reynolds", "reed", "jacobson", "walker", "rich", "kane", "mora", "rubio", "oconnor", "lucero", "robertson", "savage", "crawford", "lozano", "morales", "price", "mcclain", "compton", "peterson", "clarke", "winters", "little", "sheppard", "owens", "noble", "marsh", "fitzgerald", "wiggins", "lloyd", "wright", "garcia", "melendez", "phillips", "kelley", "stevens", "ellis", "snyder", "crosby", "olsen", "mcbride", "stewart", "santiago", "shepherd", "zavala", "houston", "leblanc", "sullivan", "perez", "lawson", "huff", "osborn", "edwards", "thompson", "shaw", "potts", "hampton", "hendrix", "faulkner", "bush", "pratt", "nichols", "fuller", "guerra", "hanson", "ray", "wells", "carter", "nielsen", "levy", "farley", "stanton", "walton", "bonilla", "hart", "berry", "erickson", "farrell", "marquez", "guzman", "boyer", "hahn", "hanna", "byrd", "daugherty", "palmer", "williamson", "holden", "salinas", "hansen", "cisneros", "zhang", "tucker", "graham", "wilcox", "gray", "brooks", "walters", "roy", "webb", "santos", "wu", "shaffer", "fletcher", "mills", "gould", "mcdaniel", "downs", "olson", "fields", "huang", "wolf", "mack", "copeland", "whitehead", "chan", "stone", "jimenez", "hardin", "ferguson", "newton", "webster", "chandler", "bennett", "soto", "morse", "riggs", "torres", "pacheco", "mcguire", "day", "donovan", "chung", "harrison", "kerr", "schroeder", "douglas", "simon", "black", "moran", "cohen", "yoder", "ramsey", "garza", "blankenship", "reese", "acevedo", "vega", "moses", "knight", "mason", "swanson", "dorsey", "mullins", "cochran", "mays", "harrington", "greene", "foster", "lamb", "steele", "larsen", "brennan", "kim", "church", "perry", "villanueva", "schmidt", "jennings", "navarro", "ballard", "wade", "patterson", "suarez", "nicholson", "cantrell", "silva", "cowan", "woods", "gregory", "yang", "adams", "ruiz", "page", "newman", "pham", "montgomery", "koch", "bass", "austin", "vasquez", "frazier", "myers", "robbins", "ferrell", "mckee", "nguyen", "drake", "wheeler", "reilly", "todd", "woodard", "dixon", "york", "good", "morton", "rogers", "rosario", "spears", "leonard", "arias", "medina", "grant", "wise", "hobbs", "rasmussen", "owen", "doyle", "villarreal", "mckenzie", "hood", "clements", "mcmahon", "li", "oneal", "ball", "cooke", "johnston", "mcdowell", "hubbard", "bell", "carson", "glass", "galvan", "hester", "moon", "freeman", "chen", "bentley", "weeks", "archer", "bolton", "stevenson", "duncan", "sims", "garner", "murray", "ho", "garrison", "lester", "ali", "neal", "conley", "hammond", "wilkerson", "roll", "philby"], "PII_unique": ["otto", "ronen", "john", "charles", "ed", "tony", "colin", "carol", "scott", "bethany", "taylor", "luis", "jerry", "ryan", "katherine", "julie", "jonathan", "diane", "jennifer", "natalie", "juan", "melissa", "david", "michael", "joel", "donna", "rachael", "andrea", "leslie", "katie", "caitlyn", "jill", "hailey", "blake", "timothy", "eric", "sherry", "joseph", "jessica", "ronald", "johnathan", "susan", "mary", "william", "wanda", "robert", "erin", "breanna", "christopher", "richard", "joshua", "rebecca", "marilyn", "courtney", "kristen", "jeremy", "shelby", "amanda", "danielle", "melanie", "deborah", "pamela", "danny", "catherine", "clayton", "lindsey", "heather", "frank", "felicia", "kevin", "kathleen", "linda", "sue", "michele", "anthony", "curtis", "kathy", "rachel", "sarah", "cheryl", "james", "thomas", "marcus", "elizabeth", "emily", "alex", "kelly", "teresa", "collin", "anita", "christy", "chase", "nicholas", "brandon", "randy", "aaron", "benjamin", "tammy", "ann", "vicki", "kaitlyn", "brenda", "erika", "daniel", "cynthia", "nathan", "jeanette", "tara", "eugene", "alexa", "jacob", "tristan", "ashley", "meghan", "tyler", "deanna", "stacey", "kerri", "anne", "micheal", "kimberly", "bryan", "desiree", "jeremiah", "willie", "leah", "maria", "carmen", "cassandra", "jamie", "mark", "matthew", "nicole", "amber", "lisa", "leon", "louis", "sydney", "dennis", "wendy", "alison", "angela", "tiffany", "kristi", "alyssa", "stanley", "oscar", "patrick", "craig", "melinda", "lance", "vickie", "raymond", "krystal", "phillip", "samantha", "pedro", "robin", "annette", "christina", "erica", "maureen", "dominic", "steven", "penny", "olivia", "dawn", "carlos", "tanya", "joy", "monique", "erik", "marissa", "patty", "renee", "sara", "norman", "diana", "debra", "roberta", "dana", "manuel", "alexander", "tracy", "hunter", "alicia", "keith", "bruce", "fred", "kenneth", "colleen", "dylan", "christine", "gabrielle", "nancy", "destiny", "mitchell", "adrienne", "anna", "derek", "andrew", "dustin", "jay", "natasha", "andres", "albert", "joe", "patricia", "lynn", "peter", "laurie", "sonya", "donald", "denise", "jeffrey", "martin", "chris", "caitlin", "haley", "yvette", "gloria", "sharon", "dean", "zachary", "madeline", "larry", "sheri", "sandra", "shannon", "molly", "judith", "ashlee", "alexis", "edward", "suzanne", "sean", "rick", "russell", "hank", "matik", "engler", "raynolds", "finley", "venture", "mederos", "dacopps", "johnson", "davenport", "brock", "mejia", "gordon", "welch", "davis", "lee", "hernandez", "simmons", "moreno", "mccormick", "diaz", "macdonald", "west", "valenzuela", "rodriguez", "bautista", "cooper", "harrell", "gonzales", "fritz", "smith", "carr", "gonzalez", "goodwin", "dalton", "benson", "flynn", "morris", "rose", "jones", "cunningham", "ramirez", "campbell", "molina", "anderson", "pittman", "butler", "mcdonald", "rush", "burns", "sherman", "poole", "armstrong", "schneider", "rios", "williams", "macias", "holmes", "wong", "lowe", "castro", "powers", "young", "andrews", "franklin", "luna", "berg", "hughes", "hickman", "wilson", "george", "becker", "rivera", "fitzpatrick", "singh", "camacho", "sutton", "gardner", "jordan", "hebert", "moore", "sanford", "weber", "fry", "miller", "dillon", "hutchinson", "reyes", "collins", "thornton", "reid", "pierce", "acosta", "turner", "mendoza", "rowe", "snow", "yates", "long", "hall", "cordova", "garrett", "henderson", "green", "cole", "nunez", "mclaughlin", "cameron", "farmer", "guerrero", "henry", "ramos", "martinez", "baker", "nelson", "white", "barr", "ross", "higgins", "jacobs", "monroe", "rosales", "jarvis", "sanchez", "herring", "ayala", "warner", "roberts", "robinson", "bailey", "fischer", "christensen", "cain", "barnes", "morgan", "ward", "walsh", "rocha", "hill", "weaver", "jackson", "harvey", "schultz", "cuevas", "willis", "lang", "parrish", "carey", "horn", "brown", "riley", "munoz", "fisher", "warren", "orozco", "watson", "rice", "chavez", "murphy", "adkins", "ritter", "gallegos", "petersen", "morrison", "bridges", "estrada", "hunt", "boyd", "clay", "sanders", "vazquez", "hodges", "arellano", "powell", "khan", "richardson", "mathis", "meyer", "barber", "bradley", "harris", "richards", "lewis", "cardenas", "gentry", "mccarthy", "dunlap", "casey", "brady", "howard", "flores", "schaefer", "wood", "hopkins", "potter", "holloway", "donaldson", "merritt", "lynch", "barton", "stafford", "meyers", "padilla", "davidson", "randall", "caldwell", "preston", "payne", "peters", "hicks", "cox", "gibson", "clark", "blair", "malone", "washington", "fleming", "stephens", "dickerson", "patton", "booker", "lopez", "parker", "contreras", "yu", "waller", "kennedy", "ibarra", "sandoval", "mccullough", "mccann", "coleman", "crane", "cross", "salas", "howell", "kaufman", "serrano", "knox", "gilbert", "patel", "carroll", "aguirre", "obrien", "wang", "fox", "king", "meadows", "daniels", "mueller", "porter", "blevins", "walls", "evans", "reynolds", "reed", "jacobson", "walker", "rich", "kane", "mora", "rubio", "oconnor", "lucero", "robertson", "savage", "crawford", "lozano", "morales", "price", "mcclain", "compton", "peterson", "clarke", "winters", "little", "sheppard", "owens", "noble", "marsh", "fitzgerald", "wiggins", "lloyd", "wright", "garcia", "melendez", "phillips", "kelley", "stevens", "ellis", "snyder", "crosby", "olsen", "mcbride", "stewart", "santiago", "shepherd", "zavala", "houston", "leblanc", "sullivan", "perez", "lawson", "huff", "osborn", "edwards", "thompson", "shaw", "potts", "hampton", "hendrix", "faulkner", "bush", "pratt", "nichols", "fuller", "guerra", "hanson", "ray", "wells", "carter", "nielsen", "levy", "farley", "stanton", "walton", "bonilla", "hart", "berry", "erickson", "farrell", "marquez", "guzman", "boyer", "hahn", "hanna", "byrd", "daugherty", "palmer", "williamson", "holden", "salinas", "hansen", "cisneros", "zhang", "tucker", "graham", "wilcox", "gray", "brooks", "walters", "roy", "webb", "santos", "wu", "shaffer", "fletcher", "mills", "gould", "mcdaniel", "downs", "olson", "fields", "huang", "wolf", "mack", "copeland", "whitehead", "chan", "stone", "jimenez", "hardin", "ferguson", "newton", "webster", "chandler", "bennett", "soto", "morse", "riggs", "torres", "pacheco", "mcguire", "day", "donovan", "chung", "harrison", "kerr", "schroeder", "douglas", "simon", "black", "moran", "cohen", "yoder", "ramsey", "garza", "blankenship", "reese", "acevedo", "vega", "moses", "knight", "mason", "swanson", "dorsey", "mullins", "cochran", "mays", "harrington", "greene", "foster", "lamb", "steele", "larsen", "brennan", "kim", "church", "perry", "villanueva", "schmidt", "jennings", "navarro", "ballard", "wade", "patterson", "suarez", "nicholson", "cantrell", "silva", "cowan", "woods", "gregory", "yang", "adams", "ruiz", "page", "newman", "pham", "montgomery", "koch", "bass", "austin", "vasquez", "frazier", "myers", "robbins", "ferrell", "mckee", "nguyen", "drake", "wheeler", "reilly", "todd", "woodard", "dixon", "york", "good", "morton", "rogers", "rosario", "spears", "leonard", "arias", "medina", "grant", "wise", "hobbs", "rasmussen", "owen", "doyle", "villarreal", "mckenzie", "hood", "clements", "mcmahon", "li", "oneal", "ball", "cooke", "johnston", "mcdowell", "hubbard", "bell", "carson", "glass", "galvan", "hester", "moon", "freeman", "chen", "bentley", "weeks", "archer", "bolton", "stevenson", "duncan", "sims", "garner", "murray", "ho", "garrison", "lester", "ali", "neal", "conley", "hammond", "wilkerson", "roll", "philby"], "Num_of_PII_all": 748, "Num_of_PII_unique": 748, "source_columns": ["I2_AddressBook.sqlitedb:abperson.first", "I2_AddressBook.sqlitedb:abperson.last", "I2_AddressBook.sqlitedb:abperson.middle", "I2_AddressBook.sqlitedb:abperson.prefix", "I2_AddressBook.sqlitedb:abperson.suffix", "I2_AddressBook.sqlitedb:abperson.nickname", "I2_AddressBook.sqlitedb:abmultivalue.value", "I2_AddressBook.sqlitedb:abmultivalueentry.value", "I2_AddressBook.sqlitedb:abmultivaluelabel.value", "I2_AddressBook.sqlitedb:abgroup.name"], "Num_of_source_columns": 10, "Num_of_source_columns_unique": 10} -{"db_path": "selectedDBs\\I2", "PII_type": "PHONE", "PII_all": ["4847353029", "5162879924", "5712679786", "7852533080", "8283677149", "8624338324", "9195796456", "9199037779", "9735203731", "2185715037", "1003163800", "1003236193", "10010361518", "10017911312", "10037878368", "10042223682", "10048948999", "10049272303", "10083209744", "10084335884", "10085591720", "1000705234", "1111844206", "1114456067", "11127810067", "11154642430", "11156064084", "11160925958", "11165224332", "11170259144", "1219230321", "1230876671", "1236195069", "1237220065", "1237697889", "1246793781", "1252341214", "1263010277", "1266958629", "1274676445", "1293350477", "1309658508", "1311142857", "1317473845", "1335889533", "1345908052", "1347504465", "1352788825", "1355886834", "1359112637", "1371651118", "1374373500", "1383511453", "1394174443", "1394974100", "1400289091", "1438414472", "1456738623", "1466660520", "1477095715", "1486535856", "1489236581", "1522593608", "1523301108", "1523732570", "1527995679", "1530023892", "1532451508", "1534800864", "153731015", "1561833198", "1567697599", "1580127521", "1589953298", "1590233689", "1592618580", "1598803703", "1615188831", "1620904215", "1621272801", "1622622249", "1626585945", "1648180067", "1652416402", "1657127459", "1659501788", "1662243216", "1678178252", "1680737602", "1700968514", "1727060137", "1729502426", "1729630568", "1730812663", "1732642364", "1732997355", "1743515310", "1749448024", "1749883352", "1753137800", "1791376371", "1796320275", "1797099841", "1797272137", "1800961218", "1811487455", "1823675967", "1844949439", "1857964983", "1863428125", "1882931943", "1891372677", "1892088215", "1896686628", "1900788361", "1901187655", "1907972638", "1915204346", "1922733069", "1923628070", "1930557057", "1940605359", "1942666207", "1971402850", "1971801897", "1978482684", "1981414360", "1985050305", "2037185429", "2042244103", "2044704042", "2044749901", "2059475943", "2071281122", "2075045262", "2078750866", "2082116067", "2085129678", "2086948006", "2088752226", "2093942574", "2145253246", "2149843522", "2156423140", "2176748868", "2178257024", "2253177170", "2268809004", "2281070922", "2292940087", "2301217722", "2312936427", "2321532562", "2325049107", "2332301506", "2332765164", "2342380318", "2343649534", "2351866828", "2362242575", "2362917151", "2365477895", "2365633370", "2367244937", "2378997802", "2382583222", "2388902280", "2410427314", "2422119908", "2430705537", "2451448554", "2455485311", "2456987468", "2463468752", "2471186371", "2484345508", "2492332402", "2493771211", "2503573737", "2505329853", "2507461127", "2531252790", "2536230179", "2556195049", "2558034553", "2559092171", "2559407962", "2559967690", "2564993198", "2572804326", "2578572811", "2592311527", "2595052890", "2597420444", "2604459928", "2608942032", "2623332651", "2640199435", "2652792498", "2660188766", "2665354711", "2696089923", "2719028977", "2726922597", "2743635456", "2764615363", "2778473126", "2819596737", "2825030094", "2825497654", "2832911319", "2842304890", "2842392780", "2863369188", "2864011477", "2864039222", "2884748172", "2906531374", "2912304764", "2944638587", "2970271630", "2973746782", "2975502113", "2978352003", "2979962905", "2993468412", "2997037867", "2997475020", "3000889258", "3021618293", "3039574309", "3047937878", "3048753581", "3049204801", "3055206714", "3099087695", "3100323276", "3120501411", "3132605147", "3136250779", "3139456003", "3140627772", "3153400023", "3164595243", "3168868953", "3170085614", "3190937453", "3196407425", "3197359876", "3202476492", "3203229772", "3213777995", "3226948630", "3249527269", "3258060262", "3259003679", "3272635469", "3273096477", "3275396020", "3285820993", "3287778065", "3291757192", "3306690787", "3311488256", "3314878934", "3323522961", "3329851083", "3331872313", "3333022413", "3352380095", "3378749007", "3380288987", "3403416170", "3406276217", "3412330186", "3418311244", "3433304040", "3434198587", "3435729809", "3443423694", "3449078256", "3451478294", "3453844772", "3458534531", "3461950559", "3467120676", "3469607558", "3473255915", "3520879371", "3524430291", "3524438840", "3527931077", "3531637114", "3537255693", "3539795102", "3543874608", "3572260235", "3601100531", "3606730050", "3616278479", "3618848403", "3626291646", "3633790198", "3639187706", "3645473856", "3647280401", "3653893440", "3655035155", "3657099311", "3658791335", "3661787908", "3662344648", "3665926016", "3667838107", "3670778378", "3680764419", "3690246064", "3712259481", "3714631461", "3728134597", "3729042737", "3729053347", "3730960981", "3731294796", "3744576258", "3758869332", "3760687074", "3761564707", "3773611810", "3795820295", "3809110296", "3815521996", "3818608047", "3820411872", "3838851482", "3841673422", "3848750442", "3893562612", "3915394979", "3925496348", "3943382145", "3961283407", "3968198702", "3989054329", "3995552200", "4001952668", "4024206763", "4026669008", "4041012598", "4042861944", "4045354583", "4050854239", "4053936716", "4054645214", "4079595480", "4092146095", "4094039925", "4105001931", "4111963419", "4118674545", "4140532237", "4161971607", "4177934766", "4191997026", "4206081952", "4238282686", "4240717202", "4253836288", "4261396630", "4261770155", "4276434026", "4276617580", "4284782351", "4319810941", "4321930503", "4329686772", "4342879509", "4346425508", "4358160889", "4361296548", "4367976121", "4370656751", "4390048884", "4416414388", "4417769422", "4425163055", "4441208502", "4442409520", "4456032157", "4457506757", "4466297937", "4470799303", "4471935911", "4483092932", "518394896", "518483084", "520844610", "557225779", "566933133", "572634441", "576321561", "579685722", "591784570", "594625973", "601131360", "606300943", "606545871", "611563102", "614840422", "6170888779", "638067303", "6464587921", "6462544332", "64651936101", "6463378838", "6482501400", "6485336816", "712469999", "719268917", "734146386", "748345955", "7770852396", "7776547639", "7788763011", "806489782", "809601585", "823612200", "847666455", "852548082", "860283792", "878445168", "880368503", "882402752", "888142233", "889729380", "894815671", "897481933", "899862104", "901600498", "908531039", "909553757", "911548389", "916609265", "917201325", "923047852", "933789386", "944124404", "947300164", "952170584", "953550486", "953533056", "9575638913", "979877000", "97943586366", "97945755591", "97954254812", "979133175", "9795735938", "9810077930", "9826068049", "929039663", "936348701", "943137225", "974230812", "975831753", "985681769", "908744606", "9089063563", "911068530", "9110673247", "9112298734", "9117205605", "911772055", "9151996595", "9183212011", "9215001556", "9218853766", "9241129604", "9254761447", "9262292661", "9289767494", "9290449772", "9292353883", "9307077748", "9320134473", "9325695476", "9330165727", "9331145184", "9361445247", "9362434449", "9387002857", "9387034890", "9389068190", "9412180617", "9423601164", "9455266133", "9457329015", "9468461641", "9475423056", "9480251463", "9502429597", "9510542650", "9538892115", "9543128363", "9567226657", "9570443114", "9572258711", "9593320317", "9599054665", "9600247679", "9604606546", "9617443284", "9617825621", "9630053792", "9638479515", "9639991049", "9653938959", "9658403133", "9661493857", "9670689330", "9679962047", "9683621997", "9692039033", "9700373700", "9708224026", "9717806697", "9721055393", "9724880340", "9779231696", "9792416865", "9800136099", "9809071973", "9809577169", "9840428535", "9863374803", "9877641003", "9891601476", "9891997062", "9921202253", "9930550456", "9932297233", "9937890366", "9950747364", "9953946557", "9962549996", "9981326993", "9982466006", "9997911741"], "PII_unique": ["4847353029", "5162879924", "5712679786", "7852533080", "8283677149", "8624338324", "9195796456", "9199037779", "9735203731", "2185715037", "1003163800", "1003236193", "10010361518", "10017911312", "10037878368", "10042223682", "10048948999", "10049272303", "10083209744", "10084335884", "10085591720", "1000705234", "1111844206", "1114456067", "11127810067", "11154642430", "11156064084", "11160925958", "11165224332", "11170259144", "1219230321", "1230876671", "1236195069", "1237220065", "1237697889", "1246793781", "1252341214", "1263010277", "1266958629", "1274676445", "1293350477", "1309658508", "1311142857", "1317473845", "1335889533", "1345908052", "1347504465", "1352788825", "1355886834", "1359112637", "1371651118", "1374373500", "1383511453", "1394174443", "1394974100", "1400289091", "1438414472", "1456738623", "1466660520", "1477095715", "1486535856", "1489236581", "1522593608", "1523301108", "1523732570", "1527995679", "1530023892", "1532451508", "1534800864", "153731015", "1561833198", "1567697599", "1580127521", "1589953298", "1590233689", "1592618580", "1598803703", "1615188831", "1620904215", "1621272801", "1622622249", "1626585945", "1648180067", "1652416402", "1657127459", "1659501788", "1662243216", "1678178252", "1680737602", "1700968514", "1727060137", "1729502426", "1729630568", "1730812663", "1732642364", "1732997355", "1743515310", "1749448024", "1749883352", "1753137800", "1791376371", "1796320275", "1797099841", "1797272137", "1800961218", "1811487455", "1823675967", "1844949439", "1857964983", "1863428125", "1882931943", "1891372677", "1892088215", "1896686628", "1900788361", "1901187655", "1907972638", "1915204346", "1922733069", "1923628070", "1930557057", "1940605359", "1942666207", "1971402850", "1971801897", "1978482684", "1981414360", "1985050305", "2037185429", "2042244103", "2044704042", "2044749901", "2059475943", "2071281122", "2075045262", "2078750866", "2082116067", "2085129678", "2086948006", "2088752226", "2093942574", "2145253246", "2149843522", "2156423140", "2176748868", "2178257024", "2253177170", "2268809004", "2281070922", "2292940087", "2301217722", "2312936427", "2321532562", "2325049107", "2332301506", "2332765164", "2342380318", "2343649534", "2351866828", "2362242575", "2362917151", "2365477895", "2365633370", "2367244937", "2378997802", "2382583222", "2388902280", "2410427314", "2422119908", "2430705537", "2451448554", "2455485311", "2456987468", "2463468752", "2471186371", "2484345508", "2492332402", "2493771211", "2503573737", "2505329853", "2507461127", "2531252790", "2536230179", "2556195049", "2558034553", "2559092171", "2559407962", "2559967690", "2564993198", "2572804326", "2578572811", "2592311527", "2595052890", "2597420444", "2604459928", "2608942032", "2623332651", "2640199435", "2652792498", "2660188766", "2665354711", "2696089923", "2719028977", "2726922597", "2743635456", "2764615363", "2778473126", "2819596737", "2825030094", "2825497654", "2832911319", "2842304890", "2842392780", "2863369188", "2864011477", "2864039222", "2884748172", "2906531374", "2912304764", "2944638587", "2970271630", "2973746782", "2975502113", "2978352003", "2979962905", "2993468412", "2997037867", "2997475020", "3000889258", "3021618293", "3039574309", "3047937878", "3048753581", "3049204801", "3055206714", "3099087695", "3100323276", "3120501411", "3132605147", "3136250779", "3139456003", "3140627772", "3153400023", "3164595243", "3168868953", "3170085614", "3190937453", "3196407425", "3197359876", "3202476492", "3203229772", "3213777995", "3226948630", "3249527269", "3258060262", "3259003679", "3272635469", "3273096477", "3275396020", "3285820993", "3287778065", "3291757192", "3306690787", "3311488256", "3314878934", "3323522961", "3329851083", "3331872313", "3333022413", "3352380095", "3378749007", "3380288987", "3403416170", "3406276217", "3412330186", "3418311244", "3433304040", "3434198587", "3435729809", "3443423694", "3449078256", "3451478294", "3453844772", "3458534531", "3461950559", "3467120676", "3469607558", "3473255915", "3520879371", "3524430291", "3524438840", "3527931077", "3531637114", "3537255693", "3539795102", "3543874608", "3572260235", "3601100531", "3606730050", "3616278479", "3618848403", "3626291646", "3633790198", "3639187706", "3645473856", "3647280401", "3653893440", "3655035155", "3657099311", "3658791335", "3661787908", "3662344648", "3665926016", "3667838107", "3670778378", "3680764419", "3690246064", "3712259481", "3714631461", "3728134597", "3729042737", "3729053347", "3730960981", "3731294796", "3744576258", "3758869332", "3760687074", "3761564707", "3773611810", "3795820295", "3809110296", "3815521996", "3818608047", "3820411872", "3838851482", "3841673422", "3848750442", "3893562612", "3915394979", "3925496348", "3943382145", "3961283407", "3968198702", "3989054329", "3995552200", "4001952668", "4024206763", "4026669008", "4041012598", "4042861944", "4045354583", "4050854239", "4053936716", "4054645214", "4079595480", "4092146095", "4094039925", "4105001931", "4111963419", "4118674545", "4140532237", "4161971607", "4177934766", "4191997026", "4206081952", "4238282686", "4240717202", "4253836288", "4261396630", "4261770155", "4276434026", "4276617580", "4284782351", "4319810941", "4321930503", "4329686772", "4342879509", "4346425508", "4358160889", "4361296548", "4367976121", "4370656751", "4390048884", "4416414388", "4417769422", "4425163055", "4441208502", "4442409520", "4456032157", "4457506757", "4466297937", "4470799303", "4471935911", "4483092932", "518394896", "518483084", "520844610", "557225779", "566933133", "572634441", "576321561", "579685722", "591784570", "594625973", "601131360", "606300943", "606545871", "611563102", "614840422", "6170888779", "638067303", "6464587921", "6462544332", "64651936101", "6463378838", "6482501400", "6485336816", "712469999", "719268917", "734146386", "748345955", "7770852396", "7776547639", "7788763011", "806489782", "809601585", "823612200", "847666455", "852548082", "860283792", "878445168", "880368503", "882402752", "888142233", "889729380", "894815671", "897481933", "899862104", "901600498", "908531039", "909553757", "911548389", "916609265", "917201325", "923047852", "933789386", "944124404", "947300164", "952170584", "953550486", "953533056", "9575638913", "979877000", "97943586366", "97945755591", "97954254812", "979133175", "9795735938", "9810077930", "9826068049", "929039663", "936348701", "943137225", "974230812", "975831753", "985681769", "908744606", "9089063563", "911068530", "9110673247", "9112298734", "9117205605", "911772055", "9151996595", "9183212011", "9215001556", "9218853766", "9241129604", "9254761447", "9262292661", "9289767494", "9290449772", "9292353883", "9307077748", "9320134473", "9325695476", "9330165727", "9331145184", "9361445247", "9362434449", "9387002857", "9387034890", "9389068190", "9412180617", "9423601164", "9455266133", "9457329015", "9468461641", "9475423056", "9480251463", "9502429597", "9510542650", "9538892115", "9543128363", "9567226657", "9570443114", "9572258711", "9593320317", "9599054665", "9600247679", "9604606546", "9617443284", "9617825621", "9630053792", "9638479515", "9639991049", "9653938959", "9658403133", "9661493857", "9670689330", "9679962047", "9683621997", "9692039033", "9700373700", "9708224026", "9717806697", "9721055393", "9724880340", "9779231696", "9792416865", "9800136099", "9809071973", "9809577169", "9840428535", "9863374803", "9877641003", "9891601476", "9891997062", "9921202253", "9930550456", "9932297233", "9937890366", "9950747364", "9953946557", "9962549996", "9981326993", "9982466006", "9997911741"], "Num_of_PII_all": 548, "Num_of_PII_unique": 548, "source_columns": ["I2_AddressBook.sqlitedb:abmultivalue.value", "I2_AddressBook.sqlitedb:abmultivalueentry.value", "I2_AddressBook.sqlitedb:abperson.first", "I2_AddressBook.sqlitedb:abperson.last", "I2_AddressBook.sqlitedb:abperson.note"], "Num_of_source_columns": 5, "Num_of_source_columns_unique": 5} -{"db_path": "selectedDBs\\I2", "PII_type": "POSTAL_ADDRESS", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": [], "Num_of_source_columns": 0, "Num_of_source_columns_unique": 0} -{"db_path": "selectedDBs\\I2", "PII_type": "USERNAME", "PII_all": ["genericgpt"], "PII_unique": ["genericgpt"], "Num_of_PII_all": 1, "Num_of_PII_unique": 1, "source_columns": ["I2_AddressBook.sqlitedb:abaccount.accountidentifier", "I2_AddressBook.sqlitedb:abgroup.name", "I2_AddressBook.sqlitedb:abgroupchanges.externalidentifier", "I2_AddressBook.sqlitedb:abgroupmembers.member_id", "I2_AddressBook.sqlitedb:abmultivalue.value", "I2_AddressBook.sqlitedb:abmultivalueentry.value", "I2_AddressBook.sqlitedb:abmultivalueentrykey.value", "I2_AddressBook.sqlitedb:abmultivaluelabel.value", "I2_AddressBook.sqlitedb:abperson.first", "I2_AddressBook.sqlitedb:abperson.last", "I2_AddressBook.sqlitedb:abperson.nickname", "I2_AddressBook.sqlitedb:abpersonchanges.externalidentifier", "I2_AddressBook.sqlitedb:abpersonsearchkey.nameonlysearchkey"], "Num_of_source_columns": 13, "Num_of_source_columns_unique": 13} -{"db_path": "selectedDBs\\I3", "PII_type": "EMAIL", "PII_all": ["ottomatik1234@gmail.com"], "PII_unique": ["ottomatik1234@gmail.com"], "Num_of_PII_all": 1, "Num_of_PII_unique": 1, "source_columns": ["I3_sms.db:message.text", "I3_sms.db:chat.account_login", "I3_sms.db:kvtable.key", "I3_sms.db:attachment.guid", "I3_sms.db:chat.display_name"], "Num_of_source_columns": 5, "Num_of_source_columns_unique": 5} -{"db_path": "selectedDBs\\I3", "PII_type": "PERSON_NAME", "PII_all": ["anya", "william", "eddie v", "otto", "tracy", "anna", "ronen engler", "joe church", "ella rutman doligo", "sharon oneil"], "PII_unique": ["anya", "william", "eddie v", "otto", "tracy", "anna", "ronen engler", "joe church", "ella rutman doligo", "sharon oneil"], "Num_of_PII_all": 10, "Num_of_PII_unique": 10, "source_columns": ["I3_sms.db:chat.display_name", "I3_sms.db:handle.id", "I3_sms.db:message.text"], "Num_of_source_columns": 3, "Num_of_source_columns_unique": 3} -{"db_path": "selectedDBs\\I3", "PII_type": "PHONE", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": ["I3_sms.db:message.text", "I3_sms.db:attachment.guid", "I3_sms.db:chat.chat_identifier", "I3_sms.db:chat.account_login", "I3_sms.db:chat.display_name", "I3_sms.db:handle.person_centric_id"], "Num_of_source_columns": 6, "Num_of_source_columns_unique": 6} -{"db_path": "selectedDBs\\I3", "PII_type": "POSTAL_ADDRESS", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": [], "Num_of_source_columns": 0, "Num_of_source_columns_unique": 0} -{"db_path": "selectedDBs\\I3", "PII_type": "USERNAME", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": [], "Num_of_source_columns": 0, "Num_of_source_columns_unique": 0} -{"db_path": "selectedDBs\\I4", "PII_type": "EMAIL", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": [], "Num_of_source_columns": 0, "Num_of_source_columns_unique": 0} -{"db_path": "selectedDBs\\I4", "PII_type": "PERSON_NAME", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": [], "Num_of_source_columns": 0, "Num_of_source_columns_unique": 0} -{"db_path": "selectedDBs\\I4", "PII_type": "PHONE", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": [], "Num_of_source_columns": 0, "Num_of_source_columns_unique": 0} -{"db_path": "selectedDBs\\I4", "PII_type": "POSTAL_ADDRESS", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": [], "Num_of_source_columns": 0, "Num_of_source_columns_unique": 0} -{"db_path": "selectedDBs\\I4", "PII_type": "USERNAME", "PII_all": ["cellebrite", "falafelhardboiledeggisrael", "dickenscider", "bluebite", "mapat", "snackwithdairykids", "figsgrowinggreennotripen", "hersheypark", "howtochangealightbulb", "tolls", "ravensmanorexperience", "cooldryplacestorage", "haifaairport", "whats thesalestaxinnyconclothing", "canyoucopyamiibowithflipperzero", "jackalope", "ambarrestaurant", "ravensmanor", "usecrashdetectiononiphoneorapplewatch", "figstre won'tripengreen", "airtag", "catfishcops", "myfilesdownload", "serverchangetoken", "usemanateecontainer", "17ebu7rghueusrylzr6u3ccjwmmqprk28k", "a&e nails", "hersheypark hours", "hersheypark tickets"], "PII_unique": ["cellebrite", "falafelhardboiledeggisrael", "dickenscider", "bluebite", "mapat", "snackwithdairykids", "figsgrowinggreennotripen", "hersheypark", "howtochangealightbulb", "tolls", "ravensmanorexperience", "cooldryplacestorage", "haifaairport", "whats thesalestaxinnyconclothing", "canyoucopyamiibowithflipperzero", "jackalope", "ambarrestaurant", "ravensmanor", "usecrashdetectiononiphoneorapplewatch", "figstre won'tripengreen", "airtag", "catfishcops", "myfilesdownload", "serverchangetoken", "usemanateecontainer", "17ebu7rghueusrylzr6u3ccjwmmqprk28k", "a&e nails", "hersheypark hours", "hersheypark tickets"], "Num_of_PII_all": 29, "Num_of_PII_unique": 29, "source_columns": ["I4_CloudTabs.db:cloud_tab_devices.device_name", "I4_CloudTabs.db:cloud_tabs.title", "I4_CloudTabs.db:metadata.key", "I4_History.db:history_event_listeners.listener_name", "I4_History.db:history_items.url", "I4_History.db:history_tags.identifier", "I4_History.db:history_visits.title"], "Num_of_source_columns": 7, "Num_of_source_columns_unique": 7} -{"db_path": "selectedDBs\\I5", "PII_type": "EMAIL", "PII_all": ["ottomatik1234@gmail.com"], "PII_unique": ["ottomatik1234@gmail.com"], "Num_of_PII_all": 1, "Num_of_PII_unique": 1, "source_columns": ["I5_Calendar.sqlitedb:alarm.email_address", "I5_Calendar.sqlitedb:calendar.shared_owner_address", "I5_Calendar.sqlitedb:calendar.self_identity_email", "I5_Calendar.sqlitedb:calendar.owner_identity_email"], "Num_of_source_columns": 4, "Num_of_source_columns_unique": 4} -{"db_path": "selectedDBs\\I5", "PII_type": "PERSON_NAME", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": [], "Num_of_source_columns": 0, "Num_of_source_columns_unique": 0} -{"db_path": "selectedDBs\\I5", "PII_type": "PHONE", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": [], "Num_of_source_columns": 0, "Num_of_source_columns_unique": 0} -{"db_path": "selectedDBs\\I5", "PII_type": "POSTAL_ADDRESS", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": [], "Num_of_source_columns": 0, "Num_of_source_columns_unique": 0} -{"db_path": "selectedDBs\\I5", "PII_type": "USERNAME", "PII_all": [], "PII_unique": [], "Num_of_PII_all": 0, "Num_of_PII_unique": 0, "source_columns": [], "Num_of_source_columns": 0, "Num_of_source_columns_unique": 0} diff --git a/config.yaml b/config.yaml index fc00d3f..d265526 100644 --- a/config.yaml +++ b/config.yaml @@ -1,6 +1,7 @@ db_dir: selectedDBs out_dir: batch_results config_py: my_run_config.py +enable_observe: false pii_targets: - EMAIL - PHONE diff --git a/batch_results_gpt4o/PII_A1_commerce_20260131T203324Z.jsonl b/model_PII_results/gpt4o/PII_A1_commerce_20260131T203324Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_A1_commerce_20260131T203324Z.jsonl rename to model_PII_results/gpt4o/PII_A1_commerce_20260131T203324Z.jsonl diff --git a/batch_results_gpt4o/PII_A1_msgstore_20260131T203502Z.jsonl b/model_PII_results/gpt4o/PII_A1_msgstore_20260131T203502Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_A1_msgstore_20260131T203502Z.jsonl rename to model_PII_results/gpt4o/PII_A1_msgstore_20260131T203502Z.jsonl diff --git a/batch_results_gpt4o/PII_A1_wa_20260131T203943Z.jsonl b/model_PII_results/gpt4o/PII_A1_wa_20260131T203943Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_A1_wa_20260131T203943Z.jsonl rename to model_PII_results/gpt4o/PII_A1_wa_20260131T203943Z.jsonl diff --git a/batch_results_gpt4o/PII_A2_core_20260131T204055Z.jsonl b/model_PII_results/gpt4o/PII_A2_core_20260131T204055Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_A2_core_20260131T204055Z.jsonl rename to model_PII_results/gpt4o/PII_A2_core_20260131T204055Z.jsonl diff --git a/batch_results_gpt4o/PII_A2_journal_20260131T204142Z.jsonl b/model_PII_results/gpt4o/PII_A2_journal_20260131T204142Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_A2_journal_20260131T204142Z.jsonl rename to model_PII_results/gpt4o/PII_A2_journal_20260131T204142Z.jsonl diff --git a/batch_results_gpt4o/PII_A2_main_20260131T204345Z.jsonl b/model_PII_results/gpt4o/PII_A2_main_20260131T204345Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_A2_main_20260131T204345Z.jsonl rename to model_PII_results/gpt4o/PII_A2_main_20260131T204345Z.jsonl diff --git a/batch_results_gpt4o/PII_A3_account1cache4_20260131T204410Z.jsonl b/model_PII_results/gpt4o/PII_A3_account1cache4_20260131T204410Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_A3_account1cache4_20260131T204410Z.jsonl rename to model_PII_results/gpt4o/PII_A3_account1cache4_20260131T204410Z.jsonl diff --git a/batch_results_gpt4o/PII_A3_account2cache4_20260131T204617Z.jsonl b/model_PII_results/gpt4o/PII_A3_account2cache4_20260131T204617Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_A3_account2cache4_20260131T204617Z.jsonl rename to model_PII_results/gpt4o/PII_A3_account2cache4_20260131T204617Z.jsonl diff --git a/batch_results_gpt4o/PII_A3_account3cache4_20260131T204642Z.jsonl b/model_PII_results/gpt4o/PII_A3_account3cache4_20260131T204642Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_A3_account3cache4_20260131T204642Z.jsonl rename to model_PII_results/gpt4o/PII_A3_account3cache4_20260131T204642Z.jsonl diff --git a/batch_results_gpt4o/PII_A4_gmm_myplaces_20260131T204709Z.jsonl b/model_PII_results/gpt4o/PII_A4_gmm_myplaces_20260131T204709Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_A4_gmm_myplaces_20260131T204709Z.jsonl rename to model_PII_results/gpt4o/PII_A4_gmm_myplaces_20260131T204709Z.jsonl diff --git a/batch_results_gpt4o/PII_A4_gmm_storage_20260131T204738Z.jsonl b/model_PII_results/gpt4o/PII_A4_gmm_storage_20260131T204738Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_A4_gmm_storage_20260131T204738Z.jsonl rename to model_PII_results/gpt4o/PII_A4_gmm_storage_20260131T204738Z.jsonl diff --git a/batch_results_gpt4o/PII_A4_peopleCache_sharononeil368@gmail.com_com.google_14_20260131T204833Z.jsonl b/model_PII_results/gpt4o/PII_A4_peopleCache_sharononeil368@gmail.com_com.google_14_20260131T204833Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_A4_peopleCache_sharononeil368@gmail.com_com.google_14_20260131T204833Z.jsonl rename to model_PII_results/gpt4o/PII_A4_peopleCache_sharononeil368@gmail.com_com.google_14_20260131T204833Z.jsonl diff --git a/batch_results_gpt4o/PII_A5_SBrowser2_20260131T205010Z.jsonl b/model_PII_results/gpt4o/PII_A5_SBrowser2_20260131T205010Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_A5_SBrowser2_20260131T205010Z.jsonl rename to model_PII_results/gpt4o/PII_A5_SBrowser2_20260131T205010Z.jsonl diff --git a/batch_results_gpt4o/PII_A5_SBrowser_20260131T204925Z.jsonl b/model_PII_results/gpt4o/PII_A5_SBrowser_20260131T204925Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_A5_SBrowser_20260131T204925Z.jsonl rename to model_PII_results/gpt4o/PII_A5_SBrowser_20260131T204925Z.jsonl diff --git a/batch_results_gpt4o/PII_A5_searchengine_20260131T205101Z.jsonl b/model_PII_results/gpt4o/PII_A5_searchengine_20260131T205101Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_A5_searchengine_20260131T205101Z.jsonl rename to model_PII_results/gpt4o/PII_A5_searchengine_20260131T205101Z.jsonl diff --git a/batch_results_gpt4o/PII_I1_CallHistory_20260131T205155Z.jsonl b/model_PII_results/gpt4o/PII_I1_CallHistory_20260131T205155Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_I1_CallHistory_20260131T205155Z.jsonl rename to model_PII_results/gpt4o/PII_I1_CallHistory_20260131T205155Z.jsonl diff --git a/batch_results_gpt4o/PII_I1_ChatStorage_20260131T205309Z.jsonl b/model_PII_results/gpt4o/PII_I1_ChatStorage_20260131T205309Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_I1_ChatStorage_20260131T205309Z.jsonl rename to model_PII_results/gpt4o/PII_I1_ChatStorage_20260131T205309Z.jsonl diff --git a/batch_results_gpt4o/PII_I1_ContactsV2_20260131T210034Z.jsonl b/model_PII_results/gpt4o/PII_I1_ContactsV2_20260131T210034Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_I1_ContactsV2_20260131T210034Z.jsonl rename to model_PII_results/gpt4o/PII_I1_ContactsV2_20260131T210034Z.jsonl diff --git a/batch_results_gpt4o/PII_I2_AddressBookImages_20260131T210641Z.jsonl b/model_PII_results/gpt4o/PII_I2_AddressBookImages_20260131T210641Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_I2_AddressBookImages_20260131T210641Z.jsonl rename to model_PII_results/gpt4o/PII_I2_AddressBookImages_20260131T210641Z.jsonl diff --git a/batch_results_gpt4o/PII_I2_AddressBook_20260131T210607Z.jsonl b/model_PII_results/gpt4o/PII_I2_AddressBook_20260131T210607Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_I2_AddressBook_20260131T210607Z.jsonl rename to model_PII_results/gpt4o/PII_I2_AddressBook_20260131T210607Z.jsonl diff --git a/batch_results_gpt4o/PII_I3_sms_20260131T210735Z.jsonl b/model_PII_results/gpt4o/PII_I3_sms_20260131T210735Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_I3_sms_20260131T210735Z.jsonl rename to model_PII_results/gpt4o/PII_I3_sms_20260131T210735Z.jsonl diff --git a/batch_results_gpt4o/PII_I4_CloudTabs_20260131T210821Z.jsonl b/model_PII_results/gpt4o/PII_I4_CloudTabs_20260131T210821Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_I4_CloudTabs_20260131T210821Z.jsonl rename to model_PII_results/gpt4o/PII_I4_CloudTabs_20260131T210821Z.jsonl diff --git a/batch_results_gpt4o/PII_I4_History_20260131T210912Z.jsonl b/model_PII_results/gpt4o/PII_I4_History_20260131T210912Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_I4_History_20260131T210912Z.jsonl rename to model_PII_results/gpt4o/PII_I4_History_20260131T210912Z.jsonl diff --git a/batch_results_gpt4o/PII_I5_Calendar_20260131T211008Z.jsonl b/model_PII_results/gpt4o/PII_I5_Calendar_20260131T211008Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_I5_Calendar_20260131T211008Z.jsonl rename to model_PII_results/gpt4o/PII_I5_Calendar_20260131T211008Z.jsonl diff --git a/batch_results_gpt4o/PII_I5_Extras_20260131T211054Z.jsonl b/model_PII_results/gpt4o/PII_I5_Extras_20260131T211054Z.jsonl similarity index 100% rename from batch_results_gpt4o/PII_I5_Extras_20260131T211054Z.jsonl rename to model_PII_results/gpt4o/PII_I5_Extras_20260131T211054Z.jsonl diff --git a/ground_truth/PII_A1_commerce_20260127T175911Z.jsonl b/model_PII_results/ground_truth/PII_A1_commerce_20260127T175911Z.jsonl similarity index 100% rename from ground_truth/PII_A1_commerce_20260127T175911Z.jsonl rename to model_PII_results/ground_truth/PII_A1_commerce_20260127T175911Z.jsonl diff --git a/ground_truth/PII_A1_msgstore_20260127T180043Z.jsonl b/model_PII_results/ground_truth/PII_A1_msgstore_20260127T180043Z.jsonl similarity index 100% rename from ground_truth/PII_A1_msgstore_20260127T180043Z.jsonl rename to model_PII_results/ground_truth/PII_A1_msgstore_20260127T180043Z.jsonl diff --git a/ground_truth/PII_A1_wa_20260127T180213Z.jsonl b/model_PII_results/ground_truth/PII_A1_wa_20260127T180213Z.jsonl similarity index 100% rename from ground_truth/PII_A1_wa_20260127T180213Z.jsonl rename to model_PII_results/ground_truth/PII_A1_wa_20260127T180213Z.jsonl diff --git a/ground_truth/PII_A2_core_20260127T180339Z.jsonl b/model_PII_results/ground_truth/PII_A2_core_20260127T180339Z.jsonl similarity index 100% rename from ground_truth/PII_A2_core_20260127T180339Z.jsonl rename to model_PII_results/ground_truth/PII_A2_core_20260127T180339Z.jsonl diff --git a/ground_truth/PII_A2_journal_20260127T180440Z.jsonl b/model_PII_results/ground_truth/PII_A2_journal_20260127T180440Z.jsonl similarity index 100% rename from ground_truth/PII_A2_journal_20260127T180440Z.jsonl rename to model_PII_results/ground_truth/PII_A2_journal_20260127T180440Z.jsonl diff --git a/ground_truth/PII_A2_main_20260127T180710Z.jsonl b/model_PII_results/ground_truth/PII_A2_main_20260127T180710Z.jsonl similarity index 100% rename from ground_truth/PII_A2_main_20260127T180710Z.jsonl rename to model_PII_results/ground_truth/PII_A2_main_20260127T180710Z.jsonl diff --git a/ground_truth/PII_A3_account1cache4_20260127T180745Z.jsonl b/model_PII_results/ground_truth/PII_A3_account1cache4_20260127T180745Z.jsonl similarity index 100% rename from ground_truth/PII_A3_account1cache4_20260127T180745Z.jsonl rename to model_PII_results/ground_truth/PII_A3_account1cache4_20260127T180745Z.jsonl diff --git a/ground_truth/PII_A3_account2cache4_20260127T180821Z.jsonl b/model_PII_results/ground_truth/PII_A3_account2cache4_20260127T180821Z.jsonl similarity index 100% rename from ground_truth/PII_A3_account2cache4_20260127T180821Z.jsonl rename to model_PII_results/ground_truth/PII_A3_account2cache4_20260127T180821Z.jsonl diff --git a/ground_truth/PII_A3_account3cache4_20260127T180857Z.jsonl b/model_PII_results/ground_truth/PII_A3_account3cache4_20260127T180857Z.jsonl similarity index 100% rename from ground_truth/PII_A3_account3cache4_20260127T180857Z.jsonl rename to model_PII_results/ground_truth/PII_A3_account3cache4_20260127T180857Z.jsonl diff --git a/ground_truth/PII_A4_gmm_myplaces_20260127T180935Z.jsonl b/model_PII_results/ground_truth/PII_A4_gmm_myplaces_20260127T180935Z.jsonl similarity index 100% rename from ground_truth/PII_A4_gmm_myplaces_20260127T180935Z.jsonl rename to model_PII_results/ground_truth/PII_A4_gmm_myplaces_20260127T180935Z.jsonl diff --git a/ground_truth/PII_A4_gmm_storage_20260127T181014Z.jsonl b/model_PII_results/ground_truth/PII_A4_gmm_storage_20260127T181014Z.jsonl similarity index 100% rename from ground_truth/PII_A4_gmm_storage_20260127T181014Z.jsonl rename to model_PII_results/ground_truth/PII_A4_gmm_storage_20260127T181014Z.jsonl diff --git a/ground_truth/PII_A4_peopleCache_sharononeil368@gmail.com_com.google_14_20260127T181121Z.jsonl b/model_PII_results/ground_truth/PII_A4_peopleCache_sharononeil368@gmail.com_com.google_14_20260127T181121Z.jsonl similarity index 100% rename from ground_truth/PII_A4_peopleCache_sharononeil368@gmail.com_com.google_14_20260127T181121Z.jsonl rename to model_PII_results/ground_truth/PII_A4_peopleCache_sharononeil368@gmail.com_com.google_14_20260127T181121Z.jsonl diff --git a/ground_truth/PII_A5_SBrowser2_20260127T181345Z.jsonl b/model_PII_results/ground_truth/PII_A5_SBrowser2_20260127T181345Z.jsonl similarity index 100% rename from ground_truth/PII_A5_SBrowser2_20260127T181345Z.jsonl rename to model_PII_results/ground_truth/PII_A5_SBrowser2_20260127T181345Z.jsonl diff --git a/ground_truth/PII_A5_SBrowser_20260127T181239Z.jsonl b/model_PII_results/ground_truth/PII_A5_SBrowser_20260127T181239Z.jsonl similarity index 100% rename from ground_truth/PII_A5_SBrowser_20260127T181239Z.jsonl rename to model_PII_results/ground_truth/PII_A5_SBrowser_20260127T181239Z.jsonl diff --git a/ground_truth/PII_A5_searchengine_20260127T181446Z.jsonl b/model_PII_results/ground_truth/PII_A5_searchengine_20260127T181446Z.jsonl similarity index 100% rename from ground_truth/PII_A5_searchengine_20260127T181446Z.jsonl rename to model_PII_results/ground_truth/PII_A5_searchengine_20260127T181446Z.jsonl diff --git a/ground_truth/PII_I1_CallHistory_20260127T181557Z.jsonl b/model_PII_results/ground_truth/PII_I1_CallHistory_20260127T181557Z.jsonl similarity index 100% rename from ground_truth/PII_I1_CallHistory_20260127T181557Z.jsonl rename to model_PII_results/ground_truth/PII_I1_CallHistory_20260127T181557Z.jsonl diff --git a/ground_truth/PII_I1_ChatStorage_20260127T181731Z.jsonl b/model_PII_results/ground_truth/PII_I1_ChatStorage_20260127T181731Z.jsonl similarity index 100% rename from ground_truth/PII_I1_ChatStorage_20260127T181731Z.jsonl rename to model_PII_results/ground_truth/PII_I1_ChatStorage_20260127T181731Z.jsonl diff --git a/ground_truth/PII_I1_ContactsV2_20260127T182906Z.jsonl b/model_PII_results/ground_truth/PII_I1_ContactsV2_20260127T182906Z.jsonl similarity index 100% rename from ground_truth/PII_I1_ContactsV2_20260127T182906Z.jsonl rename to model_PII_results/ground_truth/PII_I1_ContactsV2_20260127T182906Z.jsonl diff --git a/ground_truth/PII_I2_AddressBookImages_20260127T183526Z.jsonl b/model_PII_results/ground_truth/PII_I2_AddressBookImages_20260127T183526Z.jsonl similarity index 100% rename from ground_truth/PII_I2_AddressBookImages_20260127T183526Z.jsonl rename to model_PII_results/ground_truth/PII_I2_AddressBookImages_20260127T183526Z.jsonl diff --git a/ground_truth/PII_I2_AddressBook_20260127T183457Z.jsonl b/model_PII_results/ground_truth/PII_I2_AddressBook_20260127T183457Z.jsonl similarity index 100% rename from ground_truth/PII_I2_AddressBook_20260127T183457Z.jsonl rename to model_PII_results/ground_truth/PII_I2_AddressBook_20260127T183457Z.jsonl diff --git a/ground_truth/PII_I3_sms_20260127T183606Z.jsonl b/model_PII_results/ground_truth/PII_I3_sms_20260127T183606Z.jsonl similarity index 100% rename from ground_truth/PII_I3_sms_20260127T183606Z.jsonl rename to model_PII_results/ground_truth/PII_I3_sms_20260127T183606Z.jsonl diff --git a/ground_truth/PII_I4_CloudTabs_20260127T183643Z.jsonl b/model_PII_results/ground_truth/PII_I4_CloudTabs_20260127T183643Z.jsonl similarity index 100% rename from ground_truth/PII_I4_CloudTabs_20260127T183643Z.jsonl rename to model_PII_results/ground_truth/PII_I4_CloudTabs_20260127T183643Z.jsonl diff --git a/ground_truth/PII_I4_History_20260127T183727Z.jsonl b/model_PII_results/ground_truth/PII_I4_History_20260127T183727Z.jsonl similarity index 100% rename from ground_truth/PII_I4_History_20260127T183727Z.jsonl rename to model_PII_results/ground_truth/PII_I4_History_20260127T183727Z.jsonl diff --git a/ground_truth/PII_I5_Calendar_20260127T183815Z.jsonl b/model_PII_results/ground_truth/PII_I5_Calendar_20260127T183815Z.jsonl similarity index 100% rename from ground_truth/PII_I5_Calendar_20260127T183815Z.jsonl rename to model_PII_results/ground_truth/PII_I5_Calendar_20260127T183815Z.jsonl diff --git a/ground_truth/PII_I5_Extras_20260127T183857Z.jsonl b/model_PII_results/ground_truth/PII_I5_Extras_20260127T183857Z.jsonl similarity index 100% rename from ground_truth/PII_I5_Extras_20260127T183857Z.jsonl rename to model_PII_results/ground_truth/PII_I5_Extras_20260127T183857Z.jsonl diff --git a/my_run_config.py b/my_run_config.py index 577afd9..92c9bb6 100644 --- a/my_run_config.py +++ b/my_run_config.py @@ -1,6 +1,6 @@ db_files = [ - # "test2.db", - # "users.db", + "test2.db", + "users.db", # "A1_commerce.db", # "A1_msgstore.db", # "A1_wa.db", @@ -8,24 +8,24 @@ db_files = [ # "A2_journal.db", # "A2_main.db", # "A3_account1cache4.db", - "A3_account2cache4.db", - "A3_account3cache4.db", - "A4_gmm_myplaces.db", - "A4_gmm_storage.db", - "A4_peopleCache_sharononeil368@gmail.com_com.google_14.db", - "A5_SBrowser.db", - "A5_SBrowser2.db", - "A5_searchengine.db", - "I1_CallHistory.sqlite", - "I1_ChatStorage.sqlite", - "I1_ContactsV2.sqlite", - "I2_AddressBook.sqlitedb", - "I2_AddressBookImages.sqlitedb", - "I3_sms.db", - "I4_CloudTabs.db", - "I4_History.db", - "I5_Calendar.sqlitedb", - "I5_Extras.db", + # "A3_account2cache4.db", + # "A3_account3cache4.db", + # "A4_gmm_myplaces.db", + # "A4_gmm_storage.db", + # "A4_peopleCache_sharononeil368@gmail.com_com.google_14.db", + # "A5_SBrowser.db", + # "A5_SBrowser2.db", + # "A5_searchengine.db", + # "I1_CallHistory.sqlite", + # "I1_ChatStorage.sqlite", + # "I1_ContactsV2.sqlite", + # "I2_AddressBook.sqlitedb", + # "I2_AddressBookImages.sqlitedb", + # "I3_sms.db", + # "I4_CloudTabs.db", + # "I4_History.db", + # "I5_Calendar.sqlitedb", + # "I5_Extras.db", ] PII_CONFIG = {