mirror of
https://github.com/frankwxu/mobile-pii-discovery-agent.git
synced 2026-02-20 13:40:41 +00:00
add automated process (folder level)
This commit is contained in:
@@ -2,7 +2,7 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": null,
|
||||||
"id": "a10c9a6a",
|
"id": "a10c9a6a",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -21,6 +21,7 @@
|
|||||||
"from langchain_core.messages import HumanMessage\n",
|
"from langchain_core.messages import HumanMessage\n",
|
||||||
"from sql_utils import *\n",
|
"from sql_utils import *\n",
|
||||||
"from datetime import datetime, timezone\n",
|
"from datetime import datetime, timezone\n",
|
||||||
|
"from pathlib import Path\n",
|
||||||
"\n",
|
"\n",
|
||||||
"load_dotenv() # This looks for the .env file and loads it into os.environ\n",
|
"load_dotenv() # This looks for the .env file and loads it into os.environ\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -593,6 +594,7 @@
|
|||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
|
"Will process 1 databases (from db_files list).\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Processing: selectedDBs\\test2.db\n",
|
"Processing: selectedDBs\\test2.db\n",
|
||||||
" Processing: EMAIL\n",
|
" Processing: EMAIL\n",
|
||||||
@@ -1015,7 +1017,7 @@
|
|||||||
"extraction_sql : None\n",
|
"extraction_sql : None\n",
|
||||||
"rows_count : 20\n",
|
"rows_count : 20\n",
|
||||||
"rows_sample : [('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',), ('alice.johnson@example.com',), ('brian.smith@example.com',), ('carol.davis@example.com',), ('david.miller@example.com',), ('emma.wilson@example.com',), ('frank.brown@example.com',), ('grace.taylor@example.com',), ('henry.anderson@example.com',), ('irene.thomas@example.com',), ('jack.moore@example.com',)]\n",
|
"rows_sample : [('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',), ('alice.johnson@example.com',), ('brian.smith@example.com',), ('carol.davis@example.com',), ('david.miller@example.com',), ('emma.wilson@example.com',), ('frank.brown@example.com',), ('grace.taylor@example.com',), ('henry.anderson@example.com',), ('irene.thomas@example.com',), ('jack.moore@example.com',)]\n",
|
||||||
"classification : {'found': True, 'confidence': 95, 'reason': 'The text contains multiple usernames that are likely application-specific login usernames created by users for login purposes.'}\n",
|
"classification : {'found': True, 'confidence': 95, 'reason': 'The text contains multiple entries that resemble usernames, including both simple usernames and email addresses, which are commonly used for login purposes.'}\n",
|
||||||
"evidence_count : 0\n",
|
"evidence_count : 0\n",
|
||||||
"evidence_sample : []\n",
|
"evidence_sample : []\n",
|
||||||
"source_columns : []\n",
|
"source_columns : []\n",
|
||||||
@@ -1048,7 +1050,7 @@
|
|||||||
"SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n",
|
"SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n",
|
||||||
"rows_count : 20\n",
|
"rows_count : 20\n",
|
||||||
"rows_sample : [('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',), ('alice.johnson@example.com',), ('brian.smith@example.com',), ('carol.davis@example.com',), ('david.miller@example.com',), ('emma.wilson@example.com',), ('frank.brown@example.com',), ('grace.taylor@example.com',), ('henry.anderson@example.com',), ('irene.thomas@example.com',), ('jack.moore@example.com',)]\n",
|
"rows_sample : [('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',), ('alice.johnson@example.com',), ('brian.smith@example.com',), ('carol.davis@example.com',), ('david.miller@example.com',), ('emma.wilson@example.com',), ('frank.brown@example.com',), ('grace.taylor@example.com',), ('henry.anderson@example.com',), ('irene.thomas@example.com',), ('jack.moore@example.com',)]\n",
|
||||||
"classification : {'found': True, 'confidence': 95, 'reason': 'The text contains multiple usernames that are likely application-specific login usernames created by users for login purposes.'}\n",
|
"classification : {'found': True, 'confidence': 95, 'reason': 'The text contains multiple entries that resemble usernames, including both simple usernames and email addresses, which are commonly used for login purposes.'}\n",
|
||||||
"evidence_count : 0\n",
|
"evidence_count : 0\n",
|
||||||
"evidence_sample : []\n",
|
"evidence_sample : []\n",
|
||||||
"source_columns : []\n",
|
"source_columns : []\n",
|
||||||
@@ -1083,39 +1085,6 @@
|
|||||||
"SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n",
|
"SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n",
|
||||||
"rows_count : 20\n",
|
"rows_count : 20\n",
|
||||||
"rows_sample : [('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',), ('alice.johnson@example.com',), ('brian.smith@example.com',), ('carol.davis@example.com',), ('david.miller@example.com',), ('emma.wilson@example.com',), ('frank.brown@example.com',), ('grace.taylor@example.com',), ('henry.anderson@example.com',), ('irene.thomas@example.com',), ('jack.moore@example.com',)]\n",
|
"rows_sample : [('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',), ('alice.johnson@example.com',), ('brian.smith@example.com',), ('carol.davis@example.com',), ('david.miller@example.com',), ('emma.wilson@example.com',), ('frank.brown@example.com',), ('grace.taylor@example.com',), ('henry.anderson@example.com',), ('irene.thomas@example.com',), ('jack.moore@example.com',)]\n",
|
||||||
"classification : {'found': True, 'confidence': 95, 'reason': 'The text contains multiple usernames that are likely application-specific login usernames created by users for login purposes.'}\n",
|
|
||||||
"evidence_count : 0\n",
|
|
||||||
"evidence_sample : []\n",
|
|
||||||
"source_columns : ['users.username', 'users.email']\n",
|
|
||||||
"\n",
|
|
||||||
"--- END METADATA ---\n",
|
|
||||||
"\n",
|
|
||||||
"=== STATE SNAPSHOT ===\n",
|
|
||||||
"\n",
|
|
||||||
"--- MESSAGES ---\n",
|
|
||||||
"0: HUMAN -> Find application-specific login usernames created by users for login purposes in the database\n",
|
|
||||||
"1: AI -> SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n",
|
|
||||||
"UNION ALL \n",
|
|
||||||
"SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n",
|
|
||||||
"2: AI -> Retrieved 20 rows\n",
|
|
||||||
"3: AI -> SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\n",
|
|
||||||
"UNION ALL\n",
|
|
||||||
"SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n",
|
|
||||||
"4: AI -> Retrieved 20 rows\n",
|
|
||||||
"\n",
|
|
||||||
"--- BEGIN METADATA ---\n",
|
|
||||||
"attempt : 2\n",
|
|
||||||
"max_attempts : 2\n",
|
|
||||||
"phase : extraction\n",
|
|
||||||
"PII type : username\n",
|
|
||||||
"exploration_sql : SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n",
|
|
||||||
"UNION ALL \n",
|
|
||||||
"SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n",
|
|
||||||
"extraction_sql : SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\n",
|
|
||||||
"UNION ALL\n",
|
|
||||||
"SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n",
|
|
||||||
"rows_count : 20\n",
|
|
||||||
"rows_sample : [('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',), ('alice.johnson@example.com',), ('brian.smith@example.com',), ('carol.davis@example.com',), ('david.miller@example.com',), ('emma.wilson@example.com',), ('frank.brown@example.com',), ('grace.taylor@example.com',), ('henry.anderson@example.com',), ('irene.thomas@example.com',), ('jack.moore@example.com',)]\n",
|
|
||||||
"classification : {'found': True, 'confidence': 95, 'reason': 'The text contains multiple entries that resemble usernames, including both simple usernames and email addresses, which are commonly used for login purposes.'}\n",
|
"classification : {'found': True, 'confidence': 95, 'reason': 'The text contains multiple entries that resemble usernames, including both simple usernames and email addresses, which are commonly used for login purposes.'}\n",
|
||||||
"evidence_count : 0\n",
|
"evidence_count : 0\n",
|
||||||
"evidence_sample : []\n",
|
"evidence_sample : []\n",
|
||||||
@@ -1149,7 +1118,40 @@
|
|||||||
"SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n",
|
"SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n",
|
||||||
"rows_count : 20\n",
|
"rows_count : 20\n",
|
||||||
"rows_sample : [('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',), ('alice.johnson@example.com',), ('brian.smith@example.com',), ('carol.davis@example.com',), ('david.miller@example.com',), ('emma.wilson@example.com',), ('frank.brown@example.com',), ('grace.taylor@example.com',), ('henry.anderson@example.com',), ('irene.thomas@example.com',), ('jack.moore@example.com',)]\n",
|
"rows_sample : [('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',), ('alice.johnson@example.com',), ('brian.smith@example.com',), ('carol.davis@example.com',), ('david.miller@example.com',), ('emma.wilson@example.com',), ('frank.brown@example.com',), ('grace.taylor@example.com',), ('henry.anderson@example.com',), ('irene.thomas@example.com',), ('jack.moore@example.com',)]\n",
|
||||||
"classification : {'found': True, 'confidence': 95, 'reason': 'The text contains multiple entries that resemble usernames, including both simple usernames and email addresses, which are commonly used for login purposes.'}\n",
|
"classification : {'found': True, 'confidence': 95, 'reason': 'The text contains multiple usernames that appear to be application-specific login usernames created by users for login purposes.'}\n",
|
||||||
|
"evidence_count : 0\n",
|
||||||
|
"evidence_sample : []\n",
|
||||||
|
"source_columns : ['users.username', 'users.email']\n",
|
||||||
|
"\n",
|
||||||
|
"--- END METADATA ---\n",
|
||||||
|
"\n",
|
||||||
|
"=== STATE SNAPSHOT ===\n",
|
||||||
|
"\n",
|
||||||
|
"--- MESSAGES ---\n",
|
||||||
|
"0: HUMAN -> Find application-specific login usernames created by users for login purposes in the database\n",
|
||||||
|
"1: AI -> SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n",
|
||||||
|
"UNION ALL \n",
|
||||||
|
"SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n",
|
||||||
|
"2: AI -> Retrieved 20 rows\n",
|
||||||
|
"3: AI -> SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\n",
|
||||||
|
"UNION ALL\n",
|
||||||
|
"SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n",
|
||||||
|
"4: AI -> Retrieved 20 rows\n",
|
||||||
|
"\n",
|
||||||
|
"--- BEGIN METADATA ---\n",
|
||||||
|
"attempt : 2\n",
|
||||||
|
"max_attempts : 2\n",
|
||||||
|
"phase : extraction\n",
|
||||||
|
"PII type : username\n",
|
||||||
|
"exploration_sql : SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b' \n",
|
||||||
|
"UNION ALL \n",
|
||||||
|
"SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n",
|
||||||
|
"extraction_sql : SELECT username FROM users WHERE username REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b'\n",
|
||||||
|
"UNION ALL\n",
|
||||||
|
"SELECT email FROM users WHERE email REGEXP '\\b[a-zA-Z][a-zA-Z0-9._-]{2,51}\\b';\n",
|
||||||
|
"rows_count : 20\n",
|
||||||
|
"rows_sample : [('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',), ('alice.johnson@example.com',), ('brian.smith@example.com',), ('carol.davis@example.com',), ('david.miller@example.com',), ('emma.wilson@example.com',), ('frank.brown@example.com',), ('grace.taylor@example.com',), ('henry.anderson@example.com',), ('irene.thomas@example.com',), ('jack.moore@example.com',)]\n",
|
||||||
|
"classification : {'found': True, 'confidence': 95, 'reason': 'The text contains multiple usernames that appear to be application-specific login usernames created by users for login purposes.'}\n",
|
||||||
"evidence_count : 10\n",
|
"evidence_count : 10\n",
|
||||||
"evidence_sample : ['ajohnson', 'bsmith', 'cdavis', 'dmiller', 'ewilson', 'fbrown', 'gtaylor', 'handerson', 'ithomas', 'jmoore']\n",
|
"evidence_sample : ['ajohnson', 'bsmith', 'cdavis', 'dmiller', 'ewilson', 'fbrown', 'gtaylor', 'handerson', 'ithomas', 'jmoore']\n",
|
||||||
"source_columns : ['users.username', 'users.email']\n",
|
"source_columns : ['users.username', 'users.email']\n",
|
||||||
@@ -1161,22 +1163,18 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"--- MESSAGES ---\n",
|
"--- MESSAGES ---\n",
|
||||||
"0: HUMAN -> Find loosely structured human name-like strings in the database\n",
|
"0: HUMAN -> Find loosely structured human name-like strings in the database\n",
|
||||||
"1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
"1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \n",
|
||||||
"UNION ALL \n",
|
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \n",
|
||||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
||||||
"UNION ALL \n",
|
|
||||||
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"--- BEGIN METADATA ---\n",
|
"--- BEGIN METADATA ---\n",
|
||||||
"attempt : 2\n",
|
"attempt : 2\n",
|
||||||
"max_attempts : 2\n",
|
"max_attempts : 2\n",
|
||||||
"phase : exploration\n",
|
"phase : exploration\n",
|
||||||
"PII type : person name\n",
|
"PII type : person name\n",
|
||||||
"exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
"exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \n",
|
||||||
"UNION ALL \n",
|
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \n",
|
||||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
||||||
"UNION ALL \n",
|
|
||||||
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n",
|
|
||||||
"extraction_sql : None\n",
|
"extraction_sql : None\n",
|
||||||
"rows_count : 0\n",
|
"rows_count : 0\n",
|
||||||
"rows_sample : []\n",
|
"rows_sample : []\n",
|
||||||
@@ -1192,11 +1190,9 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"--- MESSAGES ---\n",
|
"--- MESSAGES ---\n",
|
||||||
"0: HUMAN -> Find loosely structured human name-like strings in the database\n",
|
"0: HUMAN -> Find loosely structured human name-like strings in the database\n",
|
||||||
"1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
"1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \n",
|
||||||
"UNION ALL \n",
|
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \n",
|
||||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
||||||
"UNION ALL \n",
|
|
||||||
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n",
|
|
||||||
"2: AI -> Retrieved 30 rows\n",
|
"2: AI -> Retrieved 30 rows\n",
|
||||||
"\n",
|
"\n",
|
||||||
"--- BEGIN METADATA ---\n",
|
"--- BEGIN METADATA ---\n",
|
||||||
@@ -1204,11 +1200,9 @@
|
|||||||
"max_attempts : 2\n",
|
"max_attempts : 2\n",
|
||||||
"phase : exploration\n",
|
"phase : exploration\n",
|
||||||
"PII type : person name\n",
|
"PII type : person name\n",
|
||||||
"exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
"exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \n",
|
||||||
"UNION ALL \n",
|
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \n",
|
||||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
||||||
"UNION ALL \n",
|
|
||||||
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n",
|
|
||||||
"extraction_sql : None\n",
|
"extraction_sql : None\n",
|
||||||
"rows_count : 30\n",
|
"rows_count : 30\n",
|
||||||
"rows_sample : [('Alice',), ('Brian',), ('Carol',), ('David',), ('Emma',), ('Frank',), ('Grace',), ('Henry',), ('Irene',), ('Jack',), ('Johnson',), ('Smith',), ('Davis',), ('Miller',), ('Wilson',), ('Brown',), ('Taylor',), ('Anderson',), ('Thomas',), ('Moore',), ('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',)]\n",
|
"rows_sample : [('Alice',), ('Brian',), ('Carol',), ('David',), ('Emma',), ('Frank',), ('Grace',), ('Henry',), ('Irene',), ('Jack',), ('Johnson',), ('Smith',), ('Davis',), ('Miller',), ('Wilson',), ('Brown',), ('Taylor',), ('Anderson',), ('Thomas',), ('Moore',), ('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',)]\n",
|
||||||
@@ -1223,11 +1217,9 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"--- MESSAGES ---\n",
|
"--- MESSAGES ---\n",
|
||||||
"0: HUMAN -> Find loosely structured human name-like strings in the database\n",
|
"0: HUMAN -> Find loosely structured human name-like strings in the database\n",
|
||||||
"1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
"1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \n",
|
||||||
"UNION ALL \n",
|
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \n",
|
||||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
||||||
"UNION ALL \n",
|
|
||||||
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n",
|
|
||||||
"2: AI -> Retrieved 30 rows\n",
|
"2: AI -> Retrieved 30 rows\n",
|
||||||
"\n",
|
"\n",
|
||||||
"--- BEGIN METADATA ---\n",
|
"--- BEGIN METADATA ---\n",
|
||||||
@@ -1235,11 +1227,9 @@
|
|||||||
"max_attempts : 2\n",
|
"max_attempts : 2\n",
|
||||||
"phase : exploration\n",
|
"phase : exploration\n",
|
||||||
"PII type : person name\n",
|
"PII type : person name\n",
|
||||||
"exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
"exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \n",
|
||||||
"UNION ALL \n",
|
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \n",
|
||||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
||||||
"UNION ALL \n",
|
|
||||||
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n",
|
|
||||||
"extraction_sql : None\n",
|
"extraction_sql : None\n",
|
||||||
"rows_count : 30\n",
|
"rows_count : 30\n",
|
||||||
"rows_sample : [('Alice',), ('Brian',), ('Carol',), ('David',), ('Emma',), ('Frank',), ('Grace',), ('Henry',), ('Irene',), ('Jack',), ('Johnson',), ('Smith',), ('Davis',), ('Miller',), ('Wilson',), ('Brown',), ('Taylor',), ('Anderson',), ('Thomas',), ('Moore',), ('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',)]\n",
|
"rows_sample : [('Alice',), ('Brian',), ('Carol',), ('David',), ('Emma',), ('Frank',), ('Grace',), ('Henry',), ('Irene',), ('Jack',), ('Johnson',), ('Smith',), ('Davis',), ('Miller',), ('Wilson',), ('Brown',), ('Taylor',), ('Anderson',), ('Thomas',), ('Moore',), ('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',)]\n",
|
||||||
@@ -1255,33 +1245,25 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"--- MESSAGES ---\n",
|
"--- MESSAGES ---\n",
|
||||||
"0: HUMAN -> Find loosely structured human name-like strings in the database\n",
|
"0: HUMAN -> Find loosely structured human name-like strings in the database\n",
|
||||||
"1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
"1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \n",
|
||||||
"UNION ALL \n",
|
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \n",
|
||||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
||||||
"UNION ALL \n",
|
|
||||||
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n",
|
|
||||||
"2: AI -> Retrieved 30 rows\n",
|
"2: AI -> Retrieved 30 rows\n",
|
||||||
"3: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
"3: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL\n",
|
||||||
"UNION ALL\n",
|
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL\n",
|
||||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
||||||
"UNION ALL\n",
|
|
||||||
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"--- BEGIN METADATA ---\n",
|
"--- BEGIN METADATA ---\n",
|
||||||
"attempt : 2\n",
|
"attempt : 2\n",
|
||||||
"max_attempts : 2\n",
|
"max_attempts : 2\n",
|
||||||
"phase : extraction\n",
|
"phase : extraction\n",
|
||||||
"PII type : person name\n",
|
"PII type : person name\n",
|
||||||
"exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
"exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \n",
|
||||||
"UNION ALL \n",
|
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \n",
|
||||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
||||||
"UNION ALL \n",
|
"extraction_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL\n",
|
||||||
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n",
|
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL\n",
|
||||||
"extraction_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
||||||
"UNION ALL\n",
|
|
||||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
|
||||||
"UNION ALL\n",
|
|
||||||
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n",
|
|
||||||
"rows_count : 30\n",
|
"rows_count : 30\n",
|
||||||
"rows_sample : [('Alice',), ('Brian',), ('Carol',), ('David',), ('Emma',), ('Frank',), ('Grace',), ('Henry',), ('Irene',), ('Jack',), ('Johnson',), ('Smith',), ('Davis',), ('Miller',), ('Wilson',), ('Brown',), ('Taylor',), ('Anderson',), ('Thomas',), ('Moore',), ('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',)]\n",
|
"rows_sample : [('Alice',), ('Brian',), ('Carol',), ('David',), ('Emma',), ('Frank',), ('Grace',), ('Henry',), ('Irene',), ('Jack',), ('Johnson',), ('Smith',), ('Davis',), ('Miller',), ('Wilson',), ('Brown',), ('Taylor',), ('Anderson',), ('Thomas',), ('Moore',), ('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',)]\n",
|
||||||
"classification : {'found': True, 'confidence': 1.0, 'reason': 'The text contains multiple strings that are commonly recognized as person names.'}\n",
|
"classification : {'found': True, 'confidence': 1.0, 'reason': 'The text contains multiple strings that are commonly recognized as person names.'}\n",
|
||||||
@@ -1297,17 +1279,13 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"--- MESSAGES ---\n",
|
"--- MESSAGES ---\n",
|
||||||
"0: HUMAN -> Find loosely structured human name-like strings in the database\n",
|
"0: HUMAN -> Find loosely structured human name-like strings in the database\n",
|
||||||
"1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
"1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \n",
|
||||||
"UNION ALL \n",
|
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \n",
|
||||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
||||||
"UNION ALL \n",
|
|
||||||
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n",
|
|
||||||
"2: AI -> Retrieved 30 rows\n",
|
"2: AI -> Retrieved 30 rows\n",
|
||||||
"3: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
"3: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL\n",
|
||||||
"UNION ALL\n",
|
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL\n",
|
||||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
||||||
"UNION ALL\n",
|
|
||||||
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n",
|
|
||||||
"4: AI -> Retrieved 30 rows\n",
|
"4: AI -> Retrieved 30 rows\n",
|
||||||
"\n",
|
"\n",
|
||||||
"--- BEGIN METADATA ---\n",
|
"--- BEGIN METADATA ---\n",
|
||||||
@@ -1315,16 +1293,12 @@
|
|||||||
"max_attempts : 2\n",
|
"max_attempts : 2\n",
|
||||||
"phase : extraction\n",
|
"phase : extraction\n",
|
||||||
"PII type : person name\n",
|
"PII type : person name\n",
|
||||||
"exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
"exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \n",
|
||||||
"UNION ALL \n",
|
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL \n",
|
||||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
||||||
"UNION ALL \n",
|
"extraction_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL\n",
|
||||||
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n",
|
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' UNION ALL\n",
|
||||||
"extraction_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
||||||
"UNION ALL\n",
|
|
||||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
|
||||||
"UNION ALL\n",
|
|
||||||
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n",
|
|
||||||
"rows_count : 30\n",
|
"rows_count : 30\n",
|
||||||
"rows_sample : [('Alice',), ('Brian',), ('Carol',), ('David',), ('Emma',), ('Frank',), ('Grace',), ('Henry',), ('Irene',), ('Jack',), ('Johnson',), ('Smith',), ('Davis',), ('Miller',), ('Wilson',), ('Brown',), ('Taylor',), ('Anderson',), ('Thomas',), ('Moore',), ('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',)]\n",
|
"rows_sample : [('Alice',), ('Brian',), ('Carol',), ('David',), ('Emma',), ('Frank',), ('Grace',), ('Henry',), ('Irene',), ('Jack',), ('Johnson',), ('Smith',), ('Davis',), ('Miller',), ('Wilson',), ('Brown',), ('Taylor',), ('Anderson',), ('Thomas',), ('Moore',), ('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',)]\n",
|
||||||
"classification : {'found': True, 'confidence': 1.0, 'reason': 'The text contains multiple strings that are commonly recognized as person names.'}\n",
|
"classification : {'found': True, 'confidence': 1.0, 'reason': 'The text contains multiple strings that are commonly recognized as person names.'}\n",
|
||||||
@@ -1332,90 +1306,7 @@
|
|||||||
"evidence_sample : []\n",
|
"evidence_sample : []\n",
|
||||||
"source_columns : ['users.first_name', 'users.last_name', 'users.username']\n",
|
"source_columns : ['users.first_name', 'users.last_name', 'users.username']\n",
|
||||||
"\n",
|
"\n",
|
||||||
"--- END METADATA ---\n",
|
"--- END METADATA ---\n"
|
||||||
"\n",
|
|
||||||
"=== STATE SNAPSHOT ===\n",
|
|
||||||
"\n",
|
|
||||||
"--- MESSAGES ---\n",
|
|
||||||
"0: HUMAN -> Find loosely structured human name-like strings in the database\n",
|
|
||||||
"1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
|
||||||
"UNION ALL \n",
|
|
||||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
|
||||||
"UNION ALL \n",
|
|
||||||
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n",
|
|
||||||
"2: AI -> Retrieved 30 rows\n",
|
|
||||||
"3: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
|
||||||
"UNION ALL\n",
|
|
||||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
|
||||||
"UNION ALL\n",
|
|
||||||
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n",
|
|
||||||
"4: AI -> Retrieved 30 rows\n",
|
|
||||||
"\n",
|
|
||||||
"--- BEGIN METADATA ---\n",
|
|
||||||
"attempt : 2\n",
|
|
||||||
"max_attempts : 2\n",
|
|
||||||
"phase : extraction\n",
|
|
||||||
"PII type : person name\n",
|
|
||||||
"exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
|
||||||
"UNION ALL \n",
|
|
||||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
|
||||||
"UNION ALL \n",
|
|
||||||
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n",
|
|
||||||
"extraction_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
|
||||||
"UNION ALL\n",
|
|
||||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
|
||||||
"UNION ALL\n",
|
|
||||||
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n",
|
|
||||||
"rows_count : 30\n",
|
|
||||||
"rows_sample : [('Alice',), ('Brian',), ('Carol',), ('David',), ('Emma',), ('Frank',), ('Grace',), ('Henry',), ('Irene',), ('Jack',), ('Johnson',), ('Smith',), ('Davis',), ('Miller',), ('Wilson',), ('Brown',), ('Taylor',), ('Anderson',), ('Thomas',), ('Moore',), ('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',)]\n",
|
|
||||||
"classification : {'found': True, 'confidence': 1.0, 'reason': 'The text contains multiple strings that are commonly recognized as person names.'}\n",
|
|
||||||
"evidence_count : 0\n",
|
|
||||||
"evidence_sample : []\n",
|
|
||||||
"source_columns : ['users.first_name', 'users.last_name', 'users.username']\n",
|
|
||||||
"\n",
|
|
||||||
"--- END METADATA ---\n",
|
|
||||||
"\n",
|
|
||||||
"=== STATE SNAPSHOT ===\n",
|
|
||||||
"\n",
|
|
||||||
"--- MESSAGES ---\n",
|
|
||||||
"0: HUMAN -> Find loosely structured human name-like strings in the database\n",
|
|
||||||
"1: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
|
||||||
"UNION ALL \n",
|
|
||||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
|
||||||
"UNION ALL \n",
|
|
||||||
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n",
|
|
||||||
"2: AI -> Retrieved 30 rows\n",
|
|
||||||
"3: AI -> SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
|
||||||
"UNION ALL\n",
|
|
||||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
|
||||||
"UNION ALL\n",
|
|
||||||
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n",
|
|
||||||
"4: AI -> Retrieved 30 rows\n",
|
|
||||||
"\n",
|
|
||||||
"--- BEGIN METADATA ---\n",
|
|
||||||
"attempt : 2\n",
|
|
||||||
"max_attempts : 2\n",
|
|
||||||
"phase : extraction\n",
|
|
||||||
"PII type : person name\n",
|
|
||||||
"exploration_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
|
||||||
"UNION ALL \n",
|
|
||||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}' \n",
|
|
||||||
"UNION ALL \n",
|
|
||||||
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n",
|
|
||||||
"extraction_sql : SELECT first_name FROM users WHERE first_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
|
||||||
"UNION ALL\n",
|
|
||||||
"SELECT last_name FROM users WHERE last_name REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}'\n",
|
|
||||||
"UNION ALL\n",
|
|
||||||
"SELECT username FROM users WHERE username REGEXP '[A-Za-z][A-Za-z\\s\\.\\-]{1,50}';\n",
|
|
||||||
"rows_count : 30\n",
|
|
||||||
"rows_sample : [('Alice',), ('Brian',), ('Carol',), ('David',), ('Emma',), ('Frank',), ('Grace',), ('Henry',), ('Irene',), ('Jack',), ('Johnson',), ('Smith',), ('Davis',), ('Miller',), ('Wilson',), ('Brown',), ('Taylor',), ('Anderson',), ('Thomas',), ('Moore',), ('ajohnson',), ('bsmith',), ('cdavis',), ('dmiller',), ('ewilson',), ('fbrown',), ('gtaylor',), ('handerson',), ('ithomas',), ('jmoore',)]\n",
|
|
||||||
"classification : {'found': True, 'confidence': 1.0, 'reason': 'The text contains multiple strings that are commonly recognized as person names.'}\n",
|
|
||||||
"evidence_count : 30\n",
|
|
||||||
"evidence_sample : ['Alice', 'Brian', 'Carol', 'David', 'Emma', 'Frank', 'Grace', 'Henry', 'Irene', 'Jack']\n",
|
|
||||||
"source_columns : ['users.first_name', 'users.last_name', 'users.username']\n",
|
|
||||||
"\n",
|
|
||||||
"--- END METADATA ---\n",
|
|
||||||
"Wrote: I:\\project2026\\llmagent\\batch_results\\evidence_20260120T014007Z.jsonl\n"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -1470,7 +1361,21 @@
|
|||||||
"\n",
|
"\n",
|
||||||
" return all_results\n",
|
" return all_results\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def main():\n",
|
"def main(): \n",
|
||||||
|
" DB_DIR = Path(r\"selectedDBs\") # folder that contains the dbs\n",
|
||||||
|
" OUT_DIR = Path(\"batch_results\")\n",
|
||||||
|
" OUT_DIR.mkdir(exist_ok=True)\n",
|
||||||
|
"\n",
|
||||||
|
" PII_TARGETS = [\"EMAIL\", \"PHONE\", \"USERNAME\", \"PERSON_NAME\"]\n",
|
||||||
|
"\n",
|
||||||
|
" # --- usage ---\n",
|
||||||
|
" DB_FILES_PY = Path(\"db_files.py\")\n",
|
||||||
|
" db_files = load_db_files_list(DB_FILES_PY)\n",
|
||||||
|
"\n",
|
||||||
|
" db_paths, missing, not_sqlite = build_db_paths(DB_DIR, db_files, is_sqlite_file)\n",
|
||||||
|
" print_db_path_report(db_paths, missing, not_sqlite)\n",
|
||||||
|
"\n",
|
||||||
|
" \n",
|
||||||
" all_results = run_batch(db_paths, PII_TARGETS, PII_CONFIG, app)\n",
|
" all_results = run_batch(db_paths, PII_TARGETS, PII_CONFIG, app)\n",
|
||||||
" save_jsonl(all_results, OUT_DIR)\n",
|
" save_jsonl(all_results, OUT_DIR)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
|||||||
Reference in New Issue
Block a user