diff --git a/main/dumpparser.y b/main/dumpparser.y index 62c1238..6e6da94 100644 --- a/main/dumpparser.y +++ b/main/dumpparser.y @@ -193,28 +193,27 @@ singlefield : VALUE { Skip anonymisation on NULL values */ if ((found) && (strncmp(dump_text,"NULL",dump_leng))) { bool bDone=false; - bool bFirstSeperatedValue= false; + bool bFirstSeperatedValue=true; cur->infos.nbhits++; char *curfield; int curleng; bool curquoted=false; + char *noquotetext=NULL; + /* Separated mode? */ if (cur->infos.separator[0]) { - curfield=strtok(dump_text,cur->infos.separator); - if (curfield) { - curleng=strlen(curfield); - bFirstSeperatedValue=true; - if (cur->quoted) { - fprintf(stdout, "'"); /* Opening quote for field value */ - } - } else { - fprintf(stderr, "WARNING! Table/field %s: Unable to parse seperated field, skip anonimyzation",cur->key); - quoted_output_helper(dump_text,dump_leng,true); - bDone=true; + /* Handle quoting if present */ + if (cur->quoted) { + /* Remove quoting for working text before split */ + noquotetext = mymalloc(dump_leng+1); + remove_quote(noquotetext,dump_text,dump_leng+1); + curfield=noquotetext; + curquoted=false; } } else { + /* Single value */ curfield=dump_text; curleng=dump_leng; curquoted=cur->quoted; @@ -224,20 +223,51 @@ singlefield : VALUE { while(!bDone) { if (!cur->infos.separator[0]) { bDone=true; /* Single anon */ - } else { - curfield=strtok(NULL,cur->infos.separator); - if (curfield) { - curleng=dump_leng; - if (!bFirstSeperatedValue) { - fprintf(stdout, "%s", cur->infos.separator); - } + } + else + { + if (bFirstSeperatedValue) { bFirstSeperatedValue=false; - } else { - bDone=true; - if (cur->quoted) { - fprintf(stdout, "'"); /* Ending quote for field value */ + /* First extraction on separated values */ + if (noquotetext != NULL) { + curfield = strtok(noquotetext,cur->infos.separator); + } else { + curfield = strtok(dump_text,cur->infos.separator); + } + if (curfield) { + curleng=strlen(curfield); + if (cur->quoted) { + fprintf(stdout, "'"); /* Opening quote for field value */ + } + } + else + { + fprintf(stderr, "WARNING! Table/field %s: Unable to parse seperated field, skip anonimyzation",cur->key); + fwrite(dump_text,dump_leng,1,stdout); + bDone=true; + continue; + } + } + else + { + /* Other extractions on separated values */ + curfield = strtok(NULL,cur->infos.separator); + + if (curfield) { + curleng=strlen(curfield); + if (!bFirstSeperatedValue) { + fprintf(stdout, "%s", cur->infos.separator); + } + bFirstSeperatedValue=false; + } + else + { + bDone=true; + if (cur->quoted) { + fprintf(stdout, "'"); /* Ending quote for field value */ + } + continue; } - continue; } } @@ -322,7 +352,7 @@ singlefield : VALUE { #endif default: - res_st=anonymize_token(cur->quoted,&cur->infos,curfield,curleng); + res_st=anonymize_token(curquoted,&cur->infos,curfield,curleng); quoted_output_helper((char *)&res_st.data[0],res_st.len,curquoted); break; } diff --git a/main/myanon.c b/main/myanon.c index 3c9cbb5..364cfe1 100644 --- a/main/myanon.c +++ b/main/myanon.c @@ -180,6 +180,7 @@ anonymized_res_st anonymize_token(bool quoted, anon_base_st *config, char *token worktoken = token; worktokenlen = tokenlen; } + DEBUG_MSG("--WORKTOKEN %s - %d\n", worktoken, worktokenlen); switch (config->type) { diff --git a/tests/complex_anon.sql b/tests/complex_anon.sql index 581a173..999b399 100644 --- a/tests/complex_anon.sql +++ b/tests/complex_anon.sql @@ -41,7 +41,7 @@ CREATE TABLE `points` ( LOCK TABLES `points` WRITE; /*!40000 ALTER TABLE `points` DISABLE KEYS */; -INSERT INTO `points` VALUES (1,'sgsjq','[\"mdjjoyvdxe@example.com\",\"zlhopsocec@example.com\",\"ugkmsoicdk@example.com\"]','{\"email\":\"piskgwdrsq@example.com\",\"last_name\":\"itrai\",\"first_name\":\"lbnvq\"}','dwghhfltmy@example.com,ctjebpytxb@example.com,gjvptyopbc@example.com',NULL,'2024-04-09 17:58:27.502417','2024-04-09 17:58:27.502417'),(2,'pvkkz','[\"dvuvoetfay@example.com\",\"maefvygipm@example.com\",\"eockxzkwhf@example.com\"]','{\"email\":\"qukrgrahjl@example.com\",\"last_name\":\"yzbhe\",\"first_name\":\"lcxlg\"}','ccrvzstlrl@example.com,iyvsfgvfcf@example.com,foifrlvniu@example.com','{\"email_changes\":[[\"nqtqitrcgi@example.com\"],[\"cvhedfkhxt@example.com\",\"fltphijegq@example.com\"]]}','2024-04-09 17:58:58.337830','2024-04-09 17:58:58.337830'); +INSERT INTO `points` VALUES (1,'sgsjq','[\"mdjjoyvdxe@example.com\",\"zlhopsocec@example.com\",\"ugkmsoicdk@example.com\"]','{\"email\":\"piskgwdrsq@example.com\",\"last_name\":\"itrai\",\"first_name\":\"lbnvq\"}','mdjjoyvdxe@example.com,zlhopsocec@example.com,ugkmsoicdk@example.com,piskgwdrsq@example.com',NULL,'2024-04-09 17:58:27.502417','2024-04-09 17:58:27.502417'),(2,'pvkkz','[\"dvuvoetfay@example.com\",\"maefvygipm@example.com\",\"eockxzkwhf@example.com\"]','{\"email\":\"qukrgrahjl@example.com\",\"last_name\":\"yzbhe\",\"first_name\":\"lcxlg\"}','dvuvoetfay@example.com,maefvygipm@example.com,eockxzkwhf@example.com,qukrgrahjl@example.com','{\"email_changes\":[[\"nqtqitrcgi@example.com\"],[\"cvhedfkhxt@example.com\",\"fltphijegq@example.com\"]]}','2024-04-09 17:58:58.337830','2024-04-09 17:58:58.337830'); /*!40000 ALTER TABLE `points` ENABLE KEYS */; UNLOCK TABLES; /*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;