From 4275b056c0237a69c4d64b01479b313a8d13542d Mon Sep 17 00:00:00 2001 From: sfc-gh-ext-simba-hx Date: Wed, 20 Dec 2023 16:09:56 -0800 Subject: [PATCH] bug fixes --- cpp/FileMetadataInitializer.cpp | 47 +++++++++++++++++--- cpp/FileMetadataInitializer.hpp | 7 +-- cpp/FileTransferAgent.cpp | 35 +++------------ tests/test_simple_put.cpp | 59 ++++++++++++++------------ tests/test_unit_file_metadata_init.cpp | 2 + 5 files changed, 85 insertions(+), 65 deletions(-) diff --git a/cpp/FileMetadataInitializer.cpp b/cpp/FileMetadataInitializer.cpp index 2602345132..c10dcd87ae 100755 --- a/cpp/FileMetadataInitializer.cpp +++ b/cpp/FileMetadataInitializer.cpp @@ -60,11 +60,12 @@ void Snowflake::Client::FileMetadataInitializer::populateSrcLocUploadMetadata(st { // looking for files on disk. std::string srcLocationPlatform = m_stmtPutGet->UTF8ToPlatformString(sourceLocation); + replaceStrAll(srcLocationPlatform, "/", std::string() + PATH_SEP); size_t dirSep = srcLocationPlatform.find_last_of(PATH_SEP); std::string basePath = srcLocationPlatform.substr(0, dirSep + 1); std::vector fileList; - if (!listFilesRecursive(srcLocationPlatform, fileList)) + if (!listFiles(srcLocationPlatform, fileList)) { CXX_LOG_ERROR("Failed on finding files for uploading."); return; @@ -95,7 +96,7 @@ void Snowflake::Client::FileMetadataInitializer::includeSubfolderFilesRecursive( } } -bool Snowflake::Client::FileMetadataInitializer::listFilesRecursive(const std::string &sourceLocation, +bool Snowflake::Client::FileMetadataInitializer::listFiles(const std::string &sourceLocation, std::vector & fileList) { // looking for files on disk. @@ -103,6 +104,7 @@ bool Snowflake::Client::FileMetadataInitializer::listFilesRecursive(const std::s size_t dirSep = srcLocationPlatform.find_last_of(PATH_SEP); std::string dirPath = srcLocationPlatform.substr(0, dirSep + 1); std::string filePattern = srcLocationPlatform.substr(dirSep + 1); + bool includeSubfolder = filePattern == "**"; #ifdef _WIN32 WIN32_FIND_DATA fdd; @@ -132,7 +134,12 @@ bool Snowflake::Client::FileMetadataInitializer::listFilesRecursive(const std::s } else { - includeSubfolderFilesRecursive(dirPath + fdd.cFileName, fileList); + if (includeSubfolder && + (std::string(fdd.cFileName) != ".") && + (std::string(fdd.cFileName) != "..")) + { + includeSubfolderFilesRecursive(dirPath + fdd.cFileName, fileList); + } } } while (FindNextFile(hFind, &fdd) != 0); @@ -162,7 +169,10 @@ bool Snowflake::Client::FileMetadataInitializer::listFilesRecursive(const std::s if (S_ISREG(fileStatus.st_mode)) { fileList.push_back(dirPath + dir_entry->d_name); } - else if (S_ISDIR(fileStatus.st_mode)) + else if (includeSubfolder && + (S_ISDIR(fileStatus.st_mode)) && + (std::string(dir_entry->d_name) != ".") && + (std::string(dir_entry->d_name) != "..")) { includeSubfolderFilesRecursive(dirPath + dir_entry->d_name, fileList); } @@ -201,8 +211,10 @@ void Snowflake::Client::FileMetadataInitializer::initCompressionMetadata( { // guess CXX_LOG_INFO("Auto detect on compression type"); + std::string srcFileNamePlatform = m_stmtPutGet->UTF8ToPlatformString( + fileMetadata.srcFileName); fileMetadata.sourceCompression = FileCompressionType::guessCompressionType( - m_stmtPutGet->UTF8ToPlatformString(fileMetadata.srcFileName)); + srcFileNamePlatform); } else if (!sf_strncasecmp(m_sourceCompression, COMPRESSION_NONE, sizeof(COMPRESSION_NONE))) @@ -319,4 +331,29 @@ populateSrcLocDownloadMetadata(std::string &sourceLocation, return outcome; } +void Snowflake::Client::FileMetadataInitializer:: +replaceStrAll(std::string& stringToReplace, + std::string const& oldValue, + std::string const& newValue) +{ + size_t oldValueLen = oldValue.length(); + size_t newValueLen = newValue.length(); + if (0 == oldValueLen) + { + return; + } + + size_t index = 0; + while (true) { + /* Locate the substring to replace. */ + index = stringToReplace.find(oldValue, index); + if (index == std::string::npos) break; + + /* Make the replacement. */ + stringToReplace.replace(index, oldValueLen, newValue); + + /* Advance index forward so the next iteration doesn't pick it up as well. */ + index += newValueLen; + } +} diff --git a/cpp/FileMetadataInitializer.hpp b/cpp/FileMetadataInitializer.hpp index 21f2cd199b..2a6cb7df9e 100755 --- a/cpp/FileMetadataInitializer.hpp +++ b/cpp/FileMetadataInitializer.hpp @@ -42,8 +42,9 @@ class FileMetadataInitializer static void replaceStrAll(std::string& stringToReplace, std::string const& oldValue, std::string const& newValue); /** - * Given a source location, find all files recursively in subfolders that match the - * location pattern. Utility function called from populateSrcLocUploadMetadata. + * Given a source location, find all files match the partern, recursively include + * all subfolders if the pattern is ** + * Utility function called from populateSrcLocUploadMetadata. * * @param sourceLocation The source location could have pattern at the end. * @param fileList Output the files with the full path. @@ -51,7 +52,7 @@ class FileMetadataInitializer * @return True when succeeded, false when no file matches with the source location. * @throw SnowflakeTransferException on unexpected error. */ - bool listFilesRecursive(const std::string &sourceLocation, std::vector & fileList); + bool listFiles(const std::string &sourceLocation, std::vector & fileList); /** * Given a full path of a folder, add all files in the folder recursively including subfolders. diff --git a/cpp/FileTransferAgent.cpp b/cpp/FileTransferAgent.cpp index fdfd0dec3a..ccc76a106f 100755 --- a/cpp/FileTransferAgent.cpp +++ b/cpp/FileTransferAgent.cpp @@ -33,31 +33,6 @@ using ::Snowflake::Client::RemoteStorageRequestOutcome; namespace { const std::string FILE_PROTOCOL = "file://"; - - void replaceStrAll(std::string& stringToReplace, - std::string const& oldValue, - std::string const& newValue) - { - size_t oldValueLen = oldValue.length(); - size_t newValueLen = newValue.length(); - if (0 == oldValueLen) - { - return; - } - - size_t index = 0; - while (true) { - /* Locate the substring to replace. */ - index = stringToReplace.find(oldValue, index); - if (index == std::string::npos) break; - - /* Make the replacement. */ - stringToReplace.replace(index, oldValueLen, newValue); - - /* Advance index forward so the next iteration doesn't pick it up as well. */ - index += newValueLen; - } - } } Snowflake::Client::FileTransferAgent::FileTransferAgent( @@ -471,7 +446,7 @@ RemoteStorageRequestOutcome Snowflake::Client::FileTransferAgent::uploadSingleFi // after compress replace PATH_SEP with / in destPath as that's // what needed on stage side - replaceStrAll(fileMetadata->destPath, std::string() + PATH_SEP, "/"); + FileMetadataInitializer::replaceStrAll(fileMetadata->destPath, std::string() + PATH_SEP, "/"); fileMetadata->destFileName = fileMetadata->destPath + fileMetadata->destFileName; CXX_LOG_TRACE("Update File digest metadata start"); @@ -624,7 +599,7 @@ void Snowflake::Client::FileTransferAgent::compressSourceFile( if (!fileMetadata->destPath.empty()) { std::string subfolder = fileMetadata->destPath; - replaceStrAll(subfolder, "/", std::string() + PATH_SEP); + FileMetadataInitializer::replaceStrAll(subfolder, "/", std::string() + PATH_SEP); std::string subfolderPlatform = m_stmtPutGet->UTF8ToPlatformString(subfolder); subfolder = std::string(tempDir) + subfolder; subfolderPlatform = std::string(tempDir) + subfolderPlatform; @@ -857,7 +832,7 @@ RemoteStorageRequestOutcome Snowflake::Client::FileTransferAgent::downloadSingle RemoteStorageRequestOutcome outcome = RemoteStorageRequestOutcome::FAILED; // create subfolder first befor adding file name - replaceStrAll(fileMetadata->destPath, "/", std::string() + PATH_SEP); + FileMetadataInitializer::replaceStrAll(fileMetadata->destPath, "/", std::string() + PATH_SEP); fileMetadata->destPath = std::string(response.localLocation) + PATH_SEP + fileMetadata->destPath; std::string destPathPlatform = m_stmtPutGet->UTF8ToPlatformString(fileMetadata->destPath); @@ -934,7 +909,7 @@ void Snowflake::Client::FileTransferAgent::getPresignedUrlForUploading( // need to replace file://mypath/myfile?.csv with file://mypath/myfile1.csv.gz std::string presignedUrlCommand = command; std::string localFilePath = getLocalFilePathFromCommand(command, false); - replaceStrAll(presignedUrlCommand, localFilePath, fileMetadata.destFileName); + FileMetadataInitializer::replaceStrAll(presignedUrlCommand, localFilePath, fileMetadata.destFileName); // then hand that to GS to get the actual presigned URL we'll use PutGetParseResponse rsp; if (!m_stmtPutGet->parsePutGetCommand(&presignedUrlCommand, &rsp)) @@ -980,7 +955,7 @@ std::string Snowflake::Client::FileTransferAgent::getLocalFilePathFromCommand( // unescape backslashes to match the file name from GS if (unescape) { - replaceStrAll(localFilePath, "\\\\\\\\", "\\\\"); + FileMetadataInitializer::replaceStrAll(localFilePath, "\\\\\\\\", "\\\\"); } } else diff --git a/tests/test_simple_put.cpp b/tests/test_simple_put.cpp index 4f881ae6df..0074947a3e 100755 --- a/tests/test_simple_put.cpp +++ b/tests/test_simple_put.cpp @@ -169,6 +169,11 @@ void test_simple_put_core(const char * fileName, std::string dataDir = TestSetup::getDataDir(); std::string file = dataDir + fileName; + if (uploadFolder) + { + file += PATH_SEP; + file += "**"; + } std::string putCommand = "put file://" + file + " @%test_small_put"; if (testUnicode) { @@ -927,34 +932,33 @@ void test_putget_subfolder_core(bool testUnicode) // use get command to have local files in nested subfolder std::string cmd = "get '@%test_small_put/" + filename + ".gz' "; - std::string sub1 = "sub1"; - std::string sub1Platform = sub1; - std::string localPath1 = "file://"; + std::string sub2 = "sub2"; + std::string localPath = "file://"; + + localPath += dataDir + "subfoldertest" + PATH_SEP + "sub1"; + test_simple_get_data((cmd + localPath).c_str(), "48", 0, testUnicode, sf); + if (testUnicode) { - sub1 += UTF8_STR; - sub1Platform += PLATFORM_STR; + sub2 += UTF8_STR; } - localPath1 += dataDir + sub1; - std::string localPath2 = localPath1 + PATH_SEP + "sub2"; + localPath += PATH_SEP; + localPath += sub2; if (testUnicode) { - localPath1 = "'" + localPath1 + "'"; - localPath2 = "'" + localPath2 + "'"; - replaceInPlace(localPath1, "\\", "\\\\"); - replaceInPlace(localPath2, "\\", "\\\\"); + localPath = "'" + localPath + "'"; + replaceInPlace(localPath, "\\", "\\\\"); } - test_simple_get_data((cmd + localPath1).c_str(), "48", 0, testUnicode, sf); - test_simple_get_data((cmd + localPath2).c_str(), "48", 0, testUnicode, sf); + test_simple_get_data((cmd + localPath).c_str(), "48", 0, testUnicode, sf); // the files returned in alphabetical order, use set to get expected files sorted std::set expectedFiles; // put command to upload files with subfolders to stage - // put with createSubfolder=true so all the files in "subfolder" - // which would be easier to check later - // uploadFolder=true to skip filename checking + // uploadFolder=true + // createSubfolder=true as well so the uploaded files will be in path of + // "subfolder" which would be easier to check later std::string basePath = "subfolder/"; - test_simple_put_core(sub1.c_str(), // filename + test_simple_put_core("subfoldertest", // filename "auto", //source compression true, // auto compress false, // copyUploadFile @@ -973,15 +977,17 @@ void test_putget_subfolder_core(bool testUnicode) testUnicode, // testUnicode true // uploadFolder ); - expectedFiles.insert(basePath + sub1 + "/" + filename + ".gz"); - expectedFiles.insert(basePath + sub1 + "/sub2/" + filename + ".gz"); + expectedFiles.insert(basePath + "sub1/" + filename + ".gz"); + expectedFiles.insert(basePath + "sub1/" + sub2 + "/" + filename + ".gz"); // get command to download uploaded files into a new folder sub3 - cmd = "get '@%test_small_put/subfolder/" + sub1 + "' file://" + dataDir + "sub3"; + cmd = "get @%test_small_put/subfolder file://" + + dataDir + "subfoldertest" + PATH_SEP + "put2" + PATH_SEP + "sub3"; test_simple_get_data(cmd.c_str(), "48", 0, testUnicode, sf); // put command to upload downloaded file again - test_simple_put_core("sub3", // filename + std::string put2Path = std::string("subfoldertest") + PATH_SEP + "put2"; + test_simple_put_core(put2Path.c_str(), // filename "auto", //source compression true, // auto compress false, // copyUploadFile @@ -1000,8 +1006,8 @@ void test_putget_subfolder_core(bool testUnicode) testUnicode, // testUnicode true // uploadFolder ); - expectedFiles.insert(basePath + "sub3/subfolder/" + sub1 + "/" + filename + ".gz"); - expectedFiles.insert(basePath + "sub3/subfolder/" + sub1 + "/sub2/" + filename + ".gz"); + expectedFiles.insert(basePath + "sub3/subfolder/sub1/" + filename + ".gz"); + expectedFiles.insert(basePath + "sub3/subfolder/sub1/" + sub2 + "/" + filename + ".gz"); // run list command to check result SF_STMT *sfstmt = NULL; @@ -1029,8 +1035,7 @@ void test_putget_subfolder_core(bool testUnicode) snowflake_stmt_term(sfstmt); snowflake_term(sf); - remove_all(dataDir + sub1Platform); - remove_all(dataDir + "sub3"); + remove_all(dataDir + "subfoldertest"); } void test_putget_subfolder(void **unused) @@ -1834,6 +1839,8 @@ int main(void) { } const struct CMUnitTest tests[] = { + cmocka_unit_test_teardown(test_putget_subfolder, teardown), + cmocka_unit_test_teardown(test_putget_subfolder_with_unicode, teardown), cmocka_unit_test_teardown(test_simple_put_auto_compress, teardown), cmocka_unit_test_teardown(test_simple_put_config_temp_dir, teardown), cmocka_unit_test_teardown(test_simple_put_auto_detect_gzip, teardown), @@ -1865,8 +1872,6 @@ int main(void) { cmocka_unit_test_teardown(test_simple_put_with_noproxy_fromenv, teardown), cmocka_unit_test_teardown(test_upload_file_to_stage_using_stream, donothing), cmocka_unit_test_teardown(test_put_get_with_unicode, teardown), - cmocka_unit_test_teardown(test_putget_subfolder_with_unicode, teardown), - cmocka_unit_test_teardown(test_putget_subfolder, teardown), }; int ret = cmocka_run_group_tests(tests, gr_setup, gr_teardown); return ret; diff --git a/tests/test_unit_file_metadata_init.cpp b/tests/test_unit_file_metadata_init.cpp index 592098da62..c62d548243 100755 --- a/tests/test_unit_file_metadata_init.cpp +++ b/tests/test_unit_file_metadata_init.cpp @@ -101,6 +101,8 @@ void test_file_pattern_match_core(std::vector *expectedFiles, for (auto i = expectedFiles->begin(); i != expectedFiles->end(); i++) { std::string expectedFileFull = testDir + *i; + // the src file path would use platform path separator + replaceStrAll(expectedFileFull, "/", std::string() + PATH_SEP); expectedFilesFull.insert(expectedFileFull); }