Skip to content

Commit

Permalink
bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-ext-simba-hx committed Dec 21, 2023
1 parent b727828 commit 4275b05
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 65 deletions.
47 changes: 42 additions & 5 deletions cpp/FileMetadataInitializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,12 @@ void Snowflake::Client::FileMetadataInitializer::populateSrcLocUploadMetadata(st
{
// looking for files on disk.
std::string srcLocationPlatform = m_stmtPutGet->UTF8ToPlatformString(sourceLocation);
replaceStrAll(srcLocationPlatform, "/", std::string() + PATH_SEP);
size_t dirSep = srcLocationPlatform.find_last_of(PATH_SEP);
std::string basePath = srcLocationPlatform.substr(0, dirSep + 1);

std::vector<std::string> fileList;
if (!listFilesRecursive(srcLocationPlatform, fileList))
if (!listFiles(srcLocationPlatform, fileList))
{
CXX_LOG_ERROR("Failed on finding files for uploading.");
return;
Expand Down Expand Up @@ -95,14 +96,15 @@ void Snowflake::Client::FileMetadataInitializer::includeSubfolderFilesRecursive(
}
}

bool Snowflake::Client::FileMetadataInitializer::listFilesRecursive(const std::string &sourceLocation,
bool Snowflake::Client::FileMetadataInitializer::listFiles(const std::string &sourceLocation,
std::vector<std::string> & fileList)
{
// looking for files on disk.
std::string srcLocationPlatform = m_stmtPutGet->UTF8ToPlatformString(sourceLocation);
size_t dirSep = srcLocationPlatform.find_last_of(PATH_SEP);
std::string dirPath = srcLocationPlatform.substr(0, dirSep + 1);
std::string filePattern = srcLocationPlatform.substr(dirSep + 1);
bool includeSubfolder = filePattern == "**";

#ifdef _WIN32
WIN32_FIND_DATA fdd;
Expand Down Expand Up @@ -132,7 +134,12 @@ bool Snowflake::Client::FileMetadataInitializer::listFilesRecursive(const std::s
}
else
{
includeSubfolderFilesRecursive(dirPath + fdd.cFileName, fileList);
if (includeSubfolder &&
(std::string(fdd.cFileName) != ".") &&
(std::string(fdd.cFileName) != ".."))
{
includeSubfolderFilesRecursive(dirPath + fdd.cFileName, fileList);
}
}
} while (FindNextFile(hFind, &fdd) != 0);

Expand Down Expand Up @@ -162,7 +169,10 @@ bool Snowflake::Client::FileMetadataInitializer::listFilesRecursive(const std::s
if (S_ISREG(fileStatus.st_mode)) {
fileList.push_back(dirPath + dir_entry->d_name);
}
else if (S_ISDIR(fileStatus.st_mode))
else if (includeSubfolder &&
(S_ISDIR(fileStatus.st_mode)) &&
(std::string(dir_entry->d_name) != ".") &&
(std::string(dir_entry->d_name) != ".."))
{
includeSubfolderFilesRecursive(dirPath + dir_entry->d_name, fileList);
}
Expand Down Expand Up @@ -201,8 +211,10 @@ void Snowflake::Client::FileMetadataInitializer::initCompressionMetadata(
{
// guess
CXX_LOG_INFO("Auto detect on compression type");
std::string srcFileNamePlatform = m_stmtPutGet->UTF8ToPlatformString(
fileMetadata.srcFileName);
fileMetadata.sourceCompression = FileCompressionType::guessCompressionType(
m_stmtPutGet->UTF8ToPlatformString(fileMetadata.srcFileName));
srcFileNamePlatform);
}
else if (!sf_strncasecmp(m_sourceCompression, COMPRESSION_NONE,
sizeof(COMPRESSION_NONE)))
Expand Down Expand Up @@ -319,4 +331,29 @@ populateSrcLocDownloadMetadata(std::string &sourceLocation,
return outcome;
}

void Snowflake::Client::FileMetadataInitializer::
replaceStrAll(std::string& stringToReplace,
std::string const& oldValue,
std::string const& newValue)
{
size_t oldValueLen = oldValue.length();
size_t newValueLen = newValue.length();
if (0 == oldValueLen)
{
return;
}

size_t index = 0;
while (true) {
/* Locate the substring to replace. */
index = stringToReplace.find(oldValue, index);
if (index == std::string::npos) break;

/* Make the replacement. */
stringToReplace.replace(index, oldValueLen, newValue);

/* Advance index forward so the next iteration doesn't pick it up as well. */
index += newValueLen;
}
}

7 changes: 4 additions & 3 deletions cpp/FileMetadataInitializer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,17 @@ class FileMetadataInitializer
static void replaceStrAll(std::string& stringToReplace, std::string const& oldValue,
std::string const& newValue);
/**
* Given a source location, find all files recursively in subfolders that match the
* location pattern. Utility function called from populateSrcLocUploadMetadata.
* Given a source location, find all files match the partern, recursively include
* all subfolders if the pattern is **
* Utility function called from populateSrcLocUploadMetadata.
*
* @param sourceLocation The source location could have pattern at the end.
* @param fileList Output the files with the full path.
*
* @return True when succeeded, false when no file matches with the source location.
* @throw SnowflakeTransferException on unexpected error.
*/
bool listFilesRecursive(const std::string &sourceLocation, std::vector<std::string> & fileList);
bool listFiles(const std::string &sourceLocation, std::vector<std::string> & fileList);

/**
* Given a full path of a folder, add all files in the folder recursively including subfolders.
Expand Down
35 changes: 5 additions & 30 deletions cpp/FileTransferAgent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,31 +33,6 @@ using ::Snowflake::Client::RemoteStorageRequestOutcome;
namespace
{
const std::string FILE_PROTOCOL = "file://";

void replaceStrAll(std::string& stringToReplace,
std::string const& oldValue,
std::string const& newValue)
{
size_t oldValueLen = oldValue.length();
size_t newValueLen = newValue.length();
if (0 == oldValueLen)
{
return;
}

size_t index = 0;
while (true) {
/* Locate the substring to replace. */
index = stringToReplace.find(oldValue, index);
if (index == std::string::npos) break;

/* Make the replacement. */
stringToReplace.replace(index, oldValueLen, newValue);

/* Advance index forward so the next iteration doesn't pick it up as well. */
index += newValueLen;
}
}
}

Snowflake::Client::FileTransferAgent::FileTransferAgent(
Expand Down Expand Up @@ -471,7 +446,7 @@ RemoteStorageRequestOutcome Snowflake::Client::FileTransferAgent::uploadSingleFi

// after compress replace PATH_SEP with / in destPath as that's
// what needed on stage side
replaceStrAll(fileMetadata->destPath, std::string() + PATH_SEP, "/");
FileMetadataInitializer::replaceStrAll(fileMetadata->destPath, std::string() + PATH_SEP, "/");
fileMetadata->destFileName = fileMetadata->destPath + fileMetadata->destFileName;

CXX_LOG_TRACE("Update File digest metadata start");
Expand Down Expand Up @@ -624,7 +599,7 @@ void Snowflake::Client::FileTransferAgent::compressSourceFile(
if (!fileMetadata->destPath.empty())
{
std::string subfolder = fileMetadata->destPath;
replaceStrAll(subfolder, "/", std::string() + PATH_SEP);
FileMetadataInitializer::replaceStrAll(subfolder, "/", std::string() + PATH_SEP);
std::string subfolderPlatform = m_stmtPutGet->UTF8ToPlatformString(subfolder);
subfolder = std::string(tempDir) + subfolder;
subfolderPlatform = std::string(tempDir) + subfolderPlatform;
Expand Down Expand Up @@ -857,7 +832,7 @@ RemoteStorageRequestOutcome Snowflake::Client::FileTransferAgent::downloadSingle
RemoteStorageRequestOutcome outcome = RemoteStorageRequestOutcome::FAILED;

// create subfolder first befor adding file name
replaceStrAll(fileMetadata->destPath, "/", std::string() + PATH_SEP);
FileMetadataInitializer::replaceStrAll(fileMetadata->destPath, "/", std::string() + PATH_SEP);
fileMetadata->destPath = std::string(response.localLocation) + PATH_SEP +
fileMetadata->destPath;
std::string destPathPlatform = m_stmtPutGet->UTF8ToPlatformString(fileMetadata->destPath);
Expand Down Expand Up @@ -934,7 +909,7 @@ void Snowflake::Client::FileTransferAgent::getPresignedUrlForUploading(
// need to replace file://mypath/myfile?.csv with file://mypath/myfile1.csv.gz
std::string presignedUrlCommand = command;
std::string localFilePath = getLocalFilePathFromCommand(command, false);
replaceStrAll(presignedUrlCommand, localFilePath, fileMetadata.destFileName);
FileMetadataInitializer::replaceStrAll(presignedUrlCommand, localFilePath, fileMetadata.destFileName);
// then hand that to GS to get the actual presigned URL we'll use
PutGetParseResponse rsp;
if (!m_stmtPutGet->parsePutGetCommand(&presignedUrlCommand, &rsp))
Expand Down Expand Up @@ -980,7 +955,7 @@ std::string Snowflake::Client::FileTransferAgent::getLocalFilePathFromCommand(
// unescape backslashes to match the file name from GS
if (unescape)
{
replaceStrAll(localFilePath, "\\\\\\\\", "\\\\");
FileMetadataInitializer::replaceStrAll(localFilePath, "\\\\\\\\", "\\\\");
}
}
else
Expand Down
59 changes: 32 additions & 27 deletions tests/test_simple_put.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,11 @@ void test_simple_put_core(const char * fileName,

std::string dataDir = TestSetup::getDataDir();
std::string file = dataDir + fileName;
if (uploadFolder)
{
file += PATH_SEP;
file += "**";
}
std::string putCommand = "put file://" + file + " @%test_small_put";
if (testUnicode)
{
Expand Down Expand Up @@ -927,34 +932,33 @@ void test_putget_subfolder_core(bool testUnicode)

// use get command to have local files in nested subfolder
std::string cmd = "get '@%test_small_put/" + filename + ".gz' ";
std::string sub1 = "sub1";
std::string sub1Platform = sub1;
std::string localPath1 = "file://";
std::string sub2 = "sub2";
std::string localPath = "file://";

localPath += dataDir + "subfoldertest" + PATH_SEP + "sub1";
test_simple_get_data((cmd + localPath).c_str(), "48", 0, testUnicode, sf);

if (testUnicode)
{
sub1 += UTF8_STR;
sub1Platform += PLATFORM_STR;
sub2 += UTF8_STR;
}
localPath1 += dataDir + sub1;
std::string localPath2 = localPath1 + PATH_SEP + "sub2";
localPath += PATH_SEP;
localPath += sub2;
if (testUnicode)
{
localPath1 = "'" + localPath1 + "'";
localPath2 = "'" + localPath2 + "'";
replaceInPlace(localPath1, "\\", "\\\\");
replaceInPlace(localPath2, "\\", "\\\\");
localPath = "'" + localPath + "'";
replaceInPlace(localPath, "\\", "\\\\");
}
test_simple_get_data((cmd + localPath1).c_str(), "48", 0, testUnicode, sf);
test_simple_get_data((cmd + localPath2).c_str(), "48", 0, testUnicode, sf);
test_simple_get_data((cmd + localPath).c_str(), "48", 0, testUnicode, sf);

// the files returned in alphabetical order, use set to get expected files sorted
std::set<std::string> expectedFiles;
// put command to upload files with subfolders to stage
// put with createSubfolder=true so all the files in "subfolder"
// which would be easier to check later
// uploadFolder=true to skip filename checking
// uploadFolder=true
// createSubfolder=true as well so the uploaded files will be in path of
// "subfolder" which would be easier to check later
std::string basePath = "subfolder/";
test_simple_put_core(sub1.c_str(), // filename
test_simple_put_core("subfoldertest", // filename
"auto", //source compression
true, // auto compress
false, // copyUploadFile
Expand All @@ -973,15 +977,17 @@ void test_putget_subfolder_core(bool testUnicode)
testUnicode, // testUnicode
true // uploadFolder
);
expectedFiles.insert(basePath + sub1 + "/" + filename + ".gz");
expectedFiles.insert(basePath + sub1 + "/sub2/" + filename + ".gz");
expectedFiles.insert(basePath + "sub1/" + filename + ".gz");
expectedFiles.insert(basePath + "sub1/" + sub2 + "/" + filename + ".gz");

// get command to download uploaded files into a new folder sub3
cmd = "get '@%test_small_put/subfolder/" + sub1 + "' file://" + dataDir + "sub3";
cmd = "get @%test_small_put/subfolder file://" +
dataDir + "subfoldertest" + PATH_SEP + "put2" + PATH_SEP + "sub3";
test_simple_get_data(cmd.c_str(), "48", 0, testUnicode, sf);

// put command to upload downloaded file again
test_simple_put_core("sub3", // filename
std::string put2Path = std::string("subfoldertest") + PATH_SEP + "put2";
test_simple_put_core(put2Path.c_str(), // filename
"auto", //source compression
true, // auto compress
false, // copyUploadFile
Expand All @@ -1000,8 +1006,8 @@ void test_putget_subfolder_core(bool testUnicode)
testUnicode, // testUnicode
true // uploadFolder
);
expectedFiles.insert(basePath + "sub3/subfolder/" + sub1 + "/" + filename + ".gz");
expectedFiles.insert(basePath + "sub3/subfolder/" + sub1 + "/sub2/" + filename + ".gz");
expectedFiles.insert(basePath + "sub3/subfolder/sub1/" + filename + ".gz");
expectedFiles.insert(basePath + "sub3/subfolder/sub1/" + sub2 + "/" + filename + ".gz");

// run list command to check result
SF_STMT *sfstmt = NULL;
Expand Down Expand Up @@ -1029,8 +1035,7 @@ void test_putget_subfolder_core(bool testUnicode)

snowflake_stmt_term(sfstmt);
snowflake_term(sf);
remove_all(dataDir + sub1Platform);
remove_all(dataDir + "sub3");
remove_all(dataDir + "subfoldertest");
}

void test_putget_subfolder(void **unused)
Expand Down Expand Up @@ -1834,6 +1839,8 @@ int main(void) {
}

const struct CMUnitTest tests[] = {
cmocka_unit_test_teardown(test_putget_subfolder, teardown),
cmocka_unit_test_teardown(test_putget_subfolder_with_unicode, teardown),
cmocka_unit_test_teardown(test_simple_put_auto_compress, teardown),
cmocka_unit_test_teardown(test_simple_put_config_temp_dir, teardown),
cmocka_unit_test_teardown(test_simple_put_auto_detect_gzip, teardown),
Expand Down Expand Up @@ -1865,8 +1872,6 @@ int main(void) {
cmocka_unit_test_teardown(test_simple_put_with_noproxy_fromenv, teardown),
cmocka_unit_test_teardown(test_upload_file_to_stage_using_stream, donothing),
cmocka_unit_test_teardown(test_put_get_with_unicode, teardown),
cmocka_unit_test_teardown(test_putget_subfolder_with_unicode, teardown),
cmocka_unit_test_teardown(test_putget_subfolder, teardown),
};
int ret = cmocka_run_group_tests(tests, gr_setup, gr_teardown);
return ret;
Expand Down
2 changes: 2 additions & 0 deletions tests/test_unit_file_metadata_init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ void test_file_pattern_match_core(std::vector<std::string> *expectedFiles,
for (auto i = expectedFiles->begin(); i != expectedFiles->end(); i++)
{
std::string expectedFileFull = testDir + *i;
// the src file path would use platform path separator
replaceStrAll(expectedFileFull, "/", std::string() + PATH_SEP);
expectedFilesFull.insert(expectedFileFull);
}

Expand Down

0 comments on commit 4275b05

Please sign in to comment.