From 25df76e12fac1d7e89f626c25c708178fc1531ec Mon Sep 17 00:00:00 2001 From: Max Lepikhin Date: Sun, 3 Nov 2024 16:13:34 -0700 Subject: [PATCH 1/3] Use longs when splitting model zip file Signed-off-by: Max Lepikhin --- .../java/org/opensearch/ml/engine/utils/FileUtils.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ml-algorithms/src/main/java/org/opensearch/ml/engine/utils/FileUtils.java b/ml-algorithms/src/main/java/org/opensearch/ml/engine/utils/FileUtils.java index 677ca1aa9d..82551042e1 100644 --- a/ml-algorithms/src/main/java/org/opensearch/ml/engine/utils/FileUtils.java +++ b/ml-algorithms/src/main/java/org/opensearch/ml/engine/utils/FileUtils.java @@ -45,16 +45,16 @@ public class FileUtils { * @throws IOException */ public static List splitFileIntoChunks(File file, Path outputPath, int chunkSize) throws IOException { - int fileSize = (int) file.length(); + long fileSize = file.length(); ArrayList nameList = new ArrayList<>(); try (InputStream inStream = new BufferedInputStream(new FileInputStream(file))) { int numberOfChunk = 0; - int totalBytesRead = 0; + long totalBytesRead = 0; while (totalBytesRead < fileSize) { String partName = numberOfChunk + ""; - int bytesRemaining = fileSize - totalBytesRead; + long bytesRemaining = fileSize - totalBytesRead; if (bytesRemaining < chunkSize) { - chunkSize = bytesRemaining; + chunkSize = (int) bytesRemaining; } byte[] temporary = new byte[chunkSize]; int bytesRead = inStream.read(temporary, 0, chunkSize); From 78a7c8870b9f14b2fc8493984e141dfe17b92257 Mon Sep 17 00:00:00 2001 From: Max Lepikhin Date: Tue, 5 Nov 2024 09:11:13 -0700 Subject: [PATCH 2/3] add test Signed-off-by: Max Lepikhin --- .../ml/engine/utils/FileUtilsTest.java | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 ml-algorithms/src/test/java/org/opensearch/ml/engine/utils/FileUtilsTest.java diff --git a/ml-algorithms/src/test/java/org/opensearch/ml/engine/utils/FileUtilsTest.java b/ml-algorithms/src/test/java/org/opensearch/ml/engine/utils/FileUtilsTest.java new file mode 100644 index 0000000000..3959aeffdf --- /dev/null +++ b/ml-algorithms/src/test/java/org/opensearch/ml/engine/utils/FileUtilsTest.java @@ -0,0 +1,62 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.ml.engine.utils; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.List; +import java.util.Random; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +public class FileUtilsTest { + private final TemporaryFolder tempDir = new TemporaryFolder(); + + @Before + public void setUp() throws Exception { + tempDir.create(); + } + + @After + public void tearUp() { + tempDir.delete(); + } + + @Test + public void testSplitFileIntoChunks() throws Exception { + // Write file. + Random random = new Random(); + File file = tempDir.newFile("large_file"); + byte[] data = new byte[1017]; + random.nextBytes(data); + Files.write(file.toPath(), data); + + // Split file into chunks. + int chunkSize = 325; + List chunkPaths = FileUtils + .splitFileIntoChunks(file, tempDir.newFolder().toPath(), chunkSize); + + // Verify. + int currentPosition = 0; + for (String chunkPath : chunkPaths) { + byte[] chunk = Files.readAllBytes(Path.of(chunkPath)); + assertTrue("Chunk size", currentPosition + chunk.length <= data.length); + Assert.assertArrayEquals(Arrays + .copyOfRange(data, currentPosition, currentPosition + chunk.length), chunk); + currentPosition += chunk.length; + } + assertEquals(currentPosition, data.length); + } +} From f6d611e496d13cdf1a2804f693d1140855b48e21 Mon Sep 17 00:00:00 2001 From: Max Lepikhin Date: Wed, 6 Nov 2024 11:13:11 -0700 Subject: [PATCH 3/3] spotless Signed-off-by: Max Lepikhin --- .../java/org/opensearch/ml/engine/utils/FileUtilsTest.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ml-algorithms/src/test/java/org/opensearch/ml/engine/utils/FileUtilsTest.java b/ml-algorithms/src/test/java/org/opensearch/ml/engine/utils/FileUtilsTest.java index 3959aeffdf..16138fea8b 100644 --- a/ml-algorithms/src/test/java/org/opensearch/ml/engine/utils/FileUtilsTest.java +++ b/ml-algorithms/src/test/java/org/opensearch/ml/engine/utils/FileUtilsTest.java @@ -45,16 +45,14 @@ public void testSplitFileIntoChunks() throws Exception { // Split file into chunks. int chunkSize = 325; - List chunkPaths = FileUtils - .splitFileIntoChunks(file, tempDir.newFolder().toPath(), chunkSize); + List chunkPaths = FileUtils.splitFileIntoChunks(file, tempDir.newFolder().toPath(), chunkSize); // Verify. int currentPosition = 0; for (String chunkPath : chunkPaths) { byte[] chunk = Files.readAllBytes(Path.of(chunkPath)); assertTrue("Chunk size", currentPosition + chunk.length <= data.length); - Assert.assertArrayEquals(Arrays - .copyOfRange(data, currentPosition, currentPosition + chunk.length), chunk); + Assert.assertArrayEquals(Arrays.copyOfRange(data, currentPosition, currentPosition + chunk.length), chunk); currentPosition += chunk.length; } assertEquals(currentPosition, data.length);