From 19a4c6c7e460e0822f255abd3344cad0657a48cb Mon Sep 17 00:00:00 2001 From: Diogo Santos Date: Thu, 8 Aug 2024 11:45:22 +0100 Subject: [PATCH] ID-1570 - Byte copying to preserve original stream; Hash kept as state variable; --- .../destination/local/LocalFileAcceptor.java | 5 +- .../destination/s3/S3FileAcceptor.java | 2 +- .../stream/StreamWithMD5Decorator.java | 88 ++++++++++-- .../local/LocalFileAcceptorTest.java | 131 +++++++++++------- .../stream/StreamWithMD5DecoratorTest.java | 57 ++++++-- 5 files changed, 212 insertions(+), 71 deletions(-) diff --git a/dice-where-downloader-lib/src/main/java/technology/dice/dicewhere/downloader/destination/local/LocalFileAcceptor.java b/dice-where-downloader-lib/src/main/java/technology/dice/dicewhere/downloader/destination/local/LocalFileAcceptor.java index a40dc9e..7a5aae2 100644 --- a/dice-where-downloader-lib/src/main/java/technology/dice/dicewhere/downloader/destination/local/LocalFileAcceptor.java +++ b/dice-where-downloader-lib/src/main/java/technology/dice/dicewhere/downloader/destination/local/LocalFileAcceptor.java @@ -39,7 +39,7 @@ public StreamConsumer getStreamConsumer( } catch (FileAlreadyExistsException e) { LOG.debug("Destination directory already exists"); } - Files.copy(stream, destination, StandardCopyOption.REPLACE_EXISTING); + Files.copy(stream.getInputStream(), destination, StandardCopyOption.REPLACE_EXISTING); if ((!noMd5Check) && (!originalFileMd5.matches(stream.md5()))) { LOG.error("MD5 mismatch. Deleting destination file"); Files.delete(destination); @@ -72,9 +72,6 @@ public Optional existingFileMd5() { try (InputStream is = Files.newInputStream(this.destination); BufferedInputStream bis = new BufferedInputStream(is); StreamWithMD5Decorator md5Is = StreamWithMD5Decorator.of(bis)) { - byte[] buffer = new byte[BUFFER]; - while ((md5Is.read(buffer)) != -1) { - } return Optional.of(md5Is.md5()); } catch (IOException | NoSuchAlgorithmException e) { throw new RuntimeException( diff --git a/dice-where-downloader-lib/src/main/java/technology/dice/dicewhere/downloader/destination/s3/S3FileAcceptor.java b/dice-where-downloader-lib/src/main/java/technology/dice/dicewhere/downloader/destination/s3/S3FileAcceptor.java index d145646..0ca5349 100644 --- a/dice-where-downloader-lib/src/main/java/technology/dice/dicewhere/downloader/destination/s3/S3FileAcceptor.java +++ b/dice-where-downloader-lib/src/main/java/technology/dice/dicewhere/downloader/destination/s3/S3FileAcceptor.java @@ -59,7 +59,7 @@ public StreamConsumer getStreamConsumer( .contentLength(size) .storageClass(StorageClass.INTELLIGENT_TIERING) .build(); - client.putObject(putObjectRequest, RequestBody.fromInputStream(stream, size)); + client.putObject(putObjectRequest, RequestBody.fromInputStream(stream.getInputStream(), size)); Latest latest = new Latest(clock.instant(), key); String latestContent = mapper.writeValueAsString(latest); diff --git a/dice-where-downloader-lib/src/main/java/technology/dice/dicewhere/downloader/stream/StreamWithMD5Decorator.java b/dice-where-downloader-lib/src/main/java/technology/dice/dicewhere/downloader/stream/StreamWithMD5Decorator.java index 8164348..f8401e7 100644 --- a/dice-where-downloader-lib/src/main/java/technology/dice/dicewhere/downloader/stream/StreamWithMD5Decorator.java +++ b/dice-where-downloader-lib/src/main/java/technology/dice/dicewhere/downloader/stream/StreamWithMD5Decorator.java @@ -1,43 +1,115 @@ package technology.dice.dicewhere.downloader.stream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.security.DigestInputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.Optional; import javax.xml.bind.annotation.adapters.HexBinaryAdapter; import technology.dice.dicewhere.downloader.md5.MD5Checksum; public class StreamWithMD5Decorator extends InputStream { - private final DigestInputStream inputStream; + private final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); private final MessageDigest md5; + DigestInputStream inputStream; + private boolean consumed = false; + private Optional md5Checksum = Optional.empty(); - private StreamWithMD5Decorator(DigestInputStream inputStream, MessageDigest md5) { + private StreamWithMD5Decorator(DigestInputStream inputStream, MessageDigest md5) + throws IOException { this.inputStream = inputStream; this.md5 = md5; - inputStream.on(false); + consumeStream(); } - - public static StreamWithMD5Decorator of(InputStream inputStream) throws NoSuchAlgorithmException { + public static StreamWithMD5Decorator of(InputStream inputStream) + throws NoSuchAlgorithmException, IOException { MessageDigest md5 = MessageDigest.getInstance("MD5"); DigestInputStream dis = new DigestInputStream(inputStream, md5); return new StreamWithMD5Decorator(dis, md5); } + private void consumeStream() throws IOException { + byte[] data = new byte[8192]; + int bytesRead; + while ((bytesRead = inputStream.read(data)) != -1) { + buffer.write(data, 0, bytesRead); + } + consumed = true; + } + public MD5Checksum md5() { - String hex = (new HexBinaryAdapter()).marshal(md5.digest()); - return MD5Checksum.of(hex); + if (!consumed) { + throw new IllegalStateException("Stream not fully consumed yet."); + } + return md5Checksum.orElseGet( + () -> { + String hex = (new HexBinaryAdapter()).marshal(md5.digest()); + MD5Checksum checksum = MD5Checksum.of(hex); + md5Checksum = Optional.of(checksum); + return checksum; + }); + } + + public InputStream getInputStream() { + return new ByteArrayInputStream(buffer.toByteArray()); } @Override public int read() throws IOException { - return inputStream.read(); + if (!consumed) { + throw new IllegalStateException("Stream not fully consumed yet."); + } + return getInputStream().read(); + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + if (!consumed) { + throw new IllegalStateException("Stream not fully consumed yet."); + } + return getInputStream().read(b, off, len); } @Override public void close() throws IOException { + getInputStream().close(); inputStream.close(); } + /* + public static String of1(InputStream inputStream) throws NoSuchAlgorithmException { + return bytesToHex(checksum(inputStream)); + } + + private static byte[] checksum(InputStream is) { + + MessageDigest md; + try { + md = MessageDigest.getInstance("MD5"); + } catch (NoSuchAlgorithmException e) { + throw new IllegalArgumentException(e); + } + + try (DigestInputStream dis = new DigestInputStream(is, md)) { + while (dis.read() != -1) + ; // empty loop to clear the data + md = dis.getMessageDigest(); + } catch (IOException e) { + throw new IllegalArgumentException(e); + } + return md.digest(); + } + + private static String bytesToHex(byte[] bytes) { + StringBuilder sb = new StringBuilder(); + for (byte b : bytes) { + sb.append(String.format("%02x", b)); + } + return sb.toString(); + } + */ } diff --git a/dice-where-downloader-lib/src/test/java/technology/dice/dicewhere/downloader/destination/local/LocalFileAcceptorTest.java b/dice-where-downloader-lib/src/test/java/technology/dice/dicewhere/downloader/destination/local/LocalFileAcceptorTest.java index 121d2f7..c84bf7c 100644 --- a/dice-where-downloader-lib/src/test/java/technology/dice/dicewhere/downloader/destination/local/LocalFileAcceptorTest.java +++ b/dice-where-downloader-lib/src/test/java/technology/dice/dicewhere/downloader/destination/local/LocalFileAcceptorTest.java @@ -33,8 +33,7 @@ public class LocalFileAcceptorTest extends TestCase { private static final int TEST_FILE_SIZE = 1024 * 1024; - @ClassRule - static WireMockRule wireMockRule = new WireMockRule(wireMockConfig().dynamicPort()); + @ClassRule static WireMockRule wireMockRule = new WireMockRule(wireMockConfig().dynamicPort()); @BeforeClass public static void beforeClass() { @@ -45,16 +44,22 @@ public static void beforeClass() { public void corruptedFileEmptyPreexistingSet() throws IOException, NoSuchAlgorithmException { Pair tempFile = generateTempFile(); Path destinationDir = Files.createTempDirectory("dice-where"); - IpInfoSiteSource ipInfoSiteSource = new IpInfoSiteSource( - new URL("http://localhost:" + wireMockRule.port() + "/data/file.mdb")); - wireMockRule.stubFor(WireMock.head(UrlPattern.ANY).willReturn( - aResponse().withStatus(HttpStatus.SC_OK) - .withHeader("Etag", "aaa") - .withHeader("Content-Length", Long.toString(TEST_FILE_SIZE)) - .withHeader("Last-Modified", "Thu, 01 Dec 1994 16:00:00 GMT"))); - wireMockRule.stubFor(WireMock.get(UrlPattern.ANY) - .willReturn(aResponse().withBody( - IOUtils.toByteArray(new FileInputStream(tempFile.getLeft().toFile()))))); + IpInfoSiteSource ipInfoSiteSource = + new IpInfoSiteSource(new URL("http://localhost:" + wireMockRule.port() + "/data/file.mdb")); + wireMockRule.stubFor( + WireMock.head(UrlPattern.ANY) + .willReturn( + aResponse() + .withStatus(HttpStatus.SC_OK) + .withHeader("Etag", "aaa") + .withHeader("Content-Length", Long.toString(TEST_FILE_SIZE)) + .withHeader("Last-Modified", "Thu, 01 Dec 1994 16:00:00 GMT"))); + wireMockRule.stubFor( + WireMock.get(UrlPattern.ANY) + .willReturn( + aResponse() + .withBody( + IOUtils.toByteArray(new FileInputStream(tempFile.getLeft().toFile()))))); FileInfo fileInfo = ipInfoSiteSource.fileInfo(); ipInfoSiteSource.produce(new LocalFileAcceptor(destinationDir.resolve("file.mdb")), false); @@ -68,72 +73,100 @@ public void corruptedFilePreexistingSet() throws IOException, NoSuchAlgorithmExc Pair existingFile = generateTempFile(); Path destinationDir = Files.createTempDirectory("dice-where"); Files.copy(existingFile.getLeft(), destinationDir.resolve("existingFile.mdb")); - IpInfoSiteSource ipInfoSiteSource = new IpInfoSiteSource( - new URL("http://localhost:" + wireMockRule.port() + "/data/file.mdb")); - wireMockRule.stubFor(WireMock.head(UrlPattern.ANY).willReturn( - aResponse().withStatus(HttpStatus.SC_OK) - .withHeader("Etag", "aaa") - .withHeader("Content-Length", Long.toString(TEST_FILE_SIZE)) - .withHeader("Last-Modified", "Thu, 01 Dec 1994 16:00:00 GMT"))); - wireMockRule.stubFor(WireMock.get(UrlPattern.ANY) - .willReturn(aResponse().withBody( - IOUtils.toByteArray(new FileInputStream(tempFile.getLeft().toFile()))))); + IpInfoSiteSource ipInfoSiteSource = + new IpInfoSiteSource(new URL("http://localhost:" + wireMockRule.port() + "/data/file.mdb")); + wireMockRule.stubFor( + WireMock.head(UrlPattern.ANY) + .willReturn( + aResponse() + .withStatus(HttpStatus.SC_OK) + .withHeader("Etag", "aaa") + .withHeader("Content-Length", Long.toString(TEST_FILE_SIZE)) + .withHeader("Last-Modified", "Thu, 01 Dec 1994 16:00:00 GMT"))); + wireMockRule.stubFor( + WireMock.get(UrlPattern.ANY) + .willReturn( + aResponse() + .withBody( + IOUtils.toByteArray(new FileInputStream(tempFile.getLeft().toFile()))))); FileInfo fileInfo = ipInfoSiteSource.fileInfo(); ipInfoSiteSource.produce(new LocalFileAcceptor(destinationDir.resolve("file.mdb")), false); assertNotEquals(tempFile.getRight().toLowerCase(), fileInfo.getMd5Checksum().stringFormat()); assertEquals(Files.list(destinationDir).count(), 1); assertFalse(Files.exists(destinationDir.resolve("file.mdb"))); - assertTrue(Arrays.equals(Files.readAllBytes(existingFile.getLeft()), - Files.readAllBytes(Files.list(destinationDir).findFirst().get()))); + assertTrue( + Arrays.equals( + Files.readAllBytes(existingFile.getLeft()), + Files.readAllBytes(Files.list(destinationDir).findFirst().get()))); } @Test public void goodFileEmptyPreexistingSet() throws IOException, NoSuchAlgorithmException { Pair tempFile = generateTempFile(); Path destinationDir = Files.createTempDirectory("dice-where"); - IpInfoSiteSource ipInfoSiteSource = new IpInfoSiteSource( - new URL("http://localhost:" + wireMockRule.port() + "/data/file.mdb")); - wireMockRule.stubFor(WireMock.head(UrlPattern.ANY).willReturn( - aResponse().withStatus(HttpStatus.SC_OK) - .withHeader("Etag", tempFile.getRight()) - .withHeader("Content-Length", Long.toString(TEST_FILE_SIZE)) - .withHeader("Last-Modified", "Thu, 01 Dec 1994 16:00:00 GMT"))); - wireMockRule.stubFor(WireMock.get(UrlPattern.ANY) - .willReturn(aResponse().withBody( - IOUtils.toByteArray(new FileInputStream(tempFile.getLeft().toFile()))))); + IpInfoSiteSource ipInfoSiteSource = + new IpInfoSiteSource(new URL("http://localhost:" + wireMockRule.port() + "/data/file.mdb")); + wireMockRule.stubFor( + WireMock.head(UrlPattern.ANY) + .willReturn( + aResponse() + .withStatus(HttpStatus.SC_OK) + .withHeader("Etag", tempFile.getRight()) + .withHeader("Content-Length", Long.toString(TEST_FILE_SIZE)) + .withHeader("Last-Modified", "Thu, 01 Dec 1994 16:00:00 GMT"))); + wireMockRule.stubFor( + WireMock.get(UrlPattern.ANY) + .willReturn( + aResponse() + .withBody( + IOUtils.toByteArray(new FileInputStream(tempFile.getLeft().toFile()))))); FileInfo fileInfo = ipInfoSiteSource.fileInfo(); ipInfoSiteSource.produce(new LocalFileAcceptor(destinationDir.resolve("file.mdb")), false); assertEquals(tempFile.getRight().toLowerCase(), fileInfo.getMd5Checksum().stringFormat()); assertEquals(1, Files.list(destinationDir).count()); - assertTrue(Arrays.equals(Files.readAllBytes(tempFile.getLeft()), - Files.readAllBytes(destinationDir.resolve("file.mdb")))); + assertTrue( + Arrays.equals( + Files.readAllBytes(tempFile.getLeft()), + Files.readAllBytes(destinationDir.resolve("file.mdb")))); } @Test public void goodFilePreexistingSet() throws IOException, NoSuchAlgorithmException { Pair tempFile = generateTempFile(); Pair existingFile = generateTempFile(); + Path destinationDir = Files.createTempDirectory("dice-where"); Files.copy(existingFile.getLeft(), destinationDir.resolve("existingFile.mdb")); - IpInfoSiteSource ipInfoSiteSource = new IpInfoSiteSource( - new URL("http://localhost:" + wireMockRule.port() + "/data/file.mdb")); - wireMockRule.stubFor(WireMock.head(UrlPattern.ANY).willReturn( - aResponse().withStatus(HttpStatus.SC_OK) - .withHeader("Etag", tempFile.getRight()) - .withHeader("Content-Length", Long.toString(TEST_FILE_SIZE)) - .withHeader("Last-Modified", "Thu, 01 Dec 1994 16:00:00 GMT"))); - wireMockRule.stubFor(WireMock.get(UrlPattern.ANY) - .willReturn(aResponse().withBody( - IOUtils.toByteArray(new FileInputStream(tempFile.getLeft().toFile()))))); + wireMockRule.stubFor( + WireMock.head(UrlPattern.ANY) + .willReturn( + aResponse() + .withStatus(HttpStatus.SC_OK) + .withHeader("Etag", tempFile.getRight()) + .withHeader("Content-Length", Long.toString(TEST_FILE_SIZE)) + .withHeader("Last-Modified", "Thu, 01 Dec 1994 16:00:00 GMT"))); + + wireMockRule.stubFor( + WireMock.get(UrlPattern.ANY) + .willReturn( + aResponse() + .withBody( + IOUtils.toByteArray(new FileInputStream(tempFile.getLeft().toFile()))))); + + IpInfoSiteSource ipInfoSiteSource = + new IpInfoSiteSource(new URL("http://localhost:" + wireMockRule.port() + "/data/file.mdb")); FileInfo fileInfo = ipInfoSiteSource.fileInfo(); + ipInfoSiteSource.produce(new LocalFileAcceptor(destinationDir.resolve("file.mdb")), false); assertEquals(tempFile.getRight().toLowerCase(), fileInfo.getMd5Checksum().stringFormat()); assertEquals(2, Files.list(destinationDir).count()); - assertTrue(Arrays.equals(Files.readAllBytes(tempFile.getLeft()), - Files.readAllBytes(destinationDir.resolve("file.mdb")))); + assertTrue( + Arrays.equals( + Files.readAllBytes(tempFile.getLeft()), + Files.readAllBytes(destinationDir.resolve("file.mdb")))); } private Pair generateTempFile() throws IOException, NoSuchAlgorithmException { @@ -145,4 +178,4 @@ private Pair generateTempFile() throws IOException, NoSuchAlgorith String hex = (new HexBinaryAdapter()).marshal(md.digest(contents)); return Pair.of(tempFile, hex); } -} \ No newline at end of file +} diff --git a/dice-where-downloader-lib/src/test/java/technology/dice/dicewhere/downloader/stream/StreamWithMD5DecoratorTest.java b/dice-where-downloader-lib/src/test/java/technology/dice/dicewhere/downloader/stream/StreamWithMD5DecoratorTest.java index 0372736..8ccf1a7 100644 --- a/dice-where-downloader-lib/src/test/java/technology/dice/dicewhere/downloader/stream/StreamWithMD5DecoratorTest.java +++ b/dice-where-downloader-lib/src/test/java/technology/dice/dicewhere/downloader/stream/StreamWithMD5DecoratorTest.java @@ -1,21 +1,39 @@ package technology.dice.dicewhere.downloader.stream; -import static org.junit.Assert.assertEquals; +import static com.github.tomakehurst.wiremock.client.WireMock.aResponse; +import static com.github.tomakehurst.wiremock.core.WireMockConfiguration.wireMockConfig; +import com.github.tomakehurst.wiremock.client.WireMock; +import com.github.tomakehurst.wiremock.junit.WireMockRule; +import com.github.tomakehurst.wiremock.matching.UrlPattern; import java.io.FileInputStream; import java.io.IOException; +import java.net.HttpURLConnection; import java.net.URISyntaxException; +import java.net.URL; import java.nio.charset.Charset; import java.nio.file.Path; import java.security.NoSuchAlgorithmException; +import junit.framework.TestCase; import org.apache.commons.io.IOUtils; +import org.junit.BeforeClass; +import org.junit.ClassRule; import org.junit.Test; +import org.junit.internal.runners.JUnit4ClassRunner; +import org.junit.runner.RunWith; - -public class StreamWithMD5DecoratorTest { +@RunWith(JUnit4ClassRunner.class) +public class StreamWithMD5DecoratorTest extends TestCase { private static final String PATH = "/maxmind/maxmind-city-1.zip"; + @ClassRule static WireMockRule wireMockRule = new WireMockRule(wireMockConfig().dynamicPort()); + + @BeforeClass + public static void beforeClass() { + wireMockRule.start(); + } + @Test public void shouldSuccessfullyReadAndCalculateDigestOfStream() throws IOException, NoSuchAlgorithmException, URISyntaxException { @@ -23,14 +41,35 @@ public void shouldSuccessfullyReadAndCalculateDigestOfStream() StreamWithMD5Decorator is = StreamWithMD5Decorator.of(new FileInputStream(path.toFile())); String first = is.md5().stringFormat(); - //Read from the stream - IOUtils.toString(is, Charset.defaultCharset()); + // Read from the stream + IOUtils.toString(is.getInputStream(), Charset.defaultCharset()); - //Calculate multiple Hashes - is.md5().stringFormat(); - is.md5().stringFormat(); + // Assert the Stream Hash before and after + assertEquals(first, is.md5().stringFormat()); + assertEquals(first, "9c7dd68c8352f1c59a33efe0dca04f06"); + } + + @Test + public void shouldSuccessfullyReadAndCalculateDigestOfStreamFromHttp() + throws IOException, NoSuchAlgorithmException, URISyntaxException { + Path path = Path.of(getClass().getResource(PATH).toURI()); + + wireMockRule.stubFor( + WireMock.get(UrlPattern.ANY) + .willReturn( + aResponse().withBody(IOUtils.toByteArray(new FileInputStream(path.toFile()))))); + + URL url = new URL("http://localhost:" + wireMockRule.port() + "/data/file.mdb"); + HttpURLConnection connection = (HttpURLConnection) url.openConnection(); + + StreamWithMD5Decorator is = StreamWithMD5Decorator.of(connection.getInputStream()); + + String first = is.md5().stringFormat(); + // Read from the stream + IOUtils.toString(is.getInputStream(), Charset.defaultCharset()); - //Assert the Stream Hash before and after + // Assert the Stream Hash before and after assertEquals(first, is.md5().stringFormat()); + assertEquals(first, "9c7dd68c8352f1c59a33efe0dca04f06"); } }