Fix #158 - Fix non ascii character handling (#372)

mojohaus · Jul 10, 2023 · d975178 · d975178 · mirabilos · Jul 10, 2023
1 parent c228b51
commit d975178
Show file tree

Hide file tree

Showing 2 changed files with 94 additions and 6 deletions.
diff --git a/src/main/java/org/codehaus/mojo/exec/LineRedirectOutputStream.java b/src/main/java/org/codehaus/mojo/exec/LineRedirectOutputStream.java
@@ -19,7 +19,10 @@
  * under the License.
  */
 
+import java.io.ByteArrayOutputStream;
 import java.io.OutputStream;
+import java.nio.charset.Charset;
+import java.util.Objects;
 import java.util.function.Consumer;
 
 /**
@@ -32,11 +35,17 @@
  */
 class LineRedirectOutputStream extends OutputStream {
 
-    private StringBuilder currentLine = new StringBuilder();
     private final Consumer<String> linePrinter;
+    private final Charset charset;
+    private ByteArrayOutputStream buffer = new ByteArrayOutputStream();
 
     public LineRedirectOutputStream(Consumer<String> linePrinter) {
-        this.linePrinter = linePrinter;
+        this(linePrinter, Charset.defaultCharset());
+    }
+
+    public LineRedirectOutputStream(Consumer<String> linePrinter, Charset charset) {
+        this.linePrinter = Objects.requireNonNull(linePrinter);
+        this.charset = Objects.requireNonNull(charset);
     }
 
     @Override
@@ -45,18 +54,23 @@ public void write(final int b) {
             printAndReset();
             return;
         }
-        currentLine.append((char) b);
+        buffer.write(b);
     }
 
     @Override
     public void flush() {
-        if (currentLine.length() > 0) {
+        if (buffer.size() > 0) {
             printAndReset();
         }
     }
 
+    @Override
+    public void close() {
+        flush();
+    }
+
     private void printAndReset() {
-        linePrinter.accept(currentLine.toString());
-        currentLine = new StringBuilder();
+        linePrinter.accept(new String(buffer.toByteArray(), charset));
+        buffer = new ByteArrayOutputStream();
     }
 }
diff --git a/src/test/java/org/codehaus/mojo/exec/LineRedirectOutputStreamTest.java b/src/test/java/org/codehaus/mojo/exec/LineRedirectOutputStreamTest.java
@@ -0,0 +1,74 @@
+package org.codehaus.mojo.exec;
+
+import org.junit.Assert;
+import org.junit.Assume;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.function.Function;
+
+public class LineRedirectOutputStreamTest {
+
+    @Test
+    public void givenExtendedUnicodeCharacterOutput_whenRedirectingWithUtf8Charset_thenShouldDecodeProperly()
+            throws IOException {
+        internalTestForCharset(StandardCharsets.UTF_8);
+    }
+
+    @Test
+    public void givenExtendedUnicodeCharacterOutput_whenRedirectingWithIso8859Charset_thenShouldDecodeProperly()
+            throws IOException {
+        internalTestForCharset(StandardCharsets.ISO_8859_1);
+    }
+
+    @Test
+    public void givenExtendedUnicodeCharacterOutput_whenRedirectingWithCp1252_thenShouldDecodeProperly()
+            throws IOException {
+        Assume.assumeTrue("The JVM does not contain the cp-1252 charset", Charset.availableCharsets().containsKey("windows-1252"));
+        internalTestForCharset(Charset.forName("windows-1252"));
+    }
+
+    @Test
+    public void givenExtendedUnicodeCharacterOutput_whenRedirectingWithDefaultCharset_thenShouldDecodeProperly()
+            throws IOException {
+        internalTestForCharset(Charset.defaultCharset());
+    }
+
+    @Test
+    public void givenExtendedUnicodeCharacterOutput_whenRedirectingWithCharsetUnspecified_thenShouldDecodeProperly()
+            throws IOException {
+        internalTestForCharset(sb -> new LineRedirectOutputStream(sb::append), Charset.defaultCharset());
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void givenNullCharset_whenInstantiating_thenShouldThrow() {
+        new LineRedirectOutputStream(new StringBuilder()::append, null);
+    }
+
+    @Test(expected = NullPointerException.class)
+    public void givenNullStringConsumer_whenInstantiating_thenShouldThrow() {
+        new LineRedirectOutputStream(null, Charset.defaultCharset());
+    }
+
+    private void internalTestForCharset(Charset charset) throws IOException {
+        internalTestForCharset(sb -> new LineRedirectOutputStream(sb::append, charset), charset);
+    }
+
+    private void internalTestForCharset(Function<StringBuilder, LineRedirectOutputStream> lineRedirectOutputStream,
+                                        Charset charset) throws IOException {
+        StringBuilder sb = new StringBuilder();
+        String firstLine = "Hello, 你好, नमस्ते, مرحبا, γεια σας, שלום, こんにちは, 안녕하세요!";
+        String secondLine = "🌍 Welcome to the world! 🌟✨🎉🔥";
+        String expectedString = firstLine + secondLine;
+
+        try (LineRedirectOutputStream os = lineRedirectOutputStream.apply(sb)) {
+            os.write(String.join("\n", firstLine, secondLine).getBytes(charset));
+        }
+
+        // The String to bytes to String is required here because StringCoding uses the Charset.defaultCharset()
+        // internally so it would make the test fail when testing for different charsets.
+        Assert.assertEquals(new String(expectedString.getBytes(charset), charset), sb.toString());
+    }
+}