Skip to content

Commit

Permalink
Fix #545 (encoding handling, not only wrong for HTTP).
Browse files Browse the repository at this point in the history
Spotless always provides decoded string to WTP.
  • Loading branch information
fvgh committed Mar 24, 2020
1 parent 5acecad commit 94010f4
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 0 deletions.
4 changes: 4 additions & 0 deletions _ext/eclipse-wtp/CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
We adhere to the [keepachangelog](https://keepachangelog.com/en/1.0.0/) format (starting after version `3.15.1`).

## [Unreleased]
### Fixed
* Handling of character encodings which require more than 1 byte. Previously the WTP
decoded input twice, once using the encoding configured by the user, and
once again using the default platform character set ([#545](https://github.com/diffplug/spotless/issues/545)).

## [3.15.2] - 2020-03-04
### Fixed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
Expand Down Expand Up @@ -49,6 +50,7 @@
class ContentTypeManager extends NoContentTypeSpecificHandling {
private final Map<String, IContentType> id2Object;
private final IContentType processorStepType;
private final IContentDescription processorStepDescription;

/**
* Content type manager as required for cleanup steps.
Expand All @@ -66,6 +68,7 @@ class ContentTypeManager extends NoContentTypeSpecificHandling {
if (null == processorStepType) {
throw new IllegalArgumentException("The manager does not support content type " + formatterContentTypeID);
}
processorStepDescription = new StringDescription(processorStepType);
}

@Override
Expand All @@ -83,6 +86,45 @@ public IContentType findContentTypeFor(InputStream contents, String fileName) th
return processorStepType;
}

@Override
public IContentDescription getDescriptionFor(InputStream contents, String fileName, QualifiedName[] options) throws IOException {
return processorStepDescription;
}

private static class StringDescription implements IContentDescription {

private final IContentType type;

public StringDescription(IContentType type) {
this.type = type;
}

@Override
public boolean isRequested(QualifiedName key) {
return false; //Don't use set Property
}

@Override
public String getCharset() {
return Charset.defaultCharset().name(); //Spotless operates on an decoded string, meaning the input has always the "internal" encoding
}

@Override
public IContentType getContentType() {
return type;
}

@Override
public Object getProperty(QualifiedName key) {
return null; //Assume that the property map is empty
}

@Override
public void setProperty(QualifiedName key, Object value) {
throw new IllegalArgumentException("Content description key cannot be set: " + key);
}
}

/**
* The WTP uses the manager only for ID mapping, so most of the methods are not used.
* Actually it has a hand stitched way for transforming the content type ID
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,28 @@ public void formatCSS() throws Exception {
testData.expected("css.html"), output);
}

@Test
public void checkNoDoubleEndoding() throws Exception {
String osEncoding = System.getProperty("file.encoding");
//Assure that file.encoding is not used during the clean-up.
System.setProperty("file.encoding", "ISO-8859-1");
//Check that WTP does not try to do UTF-8 conversion again (since done by Spotless framework)
String[] input = testData.input("utf-8.html");
String output = formatter.format(input[0]);
System.setProperty("file.encoding", osEncoding);
assertEquals("Unexpected formatting of UTF-8", testData.expected("utf-8.html"), output);
}

@Test
public void checkBOMisStripped() throws Exception {
String[] input = testData.input("bom.html");
String[] inputWithoutBom = testData.input("utf-8.html");
//The UTF-8 BOM is interpreted as on UTF-16 character.
assertEquals("BOM input invalid", input[0].length() - 1, inputWithoutBom[0].length());
String output = formatter.format(input[0]);
assertEquals("BOM is not stripped", testData.expected("utf-8.html"), output);
}

@Test(expected = IllegalArgumentException.class)
public void configurationChange() throws Exception {
new EclipseHtmlFormatterStepImpl(new Properties());
Expand Down
7 changes: 7 additions & 0 deletions _ext/eclipse-wtp/src/test/resources/html/expected/utf-8.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<!DOCTYPE html>
<HTML>
<HEAD>
<META charset="UTF-8">
<TITLE>ÄÜ€</TITLE>
</HEAD>
</HTML>
2 changes: 2 additions & 0 deletions _ext/eclipse-wtp/src/test/resources/html/input/bom.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<!DOCTYPE html>
<html><head><meta charset="UTF-8"><title>ÄÜ€</title></head></html>
2 changes: 2 additions & 0 deletions _ext/eclipse-wtp/src/test/resources/html/input/utf-8.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<!DOCTYPE html>
<html><head><meta charset="UTF-8"><title>ÄÜ€</title></head></html>

0 comments on commit 94010f4

Please sign in to comment.