Skip to content

Commit 34652a8

Browse files
author
John J. Aylward
committed
Updates to iterate on code points instead of characters and changes the encoding to only encode control characters as defined by ISO standard.
1 parent a2d3b59 commit 34652a8

File tree

1 file changed

+40
-5
lines changed

1 file changed

+40
-5
lines changed

XML.java

+40-5
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,42 @@ public class XML {
6161

6262
/** The Character '/'. */
6363
public static final Character SLASH = '/';
64+
65+
/**
66+
* Creates an iterator for navigating Code Points in a string instead of
67+
* characters.
68+
*
69+
* @see <a href=
70+
* "http://stackoverflow.com/a/21791059/6030888">http://stackoverflow.com/a/21791059/6030888</a>
71+
*/
72+
private static Iterable<Integer> codePointIterator(final String string) {
73+
return new Iterable<Integer>() {
74+
@Override
75+
public Iterator<Integer> iterator() {
76+
return new Iterator<Integer>() {
77+
private int nextIndex = 0;
78+
private int length = string.length();
79+
80+
@Override
81+
public boolean hasNext() {
82+
return this.nextIndex < this.length;
83+
}
84+
85+
@Override
86+
public Integer next() {
87+
int result = string.codePointAt(this.nextIndex);
88+
this.nextIndex += Character.charCount(result);
89+
return result;
90+
}
91+
92+
@Override
93+
public void remove() {
94+
throw new UnsupportedOperationException();
95+
}
96+
};
97+
}
98+
};
99+
}
64100

65101
/**
66102
* Replace special characters with XML escapes:
@@ -79,8 +115,7 @@ public class XML {
79115
*/
80116
public static String escape(String string) {
81117
StringBuilder sb = new StringBuilder(string.length());
82-
for (int i = 0, length = string.length(); i < length; i++) {
83-
char c = string.charAt(i);
118+
for (final int c : codePointIterator(string)) {
84119
switch (c) {
85120
case '&':
86121
sb.append("&amp;");
@@ -98,18 +133,18 @@ public static String escape(String string) {
98133
sb.append("&apos;");
99134
break;
100135
default:
101-
if (c < ' ' || (c >= '\u0080' && c < '\u00a0') || (c >= '\u2000' && c < '\u2100')) {
136+
if (Character.isISOControl(c)) {
102137
sb.append("&#x");
103138
sb.append(Integer.toHexString(c));
104139
sb.append(";");
105140
} else {
106-
sb.append(c);
141+
sb.append(new String(Character.toChars(c)));
107142
}
108143
}
109144
}
110145
return sb.toString();
111146
}
112-
147+
113148
/**
114149
* Removes XML escapes from the string.
115150
*

0 commit comments

Comments
 (0)