@@ -61,6 +61,42 @@ public class XML {
61
61
62
62
/** The Character '/'. */
63
63
public static final Character SLASH = '/' ;
64
+
65
+ /**
66
+ * Creates an iterator for navigating Code Points in a string instead of
67
+ * characters.
68
+ *
69
+ * @see <a href=
70
+ * "http://stackoverflow.com/a/21791059/6030888">http://stackoverflow.com/a/21791059/6030888</a>
71
+ */
72
+ private static Iterable <Integer > codePointIterator (final String string ) {
73
+ return new Iterable <Integer >() {
74
+ @ Override
75
+ public Iterator <Integer > iterator () {
76
+ return new Iterator <Integer >() {
77
+ private int nextIndex = 0 ;
78
+ private int length = string .length ();
79
+
80
+ @ Override
81
+ public boolean hasNext () {
82
+ return this .nextIndex < this .length ;
83
+ }
84
+
85
+ @ Override
86
+ public Integer next () {
87
+ int result = string .codePointAt (this .nextIndex );
88
+ this .nextIndex += Character .charCount (result );
89
+ return result ;
90
+ }
91
+
92
+ @ Override
93
+ public void remove () {
94
+ throw new UnsupportedOperationException ();
95
+ }
96
+ };
97
+ }
98
+ };
99
+ }
64
100
65
101
/**
66
102
* Replace special characters with XML escapes:
@@ -79,8 +115,7 @@ public class XML {
79
115
*/
80
116
public static String escape (String string ) {
81
117
StringBuilder sb = new StringBuilder (string .length ());
82
- for (int i = 0 , length = string .length (); i < length ; i ++) {
83
- char c = string .charAt (i );
118
+ for (final int c : codePointIterator (string )) {
84
119
switch (c ) {
85
120
case '&' :
86
121
sb .append ("&" );
@@ -98,18 +133,18 @@ public static String escape(String string) {
98
133
sb .append ("'" );
99
134
break ;
100
135
default :
101
- if (c < ' ' || ( c >= '\u0080' && c < '\u00a0' ) || ( c >= '\u2000' && c < '\u2100' )) {
136
+ if (Character . isISOControl ( c )) {
102
137
sb .append ("&#x" );
103
138
sb .append (Integer .toHexString (c ));
104
139
sb .append (";" );
105
140
} else {
106
- sb .append (c );
141
+ sb .append (new String ( Character . toChars ( c )) );
107
142
}
108
143
}
109
144
}
110
145
return sb .toString ();
111
146
}
112
-
147
+
113
148
/**
114
149
* Removes XML escapes from the string.
115
150
*
0 commit comments