Skip to content

Commit de855c5

Browse files
committed
Fixes stleary#361.
* Removes unescape from the XML class calls * fixes bug with unescape method * moves unescape logic into the XMLTokener class for more consistency
1 parent 4cb1ae8 commit de855c5

File tree

3 files changed

+37
-36
lines changed

3 files changed

+37
-36
lines changed

JSONML.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ private static Object parse(
174174
if (!(token instanceof String)) {
175175
throw x.syntaxError("Missing value");
176176
}
177-
newjo.accumulate(attribute, keepStrings ? XML.unescape((String)token) :XML.stringToValue((String)token));
177+
newjo.accumulate(attribute, keepStrings ? ((String)token) :XML.stringToValue((String)token));
178178
token = null;
179179
} else {
180180
newjo.accumulate(attribute, "");

XML.java

+5-33
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ public static String escape(String string) {
141141
if (mustEscape(cp)) {
142142
sb.append("&#x");
143143
sb.append(Integer.toHexString(cp));
144-
sb.append(";");
144+
sb.append(';');
145145
} else {
146146
sb.appendCodePoint(cp);
147147
}
@@ -191,31 +191,7 @@ public static String unescape(String string) {
191191
final int semic = string.indexOf(';', i);
192192
if (semic > i) {
193193
final String entity = string.substring(i + 1, semic);
194-
if (entity.charAt(0) == '#') {
195-
int cp;
196-
if (entity.charAt(1) == 'x') {
197-
// hex encoded unicode
198-
cp = Integer.parseInt(entity.substring(2), 16);
199-
} else {
200-
// decimal encoded unicode
201-
cp = Integer.parseInt(entity.substring(1));
202-
}
203-
sb.appendCodePoint(cp);
204-
} else {
205-
if ("quot".equalsIgnoreCase(entity)) {
206-
sb.append('"');
207-
} else if ("amp".equalsIgnoreCase(entity)) {
208-
sb.append('&');
209-
} else if ("apos".equalsIgnoreCase(entity)) {
210-
sb.append('\'');
211-
} else if ("lt".equalsIgnoreCase(entity)) {
212-
sb.append('<');
213-
} else if ("gt".equalsIgnoreCase(entity)) {
214-
sb.append('>');
215-
} else {// unsupported xml entity. leave encoded
216-
sb.append('&').append(entity).append(';');
217-
}
218-
}
194+
sb.append(XMLTokener.unescapeEntity(entity));
219195
// skip past the entity we just parsed.
220196
i += entity.length() + 1;
221197
} else {
@@ -364,7 +340,7 @@ private static boolean parse(XMLTokener x, JSONObject context, String name, bool
364340
throw x.syntaxError("Missing value");
365341
}
366342
jsonobject.accumulate(string,
367-
keepStrings ? unescape((String)token) : stringToValue((String) token));
343+
keepStrings ? ((String)token) : stringToValue((String) token));
368344
token = null;
369345
} else {
370346
jsonobject.accumulate(string, "");
@@ -396,7 +372,7 @@ private static boolean parse(XMLTokener x, JSONObject context, String name, bool
396372
string = (String) token;
397373
if (string.length() > 0) {
398374
jsonobject.accumulate("content",
399-
keepStrings ? unescape(string) : stringToValue(string));
375+
keepStrings ? string : stringToValue(string));
400376
}
401377

402378
} else if (token == LT) {
@@ -430,11 +406,7 @@ private static boolean parse(XMLTokener x, JSONObject context, String name, bool
430406
* @return JSON value of this string or the string
431407
*/
432408
public static Object stringToValue(String string) {
433-
Object ret = JSONObject.stringToValue(string);
434-
if(ret instanceof String){
435-
return unescape((String)ret);
436-
}
437-
return ret;
409+
return JSONObject.stringToValue(string);
438410
}
439411

440412
/**

XMLTokener.java

+31-2
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,37 @@ public Object nextEntity(char ampersand) throws JSONException {
138138
}
139139
}
140140
String string = sb.toString();
141-
Object object = entity.get(string);
142-
return object != null ? object : ampersand + string + ";";
141+
return unescapeEntity(string);
142+
}
143+
144+
/**
145+
* Unescapes an XML entity encoding;
146+
* @param e entity (only the actual entity value, not the preceding & or ending ;
147+
* @return
148+
*/
149+
static String unescapeEntity(String e) {
150+
// validate
151+
if (e == null || e.isEmpty()) {
152+
return "";
153+
}
154+
// if our entity is an encoded unicode point, parse it.
155+
if (e.charAt(0) == '#') {
156+
int cp;
157+
if (e.charAt(1) == 'x') {
158+
// hex encoded unicode
159+
cp = Integer.parseInt(e.substring(2), 16);
160+
} else {
161+
// decimal encoded unicode
162+
cp = Integer.parseInt(e.substring(1));
163+
}
164+
return new String(new int[] {cp},0,1);
165+
}
166+
Character knownEntity = entity.get(e);
167+
if(knownEntity==null) {
168+
// we don't know the entity so keep it encoded
169+
return '&' + e + ';';
170+
}
171+
return knownEntity.toString();
143172
}
144173

145174

0 commit comments

Comments
 (0)