33import java .io .ByteArrayOutputStream ;
44import java .io .StringWriter ;
55import java .io .Writer ;
6+ import java .nio .charset .StandardCharsets ;
67
78import org .junit .jupiter .api .Test ;
89
@@ -17,6 +18,10 @@ class SurrogateWrite223Test extends JUnit5TestBase
1718{
1819 private final JsonFactory DEFAULT_JSON_F = newStreamFactory ();
1920
21+ private final JsonFactory SURROGATE_COMBINING_JSON_F = JsonFactory .builder ()
22+ .enable (JsonWriteFeature .COMBINE_UNICODE_SURROGATES_IN_UTF8 )
23+ .build ();
24+
2025 // for [core#223]
2126 @ Test
2227 void surrogatesDefaultSetting () throws Exception {
@@ -35,9 +40,7 @@ void surrogatesByteBacked() throws Exception
3540
3641 out = new ByteArrayOutputStream ();
3742
38- JsonFactory f = JsonFactory .builder ()
39- .enable (JsonWriteFeature .COMBINE_UNICODE_SURROGATES_IN_UTF8 )
40- .build ();
43+ JsonFactory f = SURROGATE_COMBINING_JSON_F ;
4144 g = f .createGenerator (out );
4245 g .writeStartArray ();
4346 g .writeString (toQuote );
@@ -96,9 +99,7 @@ void surrogatesCharBacked() throws Exception
9699 //https://github.com/FasterXML/jackson-core/issues/1359
97100 @ Test
98101 void checkNonSurrogates () throws Exception {
99- JsonFactory f = JsonFactory .builder ()
100- .enable (JsonWriteFeature .COMBINE_UNICODE_SURROGATES_IN_UTF8 )
101- .build ();
102+ JsonFactory f = SURROGATE_COMBINING_JSON_F ;
102103 ByteArrayOutputStream out = new ByteArrayOutputStream ();
103104 try (JsonGenerator gen = f .createGenerator (out )) {
104105 gen .writeStartObject ();
@@ -126,9 +127,7 @@ void checkNonSurrogates() throws Exception {
126127
127128 @ Test
128129 void checkSurrogateWithCharacterEscapes () throws Exception {
129- JsonFactory f = JsonFactory .builder ()
130- .enable (JsonWriteFeature .COMBINE_UNICODE_SURROGATES_IN_UTF8 )
131- .build ();
130+ JsonFactory f = SURROGATE_COMBINING_JSON_F ;
132131 f .setCharacterEscapes (JsonpCharacterEscapes .instance ());
133132 ByteArrayOutputStream out = new ByteArrayOutputStream ();
134133 try (JsonGenerator gen = f .createGenerator (out )) {
@@ -140,4 +139,37 @@ void checkSurrogateWithCharacterEscapes() throws Exception {
140139 String json = out .toString ("UTF-8" );
141140 assertEquals ("{\" test_emoji\" :\" \uD83D \uDE0A \" }" , json );
142141 }
142+
143+ //https://github.com/FasterXML/jackson-core/issues/1473
144+ @ Test
145+ void surrogateCharSplitInTwoSegments () throws Exception
146+ {
147+ // UTF8JsonGenerator must avoid splitting surrogate chars
148+ // into separate segments. We want to test the third segment
149+ // split to make sure indexes, offsets, etc are all correct.
150+ // By default, segments split in every 1000 chars.
151+ // Thus, we need a string with length 2001 where the surrogate is
152+ // at 2000 and 2001 positions.
153+ int count = 1999 ;
154+ char [] chars = new char [count ];
155+ java .util .Arrays .fill (chars , 'x' );
156+ String base = new String (chars );
157+
158+ final String VALUE = base + "\uD83E \uDEE1 " ;
159+
160+ ByteArrayOutputStream bb = new ByteArrayOutputStream ();
161+ try (JsonGenerator g = SURROGATE_COMBINING_JSON_F .createGenerator (bb )) {
162+ g .enable (JsonGenerator .Feature .COMBINE_UNICODE_SURROGATES_IN_UTF8 );
163+
164+ g .writeStartArray ();
165+ g .writeString (VALUE );
166+ g .writeEndArray ();
167+ }
168+
169+ String result = new String (bb .toByteArray (), StandardCharsets .UTF_8 );
170+
171+ // +2 and -2 to remove array and quotes: result should contain ["xxxx....🫡"]
172+ // "\uD83E\uDEE1" is the combined surrogate form of the emoji
173+ assertEquals ("\uD83E \uDEE1 " , result .substring (count +2 , result .length ()-2 ));
174+ }
143175}
0 commit comments