6
6
from lmformatenforcer import JsonSchemaParser
7
7
from enum import Enum
8
8
import pytest
9
- from lmformatenforcer .consts import BACKSLASH , BACKSLASH_ESCAPING_CHARACTERS , CONFIG_ENV_VAR_STRICT_JSON_FIELD_ORDER , CONFIG_ENV_VAR_MAX_CONSECUTIVE_WHITESPACES , CONFIG_ENV_VAR_MAX_JSON_ARRAY_LENGTH
9
+ from lmformatenforcer .characterlevelparser import CharacterLevelParserConfig
10
+ from lmformatenforcer .consts import BACKSLASH , BACKSLASH_ESCAPING_CHARACTERS , COMPLETE_ALPHABET , CONFIG_ENV_VAR_STRICT_JSON_FIELD_ORDER , CONFIG_ENV_VAR_MAX_CONSECUTIVE_WHITESPACES , CONFIG_ENV_VAR_MAX_JSON_ARRAY_LENGTH
10
11
11
12
from .common import assert_parser_with_string , CharacterNotAllowedException
12
13
13
14
14
- def _test_json_schema_parsing_with_string (string : str , schema_dict : Optional [dict ], expect_success : bool , profile_file_path : Optional [str ] = None ):
15
- parser = JsonSchemaParser (schema_dict )
15
+ def _test_json_schema_parsing_with_string (string : str ,
16
+ schema_dict : Optional [dict ],
17
+ expect_success : bool ,
18
+ profile_file_path : Optional [str ] = None ,
19
+ ensure_ascii_in_json_dumps : bool = False ):
20
+ alphabet = COMPLETE_ALPHABET
21
+ for letter in set (string ):
22
+ if letter not in alphabet and letter != '\n ' :
23
+ alphabet += letter
24
+ if expect_success :
25
+ try :
26
+ minified = json .dumps (json .loads (string ), separators = (',' , ':' ), ensure_ascii = False )
27
+ for letter in set (minified ):
28
+ if letter not in alphabet and letter != '\n ' :
29
+ alphabet += letter
30
+ except :
31
+ pass
32
+ config = CharacterLevelParserConfig (alphabet = alphabet )
33
+ parser = JsonSchemaParser (schema_dict , config = config )
16
34
assert_parser_with_string (string , parser , expect_success , profile_file_path )
17
35
if expect_success :
18
36
# If expecting success, also check minified and pretty-printed
19
- minified = json .dumps (json .loads (string ), separators = (',' , ':' ))
37
+ minified = json .dumps (json .loads (string ), separators = (',' , ':' ), ensure_ascii = ensure_ascii_in_json_dumps )
20
38
assert_parser_with_string (minified , parser , expect_success )
21
- pretty_printed = json .dumps (json .loads (string ), indent = 2 )
39
+ pretty_printed = json .dumps (json .loads (string ), indent = 2 , ensure_ascii = ensure_ascii_in_json_dumps )
22
40
assert_parser_with_string (pretty_printed , parser , expect_success )
23
41
24
42
@@ -190,22 +208,22 @@ class ListOfNoMinLengthModel(BaseModel):
190
208
def test_string_escaping ():
191
209
for escaping_character in BACKSLASH_ESCAPING_CHARACTERS :
192
210
test_string = f'{{"num":1,"message":"hello { BACKSLASH } { escaping_character } world"}}'
193
- _test_json_schema_parsing_with_string (test_string , SampleModel .model_json_schema (), True )
211
+ _test_json_schema_parsing_with_string (test_string , SampleModel .model_json_schema (), True , ensure_ascii_in_json_dumps = True )
194
212
for non_escaping_character in 'a1?' :
195
213
test_string = f'{{"num":1,"message":"hello { BACKSLASH } { non_escaping_character } world"}}'
196
- _test_json_schema_parsing_with_string (test_string , SampleModel .model_json_schema (), False )
214
+ _test_json_schema_parsing_with_string (test_string , SampleModel .model_json_schema (), False , ensure_ascii_in_json_dumps = True )
197
215
198
216
# Unicode
199
217
test_string = f'{{"num":1,"message":"hello { BACKSLASH } uf9f0 world"}}'
200
- _test_json_schema_parsing_with_string (test_string , SampleModel .model_json_schema (), True )
218
+ _test_json_schema_parsing_with_string (test_string , SampleModel .model_json_schema (), True , ensure_ascii_in_json_dumps = True )
201
219
202
220
# Not enough unicode digits
203
221
test_string = f'{{"num":1,"message":"hello { BACKSLASH } uf9f world"}}'
204
- _test_json_schema_parsing_with_string (test_string , SampleModel .model_json_schema (), False )
222
+ _test_json_schema_parsing_with_string (test_string , SampleModel .model_json_schema (), False , ensure_ascii_in_json_dumps = True )
205
223
206
224
# Unicode digit outside of hex range
207
225
test_string = f'{{"num":1,"message":"hello { BACKSLASH } uf9fP world"}}'
208
- _test_json_schema_parsing_with_string (test_string , SampleModel .model_json_schema (), False )
226
+ _test_json_schema_parsing_with_string (test_string , SampleModel .model_json_schema (), False , ensure_ascii_in_json_dumps = True )
209
227
210
228
211
229
def test_comma_after_all_object_keys_fails ():
@@ -774,4 +792,9 @@ def test_invalid_number_formats_with_leading_zeros(test_input):
774
792
('{"value": -9007199254740992}' , True ),
775
793
])
776
794
def test_number_edge_cases (test_input , expected_success ):
777
- _test_json_schema_parsing_with_string (test_input , schema , expected_success )
795
+ _test_json_schema_parsing_with_string (test_input , schema , expected_success )
796
+
797
+ def test_chinese_oneof_schema ():
798
+ test_schema = { "$schema" : "http://json-schema.org/draft-07/schema#" , "type" : "array" , "items" : { "oneOf" : [ { "type" : "object" , "properties" : { "trigger" : { "type" : "string" }, "event_type" : { "enum" : [ "公司上市" ] }, "arguments" : { "type" : "array" , "items" : { "type" : "object" , "properties" : { "role" : { "enum" : [ "上市公司" , "证券代码" , "环节" , "披露时间" , "发行价格" , "事件时间" , "市值" , "募资金额" ] }, "argument" : { "type" : "string" } }, "required" : [ "role" , "argument" ] } } }, "required" : [ "trigger" , "event_type" , "arguments" ] }, { "type" : "object" , "properties" : { "trigger" : { "type" : "string" }, "event_type" : { "enum" : [ "被约谈" ] }, "arguments" : { "type" : "array" , "items" : { "type" : "object" , "properties" : { "role" : { "enum" : [ "公司名称" , "披露时间" , "被约谈时间" , "约谈机构" ] }, "argument" : { "type" : "string" } }, "required" : [ "role" , "argument" ] } } }, "required" : [ "trigger" , "event_type" , "arguments" ] } ] } }
799
+ correct_output = """[{"trigger": "IPO", "event_type": "公司上市", "arguments": [{"role": "上市公司", "argument": "理想汽车"}, {"role": "披露时间", "argument": "30日"}, {"role": "发行价格", "argument": "8-10美元"}, {"role": "环节", "argument": "筹备上市"}]}]"""
800
+ _test_json_schema_parsing_with_string (correct_output , test_schema , True )
0 commit comments