@@ -83,7 +83,9 @@ def clean_header(header):
83
83
cleaned_header = {}
84
84
for k , v in header .items ():
85
85
# Strip BOM, whitespace, and enclosing quotation marks if present
86
- cleaned_key = k .lstrip ("\ufeff " ).strip ().strip ('"' ) if isinstance (k , str ) else k
86
+ cleaned_key = (
87
+ k .lstrip ("\ufeff " ).strip ().strip ('"' ) if isinstance (k , str ) else k
88
+ )
87
89
cleaned_header [cleaned_key ] = v
88
90
return cleaned_header
89
91
@@ -145,11 +147,19 @@ def process_field_properties(data):
145
147
condition = True
146
148
147
149
# Check Field Annotation for special flags - safely handle non-string values
148
- annotation = str (data .get ("Field Annotation" , "" )).upper () if data .get ("Field Annotation" ) is not None else ""
149
- if condition and isinstance (annotation , str ) and (
150
- "@READONLY" in annotation
151
- or "@HIDDEN" in annotation
152
- or "@CALCTEXT" in annotation
150
+ annotation = (
151
+ str (data .get ("Field Annotation" , "" )).upper ()
152
+ if data .get ("Field Annotation" ) is not None
153
+ else ""
154
+ )
155
+ if (
156
+ condition
157
+ and isinstance (annotation , str )
158
+ and (
159
+ "@READONLY" in annotation
160
+ or "@HIDDEN" in annotation
161
+ or "@CALCTEXT" in annotation
162
+ )
153
163
):
154
164
condition = False
155
165
@@ -158,13 +168,18 @@ def process_field_properties(data):
158
168
"isAbout" : f"items/{ data ['Variable / Field Name' ]} " ,
159
169
"isVis" : condition ,
160
170
}
161
-
171
+
162
172
# Handle Required Field check, accounting for NaN values and empty strings
163
173
required_field = data .get ("Required Field?" )
164
- if pd .notna (required_field ) and str (required_field ).strip (): # Check if value is not NaN and not empty
174
+ if (
175
+ pd .notna (required_field ) and str (required_field ).strip ()
176
+ ): # Check if value is not NaN and not empty
165
177
if str (required_field ).lower () == "y" :
166
178
prop_obj ["valueRequired" ] = True
167
- elif str (required_field ).lower () not in ["" , "n" ]: # Only raise error for unexpected values
179
+ elif str (required_field ).lower () not in [
180
+ "" ,
181
+ "n" ,
182
+ ]: # Only raise error for unexpected values
168
183
raise ValueError (
169
184
f"value { required_field } not supported yet for redcap:Required Field?"
170
185
)
@@ -265,7 +280,7 @@ def process_choices(choices_str, field_name):
265
280
266
281
def parse_html (input_string , default_language = "en" ):
267
282
result = {}
268
-
283
+
269
284
# Handle non-string input
270
285
if not isinstance (input_string , str ):
271
286
if pd .isna (input_string ): # Handle NaN values
@@ -287,7 +302,9 @@ def parse_html(input_string, default_language="en"):
287
302
if not result : # If no text was extracted
288
303
result [default_language ] = soup .get_text (strip = True )
289
304
else :
290
- result [default_language ] = soup .get_text (strip = True ) # Use the entire text as default language text
305
+ result [default_language ] = soup .get_text (
306
+ strip = True
307
+ ) # Use the entire text as default language text
291
308
return result
292
309
293
310
@@ -525,24 +542,26 @@ def parse_language_iso_codes(input_string):
525
542
]
526
543
527
544
528
- def process_csv (
529
- csv_file , abs_folder_path , schema_context_url , protocol_name
530
- ):
545
+ def process_csv (csv_file , abs_folder_path , schema_context_url , protocol_name ):
531
546
datas = {}
532
547
order = {}
533
548
compute = {}
534
549
languages = []
535
550
536
551
# Read CSV with explicit BOM handling, and maintain original order
537
- df = pd .read_csv (csv_file , encoding = "utf-8-sig" ) # utf-8-sig handles BOM automatically
538
-
552
+ df = pd .read_csv (
553
+ csv_file , encoding = "utf-8-sig"
554
+ ) # utf-8-sig handles BOM automatically
555
+
539
556
# Clean column names (headers)
540
- df .columns = df .columns .map (lambda x : x .strip ().strip ('"' ).lstrip ("\ufeff " ))
557
+ df .columns = df .columns .map (
558
+ lambda x : x .strip ().strip ('"' ).lstrip ("\ufeff " )
559
+ )
541
560
542
561
# Clean string values in the dataframe
543
- object_columns = df .select_dtypes (include = [' object' ]).columns
562
+ object_columns = df .select_dtypes (include = [" object" ]).columns
544
563
for col in object_columns :
545
- df [col ] = df [col ].astype (str ).replace (' nan' , '' )
564
+ df [col ] = df [col ].astype (str ).replace (" nan" , "" )
546
565
547
566
# Initialize structures for each unique form
548
567
unique_forms = df ["Form Name" ].unique ()
@@ -557,17 +576,17 @@ def process_csv(
557
576
# TODO: should we bring back the language
558
577
# if not languages:
559
578
# languages = parse_language_iso_codes(row["Field Label"])
560
-
579
+
561
580
# Process rows in original order
562
581
for _ , row in df .iterrows ():
563
582
form_name = row ["Form Name" ]
564
583
field_name = row ["Variable / Field Name" ]
565
584
field_type = row .get ("Field Type" , "" )
566
585
field_annotation = row .get ("Field Annotation" )
567
-
586
+
568
587
# Add row data to datas dictionary
569
588
datas [form_name ].append (row .to_dict ())
570
-
589
+
571
590
if field_type in COMPUTE_LIST :
572
591
condition = normalize_condition (
573
592
row ["Choices, Calculations, OR Slider Labels" ],
@@ -579,7 +598,10 @@ def process_csv(
579
598
"jsExpression" : condition ,
580
599
}
581
600
)
582
- elif isinstance (field_annotation , str ) and "@CALCTEXT" in field_annotation .upper ():
601
+ elif (
602
+ isinstance (field_annotation , str )
603
+ and "@CALCTEXT" in field_annotation .upper ()
604
+ ):
583
605
calc_text = field_annotation
584
606
match = re .search (r"@CALCTEXT\((.*)\)" , calc_text )
585
607
if match :
0 commit comments