Skip to content

Commit 78ca16a

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent c6aa687 commit 78ca16a

File tree

2 files changed

+53
-29
lines changed

2 files changed

+53
-29
lines changed

reproschema/redcap2reproschema.py

Lines changed: 45 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,9 @@ def clean_header(header):
8383
cleaned_header = {}
8484
for k, v in header.items():
8585
# Strip BOM, whitespace, and enclosing quotation marks if present
86-
cleaned_key = k.lstrip("\ufeff").strip().strip('"') if isinstance(k, str) else k
86+
cleaned_key = (
87+
k.lstrip("\ufeff").strip().strip('"') if isinstance(k, str) else k
88+
)
8789
cleaned_header[cleaned_key] = v
8890
return cleaned_header
8991

@@ -145,11 +147,19 @@ def process_field_properties(data):
145147
condition = True
146148

147149
# Check Field Annotation for special flags - safely handle non-string values
148-
annotation = str(data.get("Field Annotation", "")).upper() if data.get("Field Annotation") is not None else ""
149-
if condition and isinstance(annotation, str) and (
150-
"@READONLY" in annotation
151-
or "@HIDDEN" in annotation
152-
or "@CALCTEXT" in annotation
150+
annotation = (
151+
str(data.get("Field Annotation", "")).upper()
152+
if data.get("Field Annotation") is not None
153+
else ""
154+
)
155+
if (
156+
condition
157+
and isinstance(annotation, str)
158+
and (
159+
"@READONLY" in annotation
160+
or "@HIDDEN" in annotation
161+
or "@CALCTEXT" in annotation
162+
)
153163
):
154164
condition = False
155165

@@ -158,13 +168,18 @@ def process_field_properties(data):
158168
"isAbout": f"items/{data['Variable / Field Name']}",
159169
"isVis": condition,
160170
}
161-
171+
162172
# Handle Required Field check, accounting for NaN values and empty strings
163173
required_field = data.get("Required Field?")
164-
if pd.notna(required_field) and str(required_field).strip(): # Check if value is not NaN and not empty
174+
if (
175+
pd.notna(required_field) and str(required_field).strip()
176+
): # Check if value is not NaN and not empty
165177
if str(required_field).lower() == "y":
166178
prop_obj["valueRequired"] = True
167-
elif str(required_field).lower() not in ["", "n"]: # Only raise error for unexpected values
179+
elif str(required_field).lower() not in [
180+
"",
181+
"n",
182+
]: # Only raise error for unexpected values
168183
raise ValueError(
169184
f"value {required_field} not supported yet for redcap:Required Field?"
170185
)
@@ -265,7 +280,7 @@ def process_choices(choices_str, field_name):
265280

266281
def parse_html(input_string, default_language="en"):
267282
result = {}
268-
283+
269284
# Handle non-string input
270285
if not isinstance(input_string, str):
271286
if pd.isna(input_string): # Handle NaN values
@@ -287,7 +302,9 @@ def parse_html(input_string, default_language="en"):
287302
if not result: # If no text was extracted
288303
result[default_language] = soup.get_text(strip=True)
289304
else:
290-
result[default_language] = soup.get_text(strip=True) # Use the entire text as default language text
305+
result[default_language] = soup.get_text(
306+
strip=True
307+
) # Use the entire text as default language text
291308
return result
292309

293310

@@ -525,24 +542,26 @@ def parse_language_iso_codes(input_string):
525542
]
526543

527544

528-
def process_csv(
529-
csv_file, abs_folder_path, schema_context_url, protocol_name
530-
):
545+
def process_csv(csv_file, abs_folder_path, schema_context_url, protocol_name):
531546
datas = {}
532547
order = {}
533548
compute = {}
534549
languages = []
535550

536551
# Read CSV with explicit BOM handling, and maintain original order
537-
df = pd.read_csv(csv_file, encoding="utf-8-sig") # utf-8-sig handles BOM automatically
538-
552+
df = pd.read_csv(
553+
csv_file, encoding="utf-8-sig"
554+
) # utf-8-sig handles BOM automatically
555+
539556
# Clean column names (headers)
540-
df.columns = df.columns.map(lambda x: x.strip().strip('"').lstrip("\ufeff"))
557+
df.columns = df.columns.map(
558+
lambda x: x.strip().strip('"').lstrip("\ufeff")
559+
)
541560

542561
# Clean string values in the dataframe
543-
object_columns = df.select_dtypes(include=['object']).columns
562+
object_columns = df.select_dtypes(include=["object"]).columns
544563
for col in object_columns:
545-
df[col] = df[col].astype(str).replace('nan', '')
564+
df[col] = df[col].astype(str).replace("nan", "")
546565

547566
# Initialize structures for each unique form
548567
unique_forms = df["Form Name"].unique()
@@ -557,17 +576,17 @@ def process_csv(
557576
# TODO: should we bring back the language
558577
# if not languages:
559578
# languages = parse_language_iso_codes(row["Field Label"])
560-
579+
561580
# Process rows in original order
562581
for _, row in df.iterrows():
563582
form_name = row["Form Name"]
564583
field_name = row["Variable / Field Name"]
565584
field_type = row.get("Field Type", "")
566585
field_annotation = row.get("Field Annotation")
567-
586+
568587
# Add row data to datas dictionary
569588
datas[form_name].append(row.to_dict())
570-
589+
571590
if field_type in COMPUTE_LIST:
572591
condition = normalize_condition(
573592
row["Choices, Calculations, OR Slider Labels"],
@@ -579,7 +598,10 @@ def process_csv(
579598
"jsExpression": condition,
580599
}
581600
)
582-
elif isinstance(field_annotation, str) and "@CALCTEXT" in field_annotation.upper():
601+
elif (
602+
isinstance(field_annotation, str)
603+
and "@CALCTEXT" in field_annotation.upper()
604+
):
583605
calc_text = field_annotation
584606
match = re.search(r"@CALCTEXT\((.*)\)", calc_text)
585607
if match:

reproschema/tests/test_redcap2reproschema.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@ def test_redcap2reproschema(tmpdir):
2525

2626
shutil.copy(CSV_TEST_FILE, str(temp_csv_file))
2727
shutil.copy(YAML_TEST_FILE, str(temp_yaml_file))
28-
28+
2929
# Add debug output to see the content of the CSV file
30-
with open(str(temp_csv_file), 'r') as f:
30+
with open(str(temp_csv_file), "r") as f:
3131
print("CSV content:", f.read())
32-
32+
3333
with tmpdir.as_cwd():
3434
# Read YAML to find the expected output directory name
3535
with open(str(temp_yaml_file), "r") as file:
@@ -44,8 +44,10 @@ def test_redcap2reproschema(tmpdir):
4444
str(temp_yaml_file),
4545
],
4646
)
47-
47+
4848
print("Command output:", result.output) # Add debug output
49-
49+
5050
assert result.exit_code == 0, f"Command failed with: {result.output}"
51-
assert os.path.isdir(protocol_name), f"Expected output directory '{protocol_name}' does not exist"
51+
assert os.path.isdir(
52+
protocol_name
53+
), f"Expected output directory '{protocol_name}' does not exist"

0 commit comments

Comments
 (0)