From 78ca16acffc6f4905a229e88fb5961dc4f23f9fb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Dec 2024 01:01:36 +0000 Subject: [PATCH] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- reproschema/redcap2reproschema.py | 68 +++++++++++++------- reproschema/tests/test_redcap2reproschema.py | 14 ++-- 2 files changed, 53 insertions(+), 29 deletions(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 3ba90d5..d4f0fe1 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -83,7 +83,9 @@ def clean_header(header): cleaned_header = {} for k, v in header.items(): # Strip BOM, whitespace, and enclosing quotation marks if present - cleaned_key = k.lstrip("\ufeff").strip().strip('"') if isinstance(k, str) else k + cleaned_key = ( + k.lstrip("\ufeff").strip().strip('"') if isinstance(k, str) else k + ) cleaned_header[cleaned_key] = v return cleaned_header @@ -145,11 +147,19 @@ def process_field_properties(data): condition = True # Check Field Annotation for special flags - safely handle non-string values - annotation = str(data.get("Field Annotation", "")).upper() if data.get("Field Annotation") is not None else "" - if condition and isinstance(annotation, str) and ( - "@READONLY" in annotation - or "@HIDDEN" in annotation - or "@CALCTEXT" in annotation + annotation = ( + str(data.get("Field Annotation", "")).upper() + if data.get("Field Annotation") is not None + else "" + ) + if ( + condition + and isinstance(annotation, str) + and ( + "@READONLY" in annotation + or "@HIDDEN" in annotation + or "@CALCTEXT" in annotation + ) ): condition = False @@ -158,13 +168,18 @@ def process_field_properties(data): "isAbout": f"items/{data['Variable / Field Name']}", "isVis": condition, } - + # Handle Required Field check, accounting for NaN values and empty strings required_field = data.get("Required Field?") - if pd.notna(required_field) and str(required_field).strip(): # Check if value is not NaN and not empty + if ( + pd.notna(required_field) and str(required_field).strip() + ): # Check if value is not NaN and not empty if str(required_field).lower() == "y": prop_obj["valueRequired"] = True - elif str(required_field).lower() not in ["", "n"]: # Only raise error for unexpected values + elif str(required_field).lower() not in [ + "", + "n", + ]: # Only raise error for unexpected values raise ValueError( f"value {required_field} not supported yet for redcap:Required Field?" ) @@ -265,7 +280,7 @@ def process_choices(choices_str, field_name): def parse_html(input_string, default_language="en"): result = {} - + # Handle non-string input if not isinstance(input_string, str): if pd.isna(input_string): # Handle NaN values @@ -287,7 +302,9 @@ def parse_html(input_string, default_language="en"): if not result: # If no text was extracted result[default_language] = soup.get_text(strip=True) else: - result[default_language] = soup.get_text(strip=True) # Use the entire text as default language text + result[default_language] = soup.get_text( + strip=True + ) # Use the entire text as default language text return result @@ -525,24 +542,26 @@ def parse_language_iso_codes(input_string): ] -def process_csv( - csv_file, abs_folder_path, schema_context_url, protocol_name -): +def process_csv(csv_file, abs_folder_path, schema_context_url, protocol_name): datas = {} order = {} compute = {} languages = [] # Read CSV with explicit BOM handling, and maintain original order - df = pd.read_csv(csv_file, encoding="utf-8-sig") # utf-8-sig handles BOM automatically - + df = pd.read_csv( + csv_file, encoding="utf-8-sig" + ) # utf-8-sig handles BOM automatically + # Clean column names (headers) - df.columns = df.columns.map(lambda x: x.strip().strip('"').lstrip("\ufeff")) + df.columns = df.columns.map( + lambda x: x.strip().strip('"').lstrip("\ufeff") + ) # Clean string values in the dataframe - object_columns = df.select_dtypes(include=['object']).columns + object_columns = df.select_dtypes(include=["object"]).columns for col in object_columns: - df[col] = df[col].astype(str).replace('nan', '') + df[col] = df[col].astype(str).replace("nan", "") # Initialize structures for each unique form unique_forms = df["Form Name"].unique() @@ -557,17 +576,17 @@ def process_csv( # TODO: should we bring back the language # if not languages: # languages = parse_language_iso_codes(row["Field Label"]) - + # Process rows in original order for _, row in df.iterrows(): form_name = row["Form Name"] field_name = row["Variable / Field Name"] field_type = row.get("Field Type", "") field_annotation = row.get("Field Annotation") - + # Add row data to datas dictionary datas[form_name].append(row.to_dict()) - + if field_type in COMPUTE_LIST: condition = normalize_condition( row["Choices, Calculations, OR Slider Labels"], @@ -579,7 +598,10 @@ def process_csv( "jsExpression": condition, } ) - elif isinstance(field_annotation, str) and "@CALCTEXT" in field_annotation.upper(): + elif ( + isinstance(field_annotation, str) + and "@CALCTEXT" in field_annotation.upper() + ): calc_text = field_annotation match = re.search(r"@CALCTEXT\((.*)\)", calc_text) if match: diff --git a/reproschema/tests/test_redcap2reproschema.py b/reproschema/tests/test_redcap2reproschema.py index 634c7f0..ffbbe67 100644 --- a/reproschema/tests/test_redcap2reproschema.py +++ b/reproschema/tests/test_redcap2reproschema.py @@ -25,11 +25,11 @@ def test_redcap2reproschema(tmpdir): shutil.copy(CSV_TEST_FILE, str(temp_csv_file)) shutil.copy(YAML_TEST_FILE, str(temp_yaml_file)) - + # Add debug output to see the content of the CSV file - with open(str(temp_csv_file), 'r') as f: + with open(str(temp_csv_file), "r") as f: print("CSV content:", f.read()) - + with tmpdir.as_cwd(): # Read YAML to find the expected output directory name with open(str(temp_yaml_file), "r") as file: @@ -44,8 +44,10 @@ def test_redcap2reproschema(tmpdir): str(temp_yaml_file), ], ) - + print("Command output:", result.output) # Add debug output - + assert result.exit_code == 0, f"Command failed with: {result.output}" - assert os.path.isdir(protocol_name), f"Expected output directory '{protocol_name}' does not exist" + assert os.path.isdir( + protocol_name + ), f"Expected output directory '{protocol_name}' does not exist"