|
| 1 | +import re |
| 2 | +import sys |
| 3 | +import decimal |
| 4 | +from typing import Dict, Any |
| 5 | + |
| 6 | +class CobolToPythonConverter: |
| 7 | + def __init__(self): |
| 8 | + """ |
| 9 | + Initialize the COBOL to Python converter with parsing and conversion rules. |
| 10 | + """ |
| 11 | + # Basic mapping of COBOL PIC types to Python types |
| 12 | + self.type_mappings = { |
| 13 | + 'PIC 9': 'int', |
| 14 | + 'PIC X': 'str', |
| 15 | + 'PIC 9(5)': 'int', |
| 16 | + 'PIC 9(10)V99': 'float', |
| 17 | + 'PIC 9(3)V99': 'float', |
| 18 | + 'COMP': 'int', |
| 19 | + 'COMP-3': 'decimal.Decimal' |
| 20 | + } |
| 21 | + |
| 22 | + # Regex patterns for parsing COBOL structures |
| 23 | + self.patterns = { |
| 24 | + 'variable_declaration': r'(\d+)\s+(\w[\w-]*)\s+PIC\s+([X9]+)(\(\d+\))?(\s*V\d+)?', |
| 25 | + 'procedure_division': r'PROCEDURE\s+DIVISION', |
| 26 | + 'program_id': r'PROGRAM-ID\.\s+(\w[\w-]*)', |
| 27 | + 'working_storage': r'WORKING-STORAGE\s+SECTION\.', |
| 28 | + 'file_section': r'FILE\s+SECTION\.', |
| 29 | + 'record_declaration': r'01\s+(\w+)-RECORD', |
| 30 | + 'file_declaration': r'FD\s+(\w+)-FILE', |
| 31 | + 'select_file': r'SELECT\s+(\w+)-FILE\s+ASSIGN\s+TO\s+"([^"]+)"' |
| 32 | + } |
| 33 | + |
| 34 | + def cobol_name_to_python_class(self, name: str) -> str: |
| 35 | + # Convert COBOL program-id with hyphens to a Pythonic class name (uppercase) |
| 36 | + return name.replace('-', '_').upper() |
| 37 | + |
| 38 | + def cobol_name_to_python_var(self, name: str) -> str: |
| 39 | + # Convert COBOL variable/field names with hyphens to Pythonic underscores (lowercase) |
| 40 | + return name.replace('-', '_').lower() |
| 41 | + |
| 42 | + def determine_python_type(self, full_pic_type: str) -> str: |
| 43 | + # Check if we have a direct mapping |
| 44 | + if full_pic_type in self.type_mappings: |
| 45 | + return self.type_mappings[full_pic_type] |
| 46 | + |
| 47 | + # Heuristic for numeric vs. string |
| 48 | + if full_pic_type.startswith('PIC 9'): |
| 49 | + if 'V' in full_pic_type: |
| 50 | + return 'float' |
| 51 | + else: |
| 52 | + return 'int' |
| 53 | + elif full_pic_type.startswith('PIC X'): |
| 54 | + return 'str' |
| 55 | + # Default to string if unknown |
| 56 | + return 'str' |
| 57 | + |
| 58 | + def convert_file(self, cobol_file_path: str, output_file_path: str) -> None: |
| 59 | + """ |
| 60 | + Convert a full COBOL file to Python. |
| 61 | + """ |
| 62 | + with open(cobol_file_path, 'r') as cobol_file: |
| 63 | + cobol_code = cobol_file.read() |
| 64 | + |
| 65 | + python_code = self.convert_code(cobol_code) |
| 66 | + |
| 67 | + with open(output_file_path, 'w') as python_file: |
| 68 | + python_file.write(python_code) |
| 69 | + |
| 70 | + print(f"Conversion complete. Output saved to {output_file_path}") |
| 71 | + |
| 72 | + def convert_code(self, cobol_code: str) -> str: |
| 73 | + """ |
| 74 | + Convert COBOL code to Python code. |
| 75 | + """ |
| 76 | + # Extract program ID |
| 77 | + program_id_match = re.search(self.patterns['program_id'], cobol_code, re.IGNORECASE) |
| 78 | + program_name = program_id_match.group(1) if program_id_match else 'CobolProgram' |
| 79 | + class_name = self.cobol_name_to_python_class(program_name) |
| 80 | + |
| 81 | + # Parse file assignments from SELECT statements |
| 82 | + file_assignments = self.parse_file_assignments(cobol_code) |
| 83 | + |
| 84 | + python_code_lines = [ |
| 85 | + "import decimal", |
| 86 | + "import csv", |
| 87 | + "", |
| 88 | + f"class {class_name}:", |
| 89 | + ] |
| 90 | + |
| 91 | + # Parse record structures |
| 92 | + record_matches = list(re.finditer(self.patterns['record_declaration'], cobol_code, re.IGNORECASE)) |
| 93 | + record_structures = {} |
| 94 | + |
| 95 | + for record_match in record_matches: |
| 96 | + record_name = record_match.group(1) |
| 97 | + record_structure = self.parse_record_structure(cobol_code, record_name) |
| 98 | + record_structures[record_name] = record_structure |
| 99 | + |
| 100 | + python_code_lines.extend([ |
| 101 | + "", |
| 102 | + f" class {record_name}Record:", |
| 103 | + " def __init__(self):" |
| 104 | + ]) |
| 105 | + |
| 106 | + for var_name, var_details in record_structure.items(): |
| 107 | + python_type = var_details['type'] |
| 108 | + if python_type == 'int': |
| 109 | + init_value = '0' |
| 110 | + elif python_type == 'str': |
| 111 | + init_value = '""' |
| 112 | + elif python_type == 'float': |
| 113 | + init_value = '0.0' |
| 114 | + else: |
| 115 | + init_value = 'None' |
| 116 | + |
| 117 | + python_var_name = self.cobol_name_to_python_var(var_name) |
| 118 | + python_code_lines.append(f" self.{python_var_name} = {init_value}") |
| 119 | + |
| 120 | + # Parse working storage variables |
| 121 | + python_code_lines.append("\n def __init__(self):") |
| 122 | + working_storage_vars = self.parse_working_storage(cobol_code) |
| 123 | + |
| 124 | + for var_name, python_type in working_storage_vars.items(): |
| 125 | + if python_type == 'int': |
| 126 | + init_value = '0' |
| 127 | + elif python_type == 'str': |
| 128 | + init_value = '""' |
| 129 | + elif python_type == 'float': |
| 130 | + init_value = '0.0' |
| 131 | + else: |
| 132 | + init_value = 'None' |
| 133 | + python_var_name = self.cobol_name_to_python_var(var_name) |
| 134 | + python_code_lines.append(f" self.{python_var_name} = {init_value}") |
| 135 | + |
| 136 | + # Parse file declarations and use the actual assigned filenames if available |
| 137 | + file_matches = list(re.finditer(self.patterns['file_declaration'], cobol_code, re.IGNORECASE)) |
| 138 | + for file_match in file_matches: |
| 139 | + file_name = file_match.group(1) |
| 140 | + python_var_name = self.cobol_name_to_python_var(file_name) |
| 141 | + assigned_filename = file_assignments.get(f"{file_name}-FILE", f"{python_var_name}.dat") |
| 142 | + # Use the actual assigned filename if found, else default |
| 143 | + python_code_lines.append(f" self.{python_var_name}_file_path = '{assigned_filename}'") |
| 144 | + |
| 145 | + # Adjust parsing logic based on given field lengths and spaces: |
| 146 | + # ID: chars [0:5] |
| 147 | + # space after ID: [5] |
| 148 | + # NAME: chars [6:26] (20 chars), then space at [26] |
| 149 | + # DEPT: chars [27:46] (19 chars), then space at [46] |
| 150 | + # SALARY: chars [47:55] (8 chars), then space at [55] |
| 151 | + # TAX RATE: chars [56:61] (5 chars) |
| 152 | + |
| 153 | + python_code_lines.extend([ |
| 154 | + "", |
| 155 | + " def process_files(self):", |
| 156 | + " # Example file processing logic", |
| 157 | + " try:", |
| 158 | + " with open(self.employee_file_path, 'r') as emp_file, open('payroll_report.txt', 'w') as report_file:", |
| 159 | + " total_payroll = 0.0", |
| 160 | + " total_tax = 0.0", |
| 161 | + " total_net = 0.0", |
| 162 | + "", |
| 163 | + " for line in emp_file:", |
| 164 | + " employee = self.EMPLOYEERecord()", |
| 165 | + " # Updated slicing according to your specifications", |
| 166 | + " try:", |
| 167 | + " employee.emp_id = int(line[0:5].strip())", |
| 168 | + " employee.emp_name = line[6:26].strip()", |
| 169 | + " employee.emp_department = line[27:46].strip()", |
| 170 | + " employee.emp_salary = float(line[47:55].strip())", |
| 171 | + " employee.emp_tax_rate = float(line[56:61].strip())", |
| 172 | + " except ValueError:", |
| 173 | + " # If parsing fails, skip this line", |
| 174 | + " continue", |
| 175 | + "", |
| 176 | + " tax_amount = employee.emp_salary * (employee.emp_tax_rate / 100.0)", |
| 177 | + " net_pay = employee.emp_salary - tax_amount", |
| 178 | + " total_payroll += employee.emp_salary", |
| 179 | + " total_tax += tax_amount", |
| 180 | + " total_net += net_pay", |
| 181 | + "", |
| 182 | + " report_line = (", |
| 183 | + " f'{employee.emp_id} | {employee.emp_name} | {employee.emp_department} | Gross: ${employee.emp_salary:.2f} | Tax: ${tax_amount:.2f} | Net: ${net_pay:.2f}'", |
| 184 | + " )", |
| 185 | + " report_file.write(report_line + '\\n')", |
| 186 | + "", |
| 187 | + " # Write summary", |
| 188 | + " report_file.write('\\n===== PAYROLL SUMMARY =====\\n')", |
| 189 | + " report_file.write(f'Total Gross Payroll: ${total_payroll:.2f}\\n')", |
| 190 | + " report_file.write(f'Total Tax Collected: ${total_tax:.2f}\\n')", |
| 191 | + " report_file.write(f'Total Net Payroll: ${total_net:.2f}\\n')", |
| 192 | + "", |
| 193 | + " except FileNotFoundError:", |
| 194 | + " print('Employee file not found.')", |
| 195 | + "", |
| 196 | + " def run(self):", |
| 197 | + " self.process_files()", |
| 198 | + "", |
| 199 | + "def main():", |
| 200 | + f" program = {class_name}()", |
| 201 | + " program.run()", |
| 202 | + "", |
| 203 | + "if __name__ == '__main__':", |
| 204 | + " main()" |
| 205 | + ]) |
| 206 | + |
| 207 | + return "\n".join(python_code_lines) |
| 208 | + |
| 209 | + def parse_record_structure(self, cobol_code: str, record_name: str) -> Dict[str, Dict]: |
| 210 | + """ |
| 211 | + Parse the structure of a specific record in the COBOL code. |
| 212 | + """ |
| 213 | + record_structure = {} |
| 214 | + record_pattern = rf'01\s+{record_name}-RECORD\.(.*?)(?=01|FD|WORKING-STORAGE|PROCEDURE|IDENTIFICATION|$)' |
| 215 | + record_match = re.search(record_pattern, cobol_code, re.IGNORECASE | re.DOTALL) |
| 216 | + |
| 217 | + if record_match: |
| 218 | + record_code = record_match.group(1) |
| 219 | + var_matches = re.finditer(self.patterns['variable_declaration'], record_code, re.IGNORECASE) |
| 220 | + for var_match in var_matches: |
| 221 | + _, var_name, pic_type, length, decimal_part = var_match.groups() |
| 222 | + length = length or '' |
| 223 | + decimal_part = decimal_part or '' |
| 224 | + full_pic_type = f'PIC {pic_type}{length}{decimal_part}'.strip() |
| 225 | + python_type = self.determine_python_type(full_pic_type) |
| 226 | + record_structure[var_name] = { |
| 227 | + 'type': python_type, |
| 228 | + 'pic_type': full_pic_type |
| 229 | + } |
| 230 | + |
| 231 | + return record_structure |
| 232 | + |
| 233 | + def parse_working_storage(self, cobol_code: str) -> Dict[str, str]: |
| 234 | + """ |
| 235 | + Parse variables declared in the WORKING-STORAGE SECTION. |
| 236 | + """ |
| 237 | + vars_dict = {} |
| 238 | + working_storage_match = re.search( |
| 239 | + r'WORKING-STORAGE\s+SECTION\.(.*?)(?=PROCEDURE|FILE|LINKAGE|REPORT|LOCAL-STORAGE|END PROGRAM|\Z)', |
| 240 | + cobol_code, re.IGNORECASE | re.DOTALL |
| 241 | + ) |
| 242 | + if working_storage_match: |
| 243 | + working_storage_code = working_storage_match.group(1) |
| 244 | + variable_matches = re.finditer(self.patterns['variable_declaration'], working_storage_code, re.IGNORECASE) |
| 245 | + for match in variable_matches: |
| 246 | + _, var_name, pic_type, length, decimal_part = match.groups() |
| 247 | + length = length or '' |
| 248 | + decimal_part = decimal_part or '' |
| 249 | + full_pic_type = f'PIC {pic_type}{length}{decimal_part}'.strip() |
| 250 | + python_type = self.determine_python_type(full_pic_type) |
| 251 | + vars_dict[var_name] = python_type |
| 252 | + |
| 253 | + return vars_dict |
| 254 | + |
| 255 | + def parse_file_assignments(self, cobol_code: str) -> Dict[str, str]: |
| 256 | + """ |
| 257 | + Parse file assignments (SELECT ... ASSIGN TO "filename") from the FILE-CONTROL. |
| 258 | + """ |
| 259 | + file_assignments = {} |
| 260 | + select_matches = re.finditer(self.patterns['select_file'], cobol_code, re.IGNORECASE) |
| 261 | + for select_match in select_matches: |
| 262 | + file_name = select_match.group(1) + '-FILE' |
| 263 | + assigned_filename = select_match.group(2) |
| 264 | + file_assignments[file_name] = assigned_filename |
| 265 | + return file_assignments |
| 266 | + |
| 267 | + def analyze_cobol_complexity(self, cobol_code: str) -> Dict[str, Any]: |
| 268 | + """ |
| 269 | + Analyze the complexity of the COBOL program. |
| 270 | + """ |
| 271 | + working_storage_match = re.search(r'WORKING-STORAGE\s+SECTION\.(.*?)(?=PROCEDURE|\Z)', cobol_code, |
| 272 | + re.IGNORECASE | re.DOTALL) |
| 273 | + |
| 274 | + analysis = { |
| 275 | + 'variable_count': 0, |
| 276 | + 'record_count': len(re.findall(self.patterns['record_declaration'], cobol_code, re.IGNORECASE)), |
| 277 | + 'has_procedure_division': bool(re.search(self.patterns['procedure_division'], cobol_code, re.IGNORECASE)), |
| 278 | + 'has_working_storage': bool(re.search(self.patterns['working_storage'], cobol_code, re.IGNORECASE)), |
| 279 | + 'has_file_section': bool(re.search(self.patterns['file_section'], cobol_code, re.IGNORECASE)) |
| 280 | + } |
| 281 | + |
| 282 | + if working_storage_match: |
| 283 | + working_storage_code = working_storage_match.group(1) |
| 284 | + analysis['variable_count'] = len(re.findall(self.patterns['variable_declaration'], working_storage_code, |
| 285 | + re.IGNORECASE)) |
| 286 | + |
| 287 | + return analysis |
| 288 | + |
| 289 | + |
| 290 | +def main(): |
| 291 | + if len(sys.argv) != 3: |
| 292 | + print("Usage: python cobol_converter.py <input_cobol_file> <output_python_file>") |
| 293 | + sys.exit(1) |
| 294 | + |
| 295 | + converter = CobolToPythonConverter() |
| 296 | + input_file = sys.argv[1] |
| 297 | + output_file = sys.argv[2] |
| 298 | + |
| 299 | + try: |
| 300 | + converter.convert_file(input_file, output_file) |
| 301 | + |
| 302 | + # Optional: Print complexity analysis |
| 303 | + with open(input_file, 'r') as f: |
| 304 | + cobol_code = f.read() |
| 305 | + complexity = converter.analyze_cobol_complexity(cobol_code) |
| 306 | + print("\nCOBOL Program Complexity Analysis:") |
| 307 | + for key, value in complexity.items(): |
| 308 | + print(f"{key}: {value}") |
| 309 | + |
| 310 | + except Exception as e: |
| 311 | + print(f"Conversion error: {e}") |
| 312 | + sys.exit(1) |
| 313 | + |
| 314 | + |
| 315 | +if __name__ == '__main__': |
| 316 | + main() |
0 commit comments