Skip to content

Commit a1d77d2

Browse files
committed
initial commit
0 parents  commit a1d77d2

File tree

5 files changed

+513
-0
lines changed

5 files changed

+513
-0
lines changed

cobol_converter.py

Lines changed: 316 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,316 @@
1+
import re
2+
import sys
3+
import decimal
4+
from typing import Dict, Any
5+
6+
class CobolToPythonConverter:
7+
def __init__(self):
8+
"""
9+
Initialize the COBOL to Python converter with parsing and conversion rules.
10+
"""
11+
# Basic mapping of COBOL PIC types to Python types
12+
self.type_mappings = {
13+
'PIC 9': 'int',
14+
'PIC X': 'str',
15+
'PIC 9(5)': 'int',
16+
'PIC 9(10)V99': 'float',
17+
'PIC 9(3)V99': 'float',
18+
'COMP': 'int',
19+
'COMP-3': 'decimal.Decimal'
20+
}
21+
22+
# Regex patterns for parsing COBOL structures
23+
self.patterns = {
24+
'variable_declaration': r'(\d+)\s+(\w[\w-]*)\s+PIC\s+([X9]+)(\(\d+\))?(\s*V\d+)?',
25+
'procedure_division': r'PROCEDURE\s+DIVISION',
26+
'program_id': r'PROGRAM-ID\.\s+(\w[\w-]*)',
27+
'working_storage': r'WORKING-STORAGE\s+SECTION\.',
28+
'file_section': r'FILE\s+SECTION\.',
29+
'record_declaration': r'01\s+(\w+)-RECORD',
30+
'file_declaration': r'FD\s+(\w+)-FILE',
31+
'select_file': r'SELECT\s+(\w+)-FILE\s+ASSIGN\s+TO\s+"([^"]+)"'
32+
}
33+
34+
def cobol_name_to_python_class(self, name: str) -> str:
35+
# Convert COBOL program-id with hyphens to a Pythonic class name (uppercase)
36+
return name.replace('-', '_').upper()
37+
38+
def cobol_name_to_python_var(self, name: str) -> str:
39+
# Convert COBOL variable/field names with hyphens to Pythonic underscores (lowercase)
40+
return name.replace('-', '_').lower()
41+
42+
def determine_python_type(self, full_pic_type: str) -> str:
43+
# Check if we have a direct mapping
44+
if full_pic_type in self.type_mappings:
45+
return self.type_mappings[full_pic_type]
46+
47+
# Heuristic for numeric vs. string
48+
if full_pic_type.startswith('PIC 9'):
49+
if 'V' in full_pic_type:
50+
return 'float'
51+
else:
52+
return 'int'
53+
elif full_pic_type.startswith('PIC X'):
54+
return 'str'
55+
# Default to string if unknown
56+
return 'str'
57+
58+
def convert_file(self, cobol_file_path: str, output_file_path: str) -> None:
59+
"""
60+
Convert a full COBOL file to Python.
61+
"""
62+
with open(cobol_file_path, 'r') as cobol_file:
63+
cobol_code = cobol_file.read()
64+
65+
python_code = self.convert_code(cobol_code)
66+
67+
with open(output_file_path, 'w') as python_file:
68+
python_file.write(python_code)
69+
70+
print(f"Conversion complete. Output saved to {output_file_path}")
71+
72+
def convert_code(self, cobol_code: str) -> str:
73+
"""
74+
Convert COBOL code to Python code.
75+
"""
76+
# Extract program ID
77+
program_id_match = re.search(self.patterns['program_id'], cobol_code, re.IGNORECASE)
78+
program_name = program_id_match.group(1) if program_id_match else 'CobolProgram'
79+
class_name = self.cobol_name_to_python_class(program_name)
80+
81+
# Parse file assignments from SELECT statements
82+
file_assignments = self.parse_file_assignments(cobol_code)
83+
84+
python_code_lines = [
85+
"import decimal",
86+
"import csv",
87+
"",
88+
f"class {class_name}:",
89+
]
90+
91+
# Parse record structures
92+
record_matches = list(re.finditer(self.patterns['record_declaration'], cobol_code, re.IGNORECASE))
93+
record_structures = {}
94+
95+
for record_match in record_matches:
96+
record_name = record_match.group(1)
97+
record_structure = self.parse_record_structure(cobol_code, record_name)
98+
record_structures[record_name] = record_structure
99+
100+
python_code_lines.extend([
101+
"",
102+
f" class {record_name}Record:",
103+
" def __init__(self):"
104+
])
105+
106+
for var_name, var_details in record_structure.items():
107+
python_type = var_details['type']
108+
if python_type == 'int':
109+
init_value = '0'
110+
elif python_type == 'str':
111+
init_value = '""'
112+
elif python_type == 'float':
113+
init_value = '0.0'
114+
else:
115+
init_value = 'None'
116+
117+
python_var_name = self.cobol_name_to_python_var(var_name)
118+
python_code_lines.append(f" self.{python_var_name} = {init_value}")
119+
120+
# Parse working storage variables
121+
python_code_lines.append("\n def __init__(self):")
122+
working_storage_vars = self.parse_working_storage(cobol_code)
123+
124+
for var_name, python_type in working_storage_vars.items():
125+
if python_type == 'int':
126+
init_value = '0'
127+
elif python_type == 'str':
128+
init_value = '""'
129+
elif python_type == 'float':
130+
init_value = '0.0'
131+
else:
132+
init_value = 'None'
133+
python_var_name = self.cobol_name_to_python_var(var_name)
134+
python_code_lines.append(f" self.{python_var_name} = {init_value}")
135+
136+
# Parse file declarations and use the actual assigned filenames if available
137+
file_matches = list(re.finditer(self.patterns['file_declaration'], cobol_code, re.IGNORECASE))
138+
for file_match in file_matches:
139+
file_name = file_match.group(1)
140+
python_var_name = self.cobol_name_to_python_var(file_name)
141+
assigned_filename = file_assignments.get(f"{file_name}-FILE", f"{python_var_name}.dat")
142+
# Use the actual assigned filename if found, else default
143+
python_code_lines.append(f" self.{python_var_name}_file_path = '{assigned_filename}'")
144+
145+
# Adjust parsing logic based on given field lengths and spaces:
146+
# ID: chars [0:5]
147+
# space after ID: [5]
148+
# NAME: chars [6:26] (20 chars), then space at [26]
149+
# DEPT: chars [27:46] (19 chars), then space at [46]
150+
# SALARY: chars [47:55] (8 chars), then space at [55]
151+
# TAX RATE: chars [56:61] (5 chars)
152+
153+
python_code_lines.extend([
154+
"",
155+
" def process_files(self):",
156+
" # Example file processing logic",
157+
" try:",
158+
" with open(self.employee_file_path, 'r') as emp_file, open('payroll_report.txt', 'w') as report_file:",
159+
" total_payroll = 0.0",
160+
" total_tax = 0.0",
161+
" total_net = 0.0",
162+
"",
163+
" for line in emp_file:",
164+
" employee = self.EMPLOYEERecord()",
165+
" # Updated slicing according to your specifications",
166+
" try:",
167+
" employee.emp_id = int(line[0:5].strip())",
168+
" employee.emp_name = line[6:26].strip()",
169+
" employee.emp_department = line[27:46].strip()",
170+
" employee.emp_salary = float(line[47:55].strip())",
171+
" employee.emp_tax_rate = float(line[56:61].strip())",
172+
" except ValueError:",
173+
" # If parsing fails, skip this line",
174+
" continue",
175+
"",
176+
" tax_amount = employee.emp_salary * (employee.emp_tax_rate / 100.0)",
177+
" net_pay = employee.emp_salary - tax_amount",
178+
" total_payroll += employee.emp_salary",
179+
" total_tax += tax_amount",
180+
" total_net += net_pay",
181+
"",
182+
" report_line = (",
183+
" f'{employee.emp_id} | {employee.emp_name} | {employee.emp_department} | Gross: ${employee.emp_salary:.2f} | Tax: ${tax_amount:.2f} | Net: ${net_pay:.2f}'",
184+
" )",
185+
" report_file.write(report_line + '\\n')",
186+
"",
187+
" # Write summary",
188+
" report_file.write('\\n===== PAYROLL SUMMARY =====\\n')",
189+
" report_file.write(f'Total Gross Payroll: ${total_payroll:.2f}\\n')",
190+
" report_file.write(f'Total Tax Collected: ${total_tax:.2f}\\n')",
191+
" report_file.write(f'Total Net Payroll: ${total_net:.2f}\\n')",
192+
"",
193+
" except FileNotFoundError:",
194+
" print('Employee file not found.')",
195+
"",
196+
" def run(self):",
197+
" self.process_files()",
198+
"",
199+
"def main():",
200+
f" program = {class_name}()",
201+
" program.run()",
202+
"",
203+
"if __name__ == '__main__':",
204+
" main()"
205+
])
206+
207+
return "\n".join(python_code_lines)
208+
209+
def parse_record_structure(self, cobol_code: str, record_name: str) -> Dict[str, Dict]:
210+
"""
211+
Parse the structure of a specific record in the COBOL code.
212+
"""
213+
record_structure = {}
214+
record_pattern = rf'01\s+{record_name}-RECORD\.(.*?)(?=01|FD|WORKING-STORAGE|PROCEDURE|IDENTIFICATION|$)'
215+
record_match = re.search(record_pattern, cobol_code, re.IGNORECASE | re.DOTALL)
216+
217+
if record_match:
218+
record_code = record_match.group(1)
219+
var_matches = re.finditer(self.patterns['variable_declaration'], record_code, re.IGNORECASE)
220+
for var_match in var_matches:
221+
_, var_name, pic_type, length, decimal_part = var_match.groups()
222+
length = length or ''
223+
decimal_part = decimal_part or ''
224+
full_pic_type = f'PIC {pic_type}{length}{decimal_part}'.strip()
225+
python_type = self.determine_python_type(full_pic_type)
226+
record_structure[var_name] = {
227+
'type': python_type,
228+
'pic_type': full_pic_type
229+
}
230+
231+
return record_structure
232+
233+
def parse_working_storage(self, cobol_code: str) -> Dict[str, str]:
234+
"""
235+
Parse variables declared in the WORKING-STORAGE SECTION.
236+
"""
237+
vars_dict = {}
238+
working_storage_match = re.search(
239+
r'WORKING-STORAGE\s+SECTION\.(.*?)(?=PROCEDURE|FILE|LINKAGE|REPORT|LOCAL-STORAGE|END PROGRAM|\Z)',
240+
cobol_code, re.IGNORECASE | re.DOTALL
241+
)
242+
if working_storage_match:
243+
working_storage_code = working_storage_match.group(1)
244+
variable_matches = re.finditer(self.patterns['variable_declaration'], working_storage_code, re.IGNORECASE)
245+
for match in variable_matches:
246+
_, var_name, pic_type, length, decimal_part = match.groups()
247+
length = length or ''
248+
decimal_part = decimal_part or ''
249+
full_pic_type = f'PIC {pic_type}{length}{decimal_part}'.strip()
250+
python_type = self.determine_python_type(full_pic_type)
251+
vars_dict[var_name] = python_type
252+
253+
return vars_dict
254+
255+
def parse_file_assignments(self, cobol_code: str) -> Dict[str, str]:
256+
"""
257+
Parse file assignments (SELECT ... ASSIGN TO "filename") from the FILE-CONTROL.
258+
"""
259+
file_assignments = {}
260+
select_matches = re.finditer(self.patterns['select_file'], cobol_code, re.IGNORECASE)
261+
for select_match in select_matches:
262+
file_name = select_match.group(1) + '-FILE'
263+
assigned_filename = select_match.group(2)
264+
file_assignments[file_name] = assigned_filename
265+
return file_assignments
266+
267+
def analyze_cobol_complexity(self, cobol_code: str) -> Dict[str, Any]:
268+
"""
269+
Analyze the complexity of the COBOL program.
270+
"""
271+
working_storage_match = re.search(r'WORKING-STORAGE\s+SECTION\.(.*?)(?=PROCEDURE|\Z)', cobol_code,
272+
re.IGNORECASE | re.DOTALL)
273+
274+
analysis = {
275+
'variable_count': 0,
276+
'record_count': len(re.findall(self.patterns['record_declaration'], cobol_code, re.IGNORECASE)),
277+
'has_procedure_division': bool(re.search(self.patterns['procedure_division'], cobol_code, re.IGNORECASE)),
278+
'has_working_storage': bool(re.search(self.patterns['working_storage'], cobol_code, re.IGNORECASE)),
279+
'has_file_section': bool(re.search(self.patterns['file_section'], cobol_code, re.IGNORECASE))
280+
}
281+
282+
if working_storage_match:
283+
working_storage_code = working_storage_match.group(1)
284+
analysis['variable_count'] = len(re.findall(self.patterns['variable_declaration'], working_storage_code,
285+
re.IGNORECASE))
286+
287+
return analysis
288+
289+
290+
def main():
291+
if len(sys.argv) != 3:
292+
print("Usage: python cobol_converter.py <input_cobol_file> <output_python_file>")
293+
sys.exit(1)
294+
295+
converter = CobolToPythonConverter()
296+
input_file = sys.argv[1]
297+
output_file = sys.argv[2]
298+
299+
try:
300+
converter.convert_file(input_file, output_file)
301+
302+
# Optional: Print complexity analysis
303+
with open(input_file, 'r') as f:
304+
cobol_code = f.read()
305+
complexity = converter.analyze_cobol_complexity(cobol_code)
306+
print("\nCOBOL Program Complexity Analysis:")
307+
for key, value in complexity.items():
308+
print(f"{key}: {value}")
309+
310+
except Exception as e:
311+
print(f"Conversion error: {e}")
312+
sys.exit(1)
313+
314+
315+
if __name__ == '__main__':
316+
main()

employees.dat

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
10001 John Smith Sales 50000.00 20.50
2+
10002 Emily Johnson Marketing 65000.00 22.75
3+
10003 Michael Williams Engineering 75000.00 25.00
4+
10004 Sarah Brown Human Resources 55000.00 21.25
5+
10005 David Miller IT 80000.00 26.50

payroll_report.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
10001 | John Smith | Sales | Gross: $50000.00 | Tax: $10250.00 | Net: $39750.00
2+
10002 | Emily Johnson | Marketing | Gross: $65000.00 | Tax: $14787.50 | Net: $50212.50
3+
10003 | Michael Williams | Engineering | Gross: $75000.00 | Tax: $18750.00 | Net: $56250.00
4+
10004 | Sarah Brown | Human Resources | Gross: $55000.00 | Tax: $11687.50 | Net: $43312.50
5+
10005 | David Miller | IT | Gross: $80000.00 | Tax: $21200.00 | Net: $58800.00
6+
7+
===== PAYROLL SUMMARY =====
8+
Total Gross Payroll: $325000.00
9+
Total Tax Collected: $76675.00
10+
Total Net Payroll: $248325.00

0 commit comments

Comments
 (0)