Skip to content

Commit 8473064

Browse files
authored
Merge pull request #790 from Il-Pela/feature/implemented-ebcdic-encoder
Extended EBCDIC Encoder - added support for cp1144
2 parents 6d8a997 + 7133a5b commit 8473064

File tree

7 files changed

+390
-49
lines changed

7 files changed

+390
-49
lines changed

cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/EncoderSelector.scala

Lines changed: 2 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,9 @@ object EncoderSelector {
6565
fieldLength: Int
6666
): Option[Encoder] = {
6767
encoding match {
68-
case EBCDIC =>
68+
case EBCDIC if ebcdicCodePage.supportsEncoding =>
6969
val encoder = (a: Any) => {
70-
encodeEbcdicString(a.toString, CodePageCommon.asciiToEbcdicMapping, fieldLength)
70+
ebcdicCodePage.convert(a.toString, fieldLength)
7171
}
7272
Option(encoder)
7373
case ASCII =>
@@ -77,31 +77,6 @@ object EncoderSelector {
7777
}
7878
}
7979

80-
/**
81-
* An encoder from a ASCII basic string to an EBCDIC byte array
82-
*
83-
* @param string An input string
84-
* @param conversionTable A conversion table to use to convert from ASCII to EBCDIC
85-
* @param length The length of the output (in bytes)
86-
* @return A string representation of the binary data
87-
*/
88-
def encodeEbcdicString(string: String, conversionTable: Array[Byte], length: Int): Array[Byte] = {
89-
require(length >= 0, s"Field length cannot be negative, got $length")
90-
91-
var i = 0
92-
val buf = new Array[Byte](length)
93-
94-
// PIC X fields are space-filled on mainframe. Use EBCDIC space 0x40.
95-
util.Arrays.fill(buf, 0x40.toByte)
96-
97-
while (i < string.length && i < length) {
98-
val asciiByte = string(i).toByte
99-
buf(i) = conversionTable((asciiByte + 256) % 256)
100-
i = i + 1
101-
}
102-
buf
103-
}
104-
10580
def getBinaryEncoder(compression: Option[Usage],
10681
precision: Int,
10782
scale: Int,

cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage.scala

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage
1818

1919
import za.co.absa.cobrix.cobol.internal.Logging
2020

21+
import java.util
22+
2123
/**
2224
* A trait for generalizing EBCDIC to ASCII conversion tables for different EBCDIC code pages.
2325
*/
@@ -31,6 +33,23 @@ abstract class CodePage extends Serializable {
3133
* Converts an array of bytes to string according to the rules of the code page.
3234
*/
3335
def convert(bytes: Array[Byte]): String
36+
37+
/**
38+
* An encoder from a ASCII basic string to an EBCDIC byte array
39+
* Users of this method should check whether the CodePage supports the encoding before calling it
40+
*
41+
* @param string An input string
42+
* @param length The length of the output (in bytes)
43+
* @return A string representation of the binary data
44+
*/
45+
def convert(string: String, length: Int): Array[Byte]
46+
47+
/**
48+
* Returns whether the CodePage is able to encode strings to ebcdic
49+
*
50+
* @return
51+
*/
52+
def supportsEncoding: Boolean
3453
}
3554

3655
object CodePage extends Logging {

cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1144.scala

Lines changed: 264 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage
2222
* It corresponds to code page 280 and only differs from it in position 9F, where the euro sign € is located instead
2323
* of the international currency symbol ¤.
2424
*/
25-
class CodePage1144 extends SingleByteCodePage(CodePage1144.ebcdicToAsciiMapping) {
25+
class CodePage1144 extends SingleByteCodePage(CodePage1144.ebcdicToAsciiMapping, Some(CodePage1144.asciiToEbcdicMapping)) {
2626
override def codePageShortName: String = "cp1144"
2727
}
2828

@@ -53,5 +53,268 @@ object CodePage1144 {
5353
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '³', 'Û', 'Ü', 'Ù', 'Ú', spc) // 240 - 255
5454
}
5555
ebcdic2ascii
56+
57+
}
58+
59+
/**
60+
* To generate conversion mapping use the python script shared in the PR.
61+
*/
62+
val asciiToEbcdicMapping: Int => Byte = {
63+
case 0 => 0x00.toByte
64+
case 1 => 0x01.toByte
65+
case 2 => 0x02.toByte
66+
case 3 => 0x03.toByte
67+
case 156 => 0x04.toByte
68+
case 9 => 0x05.toByte
69+
case 134 => 0x06.toByte
70+
case 127 => 0x07.toByte
71+
case 151 => 0x08.toByte
72+
case 141 => 0x09.toByte
73+
case 142 => 0x0a.toByte
74+
case 11 => 0x0b.toByte
75+
case 12 => 0x0c.toByte
76+
case 13 => 0x0d.toByte
77+
case 14 => 0x0e.toByte
78+
case 15 => 0x0f.toByte
79+
case 16 => 0x10.toByte
80+
case 17 => 0x11.toByte
81+
case 18 => 0x12.toByte
82+
case 19 => 0x13.toByte
83+
case 157 => 0x14.toByte
84+
case 8 => 0x16.toByte
85+
case 135 => 0x17.toByte
86+
case 24 => 0x18.toByte
87+
case 25 => 0x19.toByte
88+
case 146 => 0x1a.toByte
89+
case 143 => 0x1b.toByte
90+
case 28 => 0x1c.toByte
91+
case 29 => 0x1d.toByte
92+
case 30 => 0x1e.toByte
93+
case 31 => 0x1f.toByte
94+
case 128 => 0x20.toByte
95+
case 129 => 0x21.toByte
96+
case 130 => 0x22.toByte
97+
case 131 => 0x23.toByte
98+
case 132 => 0x24.toByte
99+
case 10 => 0x25.toByte //NL and LF EBCDIC representation map to LF in Unicode. Choosing to always map LF to LF
100+
case 23 => 0x26.toByte
101+
case 27 => 0x27.toByte
102+
case 136 => 0x28.toByte
103+
case 137 => 0x29.toByte
104+
case 138 => 0x2a.toByte
105+
case 139 => 0x2b.toByte
106+
case 140 => 0x2c.toByte
107+
case 5 => 0x2d.toByte
108+
case 6 => 0x2e.toByte
109+
case 7 => 0x2f.toByte
110+
case 144 => 0x30.toByte
111+
case 145 => 0x31.toByte
112+
case 22 => 0x32.toByte
113+
case 147 => 0x33.toByte
114+
case 148 => 0x34.toByte
115+
case 149 => 0x35.toByte
116+
case 150 => 0x36.toByte
117+
case 4 => 0x37.toByte
118+
case 152 => 0x38.toByte
119+
case 153 => 0x39.toByte
120+
case 154 => 0x3a.toByte
121+
case 155 => 0x3b.toByte
122+
case 20 => 0x3c.toByte
123+
case 21 => 0x3d.toByte
124+
case 158 => 0x3e.toByte
125+
case 26 => 0x3f.toByte
126+
case 32 => 0x40.toByte
127+
case 160 => 0x41.toByte
128+
case 226 => 0x42.toByte
129+
case 228 => 0x43.toByte
130+
case 123 => 0x44.toByte
131+
case 225 => 0x45.toByte
132+
case 227 => 0x46.toByte
133+
case 229 => 0x47.toByte
134+
case 92 => 0x48.toByte
135+
case 241 => 0x49.toByte
136+
case 176 => 0x4a.toByte
137+
case 46 => 0x4b.toByte
138+
case 60 => 0x4c.toByte
139+
case 40 => 0x4d.toByte
140+
case 43 => 0x4e.toByte
141+
case 33 => 0x4f.toByte
142+
case 38 => 0x50.toByte
143+
case 93 => 0x51.toByte
144+
case 234 => 0x52.toByte
145+
case 235 => 0x53.toByte
146+
case 125 => 0x54.toByte
147+
case 237 => 0x55.toByte
148+
case 238 => 0x56.toByte
149+
case 239 => 0x57.toByte
150+
case 126 => 0x58.toByte
151+
case 223 => 0x59.toByte
152+
case 233 => 0x5a.toByte
153+
case 36 => 0x5b.toByte
154+
case 42 => 0x5c.toByte
155+
case 41 => 0x5d.toByte
156+
case 59 => 0x5e.toByte
157+
case 94 => 0x5f.toByte
158+
case 45 => 0x60.toByte
159+
case 47 => 0x61.toByte
160+
case 194 => 0x62.toByte
161+
case 196 => 0x63.toByte
162+
case 192 => 0x64.toByte
163+
case 193 => 0x65.toByte
164+
case 195 => 0x66.toByte
165+
case 197 => 0x67.toByte
166+
case 199 => 0x68.toByte
167+
case 209 => 0x69.toByte
168+
case 242 => 0x6a.toByte
169+
case 44 => 0x6b.toByte
170+
case 37 => 0x6c.toByte
171+
case 95 => 0x6d.toByte
172+
case 62 => 0x6e.toByte
173+
case 63 => 0x6f.toByte
174+
case 248 => 0x70.toByte
175+
case 201 => 0x71.toByte
176+
case 202 => 0x72.toByte
177+
case 203 => 0x73.toByte
178+
case 200 => 0x74.toByte
179+
case 205 => 0x75.toByte
180+
case 206 => 0x76.toByte
181+
case 207 => 0x77.toByte
182+
case 204 => 0x78.toByte
183+
case 249 => 0x79.toByte
184+
case 58 => 0x7a.toByte
185+
case 163 => 0x7b.toByte
186+
case 167 => 0x7c.toByte
187+
case 39 => 0x7d.toByte
188+
case 61 => 0x7e.toByte
189+
case 34 => 0x7f.toByte
190+
case 216 => 0x80.toByte
191+
case 97 => 0x81.toByte
192+
case 98 => 0x82.toByte
193+
case 99 => 0x83.toByte
194+
case 100 => 0x84.toByte
195+
case 101 => 0x85.toByte
196+
case 102 => 0x86.toByte
197+
case 103 => 0x87.toByte
198+
case 104 => 0x88.toByte
199+
case 105 => 0x89.toByte
200+
case 171 => 0x8a.toByte
201+
case 187 => 0x8b.toByte
202+
case 240 => 0x8c.toByte
203+
case 253 => 0x8d.toByte
204+
case 254 => 0x8e.toByte
205+
case 177 => 0x8f.toByte
206+
case 91 => 0x90.toByte
207+
case 106 => 0x91.toByte
208+
case 107 => 0x92.toByte
209+
case 108 => 0x93.toByte
210+
case 109 => 0x94.toByte
211+
case 110 => 0x95.toByte
212+
case 111 => 0x96.toByte
213+
case 112 => 0x97.toByte
214+
case 113 => 0x98.toByte
215+
case 114 => 0x99.toByte
216+
case 170 => 0x9a.toByte
217+
case 186 => 0x9b.toByte
218+
case 230 => 0x9c.toByte
219+
case 184 => 0x9d.toByte
220+
case 198 => 0x9e.toByte
221+
case 8364 => 0x9f.toByte
222+
case 181 => 0xa0.toByte
223+
case 236 => 0xa1.toByte
224+
case 115 => 0xa2.toByte
225+
case 116 => 0xa3.toByte
226+
case 117 => 0xa4.toByte
227+
case 118 => 0xa5.toByte
228+
case 119 => 0xa6.toByte
229+
case 120 => 0xa7.toByte
230+
case 121 => 0xa8.toByte
231+
case 122 => 0xa9.toByte
232+
case 161 => 0xaa.toByte
233+
case 191 => 0xab.toByte
234+
case 208 => 0xac.toByte
235+
case 221 => 0xad.toByte
236+
case 222 => 0xae.toByte
237+
case 174 => 0xaf.toByte
238+
case 162 => 0xb0.toByte
239+
case 35 => 0xb1.toByte
240+
case 165 => 0xb2.toByte
241+
case 183 => 0xb3.toByte
242+
case 169 => 0xb4.toByte
243+
case 64 => 0xb5.toByte
244+
case 182 => 0xb6.toByte
245+
case 188 => 0xb7.toByte
246+
case 189 => 0xb8.toByte
247+
case 190 => 0xb9.toByte
248+
case 172 => 0xba.toByte
249+
case 124 => 0xbb.toByte
250+
case 175 => 0xbc.toByte
251+
case 168 => 0xbd.toByte
252+
case 180 => 0xbe.toByte
253+
case 215 => 0xbf.toByte
254+
case 224 => 0xc0.toByte
255+
case 65 => 0xc1.toByte
256+
case 66 => 0xc2.toByte
257+
case 67 => 0xc3.toByte
258+
case 68 => 0xc4.toByte
259+
case 69 => 0xc5.toByte
260+
case 70 => 0xc6.toByte
261+
case 71 => 0xc7.toByte
262+
case 72 => 0xc8.toByte
263+
case 73 => 0xc9.toByte
264+
case 173 => 0xca.toByte
265+
case 244 => 0xcb.toByte
266+
case 246 => 0xcc.toByte
267+
case 166 => 0xcd.toByte
268+
case 243 => 0xce.toByte
269+
case 245 => 0xcf.toByte
270+
case 232 => 0xd0.toByte
271+
case 74 => 0xd1.toByte
272+
case 75 => 0xd2.toByte
273+
case 76 => 0xd3.toByte
274+
case 77 => 0xd4.toByte
275+
case 78 => 0xd5.toByte
276+
case 79 => 0xd6.toByte
277+
case 80 => 0xd7.toByte
278+
case 81 => 0xd8.toByte
279+
case 82 => 0xd9.toByte
280+
case 185 => 0xda.toByte
281+
case 251 => 0xdb.toByte
282+
case 252 => 0xdc.toByte
283+
case 96 => 0xdd.toByte
284+
case 250 => 0xde.toByte
285+
case 255 => 0xdf.toByte
286+
case 231 => 0xe0.toByte
287+
case 247 => 0xe1.toByte
288+
case 83 => 0xe2.toByte
289+
case 84 => 0xe3.toByte
290+
case 85 => 0xe4.toByte
291+
case 86 => 0xe5.toByte
292+
case 87 => 0xe6.toByte
293+
case 88 => 0xe7.toByte
294+
case 89 => 0xe8.toByte
295+
case 90 => 0xe9.toByte
296+
case 178 => 0xea.toByte
297+
case 212 => 0xeb.toByte
298+
case 214 => 0xec.toByte
299+
case 210 => 0xed.toByte
300+
case 211 => 0xee.toByte
301+
case 213 => 0xef.toByte
302+
case 48 => 0xf0.toByte
303+
case 49 => 0xf1.toByte
304+
case 50 => 0xf2.toByte
305+
case 51 => 0xf3.toByte
306+
case 52 => 0xf4.toByte
307+
case 53 => 0xf5.toByte
308+
case 54 => 0xf6.toByte
309+
case 55 => 0xf7.toByte
310+
case 56 => 0xf8.toByte
311+
case 57 => 0xf9.toByte
312+
case 179 => 0xfa.toByte
313+
case 219 => 0xfb.toByte
314+
case 220 => 0xfc.toByte
315+
case 217 => 0xfd.toByte
316+
case 218 => 0xfe.toByte
317+
case 159 => 0xff.toByte
318+
case _ => 0x40.toByte // defaults to space if mapping not available.
56319
}
57320
}

0 commit comments

Comments
 (0)