Skip to content

#744 Add the ability to specify default record length for the record length field mapping #745

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,13 @@ If the record field contains a string that can be mapped to a record size, you c
.option("record_length_map", """{"SEG1":100,"SEG2":200}""")
```

You can specify the default record size by defining the key "_":
```
.option("record_format", "F")
.option("record_length_field", "FIELD_STR")
.option("record_length_map", """{"SEG1":100,"SEG2":200,"_":100}""")
```

### Use cases for various variable length formats

In order to understand the file format it is often sufficient to look at the first 4 bytes of the file (un case of RDW only files),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class FixedWithRecordLengthExprRawRecordExtractor(ctx: RawRecordContext,

final private val lengthField = recordLengthField.map(_.field)
final private val lengthMap = recordLengthField.map(_.valueMap).getOrElse(Map.empty)
final private val defaultRecordLength = lengthMap.get("_")
final private val isLengthMapEmpty = lengthMap.isEmpty

type RawRecord = (String, Array[Byte])
Expand Down Expand Up @@ -131,8 +132,8 @@ class FixedWithRecordLengthExprRawRecordExtractor(ctx: RawRecordContext,
case i: Int => getRecordLengthFromMapping(i.toString)
case l: Long => getRecordLengthFromMapping(l.toString)
case s: String => getRecordLengthFromMapping(s)
case null => throw new IllegalStateException(s"Null encountered as a record length field (offset: $byteIndex, raw value: ${getBytesAsHexString(binaryDataStart)}).")
case _ => throw new IllegalStateException(s"Record length value of the field ${lengthAST.name} must be an integral type.")
case null => defaultRecordLength.getOrElse(throw new IllegalStateException(s"Null encountered as a record length field (offset: $byteIndex, raw value: ${getBytesAsHexString(binaryDataStart)})."))
case _ => throw new IllegalStateException(s"Record length value of the field ${lengthAST.name} must be an integral type.")
}
}
length + recordLengthAdjustment
Expand All @@ -141,7 +142,7 @@ class FixedWithRecordLengthExprRawRecordExtractor(ctx: RawRecordContext,
final private def getRecordLengthFromMapping(v: String): Int = {
lengthMap.get(v) match {
case Some(len) => len
case None => throw new IllegalStateException(s"Record length value '$v' is not mapped to a record length.")
case None => defaultRecordLength.getOrElse(throw new IllegalStateException(s"Record length value '$v' is not mapped to a record length."))
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,58 @@ class VRLRecordReaderSpec extends AnyWordSpec {

assert(ex.getMessage == "The record length field LEN must be an integral type or a value mapping must be specified.")
}

"the length mapping with default record length" in {
val copybookWithLenbgthMap =
""" 01 RECORD.
05 LEN_SPEC PIC X(1).
05 N PIC 9(2).
05 A PIC X(3).
"""

val records = Array(
0xC1, 0xF1, 0xF2, 0xC1,
0xC2, 0xF3, 0xF4, 0xC2, 0xC3,
0xC3, 0xF5, 0xF6, 0xC4, 0xC5, 0xC6
).map(_.toByte)

val streamH = new ByteStreamMock(records)
val streamD = new ByteStreamMock(records)
val context = RawRecordContext(0, streamH, streamD, CopybookParser.parseSimple(copybookWithLenbgthMap), null, null, "")

val readerParameters = ReaderParameters(
lengthFieldExpression = Some("LEN_SPEC"),
lengthFieldMap = Map("A" -> 4, "B" -> 5, "_" -> 6))

val reader = getUseCase(
copybook = copybookWithLenbgthMap,
records = records,
lengthFieldExpression = Some("LEN_SPEC"),
recordExtractor = Some(new FixedWithRecordLengthExprRawRecordExtractor(context, readerParameters)))

assert(reader.hasNext)
val (segment1, record1) = reader.next()
assert(reader.hasNext)
val (segment2, record2) = reader.next()
assert(reader.hasNext)
val (segment3, record3) = reader.next()
assert(!reader.hasNext)

assert(segment1.isEmpty)
assert(segment2.isEmpty)
assert(segment3.isEmpty)
assert(record1.length == 4)
assert(record2.length == 5)
assert(record3.length == 6)
assert(record1(0) == 0xC1.toByte)
assert(record1(1) == 0xF1.toByte)
assert(record1(2) == 0xF2.toByte)
assert(record1(3) == 0xC1.toByte)
assert(record2(0) == 0xC2.toByte)
assert(record2(1) == 0xF3.toByte)
assert(record3(0) == 0xC3.toByte)
assert(record3(1) == 0xF5.toByte)
}
}

"work with record length expressions" in {
Expand Down
40 changes: 0 additions & 40 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -367,46 +367,6 @@
</dependency>
</dependencies>
</plugin>
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
<version>${maven.rat.plugin.version}</version>
<executions>
<execution>
<phase>verify</phase>
<goals>
<goal>check</goal>
</goals>
</execution>
</executions>
<configuration>
<excludes>
<exclude>**/*.sbt</exclude>
<exclude>**/*.properties</exclude>
<exclude>**/*.json</exclude>
<exclude>**/*.csv</exclude>
<exclude>**/*.txt</exclude>
<exclude>**/*.bin</exclude>
<exclude>**/*.md</exclude>
<exclude>**/*.iml</exclude>
<exclude>**/*.csv</exclude>
<exclude>**/*.cob</exclude>
<exclude>**/*.cpy</exclude>
<exclude>**/*.svg</exclude>
<exclude>**/*.plot</exclude>
<exclude>**/*.yml</exclude>
<exclude>**/*.interp</exclude>
<exclude>**/*.tokens</exclude>
<exclude>**/_*</exclude>
<exclude>**/dependency-reduced-pom.xml</exclude>
<exclude>**/.idea/**</exclude>
<exclude>**/target/**</exclude>
<exclude>**/org.apache.spark.sql.sources.DataSourceRegister</exclude>
<exclude>dependency-reduced-pom.xml</exclude>
<exclude>.github/CODEOWNERS</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>

Expand Down
Loading