AbsaOSS · yruslan · Feb 24, 2025 · Feb 20, 2025 · Feb 20, 2025 · Feb 21, 2025
@@ -485,6 +485,13 @@ If the record field contains a string that can be mapped to a record size, you c
 .option("record_length_map", """{"SEG1":100,"SEG2":200}""")  
 ```
 
+You can specify the default record size by defining the key "_":
+```
+.option("record_format", "F")
+.option("record_length_field", "FIELD_STR")
+.option("record_length_map", """{"SEG1":100,"SEG2":200,"_":100}""")  
+```
+
 ### Use cases for various variable length formats
 
 In order to understand the file format it is often sufficient to look at the first 4 bytes of the file (un case of RDW only files),

@@ -32,6 +32,7 @@ class FixedWithRecordLengthExprRawRecordExtractor(ctx: RawRecordContext,
 
   final private val lengthField = recordLengthField.map(_.field)
   final private val lengthMap = recordLengthField.map(_.valueMap).getOrElse(Map.empty)
+  final private val defaultRecordLength = lengthMap.get("_")
   final private val isLengthMapEmpty = lengthMap.isEmpty
 
   type RawRecord = (String, Array[Byte])
@@ -131,8 +132,8 @@ class FixedWithRecordLengthExprRawRecordExtractor(ctx: RawRecordContext,
         case i: Int    => getRecordLengthFromMapping(i.toString)
         case l: Long   => getRecordLengthFromMapping(l.toString)
         case s: String => getRecordLengthFromMapping(s)
-        case null      => throw new IllegalStateException(s"Null encountered as a record length field (offset: $byteIndex, raw value: ${getBytesAsHexString(binaryDataStart)}).")
-        case _         =>    throw new IllegalStateException(s"Record length value of the field ${lengthAST.name} must be an integral type.")
+        case null      => defaultRecordLength.getOrElse(throw new IllegalStateException(s"Null encountered as a record length field (offset: $byteIndex, raw value: ${getBytesAsHexString(binaryDataStart)})."))
+        case _         => throw new IllegalStateException(s"Record length value of the field ${lengthAST.name} must be an integral type.")
       }
     }
     length + recordLengthAdjustment
@@ -141,7 +142,7 @@ class FixedWithRecordLengthExprRawRecordExtractor(ctx: RawRecordContext,
   final private def getRecordLengthFromMapping(v: String): Int = {
     lengthMap.get(v) match {
       case Some(len) => len
-      case None => throw new IllegalStateException(s"Record length value '$v' is not mapped to a record length.")
+      case None => defaultRecordLength.getOrElse(throw new IllegalStateException(s"Record length value '$v' is not mapped to a record length."))
     }
   }
 

@@ -225,6 +225,58 @@ class VRLRecordReaderSpec extends AnyWordSpec {
 
         assert(ex.getMessage == "The record length field LEN must be an integral type or a value mapping must be specified.")
       }
+
+      "the length mapping with default record length" in {
+        val copybookWithLenbgthMap =
+          """       01  RECORD.
+            05  LEN_SPEC     PIC X(1).
+            05  N            PIC 9(2).
+            05  A            PIC X(3).
+          """
+
+        val records = Array(
+          0xC1, 0xF1, 0xF2, 0xC1,
+          0xC2, 0xF3, 0xF4, 0xC2, 0xC3,
+          0xC3, 0xF5, 0xF6, 0xC4, 0xC5, 0xC6
+        ).map(_.toByte)
+
+        val streamH = new ByteStreamMock(records)
+        val streamD = new ByteStreamMock(records)
+        val context = RawRecordContext(0, streamH, streamD, CopybookParser.parseSimple(copybookWithLenbgthMap), null, null, "")
+
+        val readerParameters = ReaderParameters(
+          lengthFieldExpression = Some("LEN_SPEC"),
+          lengthFieldMap = Map("A" -> 4, "B" -> 5, "_" -> 6))
+
+        val reader = getUseCase(
+          copybook = copybookWithLenbgthMap,
+          records = records,
+          lengthFieldExpression = Some("LEN_SPEC"),
+          recordExtractor = Some(new FixedWithRecordLengthExprRawRecordExtractor(context, readerParameters)))
+
+        assert(reader.hasNext)
+        val (segment1, record1) = reader.next()
+        assert(reader.hasNext)
+        val (segment2, record2) = reader.next()
+        assert(reader.hasNext)
+        val (segment3, record3) = reader.next()
+        assert(!reader.hasNext)
+
+        assert(segment1.isEmpty)
+        assert(segment2.isEmpty)
+        assert(segment3.isEmpty)
+        assert(record1.length == 4)
+        assert(record2.length == 5)
+        assert(record3.length == 6)
+        assert(record1(0) == 0xC1.toByte)
+        assert(record1(1) == 0xF1.toByte)
+        assert(record1(2) == 0xF2.toByte)
+        assert(record1(3) == 0xC1.toByte)
+        assert(record2(0) == 0xC2.toByte)
+        assert(record2(1) == 0xF3.toByte)
+        assert(record3(0) == 0xC3.toByte)
+        assert(record3(1) == 0xF5.toByte)
+      }
     }
 
     "work with record length expressions" in {

@@ -367,46 +367,6 @@
                     </dependency>
                 </dependencies>
             </plugin>
-            <plugin>
-                <groupId>org.apache.rat</groupId>
-                <artifactId>apache-rat-plugin</artifactId>
-                <version>${maven.rat.plugin.version}</version>
-                <executions>
-                    <execution>
-                        <phase>verify</phase>
-                        <goals>
-                            <goal>check</goal>
-                        </goals>
-                    </execution>
-                </executions>
-                <configuration>
-                    <excludes>
-                        <exclude>**/*.sbt</exclude>
-                        <exclude>**/*.properties</exclude>
-                        <exclude>**/*.json</exclude>
-                        <exclude>**/*.csv</exclude>
-                        <exclude>**/*.txt</exclude>
-                        <exclude>**/*.bin</exclude>
-                        <exclude>**/*.md</exclude>
-                        <exclude>**/*.iml</exclude>
-                        <exclude>**/*.csv</exclude>
-                        <exclude>**/*.cob</exclude>
-                        <exclude>**/*.cpy</exclude>
-                        <exclude>**/*.svg</exclude>
-                        <exclude>**/*.plot</exclude>
-                        <exclude>**/*.yml</exclude>
-                        <exclude>**/*.interp</exclude>
-                        <exclude>**/*.tokens</exclude>
-                        <exclude>**/_*</exclude>
-                        <exclude>**/dependency-reduced-pom.xml</exclude>
-                        <exclude>**/.idea/**</exclude>
-                        <exclude>**/target/**</exclude>
-                        <exclude>**/org.apache.spark.sql.sources.DataSourceRegister</exclude>
-                        <exclude>dependency-reduced-pom.xml</exclude>
-                        <exclude>.github/CODEOWNERS</exclude>
-                    </excludes>
-                </configuration>
-            </plugin>
         </plugins>
     </build>