Skip to content

Commit f0bd143

Browse files
committed
String + byte arrays handling: major rewrite.
* Removed StrByteLimitType, StrEosType, StrZType in favor of StrFromBytesType + a variety of BytesType to use in it * All languages don't use `read_str*` runtime API anymore, but instead read byte arrays and then wrap it into strings using translator's method `bytesToStr(...)` * Added BytesTerminatedType (to mirror lost functionality of StrZType, but without innate encoding stuff) * Added "pad-right" and "terminator" + "include" support to normal strings (only supported in Java & Ruby now, everyone else is expected to implement `bytesPadTermExpr(...)`) * Fixed GraphvizClassCompiler accordingly (probably not very pretty)
1 parent e74d667 commit f0bd143

File tree

12 files changed

+192
-148
lines changed

12 files changed

+192
-148
lines changed

shared/src/main/scala/io/kaitai/struct/GraphvizClassCompiler.scala

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -249,11 +249,10 @@ class GraphvizClassCompiler(topClass: ClassSpec, out: LanguageOutputWriter) exte
249249
case IntMultiType(_, width, _) => width.width.toString
250250
case FloatMultiType(width, _) => width.width.toString
251251
case FixedBytesType(contents, _) => contents.length.toString
252-
case BytesEosType(_) => END_OF_STREAM
253-
case BytesLimitType(ex, _) => expressionSize(ex, attrName)
254-
case StrByteLimitType(ex, _) => expressionSize(ex, attrName)
255-
case StrEosType(_) => END_OF_STREAM
256-
case _: StrZType => UNKNOWN
252+
case _: BytesEosType => END_OF_STREAM
253+
case blt: BytesLimitType => expressionSize(blt.size, attrName)
254+
case _: BytesTerminatedType => UNKNOWN
255+
case StrFromBytesType(basedOn, _) => dataTypeSizeAsString(basedOn, attrName)
257256
case UserTypeByteLimit(_, ex, _) => expressionSize(ex, attrName)
258257
case _: UserTypeEos => END_OF_STREAM
259258
case UserTypeInstream(_) => UNKNOWN
@@ -412,11 +411,10 @@ object GraphvizClassCompiler extends LanguageCompilerStatic {
412411
case IntMultiType(_, width, _) => Some(width.width)
413412
case FixedBytesType(contents, _) => Some(contents.length)
414413
case FloatMultiType(width, _) => Some(width.width)
415-
case BytesEosType(_) => None
416-
case BytesLimitType(ex, _) => evaluateIntLiteral(ex)
417-
case StrByteLimitType(ex, _) => evaluateIntLiteral(ex)
418-
case StrEosType(_) => None
419-
case _: StrZType => None
414+
case _: BytesEosType => None
415+
case blt: BytesLimitType => evaluateIntLiteral(blt.size)
416+
case _: BytesTerminatedType => None
417+
case StrFromBytesType(basedOn, _) => dataTypeByteSize(basedOn)
420418
case UserTypeByteLimit(_, ex, _) => evaluateIntLiteral(ex)
421419
case _: UserTypeEos => None
422420
case UserTypeInstream(_) => None
@@ -429,11 +427,8 @@ object GraphvizClassCompiler extends LanguageCompilerStatic {
429427
case rt: ReadableType => rt.apiCall
430428
case ut: UserType => type2display(ut.name)
431429
case FixedBytesType(contents, _) => contents.map(_.formatted("%02X")).mkString(" ")
432-
case _: BytesType => ""
433-
case StrByteLimitType(_, encoding) => s"str($encoding)"
434-
case StrEosType(encoding) => s"str($encoding)"
435-
case StrZType(encoding, terminator, include, consume, eosError) =>
436-
val args = ListBuffer(encoding)
430+
case BytesTerminatedType(terminator, include, consume, eosError, _) =>
431+
val args = ListBuffer[String]()
437432
if (terminator != 0)
438433
args += s"term=$terminator"
439434
if (include)
@@ -442,7 +437,12 @@ object GraphvizClassCompiler extends LanguageCompilerStatic {
442437
args += "don't consume"
443438
if (!eosError)
444439
args += "ignore EOS"
445-
s"strz(${args.mkString(", ")})"
440+
args.mkString(", ")
441+
case _: BytesType => ""
442+
case StrFromBytesType(basedOn, encoding) =>
443+
val bytesStr = dataTypeName(basedOn)
444+
val comma = if (bytesStr.isEmpty) "" else ", "
445+
s"str($bytesStr$comma$encoding)"
446446
case EnumType(name, basedOn) =>
447447
s"${dataTypeName(basedOn)}${type2display(name)}"
448448
case BitsType(width) => s"b$width"

shared/src/main/scala/io/kaitai/struct/exprlang/DataType.scala

Lines changed: 73 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package io.kaitai.struct.exprlang
22

3+
import io.kaitai.struct.exprlang.Ast.expr
34
import io.kaitai.struct.format._
45

56
/**
@@ -74,20 +75,30 @@ object DataType {
7475
override def process = None
7576
}
7677
case class FixedBytesType(contents: Array[Byte], override val process: Option[ProcessExpr]) extends BytesType
77-
case class BytesEosType(override val process: Option[ProcessExpr]) extends BytesType
78-
case class BytesLimitType(s: Ast.expr, override val process: Option[ProcessExpr]) extends BytesType
79-
80-
abstract class StrType extends BaseType
81-
case object CalcStrType extends StrType
82-
case class StrEosType(encoding: String) extends StrType
83-
case class StrByteLimitType(s: Ast.expr, encoding: String) extends StrType
84-
case class StrZType(
85-
encoding: String,
78+
case class BytesEosType(
79+
terminator: Option[Int],
80+
include: Boolean,
81+
padRight: Option[Int],
82+
override val process: Option[ProcessExpr]
83+
) extends BytesType
84+
case class BytesLimitType(
85+
size: Ast.expr,
86+
terminator: Option[Int],
87+
include: Boolean,
88+
padRight: Option[Int],
89+
override val process: Option[ProcessExpr]
90+
) extends BytesType
91+
case class BytesTerminatedType(
8692
terminator: Int,
8793
include: Boolean,
8894
consume: Boolean,
89-
eosError: Boolean
90-
) extends StrType
95+
eosError: Boolean,
96+
override val process: Option[ProcessExpr]
97+
) extends BytesType
98+
99+
abstract class StrType extends BaseType
100+
case object CalcStrType extends StrType
101+
case class StrFromBytesType(bytes: BytesType, encoding: String) extends StrType
91102

92103
case object BooleanType extends BaseType
93104
case class ArrayType(elType: BaseType) extends BaseType
@@ -139,10 +150,11 @@ object DataType {
139150
size: Option[Ast.expr],
140151
sizeEos: Boolean,
141152
encoding: Option[String],
142-
terminator: Int,
153+
terminator: Option[Int],
143154
include: Boolean,
144155
consume: Boolean,
145156
eosError: Boolean,
157+
padRight: Option[Int],
146158
contents: Option[Array[Byte]],
147159
enumRef: Option[String],
148160
process: Option[ProcessExpr]
@@ -152,14 +164,12 @@ object DataType {
152164
contents match {
153165
case Some(c) => FixedBytesType(c, process)
154166
case _ =>
155-
(size, sizeEos) match {
156-
case (Some(bs: Ast.expr), false) => BytesLimitType(bs, process)
157-
case (None, true) => BytesEosType(process)
158-
case (None, false) =>
159-
throw new YAMLParseException("no type: either 'size' or 'size-eos' must be specified", path)
160-
case (Some(_), true) =>
161-
throw new YAMLParseException("no type: only one of 'size' or 'size-eos' must be specified", path)
162-
}
167+
getByteArrayType(
168+
size, sizeEos,
169+
terminator, include, consume, eosError,
170+
padRight,
171+
process, path
172+
)
163173
}
164174
case Some(dt) => dt match {
165175
case "u1" => Int1Type(false)
@@ -194,19 +204,23 @@ object DataType {
194204
// either inside enum (any width) or (width != 1)
195205
BitsType(width)
196206
}
197-
case "str" =>
207+
case "str" | "strz" =>
198208
val enc = getEncoding(encoding, metaDef, path)
199-
(size, sizeEos) match {
200-
case (Some(bs: Ast.expr), false) => StrByteLimitType(bs, enc)
201-
case (None, true) => StrEosType(enc)
202-
case (None, false) =>
203-
throw new YAMLParseException(s"type $dt: either 'size' or 'size-eos' must be specified", path)
204-
case (Some(_), true) =>
205-
throw new YAMLParseException(s"type $dt: only one of 'size' or 'size-eos' must be specified", path)
209+
210+
// "strz" makes terminator = 0 by default
211+
val term = if (dt == "strz") {
212+
terminator.orElse(Some(0))
213+
} else {
214+
terminator
206215
}
207-
case "strz" =>
208-
val enc = getEncoding(encoding, metaDef, path)
209-
StrZType(enc, terminator, include, consume, eosError)
216+
217+
val bat = getByteArrayType(
218+
size, sizeEos,
219+
term, include, consume, eosError,
220+
padRight,
221+
process, path
222+
)
223+
StrFromBytesType(bat, enc)
210224
case _ =>
211225
val dtl = classNameToList(dt)
212226
(size, sizeEos) match {
@@ -234,6 +248,34 @@ object DataType {
234248
}
235249
}
236250

251+
private def getByteArrayType(
252+
size: Option[expr],
253+
sizeEos: Boolean,
254+
terminator: Option[Int],
255+
include: Boolean,
256+
consume: Boolean,
257+
eosError: Boolean,
258+
padRight: Option[Int],
259+
process: Option[ProcessExpr],
260+
path: List[String]
261+
) = {
262+
(size, sizeEos) match {
263+
case (Some(bs: expr), false) =>
264+
BytesLimitType(bs, terminator, include, padRight, process)
265+
case (None, true) =>
266+
BytesEosType(terminator, include, padRight, process)
267+
case (None, false) =>
268+
terminator match {
269+
case Some(term) =>
270+
BytesTerminatedType(term, include, consume, eosError, process)
271+
case None =>
272+
throw new YAMLParseException("'size', 'size-eos' or 'terminator' must be specified", path)
273+
}
274+
case (Some(_), true) =>
275+
throw new YAMLParseException("only one of 'size' or 'size-eos' must be specified", path)
276+
}
277+
}
278+
237279
def getEncoding(curEncoding: Option[String], metaDef: MetaDefaults, path: List[String]): String = {
238280
curEncoding.orElse(metaDef.encoding) match {
239281
case Some(enc) => enc

shared/src/main/scala/io/kaitai/struct/format/AttrSpec.scala

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -64,15 +64,12 @@ object AttrSpec {
6464
val LEGAL_KEYS_STR = Set(
6565
"size",
6666
"size-eos",
67-
"encoding"
68-
)
69-
70-
val LEGAL_KEYS_STRZ = Set(
71-
"encoding",
67+
"pad-right",
7268
"terminator",
7369
"consume",
7470
"include",
75-
"eos-error"
71+
"eos-error",
72+
"encoding"
7673
)
7774

7875
val LEGAL_KEYS_ENUM = Set(
@@ -99,10 +96,11 @@ object AttrSpec {
9996
val repeat = ParseUtils.getOptValueStr(srcMap, "repeat", path)
10097
val repeatExpr = ParseUtils.getOptValueStr(srcMap, "repeat-expr", path).map(Expressions.parse)
10198
val repeatUntil = ParseUtils.getOptValueStr(srcMap, "repeat-until", path).map(Expressions.parse)
102-
val terminator = ParseUtils.getOptValueInt(srcMap, "terminator", path).getOrElse(0)
99+
val terminator = ParseUtils.getOptValueInt(srcMap, "terminator", path)
103100
val consume = ParseUtils.getOptValueBool(srcMap, "consume", path).getOrElse(true)
104101
val include = ParseUtils.getOptValueBool(srcMap, "include", path).getOrElse(false)
105102
val eosError = ParseUtils.getOptValueBool(srcMap, "eos-error", path).getOrElse(true)
103+
val padRight = ParseUtils.getOptValueInt(srcMap, "pad-right", path)
106104
val enum = ParseUtils.getOptValueStr(srcMap, "enum", path)
107105

108106
val typObj = srcMap.get("type")
@@ -113,7 +111,7 @@ object AttrSpec {
113111
DataType.fromYaml(
114112
None, path, metaDef,
115113
size, sizeEos,
116-
encoding, terminator, include, consume, eosError,
114+
encoding, terminator, include, consume, eosError, padRight,
117115
contents, enum, process
118116
)
119117
case Some(x) =>
@@ -122,15 +120,15 @@ object AttrSpec {
122120
DataType.fromYaml(
123121
Some(simpleType), path, metaDef,
124122
size, sizeEos,
125-
encoding, terminator, include, consume, eosError,
123+
encoding, terminator, include, consume, eosError, padRight,
126124
contents, enum, process
127125
)
128126
case switchMap: Map[Any, Any] =>
129127
val switchMapStr = ParseUtils.anyMapToStrMap(switchMap, path)
130128
parseSwitch(
131129
switchMapStr, path, metaDef,
132130
size, sizeEos,
133-
encoding, terminator, include, consume, eosError,
131+
encoding, terminator, include, consume, eosError, padRight,
134132
contents, enum, process
135133
)
136134
case unknown =>
@@ -140,8 +138,7 @@ object AttrSpec {
140138

141139
val legalKeys = LEGAL_KEYS ++ (dataType match {
142140
case _: BytesType => LEGAL_KEYS_BYTES
143-
case _: StrEosType | _: StrByteLimitType => LEGAL_KEYS_STR
144-
case _: StrZType => LEGAL_KEYS_STRZ
141+
case _: StrFromBytesType => LEGAL_KEYS_STR
145142
case _: UserType => LEGAL_KEYS_BYTES
146143
case EnumType(_, _) => LEGAL_KEYS_ENUM
147144
case SwitchType(on, cases) => LEGAL_KEYS_BYTES
@@ -189,10 +186,11 @@ object AttrSpec {
189186
size: Option[Ast.expr],
190187
sizeEos: Boolean,
191188
encoding: Option[String],
192-
terminator: Int,
189+
terminator: Option[Int],
193190
include: Boolean,
194191
consume: Boolean,
195192
eosError: Boolean,
193+
padRight: Option[Int],
196194
contents: Option[Array[Byte]],
197195
enumRef: Option[String],
198196
process: Option[ProcessExpr]
@@ -208,7 +206,7 @@ object AttrSpec {
208206
Expressions.parse(condition) -> DataType.fromYaml(
209207
Some(typeName), path ++ List("cases"), metaDef,
210208
size, sizeEos,
211-
encoding, terminator, include, consume, eosError,
209+
encoding, terminator, include, consume, eosError, padRight,
212210
contents, enumRef, process
213211
)
214212
}
@@ -221,9 +219,9 @@ object AttrSpec {
221219
} else {
222220
(size, sizeEos) match {
223221
case (Some(sizeValue), false) =>
224-
Map(SwitchType.ELSE_CONST -> BytesLimitType(sizeValue, process))
222+
Map(SwitchType.ELSE_CONST -> BytesLimitType(sizeValue, None, false, None, process))
225223
case (None, true) =>
226-
Map(SwitchType.ELSE_CONST -> BytesEosType(process))
224+
Map(SwitchType.ELSE_CONST -> BytesEosType(None, false, None, process))
227225
case (None, false) =>
228226
Map()
229227
case (Some(_), true) =>

shared/src/main/scala/io/kaitai/struct/languages/CSharpCompiler.scala

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -226,17 +226,12 @@ class CSharpCompiler(config: RuntimeConfig, out: LanguageOutputWriter)
226226
dataType match {
227227
case t: ReadableType =>
228228
s"$io.Read${Utils.capitalize(t.apiCall)}()"
229-
// Aw, crap, can't use interpolated strings here: https://issues.scala-lang.org/browse/SI-6476
230-
case StrByteLimitType(bs, encoding) =>
231-
s"$io.ReadStrByteLimit(${expression(bs)}, " + '"' + encoding + "\")"
232-
case StrEosType(encoding) =>
233-
io + ".ReadStrEos(\"" + encoding + "\")"
234-
case StrZType(encoding, terminator, include, consume, eosError) =>
235-
io + ".ReadStrz(\"" + encoding + '"' + s", $terminator, $include, $consume, $eosError)"
236-
case BytesLimitType(size, _) =>
237-
s"$io.ReadBytes(${expression(size)})"
238-
case BytesEosType(_) =>
229+
case blt: BytesLimitType =>
230+
s"$io.ReadBytes(${expression(blt.size)})"
231+
case _: BytesEosType =>
239232
s"$io.ReadBytesFull()"
233+
case BytesTerminatedType(terminator, include, consume, eosError, _) =>
234+
s"$io.ReadBytesTerm($terminator, $include, $consume, $eosError)"
240235
case BitsType1 =>
241236
s"$io.ReadBitsInt(1) != 0"
242237
case BitsType(width: Int) =>

shared/src/main/scala/io/kaitai/struct/languages/CppCompiler.scala

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class CppCompiler(config: RuntimeConfig, outSrc: LanguageOutputWriter, outHdr: L
5151
outHdr.puts(s"#if KAITAI_STRUCT_VERSION < ${minVer}L")
5252
outHdr.puts(
5353
"#error \"Incompatible Kaitai Struct C++/STL API: version " +
54-
KSVersion.minimalRuntime + " or later is required"
54+
KSVersion.minimalRuntime + " or later is required\""
5555
)
5656
outHdr.puts("#endif")
5757
}
@@ -392,17 +392,12 @@ class CppCompiler(config: RuntimeConfig, outSrc: LanguageOutputWriter, outHdr: L
392392
dataType match {
393393
case t: ReadableType =>
394394
s"$io->read_${t.apiCall}()"
395-
// Aw, crap, can't use interpolated strings here: https://issues.scala-lang.org/browse/SI-6476
396-
case StrByteLimitType(bs, encoding) =>
397-
s"$io->read_str_byte_limit(${expression(bs)}, ${encodingToStr(encoding)})"
398-
case StrEosType(encoding) =>
399-
s"$io->read_str_eos(${encodingToStr(encoding)})"
400-
case StrZType(encoding, terminator, include, consume, eosError) =>
401-
s"$io->read_strz(${encodingToStr(encoding)}, $terminator, $include, $consume, $eosError)"
402-
case BytesLimitType(size, _) =>
403-
s"$io->read_bytes(${expression(size)})"
404-
case BytesEosType(_) =>
395+
case blt: BytesLimitType =>
396+
s"$io->read_bytes(${expression(blt.size)})"
397+
case _: BytesEosType =>
405398
s"$io->read_bytes_full()"
399+
case BytesTerminatedType(terminator, include, consume, eosError, _) =>
400+
s"$io->read_bytes_term($terminator, $include, $consume, $eosError)"
406401
case BitsType1 =>
407402
s"$io->read_bits_int(1)"
408403
case BitsType(width: Int) =>

0 commit comments

Comments
 (0)