From fcc46ff41bcc36ef28f43fbda6e278c96a86c5ad Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Fri, 7 Feb 2025 03:41:28 +0100 Subject: [PATCH 01/39] Add `arsd.ini` module --- dub.json | 12 ++ ini.d | 563 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 575 insertions(+) create mode 100644 ini.d diff --git a/dub.json b/dub.json index 64da328c..7dfb4c3d 100644 --- a/dub.json +++ b/dub.json @@ -787,6 +787,18 @@ "dflags-dmd": ["-mv=arsd.archive=$PACKAGE_DIR/archive.d"], "dflags-ldc": ["--mv=arsd.archive=$PACKAGE_DIR/archive.d"], "dflags-gdc": ["-fmodule-file=arsd.archive=$PACKAGE_DIR/archive.d"] + }, + { + "name": "ini", + "description": "INI file support - INI parser with support for various dialects.", + "targetType": "library", + "sourceFiles": ["ini.d"], + "dependencies": { + "arsd-official:core":"*" + }, + "dflags-dmd": ["-mv=arsd.ini=$PACKAGE_DIR/ini.d"], + "dflags-ldc": ["--mv=arsd.ini=$PACKAGE_DIR/ini.d"], + "dflags-gdc": ["-fmodule-file=arsd.ini=$PACKAGE_DIR/ini.d"] } ] } diff --git a/ini.d b/ini.d new file mode 100644 index 00000000..4691fba3 --- /dev/null +++ b/ini.d @@ -0,0 +1,563 @@ +/+ + == arsd.ini == + Copyright Elias Batek (0xEAB) 2025. + Distributed under the Boost Software License, Version 1.0. ++/ +/++ + INI configuration file support + +/ +module arsd.ini; + +/++ + Determines whether a type `T` is a string type compatible with this library. + +/ +enum isCompatibleString(T) = (is(T == string) || is(T == const(char)[]) || is(T == char[])); + +//dfmt off +/// +enum IniDialect : ulong { + lite = 0, + lineComments = 0b_0000_0000_0000_0001, + inlineComments = 0b_0000_0000_0000_0011, + hashLineComments = 0b_0000_0000_0000_0100, + hashInLineComments = 0b_0000_0000_0000_1100, + escapeSequences = 0b_0000_0000_0001_0000, + lineFolding = 0b_0000_0000_0010_0000, + quotedStrings = 0b_0000_0000_0100_0000, + arrays = 0b_0000_0000_1000_0000, + colonKeys = 0b_0000_0001_0000_0000, + defaults = (lineComments | quotedStrings), +} +//dfmt on + +private bool hasFeature(ulong dialect, ulong feature) @safe pure nothrow @nogc { + return ((dialect & feature) > 0); +} + +/// +public enum IniTokenType { + invalid = 0, + + whitespace, + bracketOpen, + bracketClose, + keyValueSeparator, + lineBreak, + + comment, + + key, + value, + sectionHeader, +} + +/// +struct IniToken(string) if (isCompatibleString!string) { + + /// + IniTokenType type; + + /// + string data; +} + +private alias TokenType = IniTokenType; +private alias Dialect = IniDialect; + +private enum LocationState { + newLine, + key, + value, + sectionHeader, +} + +/++ + Low-level INI parser + +/ +struct IniParser( + IniDialect dialect = IniDialect.defaults, + string = immutable(char)[] +) if (isCompatibleString!string) { + + public { + /// + alias Token = IniToken!string; + } + + private { + string _source; + Token _front; + bool _empty = true; + + LocationState _locationState = LocationState.newLine; + } + +@safe pure nothrow @nogc: + + /// + public this(string rawIni) { + _source = rawIni; + _empty = false; + + this.popFront(); + } + + // Range API + public { + + /// + bool empty() const { + return _empty; + } + + /// + Token front() inout { + return _front; + } + + /// + void popFront() { + if (_source.length == 0) { + _empty = true; + return; + } + + _front = this.fetchFront(); + } + + /// + typeof(this) save() inout { + return this; + } + } + + // extras + public { + + /++ + Skips tokens that are irrelevant for further processing + + Returns: + true = if there are no further tokens, + i.e. whether the range is empty now + +/ + bool skipIrrelevant(bool skipComments = true) { + static bool isIrrelevant(const TokenType type, const bool skipComments) { + pragma(inline, true); + + final switch (type) with (TokenType) { + case invalid: + return false; + + case whitespace: + case bracketOpen: + case bracketClose: + case keyValueSeparator: + case lineBreak: + return true; + + case comment: + return skipComments; + + case sectionHeader: + case key: + case value: + return false; + } + } + + while (!this.empty) { + const irrelevant = isIrrelevant(_front.type, skipComments); + + if (!irrelevant) { + return false; + } + + this.popFront(); + } + + return true; + } + } + + private { + + bool isOnFinalChar() const { + pragma(inline, true); + return (_source.length == 1); + } + + bool isAtStartOfLineOrEquivalent() { + return (_locationState == LocationState.newLine); + } + + Token makeToken(TokenType type, size_t length) { + auto token = Token(type, _source[0 .. length]); + _source = _source[length .. $]; + return token; + } + + Token makeToken(TokenType type, size_t length, size_t skip) { + _source = _source[skip .. $]; + return this.makeToken(type, length); + } + + Token lexWhitespace() { + foreach (immutable idxM1, const c; _source[1 .. $]) { + switch (c) { + case '\x09': + case '\x0B': + case '\x0C': + case ' ': + break; + + default: + return this.makeToken(TokenType.whitespace, (idxM1 + 1)); + } + } + + // all whitespace + return this.makeToken(TokenType.whitespace, _source.length); + } + + Token lexComment() { + foreach (immutable idxM1, const c; _source[1 .. $]) { + switch (c) { + default: + break; + + case '\x0A': + case '\x0D': + return this.makeToken(TokenType.comment, idxM1, 1); + } + } + + return this.makeToken(TokenType.comment, (-1 + _source.length), 1); + } + + Token lexTextImpl(TokenType tokenType)() { + + enum Result { + end, + regular, + whitespace, + } + + static if (dialect.hasFeature(Dialect.quotedStrings)) { + bool inQuotedString = false; + + if (_source[0] == '"') { + inQuotedString = true; + + // chomp quote initiator + _source = _source[1 .. $]; + } + } else { + enum inQuotedString = false; + } + + Result nextChar(const char c) @safe pure nothrow @nogc { + pragma(inline, true); + + switch (c) { + default: + return Result.regular; + + case '\x09': + case '\x0B': + case '\x0C': + case ' ': + return (inQuotedString) ? Result.regular : Result.whitespace; + + case '\x0A': + case '\x0D': + return (inQuotedString) + ? Result.regular : Result.end; + + case '"': + return (inQuotedString) + ? Result.end : Result.regular; + + case '#': + if (dialect.hasFeature(Dialect.hashInLineComments)) { + return (inQuotedString) + ? Result.regular : Result.end; + } else { + return Result.regular; + } + + case ';': + if (dialect.hasFeature(Dialect.inlineComments)) { + return (inQuotedString) + ? Result.regular : Result.end; + } else { + return Result.regular; + } + + case ':': + static if (dialect.hasFeature(Dialect.colonKeys)) { + goto case '='; + } else { + return Result.regular; + } + + case '=': + static if (tokenType == TokenType.key) { + return (inQuotedString) + ? Result.regular : Result.end; + } else { + return Result.regular; + } + + case ']': + static if (tokenType == TokenType.sectionHeader) { + return (inQuotedString) + ? Result.regular : Result.end; + } else { + return Result.regular; + } + } + + assert(false, "Bug: This should have been unreachable."); + } + + size_t idxLastText = 0; + foreach (immutable idx, const c; _source) { + const status = nextChar(c); + + if (status == Result.end) { + break; + } else if (status == Result.whitespace) { + continue; + } + + idxLastText = idx; + } + + const idxEOT = (idxLastText + 1); + auto token = Token(tokenType, _source[0 .. idxEOT]); + _source = _source[idxEOT .. $]; + + if (inQuotedString) { + // chomp quote terminator + _source = _source[1 .. $]; + } + + return token; + } + + Token lexText() { + final switch (_locationState) { + case LocationState.newLine: + case LocationState.key: + return this.lexTextImpl!(TokenType.key); + + case LocationState.value: + return this.lexTextImpl!(TokenType.value); + + case LocationState.sectionHeader: + return this.lexTextImpl!(TokenType.sectionHeader); + } + } + + Token fetchFront() { + switch (_source[0]) { + + default: + return this.lexText(); + + case '\x0A': { + _locationState = LocationState.newLine; + return this.makeToken(TokenType.lineBreak, 1); + } + + case '\x0D': { + _locationState = LocationState.newLine; + + // CR? + if (this.isOnFinalChar) { + return this.makeToken(TokenType.lineBreak, 1); + } + + // CRLF? + if (_source[1] == '\x0A') { + return this.makeToken(TokenType.lineBreak, 2); + } + + // CR + return this.makeToken(TokenType.lineBreak, 1); + } + + case '\x09': + case '\x0B': + case '\x0C': + case ' ': + return this.lexWhitespace(); + + case ':': + static if (dialect.hasFeature(Dialect.colonKeys)) { + goto case '='; + } + return this.lexText(); + + case '=': + _locationState = LocationState.value; + return this.makeToken(TokenType.keyValueSeparator, 1); + + case '[': + _locationState = LocationState.sectionHeader; + return this.makeToken(TokenType.bracketOpen, 1); + + case ']': + _locationState = LocationState.key; + return this.makeToken(TokenType.bracketClose, 1); + + case ';': { + static if (dialect.hasFeature(Dialect.inlineComments)) { + return this.lexComment(); + } else static if (dialect.hasFeature(Dialect.lineComments)) { + if (this.isAtStartOfLineOrEquivalent) { + return this.lexComment(); + } + return this.lexText(); + } else { + return this.lexText(); + } + } + + case '#': { + static if (dialect.hasFeature(Dialect.hashInLineComments)) { + return this.lexComment(); + } else static if (dialect.hasFeature(Dialect.hashLineComments)) { + if (this.isAtStartOfLineOrEquivalent) { + return this.lexComment(); + } + return this.lexText(); + } else { + return this.lexText(); + } + } + } + } + } +} + +@safe unittest { + + static immutable document = `; This is a comment. +[section1] +s1key1 = value1 +s1key2 = value2 + +; Another comment + +[section no.2] +s2key1 = "value3" +s2key2 = value no.4 +`; + + auto parser = IniParser!()(document); + alias Token = typeof(parser).Token; + + { + assert(!parser.empty); + assert(parser.front == Token(TokenType.comment, " This is a comment.")); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front.type == TokenType.lineBreak); + } + + { + parser.popFront(); + assert(!parser.empty); + assert(parser.front == Token(TokenType.bracketOpen, "[")); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front == Token(TokenType.sectionHeader, "section1")); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front == Token(TokenType.bracketClose, "]")); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front.type == TokenType.lineBreak); + } + + { + parser.popFront(); + assert(!parser.empty); + assert(parser.front == Token(TokenType.key, "s1key1")); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front == Token(TokenType.whitespace, " ")); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front == Token(TokenType.keyValueSeparator, "=")); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front == Token(TokenType.whitespace, " ")); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front == Token(TokenType.value, "value1")); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front.type == TokenType.lineBreak); + } + + { + parser.popFront(); + assert(!parser.empty); + assert(parser.front == Token(TokenType.key, "s1key2")); + + parser.popFront(); + assert(!parser.skipIrrelevant()); + assert(!parser.empty); + assert(parser.front == Token(TokenType.value, "value2"), parser.front.data); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front.type == TokenType.lineBreak); + } + + { + assert(!parser.skipIrrelevant()); + assert(!parser.empty); + assert(parser.front == Token(TokenType.sectionHeader, "section no.2")); + } + + { + parser.popFront(); + assert(!parser.skipIrrelevant()); + assert(!parser.empty); + assert(parser.front == Token(TokenType.key, "s2key1")); + + parser.popFront(); + assert(!parser.skipIrrelevant()); + assert(!parser.empty); + assert(parser.front == Token(TokenType.value, "value3")); + } + + { + parser.popFront(); + assert(!parser.skipIrrelevant()); + assert(!parser.empty); + assert(parser.front == Token(TokenType.key, "s2key2")); + + parser.popFront(); + assert(!parser.skipIrrelevant()); + assert(!parser.empty); + assert(parser.front == Token(TokenType.value, "value no.4")); + } + + parser.popFront(); + assert(parser.skipIrrelevant()); + assert(parser.empty()); +} From 2e12f1a8f528bfff451ca6f4dd68e9db5b06f801 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Fri, 7 Feb 2025 05:12:06 +0100 Subject: [PATCH 02/39] Add convenient INI DOM parser --- ini.d | 221 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 218 insertions(+), 3 deletions(-) diff --git a/ini.d b/ini.d index 4691fba3..9202c1c4 100644 --- a/ini.d +++ b/ini.d @@ -73,6 +73,11 @@ private enum LocationState { /++ Low-level INI parser + + See_also: + $(LIST + * [parseIniDocument] + ) +/ struct IniParser( IniDialect dialect = IniDialect.defaults, @@ -126,7 +131,7 @@ struct IniParser( } /// - typeof(this) save() inout { + inout(typeof(this)) save() inout { return this; } } @@ -442,9 +447,50 @@ struct IniParser( } } +/// @safe unittest { + // INI document (demo data) + static immutable string rawIniDocument = `; This is a comment. +[section1] +foo = bar ;another comment +oachkatzl = schwoaf ;try pronouncing that +`; + + // Combine feature flags to build the required dialect. + const myDialect = (Dialect.defaults | Dialect.inlineComments); + + // Instantiate a new parser and supply our document string. + auto parser = IniParser!(myDialect)(rawIniDocument); + + int comments = 0; + int sections = 0; + int keys = 0; + int values = 0; + + // Process token by token. + foreach (const parser.Token token; parser) { + if (token.type == IniTokenType.comment) { + ++comments; + } + if (token.type == IniTokenType.sectionHeader) { + ++sections; + } + if (token.type == IniTokenType.key) { + ++keys; + } + if (token.type == IniTokenType.value) { + ++values; + } + } + + assert(comments == 3); + assert(sections == 1); + assert(keys == 2); + assert(values == 2); +} - static immutable document = `; This is a comment. +@safe unittest { + static immutable string rawIniDocument = `; This is a comment. [section1] s1key1 = value1 s1key2 = value2 @@ -456,7 +502,7 @@ s2key1 = "value3" s2key2 = value no.4 `; - auto parser = IniParser!()(document); + auto parser = IniParser!()(rawIniDocument); alias Token = typeof(parser).Token; { @@ -561,3 +607,172 @@ s2key2 = value no.4 assert(parser.skipIrrelevant()); assert(parser.empty()); } + +/++ + Data entry of an INI document + +/ +struct IniKeyValuePair(string) if (isCompatibleString!string) { + /// + string key; + + /// + string value; +} + +/++ + Section of an INI document + + $(NOTE + Data entries from the document’s root – i.e. those with no designated section – + are stored in a section with its `name` set to `null`. + ) + +/ +struct IniSection(string) if (isCompatibleString!string) { + /// + alias KeyValuePair = IniKeyValuePair!string; + + /++ + Name of the section + + Also known as “key”. + +/ + string name; + + /++ + Data entries of the section + +/ + KeyValuePair[] items; +} + +/++ + DOM representation of an INI document + +/ +struct IniDocument(string) if (isCompatibleString!string) { + /// + alias Section = IniSection!string; + + /++ + Sections of the document + + $(NOTE + Data entries from the document’s root – i.e. those with no designated section – + are stored in a section with its `name` set to `null`. + + If there are no named sections in a document, there will be only a single section with no name (`null`). + ) + +/ + Section[] sections; +} + +/++ + Parses an INI string into a document ("DOM"). + +/ +IniDocument!string parseIniDocument(IniDialect dialect = IniDialect.defaults, string)(string rawIni) @safe pure nothrow +if (isCompatibleString!string) { + alias Document = IniDocument!string; + alias Section = IniSection!string; + alias KeyValuePair = IniKeyValuePair!string; + + auto parser = IniParser!(dialect)(rawIni); + + auto document = Document(null); + auto section = Section(null, null); + auto kvp = KeyValuePair(null, null); + + void commitKeyValuePair(string nextKey = null) { + if (kvp.key !is null) { + section.items ~= kvp; + } + kvp = KeyValuePair(nextKey, null); + } + + void commitSection(string nextSectionName) { + commitKeyValuePair(null); + + const isNamelessAndEmpty = ( + (section.name is null) + && (section.items.length == 0) + ); + + if (!isNamelessAndEmpty) { + document.sections ~= section; + } + + if (nextSectionName !is null) { + section = Section(nextSectionName, null); + } + } + + while (!parser.skipIrrelevant()) { + switch (parser.front.type) with (TokenType) { + + case key: + commitKeyValuePair(parser.front.data); + break; + + case value: + kvp.value = parser.front.data; + break; + + case sectionHeader: + commitSection(parser.front.data); + break; + + default: + assert(false, "Unexpected parsing error."); + } + + parser.popFront(); + } + + commitSection(null); + + return document; +} + +/// +@safe unittest { + // INI document (demo data) + static immutable string iniString = `; This is a comment. + +Oachkatzlschwoaf = Seriously, try pronouncing this :P + +[Section #1] +foo = bar +d = rocks + +; Another comment + +[Section No.2] +name = Walter Bright +company = "Digital Mars" +`; + + // Parse the document + auto doc = parseIniDocument(iniString); + + version (none) // exclude from docs + // …is equivalent to: + auto doc = parseIniDocument!(IniDialect.defaults)(iniString); + + assert(doc.sections.length == 3); + + // "Root" section (no name): + assert(doc.sections[0].name is null); + assert(doc.sections[0].items == [ + IniKeyValuePair!string("Oachkatzlschwoaf", "Seriously, try pronouncing this :P"), + ]); + + // A section with a name: + assert(doc.sections[1].name == "Section #1"); + assert(doc.sections[1].items.length == 2); + assert(doc.sections[1].items[0] == IniKeyValuePair!string("foo", "bar")); + assert(doc.sections[1].items[1] == IniKeyValuePair!string("d", "rocks")); + + // Another section: + assert(doc.sections[2].name == "Section No.2"); + assert(doc.sections[2].items == [ + IniKeyValuePair!string("name", "Walter Bright"), + IniKeyValuePair!string("company", "Digital Mars"), + ]); +} From c5406b1634ab1f50641c6263c4ae3862ff8e5e9d Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Fri, 7 Feb 2025 05:26:39 +0100 Subject: [PATCH 03/39] Fix bugs and add further unittests to `arsd.ini` --- ini.d | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/ini.d b/ini.d index 9202c1c4..57daff1e 100644 --- a/ini.d +++ b/ini.d @@ -116,7 +116,7 @@ struct IniParser( } /// - Token front() inout { + inout(Token) front() inout { return _front; } @@ -673,7 +673,7 @@ if (isCompatibleString!string) { alias Section = IniSection!string; alias KeyValuePair = IniKeyValuePair!string; - auto parser = IniParser!(dialect)(rawIni); + auto parser = IniParser!(dialect, string)(rawIni); auto document = Document(null); auto section = Section(null, null); @@ -776,3 +776,22 @@ company = "Digital Mars" IniKeyValuePair!string("company", "Digital Mars"), ]); } + +@safe unittest { + auto doc = parseIniDocument(""); + assert(doc.sections == []); + + doc = parseIniDocument(";Comment\n;Comment2\n"); + assert(doc.sections == []); +} + +@safe unittest { + char[] mutable = ['f', 'o', 'o', '=', 'b', 'a', 'r', '\n']; + + auto doc = parseIniDocument(mutable); + assert(doc.sections[0].items[0].key == "foo"); + assert(doc.sections[0].items[0].value == "bar"); + + // is mutable + static assert(is(typeof(doc.sections[0].items[0].value) == char[])); +} From 7d13f7cf22ba70c517876c18e13e81b969bb823f Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Fri, 7 Feb 2025 05:56:56 +0100 Subject: [PATCH 04/39] Add function to parse INI into an AA --- ini.d | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 101 insertions(+), 2 deletions(-) diff --git a/ini.d b/ini.d index 57daff1e..cdbe9e1f 100644 --- a/ini.d +++ b/ini.d @@ -77,6 +77,7 @@ private enum LocationState { See_also: $(LIST * [parseIniDocument] + * [parseIniAA] ) +/ struct IniParser( @@ -666,6 +667,9 @@ struct IniDocument(string) if (isCompatibleString!string) { /++ Parses an INI string into a document ("DOM"). + + See_also: + [parseIniAA] +/ IniDocument!string parseIniDocument(IniDialect dialect = IniDialect.defaults, string)(string rawIni) @safe pure nothrow if (isCompatibleString!string) { @@ -719,7 +723,7 @@ if (isCompatibleString!string) { break; default: - assert(false, "Unexpected parsing error."); + assert(false, "Unexpected parsing error."); // TODO } parser.popFront(); @@ -748,7 +752,7 @@ name = Walter Bright company = "Digital Mars" `; - // Parse the document + // Parse the document. auto doc = parseIniDocument(iniString); version (none) // exclude from docs @@ -795,3 +799,98 @@ company = "Digital Mars" // is mutable static assert(is(typeof(doc.sections[0].items[0].value) == char[])); } + +/++ + Parses an INI string into an associate array. + + See_also: + [parseIniDocument] + +/ +string[string][string] parseIniAA(IniDialect dialect = IniDialect.defaults, string)(string rawIni) @safe pure nothrow { + // TODO: duplicate handling + auto parser = IniParser!(dialect, string)(rawIni); + + string[string][string] document; + string[string] section; + + string sectionName = null; + string keyName = null; + + void commitSection() { + sectionName = null; + } + + while (!parser.skipIrrelevant()) { + switch (parser.front.type) with (TokenType) { + + case key: + keyName = parser.front.data; + break; + + case value: + section[keyName] = parser.front.data; + break; + + case sectionHeader: + if ((sectionName !is null) || (section.length > 0)) { + document[sectionName] = section; + section = null; + } + sectionName = parser.front.data; + break; + + default: + assert(false, "Unexpected parsing error."); // TODO + } + + parser.popFront(); + } + + if ((sectionName !is null) || (section.length > 0)) { + document[sectionName] = section; + } + + return document; +} + +/// +@safe unittest { + // INI document + static immutable string demoData = `; This is a comment. + +Oachkatzlschwoaf = Seriously, try pronouncing this :P + +[Section #1] +foo = bar +d = rocks + +; Another comment + +[Section No.2] +name = Walter Bright +company = "Digital Mars" +website = +;email = "noreply@example.org" +`; + + // Parse the document into an associative array. + auto aa = parseIniAA(demoData); + + assert(aa.length == 3); + + assert(aa[null].length == 1); + assert(aa[null]["Oachkatzlschwoaf"] == "Seriously, try pronouncing this :P"); + + assert(aa["Section #1"].length == 2); + assert(aa["Section #1"]["foo"] == "bar"); + assert(aa["Section #1"]["d"] == "rocks"); + + string[string] section2 = aa["Section No.2"]; + assert(section2.length == 3); + assert(section2["name"] == "Walter Bright"); + assert(section2["company"] == "Digital Mars"); + assert(section2["website"] == ""); + + // "email" is commented out + assert(!("email" in section2)); +} From 5d3a57ea1a932d507fd8e5d823c960a1cb468479 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Fri, 7 Feb 2025 05:57:35 +0100 Subject: [PATCH 05/39] Add further doc comments to `arsd.ini` --- ini.d | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/ini.d b/ini.d index cdbe9e1f..4e2ec6ab 100644 --- a/ini.d +++ b/ini.d @@ -34,30 +34,45 @@ private bool hasFeature(ulong dialect, ulong feature) @safe pure nothrow @nogc { return ((dialect & feature) > 0); } -/// +/++ + Type of a token (as output by the parser) + +/ public enum IniTokenType { + /// indicates an error invalid = 0, + /// insignificant whitespace whitespace, + /// section header opening bracket bracketOpen, + /// section header closing bracket bracketClose, + /// key/value separator, e.g. '=' keyValueSeparator, + /// line break, i.e. LF, CRLF or CR lineBreak, + /// text comment comment, + /// item key data key, + /// item value data value, + /// section name data sectionHeader, } -/// +/++ + Token of INI data (as output by the parser) + +/ struct IniToken(string) if (isCompatibleString!string) { - /// IniTokenType type; - /// + /++ + Content + +/ string data; } From 33595b7f87f7f11aa7610640f1f93205a6ef8b1a Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Sat, 8 Feb 2025 02:17:24 +0100 Subject: [PATCH 06/39] Fix minor issues --- ini.d | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ini.d b/ini.d index 4e2ec6ab..a4fee84b 100644 --- a/ini.d +++ b/ini.d @@ -20,13 +20,13 @@ enum IniDialect : ulong { lineComments = 0b_0000_0000_0000_0001, inlineComments = 0b_0000_0000_0000_0011, hashLineComments = 0b_0000_0000_0000_0100, - hashInLineComments = 0b_0000_0000_0000_1100, + hashInlineComments = 0b_0000_0000_0000_1100, escapeSequences = 0b_0000_0000_0001_0000, lineFolding = 0b_0000_0000_0010_0000, quotedStrings = 0b_0000_0000_0100_0000, arrays = 0b_0000_0000_1000_0000, colonKeys = 0b_0000_0001_0000_0000, - defaults = (lineComments | quotedStrings), + defaults = (lineComments | quotedStrings), } //dfmt on @@ -97,7 +97,7 @@ private enum LocationState { +/ struct IniParser( IniDialect dialect = IniDialect.defaults, - string = immutable(char)[] + string = immutable(char)[], ) if (isCompatibleString!string) { public { @@ -300,7 +300,7 @@ struct IniParser( ? Result.end : Result.regular; case '#': - if (dialect.hasFeature(Dialect.hashInLineComments)) { + if (dialect.hasFeature(Dialect.hashInlineComments)) { return (inQuotedString) ? Result.regular : Result.end; } else { @@ -447,7 +447,7 @@ struct IniParser( } case '#': { - static if (dialect.hasFeature(Dialect.hashInLineComments)) { + static if (dialect.hasFeature(Dialect.hashInlineComments)) { return this.lexComment(); } else static if (dialect.hasFeature(Dialect.hashLineComments)) { if (this.isAtStartOfLineOrEquivalent) { From 807cc847baea3c35978063436903975662f8684b Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Sat, 8 Feb 2025 02:17:44 +0100 Subject: [PATCH 07/39] Extend testsuite of `arsd.ini` Also adds two convenience functions. --- ini.d | 137 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/ini.d b/ini.d index a4fee84b..b68a9b6b 100644 --- a/ini.d +++ b/ini.d @@ -624,6 +624,143 @@ s2key2 = value no.4 assert(parser.empty()); } +@safe unittest { + static immutable rawIni = "#not-a = comment"; + auto parser = makeIniParser(rawIni); + + assert(!parser.empty); + assert(parser.front == parser.Token(TokenType.key, "#not-a")); + + parser.popFront(); + assert(!parser.skipIrrelevant()); + assert(parser.front == parser.Token(TokenType.value, "comment")); + + parser.popFront(); + assert(parser.empty); +} + +@safe unittest { + static immutable rawIni = "#actually_a = comment\r\n\t#another one\r\n\t\t ; oh, and a third one"; + enum dialect = (Dialect.hashLineComments | Dialect.lineComments); + auto parser = makeIniParser!dialect(rawIni); + + assert(!parser.empty); + assert(parser.front == parser.Token(TokenType.comment, "actually_a = comment")); + + parser.popFront(); + assert(!parser.skipIrrelevant(false)); + assert(parser.front == parser.Token(TokenType.comment, "another one")); + + parser.popFront(); + assert(!parser.skipIrrelevant(false)); + assert(parser.front == parser.Token(TokenType.comment, " oh, and a third one")); + + parser.popFront(); + assert(parser.empty); +} + +@safe unittest { + static immutable rawIni = "key = value ;not-a-comment \nfoo = bar # not a comment\t"; + enum dialect = Dialect.lite; + auto parser = makeIniParser!dialect(rawIni); + + { + assert(!parser.empty); + assert(parser.front.type == TokenType.key); + + parser.popFront(); + assert(!parser.skipIrrelevant()); + assert(parser.front == parser.Token(TokenType.value, "value ;not-a-comment")); + } + + { + parser.popFront(); + assert(!parser.skipIrrelevant()); + assert(parser.front.type == TokenType.key); + + parser.popFront(); + assert(!parser.skipIrrelevant()); + assert(parser.front == parser.Token(TokenType.value, "bar # not a comment")); + } +} + +@safe unittest { + static immutable rawIni = "key = value ; comment-1\nfoo = bar #comment 2\n"; + enum dialect = (Dialect.inlineComments | Dialect.hashInlineComments); + auto parser = makeIniParser!dialect(rawIni); + + { + assert(!parser.empty); + assert(parser.front.type == TokenType.key); + + parser.popFront(); + assert(!parser.skipIrrelevant(false)); + assert(parser.front == parser.Token(TokenType.value, "value")); + + parser.popFront(); + assert(!parser.skipIrrelevant(false)); + assert(parser.front == parser.Token(TokenType.comment, " comment-1")); + } + + { + parser.popFront(); + assert(!parser.skipIrrelevant(false)); + assert(parser.front.type == TokenType.key); + + parser.popFront(); + assert(!parser.skipIrrelevant(false)); + assert(parser.front == parser.Token(TokenType.value, "bar")); + + parser.popFront(); + assert(!parser.skipIrrelevant(false)); + assert(parser.front == parser.Token(TokenType.comment, "comment 2")); + } + + parser.popFront(); + assert(parser.skipIrrelevant(false)); +} + +/++ + Convenience function to create a low-level parser + + $(TIP + Unlike with the constructor of [IniParser], + the compiler is able to infer the `string` template parameter. + ) + +/ +IniParser!(dialect, string) makeIniParser( + IniDialect dialect = IniDialect.defaults, + string = immutable(char)[], +)( + string rawIni, +) @safe pure nothrow @nogc if (isCompatibleString!string) { + return IniParser!(dialect, string)(rawIni); +} + +/// +@safe unittest { + string regular; + auto parser1 = makeIniParser(regular); + assert(parser1.empty); // exclude from docs + + char[] mutable; + auto parser2 = makeIniParser(mutable); + assert(parser2.empty); // exclude from docs + + const(char)[] constChars; + auto parser3 = makeIniParser(constChars); + assert(parser3.empty); // exclude from docs +} + +// undocumented +debug void writelnTokens(IniDialect dialect, string)(IniParser!(dialect, string) parser) @safe { + import std.stdio : writeln; + + foreach (token; parser) { + writeln(token); + } +} + /++ Data entry of an INI document +/ From d93dd0d167337ffa16c7eeafd7051c973d275f28 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Sat, 8 Feb 2025 02:36:29 +0100 Subject: [PATCH 08/39] Add further test cases to `arsd.ini` --- ini.d | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 81 insertions(+), 2 deletions(-) diff --git a/ini.d b/ini.d index b68a9b6b..3a585f7e 100644 --- a/ini.d +++ b/ini.d @@ -660,12 +660,22 @@ s2key2 = value no.4 } @safe unittest { - static immutable rawIni = "key = value ;not-a-comment \nfoo = bar # not a comment\t"; + static immutable rawIni = ";not a = line comment\nkey = value ;not-a-comment \nfoo = bar # not a comment\t"; enum dialect = Dialect.lite; auto parser = makeIniParser!dialect(rawIni); { assert(!parser.empty); + assert(parser.front == parser.Token(TokenType.key, ";not a")); + + parser.popFront(); + assert(!parser.skipIrrelevant()); + assert(parser.front == parser.Token(TokenType.value, "line comment")); + } + + { + parser.popFront(); + assert(!parser.skipIrrelevant()); assert(parser.front.type == TokenType.key); parser.popFront(); @@ -685,12 +695,18 @@ s2key2 = value no.4 } @safe unittest { - static immutable rawIni = "key = value ; comment-1\nfoo = bar #comment 2\n"; + static immutable rawIni = "; line comment 0\t\n\nkey = value ; comment-1\nfoo = bar #comment 2\n"; enum dialect = (Dialect.inlineComments | Dialect.hashInlineComments); auto parser = makeIniParser!dialect(rawIni); { assert(!parser.empty); + assert(parser.front == parser.Token(TokenType.comment, " line comment 0\t")); + } + + { + parser.popFront(); + assert(!parser.skipIrrelevant(false)); assert(parser.front.type == TokenType.key); parser.popFront(); @@ -720,6 +736,69 @@ s2key2 = value no.4 assert(parser.skipIrrelevant(false)); } +@safe unittest { + static immutable rawIni = "key: value\n" + ~ "foo= bar\n" + ~ "lol :rofl\n" + ~ "Oachkatzl : -Schwoaf\n" + ~ `"Schüler:innen": 10`; + enum dialect = (Dialect.colonKeys | Dialect.quotedStrings); + auto parser = makeIniParser!dialect(rawIni); + + { + assert(!parser.empty); + assert(parser.front == parser.Token(TokenType.key, "key")); + + parser.popFront(); + assert(!parser.skipIrrelevant()); + assert(parser.front == parser.Token(TokenType.value, "value")); + + } + + { + parser.popFront(); + assert(!parser.skipIrrelevant()); + assert(parser.front == parser.Token(TokenType.key, "foo")); + + parser.popFront(); + assert(!parser.skipIrrelevant()); + assert(parser.front == parser.Token(TokenType.value, "bar")); + } + + { + parser.popFront(); + assert(!parser.skipIrrelevant()); + assert(parser.front == parser.Token(TokenType.key, "lol")); + + parser.popFront(); + assert(!parser.skipIrrelevant()); + assert(parser.front == parser.Token(TokenType.value, "rofl")); + } + + { + parser.popFront(); + assert(!parser.skipIrrelevant()); + assert(parser.front == parser.Token(TokenType.key, "Oachkatzl")); + + parser.popFront(); + assert(!parser.skipIrrelevant()); + assert(parser.front == parser.Token(TokenType.value, "-Schwoaf")); + } + + { + parser.popFront(); + assert(!parser.skipIrrelevant()); + assert(parser.front == parser.Token(TokenType.key, "Schüler:innen")); + + parser.popFront(); + assert(!parser.skipIrrelevant()); + assert(parser.front == parser.Token(TokenType.value, "10")); + } + + parser.popFront(); + assert(parser.skipIrrelevant()); +} + /++ Convenience function to create a low-level parser From 2c61ff8ab8af0412c40c1891b35f0675f06b7f35 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Sat, 8 Feb 2025 03:13:59 +0100 Subject: [PATCH 09/39] Remove usage of shortened alias from example --- ini.d | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ini.d b/ini.d index 3a585f7e..5106a2d2 100644 --- a/ini.d +++ b/ini.d @@ -473,7 +473,7 @@ oachkatzl = schwoaf ;try pronouncing that `; // Combine feature flags to build the required dialect. - const myDialect = (Dialect.defaults | Dialect.inlineComments); + const myDialect = (IniDialect.defaults | IniDialect.inlineComments); // Instantiate a new parser and supply our document string. auto parser = IniParser!(myDialect)(rawIniDocument); From c0aed7220a820a8b41532d31d8c7502f4342b45b Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Sat, 8 Feb 2025 03:32:02 +0100 Subject: [PATCH 10/39] Add `IniFilteredParser` --- ini.d | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 109 insertions(+), 4 deletions(-) diff --git a/ini.d b/ini.d index 5106a2d2..61085282 100644 --- a/ini.d +++ b/ini.d @@ -91,6 +91,7 @@ private enum LocationState { See_also: $(LIST + * [IniFilteredParser] * [parseIniDocument] * [parseIniAA] ) @@ -463,6 +464,62 @@ struct IniParser( } } +/++ + Low-level INI parser with filtered output + + This wrapper will only supply tokens of these types: + + $(LIST + * IniTokenType.key + * IniTokenType.value + * IniTokenType.sectionHeader + * IniTokenType.invalid + ) + + See_also: + $(LIST + * [IniParser] + * [parseIniDocument] + * [parseIniAA] + ) + +/ +struct IniFilteredParser( + IniDialect dialect = IniDialect.defaults, + string = immutable(char)[], +) { + /// + public alias Token = IniToken!string; + + private IniParser!(dialect, string) _parser; + +public @safe pure nothrow @nogc: + + /// + public this(IniParser!(dialect, string) parser) { + _parser = parser; + } + + /// + public this(string rawIni) { + auto parser = IniParser!(dialect, string)(rawIni); + this(parser); + } + + /// + bool empty() => _parser.skipIrrelevant(true); + + /// + inout(Token) front() inout => _parser.front; + + /// + void popFront() => _parser.popFront(); + + /// + inout(typeof(this)) save() inout { + return this; + } +} + /// @safe unittest { // INI document (demo data) @@ -806,6 +863,9 @@ s2key2 = value no.4 Unlike with the constructor of [IniParser], the compiler is able to infer the `string` template parameter. ) + + See_also: + [makeIniFilteredParser] +/ IniParser!(dialect, string) makeIniParser( IniDialect dialect = IniDialect.defaults, @@ -831,12 +891,57 @@ IniParser!(dialect, string) makeIniParser( assert(parser3.empty); // exclude from docs } +/++ + Convenience function to create a low-level filtered parser + + $(TIP + Unlike with the constructor of [IniFilteredParser], + the compiler is able to infer the `string` template parameter. + ) + + See_also: + [makeIniParser] + +/ +IniFilteredParser!(dialect, string) makeIniFilteredParser( + IniDialect dialect = IniDialect.defaults, + string = immutable(char)[], +)( + string rawIni, +) @safe pure nothrow @nogc if (isCompatibleString!string) { + return IniFilteredParser!(dialect, string)(rawIni); +} + +/// +@safe unittest { + string regular; + auto parser1 = makeIniFilteredParser(regular); + assert(parser1.empty); // exclude from docs + + char[] mutable; + auto parser2 = makeIniFilteredParser(mutable); + assert(parser2.empty); // exclude from docs + + const(char)[] constChars; + auto parser3 = makeIniFilteredParser(constChars); + assert(parser3.empty); // exclude from docs +} + // undocumented -debug void writelnTokens(IniDialect dialect, string)(IniParser!(dialect, string) parser) @safe { - import std.stdio : writeln; +debug { + void writelnTokens(IniDialect dialect, string)(IniParser!(dialect, string) parser) @safe { + import std.stdio : writeln; + + foreach (token; parser) { + writeln(token); + } + } - foreach (token; parser) { - writeln(token); + void writelnTokens(IniDialect dialect, string)(IniFilteredParser!(dialect, string) parser) @safe { + import std.stdio : writeln; + + foreach (token; parser) { + writeln(token); + } } } From f821ebdc08af46322fa6bf81b4e37c1b72b169f7 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Sat, 8 Feb 2025 03:32:17 +0100 Subject: [PATCH 11/39] Add support for single-quoted strings to `arsd.ini` --- ini.d | 152 +++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 134 insertions(+), 18 deletions(-) diff --git a/ini.d b/ini.d index 61085282..713951bd 100644 --- a/ini.d +++ b/ini.d @@ -17,15 +17,19 @@ enum isCompatibleString(T) = (is(T == string) || is(T == const(char)[]) || is(T /// enum IniDialect : ulong { lite = 0, + lineComments = 0b_0000_0000_0000_0001, inlineComments = 0b_0000_0000_0000_0011, hashLineComments = 0b_0000_0000_0000_0100, hashInlineComments = 0b_0000_0000_0000_1100, + escapeSequences = 0b_0000_0000_0001_0000, lineFolding = 0b_0000_0000_0010_0000, quotedStrings = 0b_0000_0000_0100_0000, - arrays = 0b_0000_0000_1000_0000, - colonKeys = 0b_0000_0001_0000_0000, + singleQuoteQuotedStrings = 0b_0000_0000_1000_0000, + + arrays = 0b_0000_0001_0000_0000, + colonKeys = 0b_0000_0010_0000_0000, defaults = (lineComments | quotedStrings), } //dfmt on @@ -265,17 +269,40 @@ struct IniParser( whitespace, } - static if (dialect.hasFeature(Dialect.quotedStrings)) { - bool inQuotedString = false; + enum QuotedString : ubyte { + none = 0, + regular, + single, + } + // dfmt off + enum hasAnyQuotedString = ( + dialect.hasFeature(Dialect.quotedStrings) || + dialect.hasFeature(Dialect.singleQuoteQuotedStrings) + ); + // dfmt on + + static if (hasAnyQuotedString) { + auto inQuotedString = QuotedString.none; + } + static if (dialect.hasFeature(Dialect.quotedStrings)) { if (_source[0] == '"') { - inQuotedString = true; + inQuotedString = QuotedString.regular; // chomp quote initiator _source = _source[1 .. $]; } - } else { - enum inQuotedString = false; + } + static if (dialect.hasFeature(Dialect.singleQuoteQuotedStrings)) { + if (_source[0] == '\'') { + inQuotedString = QuotedString.single; + + // chomp quote initiator + _source = _source[1 .. $]; + } + } + static if (!hasAnyQuotedString) { + enum inQuotedString = QuotedString.none; } Result nextChar(const char c) @safe pure nothrow @nogc { @@ -289,20 +316,32 @@ struct IniParser( case '\x0B': case '\x0C': case ' ': - return (inQuotedString) ? Result.regular : Result.whitespace; + return (inQuotedString != QuotedString.none) ? Result.regular : Result.whitespace; case '\x0A': case '\x0D': - return (inQuotedString) + return (inQuotedString != QuotedString.none) ? Result.regular : Result.end; case '"': - return (inQuotedString) - ? Result.end : Result.regular; + static if (dialect.hasFeature(Dialect.quotedStrings)) { + return (inQuotedString == QuotedString.regular) + ? Result.end : Result.regular; + } else { + return Result.regular; + } + + case '\'': + static if (dialect.hasFeature(Dialect.singleQuoteQuotedStrings)) { + return (inQuotedString == QuotedString.single) + ? Result.end : Result.regular; + } else { + return Result.regular; + } case '#': if (dialect.hasFeature(Dialect.hashInlineComments)) { - return (inQuotedString) + return (inQuotedString != QuotedString.none) ? Result.regular : Result.end; } else { return Result.regular; @@ -310,7 +349,7 @@ struct IniParser( case ';': if (dialect.hasFeature(Dialect.inlineComments)) { - return (inQuotedString) + return (inQuotedString != QuotedString.none) ? Result.regular : Result.end; } else { return Result.regular; @@ -325,7 +364,7 @@ struct IniParser( case '=': static if (tokenType == TokenType.key) { - return (inQuotedString) + return (inQuotedString != QuotedString.none) ? Result.regular : Result.end; } else { return Result.regular; @@ -333,7 +372,7 @@ struct IniParser( case ']': static if (tokenType == TokenType.sectionHeader) { - return (inQuotedString) + return (inQuotedString != QuotedString.none) ? Result.regular : Result.end; } else { return Result.regular; @@ -360,9 +399,11 @@ struct IniParser( auto token = Token(tokenType, _source[0 .. idxEOT]); _source = _source[idxEOT .. $]; - if (inQuotedString) { - // chomp quote terminator - _source = _source[1 .. $]; + if (inQuotedString != QuotedString.none) { + if (_source.length > 0) { + // chomp quote terminator + _source = _source[1 .. $]; + } } return token; @@ -856,6 +897,81 @@ s2key2 = value no.4 assert(parser.skipIrrelevant()); } +@safe unittest { + static immutable rawIni = + "\"foo=bar\"=foobar\n" + ~ "'foo = bar' = foo_bar\n" + ~ "foo = \"bar\"\n" + ~ "foo = 'bar'\n" + ~ "multi_line = 'line1\nline2'\n" + ~ "syntax = \"error"; + enum dialect = (Dialect.quotedStrings | Dialect.singleQuoteQuotedStrings); + auto parser = makeIniFilteredParser!dialect(rawIni); + + { + assert(!parser.empty); + assert(parser.front == parser.Token(TokenType.key, "foo=bar")); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front == parser.Token(TokenType.value, "foobar")); + + } + + { + parser.popFront(); + assert(!parser.empty); + assert(parser.front == parser.Token(TokenType.key, "foo = bar")); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front == parser.Token(TokenType.value, "foo_bar")); + } + + { + parser.popFront(); + assert(!parser.empty); + assert(parser.front == parser.Token(TokenType.key, "foo")); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front == parser.Token(TokenType.value, "bar")); + } + + { + parser.popFront(); + assert(!parser.empty); + assert(parser.front == parser.Token(TokenType.key, "foo")); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front == parser.Token(TokenType.value, "bar")); + } + + { + parser.popFront(); + assert(!parser.empty); + assert(parser.front == parser.Token(TokenType.key, "multi_line")); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front == parser.Token(TokenType.value, "line1\nline2")); + } + + { + parser.popFront(); + assert(!parser.empty); + assert(parser.front == parser.Token(TokenType.key, "syntax")); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front == parser.Token(TokenType.value, "error")); + } + + parser.popFront(); + assert(parser.empty); +} + /++ Convenience function to create a low-level parser From f8984fc4b8c453e5663875d4111d65652e7b65ae Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Sat, 8 Feb 2025 04:07:31 +0100 Subject: [PATCH 12/39] Remove unimplemented `IniDialect` option `arrays` --- ini.d | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ini.d b/ini.d index 713951bd..d60f6078 100644 --- a/ini.d +++ b/ini.d @@ -28,8 +28,8 @@ enum IniDialect : ulong { quotedStrings = 0b_0000_0000_0100_0000, singleQuoteQuotedStrings = 0b_0000_0000_1000_0000, - arrays = 0b_0000_0001_0000_0000, - colonKeys = 0b_0000_0010_0000_0000, + colonKeys = 0b_0000_0001_0000_0000, + defaults = (lineComments | quotedStrings), } //dfmt on From 3caf37fa14a3e4559f6f957f5510f48816cb0236 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Mon, 10 Feb 2025 03:37:42 +0100 Subject: [PATCH 13/39] Add further documentation to and adjust `arsd.ini` --- ini.d | 265 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 252 insertions(+), 13 deletions(-) diff --git a/ini.d b/ini.d index d60f6078..430f3351 100644 --- a/ini.d +++ b/ini.d @@ -5,32 +5,251 @@ +/ /++ INI configuration file support + + This module provides a configurable INI parser with support for multiple + “dialects” of the format. + + --- + import arsd.ini; + + IniDocument!string parseIniFile(string filePath) { + import std.file : readText; + return parseIniDocument(readText(filePath)); + } + --- +/ module arsd.ini; +/// +@safe unittest { + // INI example data (e.g. from an `autorun.inf` file) + static immutable string rawIniData = + "[autorun]\n" + ~ "open=setup.exe\n" + ~ "icon=setup.exe,0\n"; + + // Parse the document into an associative array: + string[string][string] data = parseIniAA(rawIniData); + + string open = data["autorun"]["open"]; + string icon = data["autorun"]["icon"]; + + assert(open == "setup.exe"); + assert(icon == "setup.exe,0"); +} + /++ Determines whether a type `T` is a string type compatible with this library. +/ enum isCompatibleString(T) = (is(T == string) || is(T == const(char)[]) || is(T == char[])); //dfmt off -/// +/++ + Feature set to be understood by the parser. + + --- + enum myDialect = (IniDialect.defaults | IniDialect.inlineComments); + --- + +/ enum IniDialect : ulong { + /++ + Minimum feature set. + + No comments, no extras, no nothing. + Only sections, keys and values. + Everything fits into these categories from a certain point of view. + +/ lite = 0, + /++ + Parse line comments (starting with `;`). + + ```ini + ; This is a line comment. + ;This one too. + + key = value ;But this isn't one. + ``` + +/ lineComments = 0b_0000_0000_0000_0001, + + /++ + Parse inline comments (starting with `;`). + + ```ini + key1 = value2 ; Inline comment. + key2 = value2 ;Inline comment. + key3 = value3; Inline comment. + ;Not a true inline comment (but technically equivalent). + ``` + +/ inlineComments = 0b_0000_0000_0000_0011, + + /++ + Parse line comments starting with `#`. + + ```ini + # This is a comment. + #Too. + key = value # Not a line comment. + ``` + +/ hashLineComments = 0b_0000_0000_0000_0100, + + /++ + Parse inline comments starting with `#`. + + ```ini + key1 = value2 # Inline comment. + key2 = value2 #Inline comment. + key3 = value3# Inline comment. + #Not a true inline comment (but technically equivalent). + ``` + +/ hashInlineComments = 0b_0000_0000_0000_1100, - escapeSequences = 0b_0000_0000_0001_0000, - lineFolding = 0b_0000_0000_0010_0000, - quotedStrings = 0b_0000_0000_0100_0000, - singleQuoteQuotedStrings = 0b_0000_0000_1000_0000, + /++ + Parse quoted strings. + + ```ini + key1 = non-quoted value + key2 = "quoted value" + + "quoted key" = value + non-quoted key = value + + "another key" = "another value" + + multi line = "line 1 + line 2" + ``` + +/ + quotedStrings = 0b_0000_0000_0001_0000, + + /++ + Parse quoted strings using single-quotes. + + ```ini + key1 = non-quoted value + key2 = 'quoted value' + + 'quoted key' = value + non-quoted key = value + + 'another key' = 'another value' + + multi line = 'line 1 + line 2' + ``` + +/ + singleQuoteQuotedStrings = 0b_0000_0000_0010_0000, + + /++ + Parse key/value pairs separated with a colon (`:`). + + ```ini + key: value + key= value + ``` + +/ + colonKeys = 0b_0000_0000_0100_0000, + + /++ + Concats substrings and emits them as a single token. + + $(LIST + * For a mutable `char[]` input, + this will rewrite the data in the input array. + * For a non-mutable `immutable(char)[]` (=`string`) or `const(char)[]` input, + this will allocate a new array with the GC. + ) + + ```ini + key = "Value1" "Value2" + ; → Value1Value2 + ``` + +/ + concatSubstrings = 0b_0000_0001_0000_0000, + + /++ + Evaluates escape sequences in the input string. + + $(LIST + * For a mutable `char[]` input, + this will rewrite the data in the input array. + * For a non-mutable `immutable(char)[]` (=`string`) or `const(char)[]` input, + this will allocate a new array with the GC. + ) + + $(SMALL_TABLE + Special escape sequences + `\\` | Backslash + `\0` | Null character + `\n` | Line feed + `\r` | Carriage return + `\t` | Tabulator + ) + + ```ini + key1 = Line 1\nLine 2 + ; → Line 1 + ; Line 2 + + key2 = One \\ and one \; + ; → One \ and one ; + ``` + +/ + escapeSequences = 0b_0000_0010_0000_0000, + + /++ + Folds lines on escaped linebreaks. + + $(LIST + * For a mutable `char[]` input, + this will rewrite the data in the input array. + * For a non-mutable `immutable(char)[]` (=`string`) or `const(char)[]` input, + this will allocate a new array with the GC. + ) + + ```ini + key1 = word1\ + word2 + ; → word1word2 - colonKeys = 0b_0000_0001_0000_0000, + key2 = foo \ + bar + ; → foo bar + ``` + +/ + lineFolding = 0b_0000_0100_0000_0000, + + /++ + Imitates the behavior of the INI parser implementation found in PHP. + + $(WARNING + This preset may be adjusted without further notice in the future + in cases where it increases alignment with PHP’s implementation. + ) + +/ + presetPhp = ( + lineComments + | inlineComments + | hashLineComments + | hashInlineComments + | quotedStrings + | singleQuoteQuotedStrings + | concatSubstrings + ), - defaults = (lineComments | quotedStrings), + /// + presetDefaults = ( + lineComments + | quotedStrings + | singleQuoteQuotedStrings + ), + + /// + defaults = presetDefaults, } //dfmt on @@ -208,27 +427,27 @@ struct IniParser( private { - bool isOnFinalChar() const { + bool isOnFinalChar() const @nogc { pragma(inline, true); return (_source.length == 1); } - bool isAtStartOfLineOrEquivalent() { + bool isAtStartOfLineOrEquivalent() @nogc { return (_locationState == LocationState.newLine); } - Token makeToken(TokenType type, size_t length) { + Token makeToken(TokenType type, size_t length) @nogc { auto token = Token(type, _source[0 .. length]); _source = _source[length .. $]; return token; } - Token makeToken(TokenType type, size_t length, size_t skip) { + Token makeToken(TokenType type, size_t length, size_t skip) @nogc { _source = _source[skip .. $]; return this.makeToken(type, length); } - Token lexWhitespace() { + Token lexWhitespace() @nogc { foreach (immutable idxM1, const c; _source[1 .. $]) { switch (c) { case '\x09': @@ -246,7 +465,7 @@ struct IniParser( return this.makeToken(TokenType.whitespace, _source.length); } - Token lexComment() { + Token lexComment() @nogc { foreach (immutable idxM1, const c; _source[1 .. $]) { switch (c) { default: @@ -834,6 +1053,26 @@ s2key2 = value no.4 assert(parser.skipIrrelevant(false)); } +@safe unittest { + static immutable rawIni = "key = value;inline"; + enum dialect = Dialect.inlineComments; + auto parser = makeIniParser!dialect(rawIni); + + assert(!parser.empty); + parser.front == parser.Token(TokenType.key, "key"); + + parser.popFront(); + assert(!parser.skipIrrelevant(false)); + parser.front == parser.Token(TokenType.value, "value"); + + parser.popFront(); + assert(!parser.skipIrrelevant(false)); + parser.front == parser.Token(TokenType.comment, "inline"); + + parser.popFront(); + assert(parser.empty); +} + @safe unittest { static immutable rawIni = "key: value\n" ~ "foo= bar\n" From 89d438982daca5ed1141246925441c90319cd9f2 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Tue, 11 Feb 2025 03:41:08 +0100 Subject: [PATCH 14/39] Implement `Dialect.concatSubstrings` --- ini.d | 257 +++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 229 insertions(+), 28 deletions(-) diff --git a/ini.d b/ini.d index 430f3351..889a013f 100644 --- a/ini.d +++ b/ini.d @@ -305,10 +305,19 @@ private alias Dialect = IniDialect; private enum LocationState { newLine, key, - value, + preValue, + inValue, sectionHeader, } +private enum OperatingMode { + mut, + dup, +} + +private enum OperatingMode operatingMode(string) = (is(string == char[])) + ? OperatingMode.mut : OperatingMode.dup; + /++ Low-level INI parser @@ -337,7 +346,7 @@ struct IniParser( LocationState _locationState = LocationState.newLine; } -@safe pure nothrow @nogc: +@safe pure nothrow: /// public this(string rawIni) { @@ -484,6 +493,7 @@ struct IniParser( enum Result { end, + endChomp, regular, whitespace, } @@ -535,24 +545,30 @@ struct IniParser( case '\x0B': case '\x0C': case ' ': - return (inQuotedString != QuotedString.none) ? Result.regular : Result.whitespace; + return (inQuotedString != QuotedString.none) + ? Result.regular : Result.whitespace; case '\x0A': case '\x0D': return (inQuotedString != QuotedString.none) - ? Result.regular : Result.end; + ? Result.regular : Result.endChomp; case '"': static if (dialect.hasFeature(Dialect.quotedStrings)) { + // dfmt off return (inQuotedString == QuotedString.regular) - ? Result.end : Result.regular; + ? Result.end + : (inQuotedString == QuotedString.single) + ? Result.regular + : Result.endChomp; + // dfmt on } else { return Result.regular; } case '\'': static if (dialect.hasFeature(Dialect.singleQuoteQuotedStrings)) { - return (inQuotedString == QuotedString.single) + return (inQuotedString != QuotedString.regular) ? Result.end : Result.regular; } else { return Result.regular; @@ -561,7 +577,7 @@ struct IniParser( case '#': if (dialect.hasFeature(Dialect.hashInlineComments)) { return (inQuotedString != QuotedString.none) - ? Result.regular : Result.end; + ? Result.regular : Result.endChomp; } else { return Result.regular; } @@ -569,7 +585,7 @@ struct IniParser( case ';': if (dialect.hasFeature(Dialect.inlineComments)) { return (inQuotedString != QuotedString.none) - ? Result.regular : Result.end; + ? Result.regular : Result.endChomp; } else { return Result.regular; } @@ -601,11 +617,18 @@ struct IniParser( assert(false, "Bug: This should have been unreachable."); } - size_t idxLastText = 0; + ptrdiff_t idxLastText = -1; + ptrdiff_t idxCutoff = -1; foreach (immutable idx, const c; _source) { const status = nextChar(c); if (status == Result.end) { + if (idxLastText < 0) { + idxLastText = (idx - 1); + } + break; + } else if (status == Result.endChomp) { + idxCutoff = idx; break; } else if (status == Result.whitespace) { continue; @@ -616,7 +639,31 @@ struct IniParser( const idxEOT = (idxLastText + 1); auto token = Token(tokenType, _source[0 .. idxEOT]); - _source = _source[idxEOT .. $]; + + // "double-quote quoted": cut off any whitespace afterwards + if (inQuotedString == QuotedString.regular) { + const idxEOQ = (idxEOT + 1); + if (_source.length > idxEOQ) { + foreach (immutable idx, c; _source[idxEOQ .. $]) { + switch (c) { + case '\x09': + case '\x0B': + case '\x0C': + case ' ': + continue; + + default: + // EOT because Q is cut off later + idxCutoff = idxEOT + idx; + break; + } + break; + } + } + } + + const idxNextToken = (idxCutoff >= 0) ? idxCutoff : idxEOT; + _source = _source[idxNextToken .. $]; if (inQuotedString != QuotedString.none) { if (_source.length > 0) { @@ -634,7 +681,11 @@ struct IniParser( case LocationState.key: return this.lexTextImpl!(TokenType.key); - case LocationState.value: + case LocationState.preValue: + _locationState = LocationState.inValue; + goto case LocationState.inValue; + + case LocationState.inValue: return this.lexTextImpl!(TokenType.value); case LocationState.sectionHeader: @@ -674,6 +725,9 @@ struct IniParser( case '\x0B': case '\x0C': case ' ': + if (_locationState == LocationState.inValue) { + return this.lexText(); + } return this.lexWhitespace(); case ':': @@ -683,7 +737,7 @@ struct IniParser( return this.lexText(); case '=': - _locationState = LocationState.value; + _locationState = LocationState.preValue; return this.makeToken(TokenType.keyValueSeparator, 1); case '[': @@ -752,7 +806,7 @@ struct IniFilteredParser( private IniParser!(dialect, string) _parser; -public @safe pure nothrow @nogc: +public @safe pure nothrow: /// public this(IniParser!(dialect, string) parser) { @@ -766,13 +820,16 @@ public @safe pure nothrow @nogc: } /// - bool empty() => _parser.skipIrrelevant(true); + bool empty() const => _parser.empty; /// inout(Token) front() inout => _parser.front; /// - void popFront() => _parser.popFront(); + void popFront() { + _parser.popFront(); + _parser.skipIrrelevant(true); + } /// inout(typeof(this)) save() inout { @@ -1142,6 +1199,8 @@ s2key2 = value no.4 ~ "'foo = bar' = foo_bar\n" ~ "foo = \"bar\"\n" ~ "foo = 'bar'\n" + ~ "foo = ' bar '\n" + ~ "foo = \" bar \"\n" ~ "multi_line = 'line1\nline2'\n" ~ "syntax = \"error"; enum dialect = (Dialect.quotedStrings | Dialect.singleQuoteQuotedStrings); @@ -1187,6 +1246,26 @@ s2key2 = value no.4 assert(parser.front == parser.Token(TokenType.value, "bar")); } + { + parser.popFront(); + assert(!parser.empty); + assert(parser.front == parser.Token(TokenType.key, "foo")); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front == parser.Token(TokenType.value, " bar ")); + } + + { + parser.popFront(); + assert(!parser.empty); + assert(parser.front == parser.Token(TokenType.key, "foo")); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front == parser.Token(TokenType.value, " bar ")); + } + { parser.popFront(); assert(!parser.empty); @@ -1494,11 +1573,15 @@ company = "Digital Mars" /++ Parses an INI string into an associate array. + $(LIST + * Duplicate keys cause values to get overwritten. + * Sections with the same name are merged. + ) + See_also: [parseIniDocument] +/ string[string][string] parseIniAA(IniDialect dialect = IniDialect.defaults, string)(string rawIni) @safe pure nothrow { - // TODO: duplicate handling auto parser = IniParser!(dialect, string)(rawIni); string[string][string] document; @@ -1506,28 +1589,73 @@ string[string][string] parseIniAA(IniDialect dialect = IniDialect.defaults, stri string sectionName = null; string keyName = null; + string value = null; - void commitSection() { - sectionName = null; + void commitKeyValuePair(string nextKey) { + if (keyName !is null) { + section[keyName] = value; + } + + keyName = nextKey; + value = null; + } + + void addValue(string nextValue) { + static if (dialect.hasFeature(Dialect.concatSubstrings)) { + if (value !is null) { + static if (operatingMode!string == OperatingMode.dup) { + value ~= nextValue; + } + static if (operatingMode!string == OperatingMode.mut) { + // Insane assumptions ahead: + () @trusted { + if (nextValue.ptr <= &value[$ - 1]) { + assert(false, "Memory corruption bug."); + } + const size_t end = (value.length + nextValue.length); + foreach (immutable idx, ref c; value.ptr[value.length .. end]) { + nextValue.ptr[idx]; + } + }(); + } + } else { + value = nextValue; + } + } else { + value = nextValue; + } + } + + void commitSection(string nextSection) { + commitKeyValuePair(null); + if ((sectionName !is null) || (section.length > 0)) { + document[sectionName] = section; + section = null; + } + + if (nextSection !is null) { + auto existingSection = nextSection in document; + if (existingSection !is null) { + section = *existingSection; + } + + sectionName = nextSection; + } } while (!parser.skipIrrelevant()) { switch (parser.front.type) with (TokenType) { case key: - keyName = parser.front.data; + commitKeyValuePair(parser.front.data); break; case value: - section[keyName] = parser.front.data; + addValue(parser.front.data); break; case sectionHeader: - if ((sectionName !is null) || (section.length > 0)) { - document[sectionName] = section; - section = null; - } - sectionName = parser.front.data; + commitSection(parser.front.data); break; default: @@ -1537,9 +1665,7 @@ string[string][string] parseIniAA(IniDialect dialect = IniDialect.defaults, stri parser.popFront(); } - if ((sectionName !is null) || (section.length > 0)) { - document[sectionName] = section; - } + commitSection(null); return document; } @@ -1585,3 +1711,78 @@ website = // "email" is commented out assert(!("email" in section2)); } + +@safe unittest { + static immutable string demoData = `[1] +key = "value1" "value2" +[2] +0 = a b +1 = 'a' b +2 = a 'b' +3 = a "b" +4 = "a" 'b' +5 = 'a' "b" +6 = "a" "b" +7 = 'a' 'b' +8 = 'a' "b" 'c' +`; + + enum dialect = (Dialect.concatSubstrings | Dialect.quotedStrings | Dialect.singleQuoteQuotedStrings); + auto aa = parseIniAA!dialect(demoData); + + assert(aa.length == 2); + assert(!(null in aa)); + assert("1" in aa); + assert("2" in aa); + assert(aa["1"]["key"] == "value1value2"); + assert(aa["2"]["0"] == "a b"); + assert(aa["2"]["1"] == "a b"); + assert(aa["2"]["2"] == "a b"); + assert(aa["2"]["3"] == "ab"); + assert(aa["2"]["4"] == "ab"); + assert(aa["2"]["5"] == "ab"); + assert(aa["2"]["6"] == "ab"); + assert(aa["2"]["7"] == "a b"); + assert(aa["2"]["8"] == "abc"); +} + +@safe unittest { + static immutable string demoData = ` +0 = "a" b +1 = "a" 'b' +2 = a "b" +3 = 'a' "b" +`; + + enum dialect = (Dialect.concatSubstrings | Dialect.singleQuoteQuotedStrings); + auto aa = parseIniAA!dialect(demoData); + + assert(aa.length == 1); + assert(aa[null]["0"] == `"a" b`); + assert(aa[null]["1"] == `"a" b`); + assert(aa[null]["2"] == `a "b"`); + assert(aa[null]["3"] == `a "b"`); +} + +@safe unittest { + static immutable string demoData = `[1] +key = original +no2 = kept +[2] +key = original +key = overwritten +[1] +key = merged and overwritten +`; + + enum dialect = Dialect.concatSubstrings; + auto aa = parseIniAA!dialect(demoData); + + assert(aa.length == 2); + assert(!(null in aa)); + assert("1" in aa); + assert("2" in aa); + assert(aa["1"]["key"] == "merged and overwritten"); + assert(aa["1"]["no2"] == "kept"); + assert(aa["2"]["key"] == "overwritten"); +} From 533290373e7c1f3a579bcf951004be8858ac9340 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Tue, 11 Feb 2025 04:02:35 +0100 Subject: [PATCH 15/39] Fix `parseIniAA` --- ini.d | 50 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/ini.d b/ini.d index 889a013f..3580c2d9 100644 --- a/ini.d +++ b/ini.d @@ -1582,6 +1582,12 @@ company = "Digital Mars" [parseIniDocument] +/ string[string][string] parseIniAA(IniDialect dialect = IniDialect.defaults, string)(string rawIni) @safe pure nothrow { + static if (is(string == immutable(char)[])) { + immutable(char)[] toString(string key) => key; + } else { + immutable(char)[] toString(string key) => key.idup; + } + auto parser = IniParser!(dialect, string)(rawIni); string[string][string] document; @@ -1593,7 +1599,7 @@ string[string][string] parseIniAA(IniDialect dialect = IniDialect.defaults, stri void commitKeyValuePair(string nextKey) { if (keyName !is null) { - section[keyName] = value; + section[toString(keyName)] = value; } keyName = nextKey; @@ -1612,10 +1618,12 @@ string[string][string] parseIniAA(IniDialect dialect = IniDialect.defaults, stri if (nextValue.ptr <= &value[$ - 1]) { assert(false, "Memory corruption bug."); } + const size_t end = (value.length + nextValue.length); foreach (immutable idx, ref c; value.ptr[value.length .. end]) { - nextValue.ptr[idx]; + c = nextValue.ptr[idx]; } + value = value.ptr[0 .. end]; }(); } } else { @@ -1629,7 +1637,7 @@ string[string][string] parseIniAA(IniDialect dialect = IniDialect.defaults, stri void commitSection(string nextSection) { commitKeyValuePair(null); if ((sectionName !is null) || (section.length > 0)) { - document[sectionName] = section; + document[toString(sectionName)] = section; section = null; } @@ -1712,6 +1720,40 @@ website = assert(!("email" in section2)); } +@safe unittest { + char[] demoData = `[1] +key = "value1" "value2" +[2] +0 = a b +1 = 'a' b +2 = a 'b' +3 = a "b" +4 = "a" 'b' +5 = 'a' "b" +6 = "a" "b" +7 = 'a' 'b' +8 = 'a' "b" 'c' +`.dup; + + enum dialect = (Dialect.concatSubstrings | Dialect.quotedStrings | Dialect.singleQuoteQuotedStrings); + auto aa = parseIniAA!dialect(demoData); + + assert(aa.length == 2); + assert(!(null in aa)); + assert("1" in aa); + assert("2" in aa); + assert(aa["1"]["key"] == "value1value2"); + assert(aa["2"]["0"] == "a b"); + assert(aa["2"]["1"] == "a b"); + assert(aa["2"]["2"] == "a b"); + assert(aa["2"]["3"] == "ab"); + assert(aa["2"]["4"] == "ab"); + assert(aa["2"]["5"] == "ab"); + assert(aa["2"]["6"] == "ab"); + assert(aa["2"]["7"] == "a b"); + assert(aa["2"]["8"] == "abc"); +} + @safe unittest { static immutable string demoData = `[1] key = "value1" "value2" @@ -1765,7 +1807,7 @@ key = "value1" "value2" } @safe unittest { - static immutable string demoData = `[1] + static immutable const(char)[] demoData = `[1] key = original no2 = kept [2] From 7e03da94e8ec8cbcc0e00229da2d2db3847c6dae Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Tue, 11 Feb 2025 05:12:24 +0100 Subject: [PATCH 16/39] Add `isSliceOf` to `arsd.core` --- core.d | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/core.d b/core.d index cde30ebb..f7fe75f5 100644 --- a/core.d +++ b/core.d @@ -271,6 +271,55 @@ auto ref T castTo(T, S)(auto ref S v) { /// alias typeCast = castTo; +/++ + Determines whether `needle` is a slice of `haystack`. + + History: + Added on February 11, 2025. + +/ +bool isSliceOf(T1, T2)(scope const(T1)[] needle, scope const(T2)[] haystack) @trusted pure nothrow @nogc { + return ( + needle.ptr >= haystack.ptr + && ((needle.ptr + needle.length) <= (haystack.ptr + haystack.length)) + ); +} + +@safe unittest { + string s0 = "01234"; + const(char)[] s1 = s0[1 .. $]; + const(void)[] s2 = s1.castTo!(const(void)[]); + string s3 = s1.idup; + + assert( s0.isSliceOf(s0)); + assert( s1.isSliceOf(s0)); + assert( s2.isSliceOf(s0)); + assert(!s3.isSliceOf(s0)); + + assert(!s0.isSliceOf(s1)); + assert( s1.isSliceOf(s1)); + assert( s2.isSliceOf(s1)); + assert(!s3.isSliceOf(s1)); + + assert(!s0.isSliceOf(s2)); + assert( s1.isSliceOf(s2)); + assert( s2.isSliceOf(s2)); + assert(!s3.isSliceOf(s2)); + + assert(!s0.isSliceOf(s3)); + assert(!s1.isSliceOf(s3)); + assert(!s2.isSliceOf(s3)); + assert( s3.isSliceOf(s3)); + + assert(s1.length == 4); + assert(s1[0 .. 0].isSliceOf(s1)); + assert(s1[0 .. 1].isSliceOf(s1)); + assert(s1[1 .. 2].isSliceOf(s1)); + assert(s1[1 .. 3].isSliceOf(s1)); + assert(s1[1 .. $].isSliceOf(s1)); + assert(s1[$ .. $].isSliceOf(s1)); +} + + /++ Does math as a 64 bit number, but saturates at int.min and int.max when converting back to a 32 bit int. From af25bbbed4997379bfca3970077d3a94e4f1b99e Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Tue, 11 Feb 2025 05:13:10 +0100 Subject: [PATCH 17/39] Add unittest to documentation --- core.d | 1 + 1 file changed, 1 insertion(+) diff --git a/core.d b/core.d index f7fe75f5..2523a49b 100644 --- a/core.d +++ b/core.d @@ -284,6 +284,7 @@ bool isSliceOf(T1, T2)(scope const(T1)[] needle, scope const(T2)[] haystack) @tr ); } +/// @safe unittest { string s0 = "01234"; const(char)[] s1 = s0[1 .. $]; From 5a3a16a1504be0944798870713ed55ffa46b7748 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Tue, 11 Feb 2025 05:29:55 +0100 Subject: [PATCH 18/39] Fix template instantiation bug --- ini.d | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ini.d b/ini.d index 3580c2d9..573a564d 100644 --- a/ini.d +++ b/ini.d @@ -1303,7 +1303,7 @@ s2key2 = value no.4 +/ IniParser!(dialect, string) makeIniParser( IniDialect dialect = IniDialect.defaults, - string = immutable(char)[], + string, )( string rawIni, ) @safe pure nothrow @nogc if (isCompatibleString!string) { @@ -1338,7 +1338,7 @@ IniParser!(dialect, string) makeIniParser( +/ IniFilteredParser!(dialect, string) makeIniFilteredParser( IniDialect dialect = IniDialect.defaults, - string = immutable(char)[], + string, )( string rawIni, ) @safe pure nothrow @nogc if (isCompatibleString!string) { From 51d51e5a9832565856941bf66a5393bf1c674959 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Tue, 11 Feb 2025 06:35:08 +0100 Subject: [PATCH 19/39] Implement `Dialect.concatSubstrings` in low-level `IniParser` --- ini.d | 147 +++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 110 insertions(+), 37 deletions(-) diff --git a/ini.d b/ini.d index 573a564d..a514c668 100644 --- a/ini.d +++ b/ini.d @@ -344,6 +344,10 @@ struct IniParser( bool _empty = true; LocationState _locationState = LocationState.newLine; + + static if (dialect.hasFeature(Dialect.concatSubstrings)) { + bool _bypassConcatSubstrings = false; + } } @safe pure nothrow: @@ -489,7 +493,7 @@ struct IniParser( return this.makeToken(TokenType.comment, (-1 + _source.length), 1); } - Token lexTextImpl(TokenType tokenType)() { + Token lexSubstringImpl(TokenType tokenType)() { enum Result { end, @@ -675,24 +679,101 @@ struct IniParser( return token; } - Token lexText() { + Token lexSubstring() { final switch (_locationState) { case LocationState.newLine: case LocationState.key: - return this.lexTextImpl!(TokenType.key); + return this.lexSubstringImpl!(TokenType.key); case LocationState.preValue: _locationState = LocationState.inValue; goto case LocationState.inValue; case LocationState.inValue: - return this.lexTextImpl!(TokenType.value); + return this.lexSubstringImpl!(TokenType.value); case LocationState.sectionHeader: - return this.lexTextImpl!(TokenType.sectionHeader); + return this.lexSubstringImpl!(TokenType.sectionHeader); } } + static if (dialect.hasFeature(Dialect.concatSubstrings)) { + Token lexSubstringsImpl(TokenType tokenType)() { + static if (operatingMode!string == OperatingMode.mut) { + auto originalSource = _source; + } + + Token token = this.lexSubstringImpl!tokenType(); + + auto next = this.save(); + next._bypassConcatSubstrings = true; + next.popFront(); + + static if (operatingMode!string == OperatingMode.mut) { + import arsd.core : isSliceOf; + + if (!token.data.isSliceOf(originalSource)) { + assert(false, "Memory corruption bug."); + } + + const ptrdiff_t tokenDataOffset = (() @trusted => token.data.ptr - originalSource.ptr)(); + auto mutSource = originalSource[tokenDataOffset .. $]; + size_t mutOffset = token.data.length; + } + + while (!next.empty) { + if (next.front.type != tokenType) { + break; + } + + static if (operatingMode!string == OperatingMode.dup) { + token.data ~= next.front.data; + } + static if (operatingMode!string == OperatingMode.mut) { + foreach (const c; next.front.data) { + mutSource[mutOffset] = c; + ++mutOffset; + } + token.data = mutSource[0 .. mutOffset]; + } + + _source = next._source; + _locationState = next._locationState; + next.popFront(); + } + + return token; + } + + Token lexSubstrings() { + final switch (_locationState) { + case LocationState.newLine: + case LocationState.key: + return this.lexSubstringsImpl!(TokenType.key); + + case LocationState.preValue: + _locationState = LocationState.inValue; + goto case LocationState.inValue; + + case LocationState.inValue: + return this.lexSubstringsImpl!(TokenType.value); + + case LocationState.sectionHeader: + return this.lexSubstringsImpl!(TokenType.sectionHeader); + } + } + } + + Token lexText() { + static if (dialect.hasFeature(Dialect.concatSubstrings)) { + if (!_bypassConcatSubstrings) { + return this.lexSubstrings(); + } + } + + return this.lexSubstring(); + } + Token fetchFront() { switch (_source[0]) { @@ -1306,7 +1387,7 @@ IniParser!(dialect, string) makeIniParser( string, )( string rawIni, -) @safe pure nothrow @nogc if (isCompatibleString!string) { +) @safe pure nothrow if (isCompatibleString!string) { return IniParser!(dialect, string)(rawIni); } @@ -1341,7 +1422,7 @@ IniFilteredParser!(dialect, string) makeIniFilteredParser( string, )( string rawIni, -) @safe pure nothrow @nogc if (isCompatibleString!string) { +) @safe pure nothrow if (isCompatibleString!string) { return IniFilteredParser!(dialect, string)(rawIni); } @@ -1570,6 +1651,22 @@ company = "Digital Mars" static assert(is(typeof(doc.sections[0].items[0].value) == char[])); } +@safe unittest { + static immutable demoData = ` +0 = a 'b' +1 = a "b" +2 = 'a' b +3 = "a" b +`; + + enum dialect = (Dialect.concatSubstrings | Dialect.quotedStrings | Dialect.singleQuoteQuotedStrings); + auto doc = parseIniDocument!dialect(demoData); + assert(doc.sections[0].items[0].value == "a b"); + assert(doc.sections[0].items[1].value == "ab"); + assert(doc.sections[0].items[2].value == "a b"); + assert(doc.sections[0].items[3].value == "ab"); +} + /++ Parses an INI string into an associate array. @@ -1606,32 +1703,8 @@ string[string][string] parseIniAA(IniDialect dialect = IniDialect.defaults, stri value = null; } - void addValue(string nextValue) { - static if (dialect.hasFeature(Dialect.concatSubstrings)) { - if (value !is null) { - static if (operatingMode!string == OperatingMode.dup) { - value ~= nextValue; - } - static if (operatingMode!string == OperatingMode.mut) { - // Insane assumptions ahead: - () @trusted { - if (nextValue.ptr <= &value[$ - 1]) { - assert(false, "Memory corruption bug."); - } - - const size_t end = (value.length + nextValue.length); - foreach (immutable idx, ref c; value.ptr[value.length .. end]) { - c = nextValue.ptr[idx]; - } - value = value.ptr[0 .. end]; - }(); - } - } else { - value = nextValue; - } - } else { - value = nextValue; - } + void setValue(string nextValue) { + value = nextValue; } void commitSection(string nextSection) { @@ -1659,7 +1732,7 @@ string[string][string] parseIniAA(IniDialect dialect = IniDialect.defaults, stri break; case value: - addValue(parser.front.data); + setValue(parser.front.data); break; case sectionHeader: @@ -1721,7 +1794,7 @@ website = } @safe unittest { - char[] demoData = `[1] + static immutable demoData = `[1] key = "value1" "value2" [2] 0 = a b @@ -1733,10 +1806,10 @@ key = "value1" "value2" 6 = "a" "b" 7 = 'a' 'b' 8 = 'a' "b" 'c' -`.dup; +`; enum dialect = (Dialect.concatSubstrings | Dialect.quotedStrings | Dialect.singleQuoteQuotedStrings); - auto aa = parseIniAA!dialect(demoData); + auto aa = parseIniAA!(dialect, char[])(demoData.dup); assert(aa.length == 2); assert(!(null in aa)); From eacf7987889bbd753c0b45c962ddbfa46dd56287 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Wed, 12 Feb 2025 01:07:18 +0100 Subject: [PATCH 20/39] Rename `mut` and `dup` to `destructive` and `nonDestructive` --- ini.d | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ini.d b/ini.d index a514c668..9a2dcdc7 100644 --- a/ini.d +++ b/ini.d @@ -311,12 +311,12 @@ private enum LocationState { } private enum OperatingMode { - mut, - dup, + nonDestructive, + destructive, } private enum OperatingMode operatingMode(string) = (is(string == char[])) - ? OperatingMode.mut : OperatingMode.dup; + ? OperatingMode.destructive : OperatingMode.nonDestructive; /++ Low-level INI parser @@ -699,7 +699,7 @@ struct IniParser( static if (dialect.hasFeature(Dialect.concatSubstrings)) { Token lexSubstringsImpl(TokenType tokenType)() { - static if (operatingMode!string == OperatingMode.mut) { + static if (operatingMode!string == OperatingMode.destructive) { auto originalSource = _source; } @@ -709,7 +709,7 @@ struct IniParser( next._bypassConcatSubstrings = true; next.popFront(); - static if (operatingMode!string == OperatingMode.mut) { + static if (operatingMode!string == OperatingMode.destructive) { import arsd.core : isSliceOf; if (!token.data.isSliceOf(originalSource)) { @@ -726,10 +726,10 @@ struct IniParser( break; } - static if (operatingMode!string == OperatingMode.dup) { + static if (operatingMode!string == OperatingMode.nonDestructive) { token.data ~= next.front.data; } - static if (operatingMode!string == OperatingMode.mut) { + static if (operatingMode!string == OperatingMode.destructive) { foreach (const c; next.front.data) { mutSource[mutOffset] = c; ++mutOffset; From 5c7538421f9aa6ba0f1c0fd6729503b3cb478f39 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Wed, 12 Feb 2025 01:49:32 +0100 Subject: [PATCH 21/39] Chomp chomp --- core.d | 1 - 1 file changed, 1 deletion(-) diff --git a/core.d b/core.d index 2523a49b..a1625e37 100644 --- a/core.d +++ b/core.d @@ -320,7 +320,6 @@ bool isSliceOf(T1, T2)(scope const(T1)[] needle, scope const(T2)[] haystack) @tr assert(s1[$ .. $].isSliceOf(s1)); } - /++ Does math as a 64 bit number, but saturates at int.min and int.max when converting back to a 32 bit int. From a2fe6f1fb497ed0e8a4e00fedfac3928c2226471 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Wed, 12 Feb 2025 02:13:34 +0100 Subject: [PATCH 22/39] Remove destruciveness footguns and add further documentation --- ini.d | 150 ++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 126 insertions(+), 24 deletions(-) diff --git a/ini.d b/ini.d index 9a2dcdc7..8d96f28a 100644 --- a/ini.d +++ b/ini.d @@ -17,6 +17,51 @@ return parseIniDocument(readText(filePath)); } --- + + + ### On destructiveness and GC usage + + Depending on the dialect and string type, + [IniParser] can operate in one of these three modes: + + $(LIST + * Non-destructive with no heap alloc (incl. `@nogc`) + * Non-destructive (uses the GC) + * Destructive with no heap alloc (incl. `@nogc`) + ) + + a) If a given dialect requests no mutation of the input data + (i.e. no escape sequences, no concaternation of substrings etc.) + and is therefore possible to implement with slicing operations only, + the parser will be non-destructive and not do any heap allocations. + Such a parser is verifiably `@nogc`, too. + + b) In cases where a dialect requires data-mutating operations, + there are two ways for a parser to implement them: + + b.0) Either perform those mutations on the input data itself + and alter the contents of that buffer. + Because of the destructive nature of this operation, + it can be performed only once safely. + (Such an implementation could optionally fix up the modified data + to become valid and parsable again. + Though doing so would come with a performance overhead.) + + b.1) Or allocate a new buffer for the result of the operation. + This also has the advantage that it works with `immutable` and `const` + input data. + For convenience reasons the GC is used to perform such allocations. + + Use [IniParser.isDestructive] to check for the operating mode. + + The construct a non-destructive parser despite a mutable input data, + specify `const(char)[]` as the value of the `string` template parameter. + + --- + char[] mutableInput = [ /* … */ ]; + auto parser = makeIniParser!(dialect, const(char)[])(mutableInput); + assert(parser.isDestructive == false); + --- +/ module arsd.ini; @@ -336,6 +381,18 @@ struct IniParser( public { /// alias Token = IniToken!string; + + // dfmt off + /// + enum isDestructive = ( + (operatingMode!string == OperatingMode.destructive) + && ( + dialect.hasFeature(Dialect.concatSubstrings) + || dialect.hasFeature(Dialect.escapeSequences) + || dialect.hasFeature(Dialect.lineFolding) + ) + ); + // dfmt on } private { @@ -364,17 +421,16 @@ struct IniParser( public { /// - bool empty() const { + bool empty() const @nogc { return _empty; } /// - inout(Token) front() inout { + inout(Token) front() inout @nogc { return _front; } - /// - void popFront() { + private void popFrontImpl() { if (_source.length == 0) { _empty = true; return; @@ -383,9 +439,35 @@ struct IniParser( _front = this.fetchFront(); } - /// - inout(typeof(this)) save() inout { - return this; + /* + This is a workaround. + The compiler doesn’t feel like inferring `@nogc` properly otherwise. + + → cannot call non-@nogc function + `arsd.ini.makeIniParser!(IniDialect.concatSubstrings, char[]).makeIniParser` + → which calls + `arsd.ini.IniParser!(IniDialect.concatSubstrings, char[]).IniParser.this` + → which calls + `arsd.ini.IniParser!(IniDialect.concatSubstrings, char[]).IniParser.popFront` + */ + static if (isDestructive) { + /// + void popFront() @nogc { + popFrontImpl(); + } + } else { + /// + void popFront() { + popFrontImpl(); + } + } + + // Destructive parsers make very poor Forward Ranges. + static if (!isDestructive) { + /// + inout(typeof(this)) save() inout @nogc { + return this; + } } } @@ -705,7 +787,7 @@ struct IniParser( Token token = this.lexSubstringImpl!tokenType(); - auto next = this.save(); + auto next = this; // copy next._bypassConcatSubstrings = true; next.popFront(); @@ -885,6 +967,9 @@ struct IniFilteredParser( /// public alias Token = IniToken!string; + /// + public enum isDestructive = IniParser!(dialect, string).isDestructive; + private IniParser!(dialect, string) _parser; public @safe pure nothrow: @@ -901,10 +986,10 @@ public @safe pure nothrow: } /// - bool empty() const => _parser.empty; + bool empty() const @nogc => _parser.empty; /// - inout(Token) front() inout => _parser.front; + inout(Token) front() inout @nogc => _parser.front; /// void popFront() { @@ -912,14 +997,16 @@ public @safe pure nothrow: _parser.skipIrrelevant(true); } - /// - inout(typeof(this)) save() inout { - return this; + static if (!isDestructive) { + /// + inout(typeof(this)) save() inout @nogc { + return this; + } } } /// -@safe unittest { +@safe @nogc unittest { // INI document (demo data) static immutable string rawIniDocument = `; This is a comment. [section1] @@ -960,7 +1047,7 @@ oachkatzl = schwoaf ;try pronouncing that assert(values == 2); } -@safe unittest { +@safe @nogc unittest { static immutable string rawIniDocument = `; This is a comment. [section1] s1key1 = value1 @@ -1079,7 +1166,7 @@ s2key2 = value no.4 assert(parser.empty()); } -@safe unittest { +@safe @nogc unittest { static immutable rawIni = "#not-a = comment"; auto parser = makeIniParser(rawIni); @@ -1094,7 +1181,7 @@ s2key2 = value no.4 assert(parser.empty); } -@safe unittest { +@safe @nogc unittest { static immutable rawIni = "#actually_a = comment\r\n\t#another one\r\n\t\t ; oh, and a third one"; enum dialect = (Dialect.hashLineComments | Dialect.lineComments); auto parser = makeIniParser!dialect(rawIni); @@ -1114,7 +1201,7 @@ s2key2 = value no.4 assert(parser.empty); } -@safe unittest { +@safe @nogc unittest { static immutable rawIni = ";not a = line comment\nkey = value ;not-a-comment \nfoo = bar # not a comment\t"; enum dialect = Dialect.lite; auto parser = makeIniParser!dialect(rawIni); @@ -1149,7 +1236,7 @@ s2key2 = value no.4 } } -@safe unittest { +@safe @nogc unittest { static immutable rawIni = "; line comment 0\t\n\nkey = value ; comment-1\nfoo = bar #comment 2\n"; enum dialect = (Dialect.inlineComments | Dialect.hashInlineComments); auto parser = makeIniParser!dialect(rawIni); @@ -1191,7 +1278,7 @@ s2key2 = value no.4 assert(parser.skipIrrelevant(false)); } -@safe unittest { +@safe @nogc unittest { static immutable rawIni = "key = value;inline"; enum dialect = Dialect.inlineComments; auto parser = makeIniParser!dialect(rawIni); @@ -1211,7 +1298,7 @@ s2key2 = value no.4 assert(parser.empty); } -@safe unittest { +@safe @nogc unittest { static immutable rawIni = "key: value\n" ~ "foo= bar\n" ~ "lol :rofl\n" @@ -1274,7 +1361,7 @@ s2key2 = value no.4 assert(parser.skipIrrelevant()); } -@safe unittest { +@safe @nogc unittest { static immutable rawIni = "\"foo=bar\"=foobar\n" ~ "'foo = bar' = foo_bar\n" @@ -1392,7 +1479,7 @@ IniParser!(dialect, string) makeIniParser( } /// -@safe unittest { +@safe @nogc unittest { string regular; auto parser1 = makeIniParser(regular); assert(parser1.empty); // exclude from docs @@ -1404,6 +1491,21 @@ IniParser!(dialect, string) makeIniParser( const(char)[] constChars; auto parser3 = makeIniParser(constChars); assert(parser3.empty); // exclude from docs + + assert(!parser1.isDestructive); // exclude from docs + assert(!parser2.isDestructive); // exclude from docs + assert(!parser3.isDestructive); // exclude from docs +} + +@safe unittest { + char[] mutableInput; + enum dialect = Dialect.concatSubstrings; + + auto parser1 = makeIniParser!(dialect, const(char)[])(mutableInput); + auto parser2 = (() @nogc => makeIniParser!(dialect)(mutableInput))(); + + assert(!parser1.isDestructive); + assert(parser2.isDestructive); } /++ @@ -1427,7 +1529,7 @@ IniFilteredParser!(dialect, string) makeIniFilteredParser( } /// -@safe unittest { +@safe @nogc unittest { string regular; auto parser1 = makeIniFilteredParser(regular); assert(parser1.empty); // exclude from docs From 88b50feef159b88e8aab49ff9da9101ef33c4d58 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Wed, 12 Feb 2025 02:40:51 +0100 Subject: [PATCH 23/39] Refactor AA to have keys typed as `immutable(char)[]` --- ini.d | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/ini.d b/ini.d index 8d96f28a..058d23f5 100644 --- a/ini.d +++ b/ini.d @@ -1780,7 +1780,12 @@ company = "Digital Mars" See_also: [parseIniDocument] +/ -string[string][string] parseIniAA(IniDialect dialect = IniDialect.defaults, string)(string rawIni) @safe pure nothrow { +string[immutable(char)[]][immutable(char)[]] parseIniAA( + IniDialect dialect = IniDialect.defaults, + string, +)( + string rawIni, +) @safe pure nothrow { static if (is(string == immutable(char)[])) { immutable(char)[] toString(string key) => key; } else { @@ -1789,8 +1794,8 @@ string[string][string] parseIniAA(IniDialect dialect = IniDialect.defaults, stri auto parser = IniParser!(dialect, string)(rawIni); - string[string][string] document; - string[string] section; + string[immutable(char)[]][immutable(char)[]] document; + string[immutable(char)[]] section; string sectionName = null; string keyName = null; From 723fa5be40f16ce6bd4fd0facdb555a3bd90dfc9 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Thu, 13 Feb 2025 02:55:22 +0100 Subject: [PATCH 24/39] Fix filtering of first token in `IniFilteredParser` --- ini.d | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ini.d b/ini.d index 058d23f5..d1bdf9c2 100644 --- a/ini.d +++ b/ini.d @@ -977,6 +977,7 @@ public @safe pure nothrow: /// public this(IniParser!(dialect, string) parser) { _parser = parser; + _parser.skipIrrelevant(true); } /// @@ -1181,6 +1182,16 @@ s2key2 = value no.4 assert(parser.empty); } +@safe @nogc unittest { + static immutable rawIni = "; only a comment"; + + auto regularParser = makeIniParser(rawIni); + auto filteredParser = makeIniFilteredParser(rawIni); + + assert(!regularParser.empty); + assert(filteredParser.empty); +} + @safe @nogc unittest { static immutable rawIni = "#actually_a = comment\r\n\t#another one\r\n\t\t ; oh, and a third one"; enum dialect = (Dialect.hashLineComments | Dialect.lineComments); From 08f9ba3c95be2295f4eabf617e4ebc6729551fb0 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Thu, 13 Feb 2025 05:40:03 +0100 Subject: [PATCH 25/39] Implement escape sequences + line folding --- ini.d | 503 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 498 insertions(+), 5 deletions(-) diff --git a/ini.d b/ini.d index d1bdf9c2..10c5d304 100644 --- a/ini.d +++ b/ini.d @@ -302,6 +302,254 @@ private bool hasFeature(ulong dialect, ulong feature) @safe pure nothrow @nogc { return ((dialect & feature) > 0); } +private T[] spliceImpl(T)(T[] array, size_t at, size_t count) @safe pure nothrow @nogc +in (at < array.length) +in (count <= array.length) +in (at + count <= array.length) { + const upper = array.length - count; + + for (size_t idx = at; idx < upper; ++idx) { + array[idx] = array[idx + count]; + } + + return array[0 .. ($ - count)]; +} + +private T[] splice(T)(auto ref scope T[] array, size_t at, size_t count) @safe pure nothrow @nogc { + static if (__traits(isRef, array)) { + array = spliceImpl(array, at, count); // @suppress(dscanner.suspicious.auto_ref_assignment) + return array; + } else { + return spliceImpl(array, at, count); + } +} + +@safe unittest { + assert("foobar".dup.splice(0, 0) == "foobar"); + assert("foobar".dup.splice(0, 6) == ""); + assert("foobar".dup.splice(0, 1) == "oobar"); + assert("foobar".dup.splice(1, 5) == "f"); + assert("foobar".dup.splice(1, 4) == "fr"); + assert("foobar".dup.splice(4, 1) == "foobr"); + assert("foobar".dup.splice(4, 2) == "foob"); +} + +@safe unittest { + char[] array = ['a', 's', 'd', 'f']; + array.splice(1, 2); + assert(array == "af"); +} + +/// +char resolveIniEscapeSequence(char c) @safe pure nothrow @nogc { + switch (c) { + case 'n': + return '\x0A'; + case 'r': + return '\x0D'; + case 't': + return '\x09'; + case '\\': + return '\\'; + case '0': + return '\x00'; + + default: + return c; + } +} + +/// +@safe unittest { + assert(resolveIniEscapeSequence('n') == '\n'); + assert(resolveIniEscapeSequence('r') == '\r'); + assert(resolveIniEscapeSequence('t') == '\t'); + assert(resolveIniEscapeSequence('\\') == '\\'); + assert(resolveIniEscapeSequence('0') == '\0'); + + // Unsupported characters are preserved. + assert(resolveIniEscapeSequence('a') == 'a'); + assert(resolveIniEscapeSequence('Z') == 'Z'); + assert(resolveIniEscapeSequence('1') == '1'); + // Unsupported special characters are preserved. + assert(resolveIniEscapeSequence('@') == '@'); + // Line breaks are preserved. + assert(resolveIniEscapeSequence('\n') == '\n'); + assert(resolveIniEscapeSequence('\r') == '\r'); + // UTF-8 is preserved. + assert(resolveIniEscapeSequence("ü"[0]) == "ü"[0]); +} + +private struct StringRange { + private { + const(char)[] _data; + } + +@safe pure nothrow @nogc: + + public this(const(char)[] data) { + _data = data; + } + + bool empty() const { + return (_data.length == 0); + } + + char front() const { + return _data[0]; + } + + void popFront() { + _data = _data[1 .. $]; + } +} + +/++ + Resolves escape sequences and performs line folding. + + Feature set depends on the [Dialect]. + +/ +string resolveIniEscapeSequences(Dialect dialect)(const(char)[] input) @safe pure nothrow { + size_t irrelevant = 0; + + auto source = StringRange(input); + determineIrrelevantLoop: while (!source.empty) { + if (source.front != '\\') { + source.popFront(); + continue; + } + + source.popFront(); + if (source.empty) { + break; + } + + static if (dialect.hasFeature(Dialect.lineFolding)) { + switch (source.front) { + case '\n': + source.popFront(); + irrelevant += 2; + continue determineIrrelevantLoop; + + case '\r': + source.popFront(); + irrelevant += 2; + if (source.empty) { + break determineIrrelevantLoop; + } + // CRLF? + if (source.front == '\n') { + source.popFront(); + ++irrelevant; + } + continue determineIrrelevantLoop; + + default: + break; + } + } + + static if (dialect.hasFeature(Dialect.escapeSequences)) { + source.popFront(); + ++irrelevant; + } + } + + const escapedSize = input.length - irrelevant; + auto result = new char[](escapedSize); + + size_t cursor = 0; + source = StringRange(input); + buildResultLoop: while (!source.empty) { + if (source.front != '\\') { + result[cursor++] = source.front; + source.popFront(); + continue; + } + + source.popFront(); + if (source.empty) { + result[cursor] = '\\'; + break; + } + + static if (dialect.hasFeature(Dialect.lineFolding)) { + switch (source.front) { + case '\n': + source.popFront(); + continue buildResultLoop; + + case '\r': + source.popFront(); + if (source.empty) { + break buildResultLoop; + } + // CRLF? + if (source.front == '\n') { + source.popFront(); + } + continue buildResultLoop; + + default: + break; + } + } + + static if (dialect.hasFeature(Dialect.escapeSequences)) { + result[cursor++] = resolveIniEscapeSequence(source.front); + source.popFront(); + continue; + } else { + result[cursor++] = '\\'; + } + } + + return result; +} + +/// +@safe unittest { + enum none = Dialect.lite; + enum escp = Dialect.escapeSequences; + enum fold = Dialect.lineFolding; + enum both = Dialect.escapeSequences | Dialect.lineFolding; + + assert(resolveIniEscapeSequences!none("foo\\nbar") == "foo\\nbar"); + assert(resolveIniEscapeSequences!escp("foo\\nbar") == "foo\nbar"); + assert(resolveIniEscapeSequences!fold("foo\\nbar") == "foo\\nbar"); + assert(resolveIniEscapeSequences!both("foo\\nbar") == "foo\nbar"); + + assert(resolveIniEscapeSequences!none("foo\\\nbar") == "foo\\\nbar"); + assert(resolveIniEscapeSequences!escp("foo\\\nbar") == "foo\nbar"); + assert(resolveIniEscapeSequences!fold("foo\\\nbar") == "foobar"); + assert(resolveIniEscapeSequences!both("foo\\\nbar") == "foobar"); + + assert(resolveIniEscapeSequences!none("foo\\\n\\nbar") == "foo\\\n\\nbar"); + assert(resolveIniEscapeSequences!escp("foo\\\n\\nbar") == "foo\n\nbar"); + assert(resolveIniEscapeSequences!fold("foo\\\n\\nbar") == "foo\\nbar"); + assert(resolveIniEscapeSequences!both("foo\\\n\\nbar") == "foo\nbar"); + + assert(resolveIniEscapeSequences!none("foobar\\") == "foobar\\"); + assert(resolveIniEscapeSequences!escp("foobar\\") == "foobar\\"); + assert(resolveIniEscapeSequences!fold("foobar\\") == "foobar\\"); + assert(resolveIniEscapeSequences!both("foobar\\") == "foobar\\"); + + assert(resolveIniEscapeSequences!none("foo\\\r\nbar") == "foo\\\r\nbar"); + assert(resolveIniEscapeSequences!escp("foo\\\r\nbar") == "foo\r\nbar"); + assert(resolveIniEscapeSequences!fold("foo\\\r\nbar") == "foobar"); + assert(resolveIniEscapeSequences!both("foo\\\r\nbar") == "foobar"); + + assert(resolveIniEscapeSequences!none(`\nfoobar\n`) == "\\nfoobar\\n"); + assert(resolveIniEscapeSequences!escp(`\nfoobar\n`) == "\nfoobar\n"); + assert(resolveIniEscapeSequences!fold(`\nfoobar\n`) == "\\nfoobar\\n"); + assert(resolveIniEscapeSequences!both(`\nfoobar\n`) == "\nfoobar\n"); + + assert(resolveIniEscapeSequences!none("\\\nfoo \\\rba\\\r\nr") == "\\\nfoo \\\rba\\\r\nr"); + assert(resolveIniEscapeSequences!escp("\\\nfoo \\\rba\\\r\nr") == "\nfoo \rba\r\nr"); + assert(resolveIniEscapeSequences!fold("\\\nfoo \\\rba\\\r\nr") == "foo bar"); + assert(resolveIniEscapeSequences!both("\\\nfoo \\\rba\\\r\nr") == "foo bar"); +} + /++ Type of a token (as output by the parser) +/ @@ -582,6 +830,7 @@ struct IniParser( endChomp, regular, whitespace, + sequence, } enum QuotedString : ubyte { @@ -591,9 +840,14 @@ struct IniParser( } // dfmt off - enum hasAnyQuotedString = ( - dialect.hasFeature(Dialect.quotedStrings) || - dialect.hasFeature(Dialect.singleQuoteQuotedStrings) + enum bool hasAnyQuotedString = ( + dialect.hasFeature(Dialect.quotedStrings) + || dialect.hasFeature(Dialect.singleQuoteQuotedStrings) + ); + + enum bool hasAnyEscaping = ( + dialect.hasFeature(Dialect.lineFolding) + || dialect.hasFeature(Dialect.escapeSequences) ); // dfmt on @@ -691,6 +945,13 @@ struct IniParser( return Result.regular; } + case '\\': + static if (hasAnyEscaping) { + return Result.sequence; + } else { + goto default; + } + case ']': static if (tokenType == TokenType.sectionHeader) { return (inQuotedString != QuotedString.none) @@ -705,7 +966,9 @@ struct IniParser( ptrdiff_t idxLastText = -1; ptrdiff_t idxCutoff = -1; - foreach (immutable idx, const c; _source) { + + for (size_t idx = 0; idx < _source.length; ++idx) { + const c = _source[idx]; const status = nextChar(c); if (status == Result.end) { @@ -718,6 +981,62 @@ struct IniParser( break; } else if (status == Result.whitespace) { continue; + } else if (status == Result.sequence) { + static if (hasAnyEscaping) { + const idxNext = idx + 1; + if (idxNext < _source.length) { + static if (dialect.hasFeature(Dialect.lineFolding)) { + size_t determineFoldingCount() { + switch (_source[idxNext]) { + case '\n': + return 2; + + case '\r': + const idxAfterNext = idxNext + 1; + + // CRLF? + if (idxAfterNext < _source.length) { + if (_source[idxAfterNext] == '\n') { + return 3; + } + } + + return 2; + + default: + return 0; + } + + assert(false, "Bug: This should have been unreachable."); + } + + const foldingCount = determineFoldingCount(); + if (foldingCount > 0) { + static if (operatingMode!string == OperatingMode.nonDestructive) { + idx += (foldingCount - 1); + idxLastText = idx; + } + static if (operatingMode!string == OperatingMode.destructive) { + _source.splice(idx, foldingCount); + idx -= (foldingCount - 1); + } + continue; + } + } + static if (dialect.hasFeature(Dialect.escapeSequences)) { + static if (operatingMode!string == OperatingMode.nonDestructive) { + ++idx; + } + static if (operatingMode!string == OperatingMode.destructive) { + _source[idx] = resolveIniEscapeSequence(_source[idxNext]); + _source.splice(idxNext, 1); + } + + idxLastText = idx; + continue; + } + } + } } idxLastText = idx; @@ -726,6 +1045,12 @@ struct IniParser( const idxEOT = (idxLastText + 1); auto token = Token(tokenType, _source[0 .. idxEOT]); + static if (hasAnyEscaping) { + static if (operatingMode!string == OperatingMode.nonDestructive) { + token.data = resolveIniEscapeSequences!dialect(token.data); + } + } + // "double-quote quoted": cut off any whitespace afterwards if (inQuotedString == QuotedString.regular) { const idxEOQ = (idxEOT + 1); @@ -896,8 +1221,9 @@ struct IniParser( case ':': static if (dialect.hasFeature(Dialect.colonKeys)) { goto case '='; + } else { + return this.lexText(); } - return this.lexText(); case '=': _locationState = LocationState.preValue; @@ -1469,6 +1795,173 @@ s2key2 = value no.4 assert(parser.empty); } +@safe unittest { + char[] rawIni = ` +key = \nvalue\n +key = foo\t bar +key\0key = value +key \= = value +`.dup; + enum dialect = Dialect.escapeSequences; + auto parser = makeIniFilteredParser!dialect(rawIni); + + { + assert(!parser.empty); + assert(parser.front.data == "key"); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front.data == "\nvalue\n"); + } + + { + parser.popFront(); + assert(!parser.empty); + assert(parser.front.data == "key"); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front.data == "foo\t bar"); + } + + { + parser.popFront(); + assert(!parser.empty); + assert(parser.front.data == "key\0key"); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front.data == "value"); + } + + { + parser.popFront(); + assert(!parser.empty); + assert(parser.front.data == "key ="); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front.data == "value"); + } + + parser.popFront(); + assert(parser.empty); +} + +@safe unittest { + static immutable string rawIni = ` +key = \nvalue\n +key = foo\t bar +key\0key = value +key \= = value +`; + enum dialect = Dialect.escapeSequences; + auto parser = makeIniFilteredParser!dialect(rawIni); + + { + assert(!parser.empty); + assert(parser.front.data == "key"); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front.data == "\nvalue\n"); + } + + { + parser.popFront(); + assert(!parser.empty); + assert(parser.front.data == "key"); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front.data == "foo\t bar"); + } + + { + parser.popFront(); + assert(!parser.empty); + assert(parser.front.data == "key\0key"); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front.data == "value"); + } + + { + parser.popFront(); + assert(!parser.empty); + assert(parser.front.data == "key ="); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front.data == "value"); + } + + parser.popFront(); + assert(parser.empty); +} + +@safe unittest { + char[] rawIni = "key = val\\\nue\nkey \\\n= \\\nvalue \\\rvalu\\\r\ne\n".dup; + enum dialect = Dialect.lineFolding; + auto parser = makeIniFilteredParser!dialect(rawIni); + + { + assert(!parser.empty); + assert(parser.front.data == "key"); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front.data == "value"); + } + + { + parser.popFront(); + assert(!parser.empty); + assert(parser.front.data == "key"); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front.data == "value value"); + } + + parser.popFront(); + assert(parser.empty); +} + +@safe unittest { + static immutable string rawIni = "key = val\\\nue\nkey \\\n= \\\nvalue \\\rvalu\\\r\ne\n"; + enum dialect = Dialect.lineFolding; + auto parser = makeIniFilteredParser!dialect(rawIni); + + { + assert(!parser.empty); + assert(parser.front.data == "key"); + + parser.popFront(); + assert(!parser.empty); + assert(parser.front.data == "value"); + } + + { + parser.popFront(); + assert(!parser.empty); + // FIXME: Line folding does not interact properly with keys. + version (none) { + assert(parser.front.data == "key"); + } else { + assert(parser.front.data == "key "); // bug + } + + parser.popFront(); + assert(!parser.empty); + assert(parser.front.data == "value value"); + } + + parser.popFront(); + assert(parser.empty); +} + /++ Convenience function to create a low-level parser From c9198a4e790d06d74f4419602019b078696206f3 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Thu, 13 Feb 2025 05:49:30 +0100 Subject: [PATCH 26/39] Mention `arsd.ini` in README --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 88450cf1..633cff85 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,8 @@ Future release, likely May 2026 or later. Nothing is planned for it at this time. +arsd.ini was added. + ## 12.0 Released: January 2025 From 433593db485806f3632a4d93f8b9661060398888 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Thu, 13 Feb 2025 06:29:05 +0100 Subject: [PATCH 27/39] Fix known bug with line-folding keys or sections --- ini.d | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/ini.d b/ini.d index 10c5d304..686af1c5 100644 --- a/ini.d +++ b/ini.d @@ -993,14 +993,12 @@ struct IniParser( case '\r': const idxAfterNext = idxNext + 1; - // CRLF? if (idxAfterNext < _source.length) { if (_source[idxAfterNext] == '\n') { return 3; } } - return 2; default: @@ -1014,7 +1012,7 @@ struct IniParser( if (foldingCount > 0) { static if (operatingMode!string == OperatingMode.nonDestructive) { idx += (foldingCount - 1); - idxLastText = idx; + idxCutoff = idx; } static if (operatingMode!string == OperatingMode.destructive) { _source.splice(idx, foldingCount); @@ -1073,7 +1071,7 @@ struct IniParser( } } - const idxNextToken = (idxCutoff >= 0) ? idxCutoff : idxEOT; + const idxNextToken = (idxCutoff >= idxLastText) ? idxCutoff : idxEOT; _source = _source[idxNextToken .. $]; if (inQuotedString != QuotedString.none) { @@ -1946,12 +1944,7 @@ key \= = value { parser.popFront(); assert(!parser.empty); - // FIXME: Line folding does not interact properly with keys. - version (none) { - assert(parser.front.data == "key"); - } else { - assert(parser.front.data == "key "); // bug - } + assert(parser.front.data == "key"); parser.popFront(); assert(!parser.empty); From a1a96a44cd84531677b0e5d5f6dacb7e7e3553e9 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Fri, 14 Feb 2025 04:16:51 +0100 Subject: [PATCH 28/39] Implement INI string serializer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ued to “stringify” INI documents. --- ini.d | 394 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 394 insertions(+) diff --git a/ini.d b/ini.d index 686af1c5..34bf6edb 100644 --- a/ini.d +++ b/ini.d @@ -404,6 +404,30 @@ private struct StringRange { } } +private struct StringSliceRange { + private { + const(char)[] _data; + } + +@safe pure nothrow @nogc: + + public this(const(char)[] data) { + _data = data; + } + + bool empty() const { + return (_data.length == 0); + } + + const(char)[] front() const { + return _data[0 .. 1]; + } + + void popFront() { + _data = _data[1 .. $]; + } +} + /++ Resolves escape sequences and performs line folding. @@ -2505,3 +2529,373 @@ key = merged and overwritten assert(aa["1"]["no2"] == "kept"); assert(aa["2"]["key"] == "overwritten"); } + +private void stringifyIniString(string, OutputRange)(string data, OutputRange output) { + if (data is null) { + output.put("\"\""); + return; + } + + size_t nQuotes = 0; + size_t nSingleQuotes = 0; + bool hasLineBreaks = false; + + foreach (const c; data) { + switch (c) { + default: + break; + + case '"': + ++nQuotes; + break; + case '\'': + ++nSingleQuotes; + break; + + case '\n': + case '\r': + hasLineBreaks = true; + break; + } + } + + const hasQuotes = (nQuotes > 0); + const hasSingleQuotes = (nSingleQuotes > 0); + + if (hasQuotes && !hasSingleQuotes) { + output.put("'"); + output.put(data); + output.put("'"); + return; + } + + if (!hasQuotes && hasSingleQuotes) { + output.put("\""); + output.put(data); + output.put("\""); + return; + } + + if (hasQuotes && hasSingleQuotes) { + if (nQuotes <= nSingleQuotes) { + output.put("\""); + + foreach (const c; StringSliceRange(data)) { + if (c == "\"") { + output.put("\" '\"' \""); + continue; + } + + output.put(c); + } + + output.put("\""); + return; + } + + if ( /*nQuotes > nSingleQuotes*/ true) { + output.put("'"); + + foreach (const c; StringSliceRange(data)) { + if (c == "'") { + output.put("' \"'\" '"); + continue; + } + + output.put(c); + } + + output.put("'"); + return; + } + } + + if ( /*!hasQuotes && !hasSingleQuotes*/ true) { + if (hasLineBreaks) { + output.put("\""); + } + + output.put(data); + + if (hasLineBreaks) { + output.put("\""); + } + } +} + +private void stringifyIni(StringKey, StringValue, OutputRange)(StringKey key, StringValue value, OutputRange output) { + stringifyIniString(key, output); + output.put(" = "); + stringifyIniString(value, output); + output.put("\n"); +} + +private void stringifyIni(string, OutputRange)(const IniKeyValuePair!string kvp, OutputRange output) { + return stringifyIni(kvp.key, kvp.value, output); +} + +private void stringifyIniSectionHeader(string, OutputRange)(string sectionName, OutputRange output) { + if (sectionName !is null) { + output.put("["); + stringifyIniString(sectionName, output); + output.put("]\n"); + } +} + +private void stringifyIni(string, OutputRange)(const IniSection!string section, OutputRange output) { + stringifyIniSectionHeader(section.name, output); + foreach (const item; section.items) { + stringifyIni(item, output); + } +} + +/++ + Serializes an [IniDocument] to a string in INI format. + +/ +void stringifyIni(string, OutputRange)(IniDocument!string document, OutputRange output) { + bool anySectionsWritten = false; + + foreach (const section; document.sections) { + if (section.name is null) { + if (anySectionsWritten) { + output.put("\n"); + } + + stringifyIni(section, output); + + if (section.items.length > 0) { + anySectionsWritten = true; + } + } + } + + foreach (const section; document.sections) { + if (section.name is null) { + continue; + } + + if (!anySectionsWritten) { + anySectionsWritten = true; + } else { + output.put("\n"); + } + + stringifyIni(section, output); + } +} + +/// ditto +string stringifyIni(string)(IniDocument!string document) { + import std.array : appender; + + auto output = appender!string(); + stringifyIni(document, output); + return output[]; +} + +/// +@safe unittest { + auto doc = IniDocument!string([ + IniSection!string(null, [ + IniKeyValuePair!string("key", "value"), + ]), + IniSection!string("Section 1", [ + IniKeyValuePair!string("key1", "value1"), + IniKeyValuePair!string("key2", "foo'bar"), + ]), + ]); + + // Serialize + string ini = stringifyIni(doc); + + static immutable expected = + "key = value\n" + ~ "\n" + ~ "[Section 1]\n" + ~ "key1 = value1\n" + ~ "key2 = \"foo'bar\"\n"; + assert(ini == expected); +} + +@safe unittest { + auto doc = IniDocument!string([ + IniSection!string("Oachkatzlschwoaf", [ + IniKeyValuePair!string("key1", "value1"), + IniKeyValuePair!string("key2", "value2"), + IniKeyValuePair!string("key3", "foo bar"), + ]), + IniSection!string(null, [ + IniKeyValuePair!string("key", "value"), + ]), + IniSection!string("Kaiserschmarrn", [ + IniKeyValuePair!string("1", "value\n1"), + IniKeyValuePair!string("2", "\"value\t2"), + IniKeyValuePair!string("3", "\"foo'bar\""), + IniKeyValuePair!string("4", "'foo\"bar'"), + ]), + ]); + + string ini = stringifyIni(doc); + + static immutable expected = "key = value\n" + ~ "\n" + ~ "[Oachkatzlschwoaf]\n" + ~ "key1 = value1\n" + ~ "key2 = value2\n" + ~ "key3 = foo bar\n" + ~ "\n" + ~ "[Kaiserschmarrn]\n" + ~ "1 = \"value\n1\"\n" + ~ "2 = '\"value\t2'\n" + ~ "3 = '\"foo' \"'\" 'bar\"'\n" + ~ "4 = \"'foo\" '\"' \"bar'\"\n"; + assert(ini == expected); +} + +/++ + Serializes an AA to a string in INI format. + +/ +void stringifyIni( + StringKey, + StringValue, + OutputRange, +)( + const StringValue[StringKey] sectionItems, + OutputRange output, +) if (isCompatibleString!StringKey && isCompatibleString!StringValue) { + foreach (key, value; sectionItems) { + stringifyIni(key, value, output); + } +} + +/// ditto +string stringifyIni( + StringKey, + StringValue, +)( + const StringValue[StringKey] sectionItems +) if (isCompatibleString!StringKey && isCompatibleString!StringValue) { + import std.array : appender; + + auto output = appender!string(); + stringifyIni(sectionItems, output); + return output[]; +} + +/// +@safe unittest { + string[string] doc; + doc["1"] = "value1"; + doc["2"] = "foo'bar"; + + // Serialize AA to INI + string ini = stringifyIni(doc); + + // dfmt off + static immutable expectedEither = "1 = value1\n" ~ "2 = \"foo'bar\"\n"; // exclude from docs + static immutable expectedOr = "2 = \"foo'bar\"\n" ~ "1 = value1\n" ; // exclude from docs + // dfmt on + + assert(ini == expectedEither || ini == expectedOr); // exclude from docs +} + +/++ + Serializes a nested AA to a string in INI format. + +/ +void stringifyIni( + StringSection, + StringKey, + StringValue, + OutputRange, +)( + const StringValue[StringKey][StringSection] document, + OutputRange output, +) if (isCompatibleString!StringSection && isCompatibleString!StringKey && isCompatibleString!StringValue) { + bool anySectionsWritten = false; + + const rootSection = null in document; + if (rootSection !is null) { + stringifyIni(*rootSection, output); + anySectionsWritten = true; + } + + foreach (sectionName, items; document) { + if (sectionName is null) { + continue; + } + + if (!anySectionsWritten) { + anySectionsWritten = true; + } else { + output.put("\n"); + } + + stringifyIniSectionHeader(sectionName, output); + foreach (key, value; items) { + stringifyIni(key, value, output); + } + } +} + +/// ditto +string stringifyIni( + StringSection, + StringKey, + StringValue, +)( + const StringValue[StringKey][StringSection] document, +) if (isCompatibleString!StringSection && isCompatibleString!StringKey && isCompatibleString!StringValue) { + import std.array : appender; + + auto output = appender!string(); + stringifyIni(document, output); + return output[]; +} + +/// +@safe unittest { + string[string][string] doc; + + doc[null]["key"] = "value"; + doc[null]["foo"] = "bar"; + + doc["Section 1"]["firstname"] = "Walter"; + doc["Section 1"]["lastname"] = "Bright"; + doc["Section 1"]["language"] = "'D'"; + + doc["Section 2"]["Oachkatzl"] = "Schwoaf"; + + // Serialize AA to INI + string ini = stringifyIni(doc); + + import std.string : indexOf, startsWith; // exclude from docs + + assert(ini.startsWith("key = value\n") || ini.startsWith("foo = bar\n")); // exclude from docs + assert(ini.indexOf("\n[Section 1]\n") > 0); // exclude from docs + assert(ini.indexOf("\nfirstname = Walter\n") > 0); // exclude from docs + assert(ini.indexOf("\nlastname = Bright\n") > 0); // exclude from docs + assert(ini.indexOf("\nlanguage = \"'D'\"\n") > 0); // exclude from docs + assert(ini.indexOf("\n[Section 2]\n") > 0); // exclude from docs + assert(ini.indexOf("\nOachkatzl = Schwoaf\n") > 0); // exclude from docs +} + +@safe unittest { + string[string][string] doc; + doc[null]["key"] = "value"; + doc["S1"]["1"] = "value1"; + doc["S1"]["2"] = "value2"; + doc["S2"]["x"] = "foo'bar"; + doc["S2"][null] = "bamboozled"; + + string ini = stringifyIni(doc); + + import std.string : indexOf, startsWith; + + assert(ini.startsWith("key = value\n")); + assert(ini.indexOf("\n[S1]\n") > 0); + assert(ini.indexOf("\n1 = value1\n") > 0); + assert(ini.indexOf("\n2 = value2\n") > 0); + assert(ini.indexOf("\n[S2]\n") > 0); + assert(ini.indexOf("\nx = \"foo'bar\"\n") > 0); + assert(ini.indexOf("\n\"\" = bamboozled\n") > 0); +} From 007a637559bb39c69ef418533f6d5d2d7f3a657e Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Fri, 14 Feb 2025 04:20:13 +0100 Subject: [PATCH 29/39] Update sub-package description in DUB recipe --- dub.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dub.json b/dub.json index 7dfb4c3d..aa89f3ac 100644 --- a/dub.json +++ b/dub.json @@ -790,7 +790,7 @@ }, { "name": "ini", - "description": "INI file support - INI parser with support for various dialects.", + "description": "INI configuration file support - configurable INI parser and serializer with support for various dialects.", "targetType": "library", "sourceFiles": ["ini.d"], "dependencies": { From 7e793993b94eccd9dfb7283f5054caac1c3ab3ac Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Fri, 14 Feb 2025 04:24:13 +0100 Subject: [PATCH 30/39] Add `writeIniFile` example --- ini.d | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ini.d b/ini.d index 34bf6edb..4e7ab0e5 100644 --- a/ini.d +++ b/ini.d @@ -18,6 +18,15 @@ } --- + --- + import arsd.ini; + + void writeIniFile(string filePath, IniDocument!string document) { + import std.file : write; + return write(filePath, stringifyIni(document)); + } + --- + ### On destructiveness and GC usage From 2aa7a7573cc32bd2264478e1f0735c418b18e039 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Fri, 14 Feb 2025 04:38:16 +0100 Subject: [PATCH 31/39] Add `parseIniDocument` example --- ini.d | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/ini.d b/ini.d index 4e7ab0e5..0e240199 100644 --- a/ini.d +++ b/ini.d @@ -92,6 +92,48 @@ module arsd.ini; assert(icon == "setup.exe,0"); } +/// +@safe unittest { + // INI example data (e.g. from an `autorun.inf` file): + static immutable string rawIniData = + "[autorun]\n" + ~ "open=setup.exe\n" + ~ "icon=setup.exe,0\n"; + + // Parse the document. + IniDocument!string document = parseIniDocument(rawIniData); + + // Let’s search for the value of an entry `icon` in the `autorun` section. + static string searchAutorunIcon(IniDocument!string document) { + // Iterate over all sections. + foreach (IniSection!string section; document.sections) { + + // Search for the `[autorun]` section. + if (section.name == "autorun") { + + // Iterate over all items in the section. + foreach (IniKeyValuePair!string item; section.items) { + + // Search for the `icon` entry. + if (item.key == "icon") { + // Found! + return item.value; + } + } + } + } + + // Not found! + return null; + } + + // Call our search function. + string icon = searchAutorunIcon(document); + + // Finally, verify the result. + assert(icon == "setup.exe,0"); +} + /++ Determines whether a type `T` is a string type compatible with this library. +/ From b9ea9562fc8c0b012592acc74a8da5bc327397f9 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Sat, 15 Feb 2025 21:31:57 +0100 Subject: [PATCH 32/39] Clarify `immutable(char)[]` string type --- ini.d | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ini.d b/ini.d index 0e240199..ddb08848 100644 --- a/ini.d +++ b/ini.d @@ -137,7 +137,7 @@ module arsd.ini; /++ Determines whether a type `T` is a string type compatible with this library. +/ -enum isCompatibleString(T) = (is(T == string) || is(T == const(char)[]) || is(T == char[])); +enum isCompatibleString(T) = (is(T == immutable(char)[]) || is(T == const(char)[]) || is(T == char[])); //dfmt off /++ From cb781b853d1c18d19ee05c5b4e34bc4ee16ed8b4 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Sat, 15 Feb 2025 21:45:12 +0100 Subject: [PATCH 33/39] =?UTF-8?q?Add=20additional=20=E2=80=9Cgetting=20sta?= =?UTF-8?q?rted=E2=80=9D=20section?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ini.d | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ini.d b/ini.d index ddb08848..ce444601 100644 --- a/ini.d +++ b/ini.d @@ -9,6 +9,15 @@ This module provides a configurable INI parser with support for multiple “dialects” of the format. + ### Getting started + + $(LIST + * [parseIniDocument] – Parse INI string into a DOM-like document. + * [parseIniAA] – Parse INI string into an associative array. + * [stringifyIni] – Serialize an [IniDocument] or an associative array + to a string of data in INI format. + ) + --- import arsd.ini; From 6f59ff160cc03befc9b8f1ed8b5db60064dc37e3 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Sat, 15 Feb 2025 22:16:50 +0100 Subject: [PATCH 34/39] Link `IniDocument` in getting started section --- ini.d | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ini.d b/ini.d index ce444601..2a9b72d4 100644 --- a/ini.d +++ b/ini.d @@ -12,7 +12,7 @@ ### Getting started $(LIST - * [parseIniDocument] – Parse INI string into a DOM-like document. + * [parseIniDocument] – Parse INI string into a DOM-like [IniDocument]. * [parseIniAA] – Parse INI string into an associative array. * [stringifyIni] – Serialize an [IniDocument] or an associative array to a string of data in INI format. From c300956cf7c9068579cea24ccd496d44ffa2ed05 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Sun, 16 Feb 2025 00:43:19 +0100 Subject: [PATCH 35/39] Make `stringifyIni` overloads public --- ini.d | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/ini.d b/ini.d index 2a9b72d4..b560867c 100644 --- a/ini.d +++ b/ini.d @@ -2683,17 +2683,43 @@ private void stringifyIniString(string, OutputRange)(string data, OutputRange ou } } -private void stringifyIni(StringKey, StringValue, OutputRange)(StringKey key, StringValue value, OutputRange output) { +/++ + Serializes a `key` + `value` pair to a string in INI format. + +/ +void stringifyIni(StringKey, StringValue, OutputRange)(StringKey key, StringValue value, OutputRange output) + if (isCompatibleString!StringKey && isCompatibleString!StringValue) { stringifyIniString(key, output); output.put(" = "); stringifyIniString(value, output); output.put("\n"); } -private void stringifyIni(string, OutputRange)(const IniKeyValuePair!string kvp, OutputRange output) { +/// ditto +string stringifyIni(StringKey, StringValue)(StringKey key, StringValue value) + if (isCompatibleString!StringKey && isCompatibleString!StringValue) { + import std.array : appender; + + auto output = appender!string(); + stringifyIni(key, value, output); + return output[]; +} + +/++ + Serializes an [IniKeyValuePair] to a string in INI format. + +/ +void stringifyIni(string, OutputRange)(const IniKeyValuePair!string kvp, OutputRange output) { return stringifyIni(kvp.key, kvp.value, output); } +/// ditto +string stringifyIni(string)(const IniKeyValuePair!string kvp) { + import std.array : appender; + + auto output = appender!string(); + stringifyIni(kvp, output); + return output[]; +} + private void stringifyIniSectionHeader(string, OutputRange)(string sectionName, OutputRange output) { if (sectionName !is null) { output.put("["); @@ -2702,13 +2728,25 @@ private void stringifyIniSectionHeader(string, OutputRange)(string sectionName, } } -private void stringifyIni(string, OutputRange)(const IniSection!string section, OutputRange output) { +/++ + Serializes an [IniSection] to a string in INI format. + +/ +void stringifyIni(string, OutputRange)(const IniSection!string section, OutputRange output) { stringifyIniSectionHeader(section.name, output); foreach (const item; section.items) { stringifyIni(item, output); } } +/// ditto +string stringifyIni(string)(const IniSection!string section) { + import std.array : appender; + + auto output = appender!string(); + stringifyIni(section, output); + return output[]; +} + /++ Serializes an [IniDocument] to a string in INI format. +/ @@ -2959,3 +2997,25 @@ string stringifyIni( assert(ini.indexOf("\nx = \"foo'bar\"\n") > 0); assert(ini.indexOf("\n\"\" = bamboozled\n") > 0); } + +@safe unittest { + const section = IniSection!string("Section Name", [ + IniKeyValuePair!string("monkyyy", "business"), + IniKeyValuePair!string("Oachkatzl", "Schwoaf"), + ]); + + static immutable expected = "[Section Name]\n" + ~ "monkyyy = business\n" + ~ "Oachkatzl = Schwoaf\n"; + + assert(stringifyIni(section) == expected); +} + +@safe unittest { + const kvp = IniKeyValuePair!string("Key", "Value"); + assert(stringifyIni(kvp) == "Key = Value\n"); +} + +@safe unittest { + assert(stringifyIni("monkyyy", "business lol") == "monkyyy = business lol\n"); +} From c38b37cce9bcdf4d8e4900ad4128d9f108df70a9 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Sun, 16 Feb 2025 01:04:05 +0100 Subject: [PATCH 36/39] Implement function `parseIniMergedAA` --- ini.d | 116 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 114 insertions(+), 2 deletions(-) diff --git a/ini.d b/ini.d index b560867c..a09cd1d8 100644 --- a/ini.d +++ b/ini.d @@ -703,6 +703,7 @@ private enum OperatingMode operatingMode(string) = (is(string == char[])) * [IniFilteredParser] * [parseIniDocument] * [parseIniAA] + * [parseIniMergedAA] ) +/ struct IniParser( @@ -1366,6 +1367,7 @@ struct IniParser( * [IniParser] * [parseIniDocument] * [parseIniAA] + * [parseIniMergedAA] ) +/ struct IniFilteredParser( @@ -2203,7 +2205,10 @@ struct IniDocument(string) if (isCompatibleString!string) { Parses an INI string into a document ("DOM"). See_also: - [parseIniAA] + $(LIST + * [parseIniAA] + * [parseIniMergedAA] + ) +/ IniDocument!string parseIniDocument(IniDialect dialect = IniDialect.defaults, string)(string rawIni) @safe pure nothrow if (isCompatibleString!string) { @@ -2359,7 +2364,10 @@ company = "Digital Mars" ) See_also: - [parseIniDocument] + $(LIST + * [parseIniMergedAA] + * [parseIniDocument] + ) +/ string[immutable(char)[]][immutable(char)[]] parseIniAA( IniDialect dialect = IniDialect.defaults, @@ -2590,6 +2598,110 @@ key = merged and overwritten assert(aa["2"]["key"] == "overwritten"); } +/++ + Parses an INI string into a section-less associate array. + All sections are merged. + + $(LIST + * Section names are discarded. + * Duplicate keys cause values to get overwritten. + ) + + See_also: + $(LIST + * [parseIniAA] + * [parseIniDocument] + ) + +/ +string[immutable(char)[]] parseIniMergedAA( + IniDialect dialect = IniDialect.defaults, + string, +)( + string rawIni, +) @safe pure nothrow { + static if (is(string == immutable(char)[])) { + immutable(char)[] toString(string key) => key; + } else { + immutable(char)[] toString(string key) => key.idup; + } + + auto parser = IniParser!(dialect, string)(rawIni); + + string[immutable(char)[]] section; + + string keyName = null; + string value = null; + + void commitKeyValuePair(string nextKey) { + if (keyName !is null) { + section[toString(keyName)] = value; + } + + keyName = nextKey; + value = null; + } + + void setValue(string nextValue) { + value = nextValue; + } + + while (!parser.skipIrrelevant()) { + switch (parser.front.type) with (TokenType) { + + case key: + commitKeyValuePair(parser.front.data); + break; + + case value: + setValue(parser.front.data); + break; + + case sectionHeader: + // nothing to do + break; + + default: + assert(false, "Unexpected parsing error."); // TODO + } + + parser.popFront(); + } + + commitKeyValuePair(null); + + return section; +} + +/// +@safe unittest { + static immutable demoData = ` +key0 = value0 + +[1] +key1 = value1 +key2 = other value + +[2] +key1 = value2 +key3 = yet another value`; + + // Parse INI file into an associative array with merged sections. + string[string] aa = parseIniMergedAA(demoData); + + // As sections were merged, entries sharing the same key got overridden. + // Hence, there are only four entries left. + assert(aa.length == 4); + + // The "key1" entry of the first section got overruled + // by the "key1" entry of the second section that came later. + assert(aa["key1"] == "value2"); + + // Entries with unique keys got through unaffected. + assert(aa["key0"] == "value0"); + assert(aa["key2"] == "other value"); + assert(aa["key3"] == "yet another value"); +} + private void stringifyIniString(string, OutputRange)(string data, OutputRange output) { if (data is null) { output.put("\"\""); From fd5dab8c43a7b33a9a95dd6f4a2be3c666f60e4e Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Sun, 16 Feb 2025 01:09:17 +0100 Subject: [PATCH 37/39] Move `arsd.ini` changelog entry to v12 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adam said the v12 release hasn’t happened yet. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 633cff85..9835cf25 100644 --- a/README.md +++ b/README.md @@ -28,8 +28,6 @@ Future release, likely May 2026 or later. Nothing is planned for it at this time. -arsd.ini was added. - ## 12.0 Released: January 2025 @@ -44,6 +42,8 @@ Go to the file+line number from the error message and change `Event` to `FocusIn arsd.pixmappresenter, arsd.pixmappaint and arsd.pixmaprecorder were added. +arsd.ini was added. + ## 11.0 Released: Planned for May 2023, actually out August 2023. From e29d8fcd2220e63d76187e39aa658f8ca54798c4 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Sun, 16 Feb 2025 21:31:25 +0100 Subject: [PATCH 38/39] Improve getting started section of `arsd.ini` --- ini.d | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/ini.d b/ini.d index a09cd1d8..6314d8c7 100644 --- a/ini.d +++ b/ini.d @@ -12,9 +12,14 @@ ### Getting started $(LIST - * [parseIniDocument] – Parse INI string into a DOM-like [IniDocument]. - * [parseIniAA] – Parse INI string into an associative array. - * [stringifyIni] – Serialize an [IniDocument] or an associative array + * [parseIniDocument] – Parses a string of INI data and stores the + result in a DOM-inspired [IniDocument] structure. + * [parseIniAA] – Parses a string of INI data and stores the result + in an associative array (named sections) of associative arrays + (key/value pairs of the section). + * [parseIniMergedAA] – Parses a string of INI data and stores the + result in a flat associative array (with all sections merged). + * [stringifyIni] – Serializes an [IniDocument] or an associative array to a string of data in INI format. ) From 5d31192edbdd316df3f8ac5ba44a0f8195fcde60 Mon Sep 17 00:00:00 2001 From: Elias Batek Date: Sun, 16 Feb 2025 21:35:39 +0100 Subject: [PATCH 39/39] Add `parseIniMergedAA` example --- ini.d | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ini.d b/ini.d index 6314d8c7..f4d83af1 100644 --- a/ini.d +++ b/ini.d @@ -106,6 +106,26 @@ module arsd.ini; assert(icon == "setup.exe,0"); } +/// +@safe unittest { + // INI example data (e.g. from an `autorun.inf` file) + static immutable string rawIniData = + "[autorun]\n" + ~ "open=setup.exe\n" + ~ "icon=setup.exe,0\n"; + + // Parse the document into a flat associative array. + // (Sections would get merged, but there is only one section in the + // example anyway.) + string[string] data = parseIniMergedAA(rawIniData); + + string open = data["open"]; + string icon = data["icon"]; + + assert(open == "setup.exe"); + assert(icon == "setup.exe,0"); +} + /// @safe unittest { // INI example data (e.g. from an `autorun.inf` file):