Add addr-spec (#4)

* npm init project * Add 'addr-spec' grammar, from rfc * Add a few tests for the addr-spec * Refactor part of spec to use hex values directly, and start updating REAMDE * Finish first real text for dot-atom-text * Update .gitattributes file to not show diffs for generated code * Add CFWS tests * Add tests for quoted-string * Add better whitespace around brackets for readability * Add dot-atom tests for domain as well as a 'parts' computed field on dot-atom types * Add domain-literal test
asimpletune · Jun 15, 2023 · 098d89d · 098d89d
1 parent f3fcd0a
commit 098d89d
Show file tree

Hide file tree

Showing 9 changed files with 5,583 additions and 2 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,2 @@
+package-lock.json -diff
+*.parser.ts -diff
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+node_modules/
+.DS_Store
diff --git a/README.md b/README.md
@@ -1,2 +1,94 @@
-# rfc5322-header-parser_ts
-Parse rfc5322 email headers into typed objects
+# rfc5322 Headers Parser (typescript)
+
+Parse [RFC5322](https://datatracker.ietf.org/doc/html/rfc5322#section-4.1) email headers into typed objects.
+
+This project is new and currently in WIP. It is currently not tested very well. The strategy is to literally just take the specification, word for word more or less, and convert it into a PEG grammar. There are many tools that can take such a grammar and produce an AST. For this project I'm using [tsPEG](https://github.com/EoinDavey/tsPEG), which produces a parser that can be used in typescript.
+
+## Help Wanted
+
+I think the most useful help I could get at this point is writing more tests.
+
+## The Grammar
+
+Here is a copy of the grammar (thus far)
+
+```peg
+// RFC 5322
+// See: https://datatracker.ietf.org/doc/html/rfc5322
+// The following is the RFC5322 "Internet Message Format" specification
+// using tspeg to represent the grammar and to generate a parser in TS
+
+// Note: this first token is the entrypoint for the rest of the grammar
+input           := addr_spec
+
+// "Core Rules" from RFC5234
+// See: https://datatracker.ietf.org/doc/html/rfc5234#appendix-B.1
+CR              :=    '\x0D'        // carriage return, i.e. '\r'
+CRLF            :=    CR LF         // Internet standard newline
+DQUOTE          :=    '\x22'        // double quote, i.e. '"'
+HTAB            :=    '\x09'        // horizontal tab, i.e. 'TAB'
+LF              :=    '\x0A'        // linefeed, i.e. '\n'
+SP              :=    '\x20'        // space, i.e. 'Space'
+VCHAR           :=    '[\x21-\x7E]' // visible (printing) characters
+WSP             :=    SP | HTAB     // white space
+
+// § 3.2.1 Quoted Characters
+// See: https://datatracker.ietf.org/doc/html/rfc5322#section-3.2.1
+quoted_pair     :=    {'\\' {VCHAR | WSP}} | obs_qp
+
+// §3.2.2 Folding White Space and Comments
+// See: https://datatracker.ietf.org/doc/html/rfc5322#section-3.2.2
+FWS             :=    {{WSP* CRLF}? WSP+} |  obs_FWS
+ctext           :=    '[\x21-\x27]' | '[\x2a-\x5b]' | '[\x5d-\x7e]' | obs-ctext
+ccontent        :=    ctext | quoted_pair | comment
+comment         :=    '\(' {FWS? ccontent}* FWS? '\)'
+CFWS            :=    {{FWS? comment}+ FWS?} | FWS
+
+// § 3.2.3 Atom
+// See: https://datatracker.ietf.org/doc/html/rfc5322#section-3.2.3
+// (Printable US-ASCII characters not including specials. Used for atoms.)
+atext           :=    atext='[A-Za-z0-9!#$%&\x27*+\-\/=?^_`{|}~]'
+atom            :=    CFWS? atext+ CFWS?
+dot_atom_text   :=    head_atext = atext+ {'\.' tail_atext = atext+}*
+dot_atom        :=    CFWS? dot_atom_text = dot_atom_text CFWS?
+// (Special characters that do not appear in atext)
+specials        :=     '\(' |'\)' | '[<>]' | '\[' | '\]' | '[:;@]' | '\\' | ',' | '\.' | DQUOTE
+
+// § 3.2.4 Quoted Strings
+// See https://datatracker.ietf.org/doc/html/rfc5322#section-3.2.4
+// (Printable US-ASCII characters not including '\"' or the quote character)
+qtext           :=    '\x21' | '[\x23-\x5b]' | '[\x5d-\x7e]' | obs_qtext
+qcontent        :=    qtext | quoted_pair
+quoted_string   :=    CFWS? DQUOTE {FWS? qcontent = qcontent}* FWS? DQUOTE CFWS?
+
+// § 3.2.5 Miscellaneous Tokens
+// See: https://datatracker.ietf.org/doc/html/rfc5322#section-3.2.5
+word            :=    atom | quoted_string
+
+// § 3.4.1 Addr-Spec Specification
+// See: https://datatracker.ietf.org/doc/html/rfc5322#section-3.4.1
+addr_spec       :=    local_part = local_part '@' domain = domain
+local_part      :=    dot_atom = dot_atom | quoted_string = quoted_string | obs_local_part = obs_local_part
+domain          :=    dot_atom = dot_atom | domain_literal = domain_literal | obs_domain = obs_domain
+domain_literal  :=    CFWS? '\[' {FWS? dtext=dtext}* FWS? '\]' CFWS?
+// (Printable US-ASCII characters not including '[', ']', or '\"')
+dtext           :=    '[\x21-\x5a]' | '[\x5e-\x7e]' | obs_dtext = obs_dtext
+
+// § 4.1 Miscellaneous Obsolete Tokens
+// See: https://datatracker.ietf.org/doc/html/rfc5322#section-4.1
+// (US-ASCII control characters that do not include the carriage return, line feed, and white space characters)
+obs_NO_WS_CTL   :=    '[\x01-\x08]' | '\x0B' | '\x0C' | '[\x0E-\x1F]' | '\x7F'
+obs_qtext       :=    obs_NO_WS_CTL
+obs_ctext       :=    obs_NO_WS_CTL
+obs_qp          :=    '\\' {'\x00' | obs_NO_WS_CTL | LF | CR}
+
+// § 4.2 Obsolete Folding White Space
+// See: https://datatracker.ietf.org/doc/html/rfc5322#section-4.2
+obs_FWS         :=    WSP+ {CRLF WSP+}*
+
+// § 4.4 Obsolete Addressing
+// See: https://datatracker.ietf.org/doc/html/rfc5322#section-4.4
+obs_local_part  :=    word {'\.' word}*
+obs_domain      :=    head_atom = atom {'\.' tail_atom = atom}*
+obs_dtext       :=    obs_NO_WS_CTL | quoted_pair
+```
diff --git a/jest.config.ts b/jest.config.ts
@@ -0,0 +1,195 @@
+/*
+ * For a detailed explanation regarding each configuration property, visit:
+ * https://jestjs.io/docs/configuration
+ */
+
+module.exports = {
+  // All imported modules in your tests should be mocked automatically
+  // automock: false,
+
+  // Stop running tests after `n` failures
+  // bail: 0,
+
+  // The directory where Jest should store its cached dependency information
+  // cacheDirectory: "/private/var/folders/4k/xcj2g_4j3lgftc237gs1h0th0000gn/T/jest_dx",
+
+  // Automatically clear mock calls, instances, contexts and results before every test
+  clearMocks: true,
+
+  // Indicates whether the coverage information should be collected while executing the test
+  // collectCoverage: false,
+
+  // An array of glob patterns indicating a set of files for which coverage information should be collected
+  // collectCoverageFrom: undefined,
+
+  // The directory where Jest should output its coverage files
+  // coverageDirectory: undefined,
+
+  // An array of regexp pattern strings used to skip coverage collection
+  // coveragePathIgnorePatterns: [
+  //   "/node_modules/"
+  // ],
+
+  // Indicates which provider should be used to instrument code for coverage
+  coverageProvider: "v8",
+
+  // A list of reporter names that Jest uses when writing coverage reports
+  // coverageReporters: [
+  //   "json",
+  //   "text",
+  //   "lcov",
+  //   "clover"
+  // ],
+
+  // An object that configures minimum threshold enforcement for coverage results
+  // coverageThreshold: undefined,
+
+  // A path to a custom dependency extractor
+  // dependencyExtractor: undefined,
+
+  // Make calling deprecated APIs throw helpful error messages
+  // errorOnDeprecated: false,
+
+  // The default configuration for fake timers
+  // fakeTimers: {
+  //   "enableGlobally": false
+  // },
+
+  // Force coverage collection from ignored files using an array of glob patterns
+  // forceCoverageMatch: [],
+
+  // A path to a module which exports an async function that is triggered once before all test suites
+  // globalSetup: undefined,
+
+  // A path to a module which exports an async function that is triggered once after all test suites
+  // globalTeardown: undefined,
+
+  // A set of global variables that need to be available in all test environments
+  // globals: {},
+
+  // The maximum amount of workers used to run your tests. Can be specified as % or a number. E.g. maxWorkers: 10% will use 10% of your CPU amount + 1 as the maximum worker number. maxWorkers: 2 will use a maximum of 2 workers.
+  // maxWorkers: "50%",
+
+  // An array of directory names to be searched recursively up from the requiring module's location
+  // moduleDirectories: [
+  //   "node_modules"
+  // ],
+
+  // An array of file extensions your modules use
+  // moduleFileExtensions: [
+  //   "js",
+  //   "mjs",
+  //   "cjs",
+  //   "jsx",
+  //   "ts",
+  //   "tsx",
+  //   "json",
+  //   "node"
+  // ],
+
+  // A map from regular expressions to module names or to arrays of module names that allow to stub out resources with a single module
+  // moduleNameMapper: {},
+
+  // An array of regexp pattern strings, matched against all module paths before considered 'visible' to the module loader
+  // modulePathIgnorePatterns: [],
+
+  // Activates notifications for test results
+  // notify: false,
+
+  // An enum that specifies notification mode. Requires { notify: true }
+  // notifyMode: "failure-change",
+
+  // A preset that is used as a base for Jest's configuration
+  preset: "ts-jest"
+
+  // Run tests from one or more projects
+  // projects: undefined,
+
+  // Use this configuration option to add custom reporters to Jest
+  // reporters: undefined,
+
+  // Automatically reset mock state before every test
+  // resetMocks: false,
+
+  // Reset the module registry before running each individual test
+  // resetModules: false,
+
+  // A path to a custom resolver
+  // resolver: undefined,
+
+  // Automatically restore mock state and implementation before every test
+  // restoreMocks: false,
+
+  // The root directory that Jest should scan for tests and modules within
+  // rootDir: undefined,
+
+  // A list of paths to directories that Jest should use to search for files in
+  // roots: [
+  //   "<rootDir>"
+  // ],
+
+  // Allows you to use a custom runner instead of Jest's default test runner
+  // runner: "jest-runner",
+
+  // The paths to modules that run some code to configure or set up the testing environment before each test
+  // setupFiles: [],
+
+  // A list of paths to modules that run some code to configure or set up the testing framework before each test
+  // setupFilesAfterEnv: [],
+
+  // The number of seconds after which a test is considered as slow and reported as such in the results.
+  // slowTestThreshold: 5,
+
+  // A list of paths to snapshot serializer modules Jest should use for snapshot testing
+  // snapshotSerializers: [],
+
+  // The test environment that will be used for testing
+  // testEnvironment: "jest-environment-node",
+
+  // Options that will be passed to the testEnvironment
+  // testEnvironmentOptions: {},
+
+  // Adds a location field to test results
+  // testLocationInResults: false,
+
+  // The glob patterns Jest uses to detect test files
+  // testMatch: [
+  //   "**/__tests__/**/*.[jt]s?(x)",
+  //   "**/?(*.)+(spec|test).[tj]s?(x)"
+  // ],
+
+  // An array of regexp pattern strings that are matched against all test paths, matched tests are skipped
+  // testPathIgnorePatterns: [
+  //   "/node_modules/"
+  // ],
+
+  // The regexp pattern or array of patterns that Jest uses to detect test files
+  // testRegex: [],
+
+  // This option allows the use of a custom results processor
+  // testResultsProcessor: undefined,
+
+  // This option allows use of a custom test runner
+  // testRunner: "jest-circus/runner",
+
+  // A map from regular expressions to paths to transformers
+  // transform: undefined,
+
+  // An array of regexp pattern strings that are matched against all source file paths, matched files will skip transformation
+  // transformIgnorePatterns: [
+  //   "/node_modules/",
+  //   "\\.pnp\\.[^\\/]+$"
+  // ],
+
+  // An array of regexp pattern strings that are matched against all modules before the module loader will automatically return a mock for them
+  // unmockedModulePathPatterns: undefined,
+
+  // Indicates whether each individual test should be reported during the run
+  // verbose: undefined,
+
+  // An array of regexp patterns that are matched against all source file paths before re-running tests in watch mode
+  // watchPathIgnorePatterns: [],
+
+  // Whether to use watchman for file crawling
+  // watchman: true,
+};