Skip to content

Handle unknown tags via PassthroughConstructor. #38

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,25 @@ add `:keywords false` parameters to the `parse-string` function:
" :keywords false)
```

Unknown tags can be handled by passing a handler function via the
:unknown-tag-fn parameter. The handler function is provided a map
which includes `:tag` and `:value` keys. Note that the value passed to
the `unknown-tag-fn` is a string if it's a scalar, regardless of the
quoting (or lack thereof) of the scalar value.

```clojure
;; drop tags
(yaml/parse-string "!Base12 10" :unknown-tag-fn :value
;; => "10"
(yaml/parse-string "!Base12 10"
:unknown-tag-fn (fn [{:keys [tag value]}]
(if (= "!Base12" tag)
(Integer/parseInt value 12)
value)))
;; => 12
```


Different flow styles (`:auto`, `:block`, `:flow`) allow customization of how YAML is rendered:


Expand Down Expand Up @@ -95,3 +114,9 @@ clj-commons/clj-yaml {:mvn/version "0.7.0"}
$ lein deps
$ lein test
$ lein install

## License

(c) Lance Bradley - Licensed under the same terms as clojure itself. See LICENCE file for details.

Portions (c) Owain Lewis as marked.
69 changes: 40 additions & 29 deletions src/clojure/clj_yaml/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
(org.yaml.snakeyaml.constructor Constructor SafeConstructor BaseConstructor)
(org.yaml.snakeyaml.representer Representer)
(org.yaml.snakeyaml.error Mark)
(clj_yaml MarkedConstructor)
(clj_yaml MarkedConstructor UnknownTagsConstructor)
(java.util LinkedHashMap)))

(def flow-styles
Expand Down Expand Up @@ -50,19 +50,24 @@

(defn ^Yaml make-yaml
"Make a yaml encoder/decoder with some given options."
[& {:keys [dumper-options unsafe mark max-aliases-for-collections allow-recursive-keys allow-duplicate-keys]}]
[& {:keys [unknown-tag-fn dumper-options unsafe mark max-aliases-for-collections allow-recursive-keys allow-duplicate-keys]}]
(let [loader (make-loader-options :max-aliases-for-collections max-aliases-for-collections
:allow-recursive-keys allow-recursive-keys
:allow-duplicate-keys allow-duplicate-keys)
^BaseConstructor constructor
(if unsafe (Constructor. loader)
(if mark
;; construct2ndStep isn't implemented by MarkedConstructor,
;; causing an exception to be thrown before loader options are
;; used
(MarkedConstructor.)
(SafeConstructor. loader)))
;; TODO: unsafe marked constructor
(cond
unsafe (Constructor. loader)

;; construct2ndStep isn't implemented by MarkedConstructor,
;; causing an exception to be thrown before loader options
;; are used
mark (MarkedConstructor.)

unknown-tag-fn (UnknownTagsConstructor.)

;; TODO: unsafe marked constructor
:else (SafeConstructor. loader))

dumper (make-dumper-options dumper-options)]
(Yaml. constructor (Representer.) dumper loader)))

Expand Down Expand Up @@ -90,11 +95,11 @@
"A protocol for things that can be coerced to and from the types
that snakeyaml knows how to encode and decode."
(encode [data])
(decode [data keywords]))
(decode [data keywords unknown-tag-fn]))

(extend-protocol YAMLCodec
clj_yaml.MarkedConstructor$Marked
(decode [data keywords]
(decode [data keywords unknown-tag-fn]
(letfn [(from-Mark [^Mark mark]
{:line (.getLine mark)
:index (.getIndex mark)
Expand All @@ -103,7 +108,12 @@
(mark (-> data .start from-Mark)
(-> data .end from-Mark)
(-> data .marked
(decode keywords)))))
(decode keywords unknown-tag-fn)))))

clj_yaml.UnknownTagsConstructor$UnknownTag
(decode [data keywords unknown-tag-fn]
(unknown-tag-fn {:tag (str (.tag data))
:value (-> (.value data) (decode keywords unknown-tag-fn))}))

clojure.lang.IPersistentMap
(encode [data]
Expand All @@ -122,45 +132,46 @@
(subs (str data) 1))

java.util.LinkedHashMap
(decode [data keywords]
(decode [data keywords unknown-tag-fn]
(letfn [(decode-key [k]
(if keywords
;; (keyword k) is nil for numbers etc
(or (keyword k) k)
k))]
(into (ordered-map)
(for [[k v] data]
[(-> k (decode keywords) decode-key) (decode v keywords)]))))
[(-> k (decode keywords unknown-tag-fn) decode-key) (decode v keywords unknown-tag-fn)]))))

java.util.LinkedHashSet
(decode [data keywords]
(decode [data _keywords _unknown-tag-fn]
(into (ordered-set) data))

java.util.ArrayList
(decode [data keywords]
(map #(decode % keywords) data))
(decode [data keywords unknown-tag-fn]
(map #(decode % keywords unknown-tag-fn) data))

Object
(encode [data] data)
(decode [data keywords] data)
(decode [data _keywords _unknown-tag-fn] data)

nil
(encode [data] data)
(decode [data keywords] data))
(decode [data _keywords _unknown-tag-fn] data))


(defn generate-string [data & opts]
(.dump ^Yaml (apply make-yaml opts)
(encode data)))

(defn parse-string
[^String string & {:keys [unsafe mark keywords max-aliases-for-collections allow-recursive-keys allow-duplicate-keys] :or {keywords true}}]
(decode (.load (make-yaml :unsafe unsafe
:mark mark
:max-aliases-for-collections max-aliases-for-collections
:allow-recursive-keys allow-recursive-keys
:allow-duplicate-keys allow-duplicate-keys)
string) keywords))
[^String string & {:keys [unknown-tag-fn unsafe mark keywords max-aliases-for-collections allow-recursive-keys allow-duplicate-keys] :or {keywords true}}]
(let [yaml (make-yaml :unsafe unsafe
:mark mark
:unknown-tag-fn unknown-tag-fn
:max-aliases-for-collections max-aliases-for-collections
:allow-recursive-keys allow-recursive-keys
:allow-duplicate-keys allow-duplicate-keys)]
(decode (.load yaml string) keywords unknown-tag-fn)))

;; From https://github.com/metosin/muuntaja/pull/94/files
(defn generate-stream
Expand All @@ -169,6 +180,6 @@
(.dump ^Yaml (apply make-yaml opts) (encode data) writer))

(defn parse-stream
[^java.io.Reader reader & {:keys [keywords] :or {keywords true} :as opts}]
[^java.io.Reader reader & {:keys [keywords unknown-tag-fn] :or {keywords true} :as opts}]
(decode (.load ^Yaml (apply make-yaml (into [] cat opts))
reader) keywords))
reader) keywords unknown-tag-fn))
48 changes: 48 additions & 0 deletions src/java/clj_yaml/UnknownTagsConstructor.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package clj_yaml;

import org.yaml.snakeyaml.constructor.Construct;
import org.yaml.snakeyaml.constructor.Constructor;
import org.yaml.snakeyaml.constructor.SafeConstructor;
import org.yaml.snakeyaml.constructor.AbstractConstruct;
import org.yaml.snakeyaml.nodes.Node;
import org.yaml.snakeyaml.nodes.Tag;

public class UnknownTagsConstructor extends SafeConstructor {

public UnknownTagsConstructor() {
this.yamlMultiConstructors.put("", new UnknownTagConstruct());
}

public class UnknownTagConstruct extends AbstractConstruct {

public Object construct(Node node) {
Tag unknownTag = node.getTag();

Tag newTag = null;
switch (node.getNodeId()) {
case scalar:
newTag = Tag.STR;
break;
case sequence:
newTag = Tag.SEQ;
break;
default:
newTag = Tag.MAP;
break;
}
node.setTag(newTag);

return new UnknownTag(unknownTag, getConstructor(node).construct(node));
}
}

public static class UnknownTag {
public Tag tag;
public Object value;

public UnknownTag(Tag unknownTag, Object taggedValue) {
this.tag = unknownTag;
this.value = taggedValue;
}
}
}
44 changes: 38 additions & 6 deletions test/clj_yaml/core_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
(:require [clojure.test :refer (deftest testing is)]
[clojure.string :as string]
[clojure.java.io :as io]
[clj-yaml.core :refer [parse-string unmark generate-string
parse-stream generate-stream]])
[clj-yaml.core :as yaml :refer [parse-string unmark generate-string
parse-stream generate-stream]])
(:import [java.util Date]
(java.io ByteArrayOutputStream OutputStreamWriter ByteArrayInputStream)
java.nio.charset.StandardCharsets
(org.yaml.snakeyaml.error YAMLException)
(org.yaml.snakeyaml.constructor DuplicateKeyException)))
(org.yaml.snakeyaml.constructor ConstructorException DuplicateKeyException)))

(def nested-hash-yaml
"root:\n childa: a\n childb: \n grandchild: \n greatgrandchild: bar\n")
Expand Down Expand Up @@ -166,8 +166,8 @@ the-bin: !!binary 0101")
;; This test ensures that generate-string uses the older behavior by default, for the sake
;; of stability, i.e. backwards compatibility.
(is
(= "{description: Big-picture diagram showing how our top-level systems and stakeholders interact}\n"
(generate-string data))))))
(= "{description: Big-picture diagram showing how our top-level systems and stakeholders interact}\n"
(generate-string data))))))

(deftest dump-opts
(let [data [{:age 33 :name "jon"} {:age 44 :name "boo"}]]
Expand Down Expand Up @@ -290,4 +290,36 @@ foo/bar: 42
(generate-string (parse-string indented-yaml)
:dumper-options {:indent 5
:indicator-indent 2
:flow-style :block})))))
:flow-style :block})))))

(def yaml-with-unknown-tags "---
scalar: !CustomScalar some-scalar
mapping: !CustomMapping
x: foo
y: bar
sequence: !CustomSequence
- a
- b
- z
")

(deftest unknown-tags-test
(testing "Throws with unknown tags and default constructor"
(is (thrown-with-msg? ConstructorException
#"^could not determine a constructor for the tag !CustomScalar"
(parse-string yaml-with-unknown-tags))))
(testing "Can process unknown tags with strip-unknown-tags? constructor"
(is (= {:scalar "some-scalar"
:mapping {:x "foo" :y "bar"}
:sequence ["a" "b" "z"]}
(parse-string yaml-with-unknown-tags :unknown-tag-fn :value))))
(testing "Can process unknown tags with :unknown-tag-fn as identity"
(is (= {:scalar {:tag "!CustomScalar" :value "some-scalar"}
:mapping {:tag "!CustomMapping" :value {:x "foo" :y "bar"}}
:sequence {:tag "!CustomSequence" :value ["a" "b" "z"]}}
(parse-string yaml-with-unknown-tags :unknown-tag-fn identity)))
(is (= {:base-12 12 :base-10 "10"}
(parse-string "{base-12: !Base12 10, base-10: !Base10 10}"
:unknown-tag-fn (fn [{:keys [tag value]}]
(if (= "!Base12" tag)
(Integer/parseInt value 12) value)))))))