|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | + |
| 3 | +""" |
| 4 | +Python Frontmatter: Parse and manage posts with YAML frontmatter |
| 5 | +
|
| 6 | +Based on and modified from ``python-frontmatter`` version 1.0.0. |
| 7 | +
|
| 8 | +license: mit. See frontmatter.ABOUT file for details. |
| 9 | +""" |
| 10 | + |
| 11 | + |
| 12 | +import codecs |
| 13 | +import saneyaml |
| 14 | +import re |
| 15 | + |
| 16 | +from frontmatter import detect_format |
| 17 | +from frontmatter import handlers |
| 18 | +from frontmatter import Post as FrontmatterPost |
| 19 | +from frontmatter.default_handlers import BaseHandler |
| 20 | +from frontmatter.util import u |
| 21 | + |
| 22 | +from licensedcode.tokenize import query_lines |
| 23 | + |
| 24 | + |
| 25 | + |
| 26 | +class SaneYAMLHandler(BaseHandler): |
| 27 | + """ |
| 28 | + Load and export YAML metadata. . |
| 29 | +
|
| 30 | + This is similar to the frontmatter.default_handlers.YAMLHandler but |
| 31 | + is using nexB/saneyaml instead of pyyaml. |
| 32 | + """ |
| 33 | + FM_BOUNDARY = re.compile(r"^-{3,}\s*$", re.MULTILINE) |
| 34 | + START_DELIMITER = END_DELIMITER = "---" |
| 35 | + |
| 36 | + def load(self, fm, **kwargs): |
| 37 | + """ |
| 38 | + Parse YAML front matter. |
| 39 | + """ |
| 40 | + return saneyaml.load(fm, allow_duplicate_keys=False, **kwargs) |
| 41 | + |
| 42 | + def export(self, metadata, **kwargs): |
| 43 | + """ |
| 44 | + Export metadata as YAML. |
| 45 | + """ |
| 46 | + metadata = saneyaml.dump(metadata, indent=4, encoding='utf-8', **kwargs).strip() |
| 47 | + return u(metadata) # ensure unicode |
| 48 | + |
| 49 | + |
| 50 | +def get_rule_text(location=None, text=None): |
| 51 | + """ |
| 52 | + Return the rule ``text`` prepared for indexing. |
| 53 | + ############### |
| 54 | + # IMPORTANT: we use the same process as used to load query text for symmetry |
| 55 | + ############### |
| 56 | + """ |
| 57 | + numbered_lines = query_lines(location=location, query_string=text, plain_text=True) |
| 58 | + return '\n'.join(l.strip() for _, l in numbered_lines) |
| 59 | + |
| 60 | + |
| 61 | +def parse_frontmatter(text, encoding="utf-8", handler=None, **defaults): |
| 62 | + """ |
| 63 | + Parse text with frontmatter, return metadata and content. |
| 64 | + Pass in optional metadata defaults as keyword args. |
| 65 | +
|
| 66 | + If frontmatter is not found, returns an empty metadata dictionary |
| 67 | + (or defaults) and original text content. |
| 68 | +
|
| 69 | + This is similar to the frontmatter.parse but is using `get_rule_text` |
| 70 | + to use the same process as loading quary text for symmetry. |
| 71 | + """ |
| 72 | + # ensure unicode first |
| 73 | + text = u(text, encoding).strip() |
| 74 | + |
| 75 | + # metadata starts with defaults |
| 76 | + metadata = defaults.copy() |
| 77 | + |
| 78 | + # this will only run if a handler hasn't been set higher up |
| 79 | + handler = handler or detect_format(text, handlers) |
| 80 | + if handler is None: |
| 81 | + return metadata, text |
| 82 | + |
| 83 | + # split on the delimiters |
| 84 | + try: |
| 85 | + fm, content = handler.split(text) |
| 86 | + except ValueError: |
| 87 | + # if we can't split, bail |
| 88 | + return metadata, text |
| 89 | + |
| 90 | + # parse, now that we have frontmatter |
| 91 | + fm = handler.load(fm) |
| 92 | + if isinstance(fm, dict): |
| 93 | + metadata.update(fm) |
| 94 | + |
| 95 | + text = get_rule_text(text=content) |
| 96 | + |
| 97 | + return metadata, text |
| 98 | + |
| 99 | + |
| 100 | +def loads_frontmatter(text, encoding="utf-8", handler=None, **defaults): |
| 101 | + """ |
| 102 | + Parse text (binary or unicode) and return a :py:class:`post <frontmatter.Post>`. |
| 103 | +
|
| 104 | + This is similar to the frontmatter.loads but is using the `parse` |
| 105 | + function defined above. |
| 106 | + """ |
| 107 | + text = u(text, encoding) |
| 108 | + handler = handler or detect_format(text, handlers) |
| 109 | + metadata, content = parse_frontmatter(text, encoding, handler, **defaults) |
| 110 | + return FrontmatterPost(content, handler, **metadata) |
| 111 | + |
| 112 | + |
| 113 | +def load_frontmatter(fd, encoding="utf-8", handler=None, **defaults): |
| 114 | + """ |
| 115 | + Load and parse a file-like object or filename, |
| 116 | + return a :py:class:`post <frontmatter.Post>`. |
| 117 | +
|
| 118 | + This is similar to the frontmatter.load but is using the `loads` |
| 119 | + function defined above. |
| 120 | + """ |
| 121 | + if hasattr(fd, "read"): |
| 122 | + text = fd.read() |
| 123 | + |
| 124 | + else: |
| 125 | + with codecs.open(fd, "r", encoding) as f: |
| 126 | + text = f.read() |
| 127 | + |
| 128 | + handler = handler or detect_format(text, handlers) |
| 129 | + return loads_frontmatter(text, encoding, handler, **defaults) |
| 130 | + |
| 131 | + |
| 132 | +def dumps_frontmatter(post, handler=None, **kwargs): |
| 133 | + """ |
| 134 | + Serialize a :py:class:`post <frontmatter.Post>` to a string and return text. |
| 135 | + This always returns unicode text, which can then be encoded. |
| 136 | +
|
| 137 | + Passing ``handler`` will change how metadata is turned into text. A handler |
| 138 | + passed as an argument will override ``post.handler``, with |
| 139 | + :py:class:`SaneYAMLHandler <frontmatter.SaneYAMLHandler>` used as |
| 140 | + a default. |
| 141 | +
|
| 142 | + This is similar to the frontmatter.dumps but is using the `SaneYAMLHandler` |
| 143 | + defined above as default instead of frontmatter.default_handlers.YAMLHandler. |
| 144 | + """ |
| 145 | + if handler is None: |
| 146 | + handler = getattr(post, "handler", None) or SaneYAMLHandler() |
| 147 | + |
| 148 | + return handler.format(post, **kwargs) |
0 commit comments