Skip to content

Commit

Permalink
Merge pull request datacarpentry#234 from mr-c/py34
Browse files Browse the repository at this point in the history
convert to py3.4 syntax, a few cleanups
  • Loading branch information
rgaiacs authored May 8, 2018
2 parents e9acce2 + 93eb566 commit 079bbc2
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 103 deletions.
91 changes: 43 additions & 48 deletions bin/lesson_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,14 @@
Check lesson files and their contents.
"""

from __future__ import print_function
import sys

import os
import glob
import json
import re
from optparse import OptionParser

from util import Reporter, read_markdown, load_yaml, check_unwanted_files, require, IMAGE_FILE_SUFFIX
from util import (Reporter, read_markdown, load_yaml, check_unwanted_files,
require)

__version__ = '0.3'

Expand All @@ -23,8 +22,9 @@
# FIXME: We do not yet validate whether any files have the required
# YAML headers, but should in the future.
# The '%' is replaced with the source directory path for checking.
# Episodes are handled specially, and extra files in '_extras' are also handled specially.
# This list must include all the Markdown files listed in the 'bin/initialize' script.
# Episodes are handled specially, and extra files in '_extras' are also handled
# specially. This list must include all the Markdown files listed in the
# 'bin/initialize' script.
REQUIRED_FILES = {
'%/CONDUCT.md': True,
'%/CONTRIBUTING.md': False,
Expand Down Expand Up @@ -101,6 +101,7 @@
# How long are lines allowed to be?
MAX_LINE_LEN = 100


def main():
"""Main driver."""

Expand All @@ -110,9 +111,9 @@ def main():
args.references = read_references(args.reporter, args.reference_path)

docs = read_all_markdown(args.source_dir, args.parser)
check_fileset(args.source_dir, args.reporter, docs.keys())
check_fileset(args.source_dir, args.reporter, list(docs.keys()))
check_unwanted_files(args.source_dir, args.reporter)
for filename in docs.keys():
for filename in list(docs.keys()):
checker = create_checker(args, filename, docs[filename])
checker.check()

Expand Down Expand Up @@ -160,8 +161,10 @@ def check_config(reporter, source_dir):

config_file = os.path.join(source_dir, '_config.yml')
config = load_yaml(config_file)
reporter.check_field(config_file, 'configuration', config, 'kind', 'lesson')
reporter.check_field(config_file, 'configuration', config, 'carpentry', ('swc', 'dc', 'lc'))
reporter.check_field(config_file, 'configuration',
config, 'kind', 'lesson')
reporter.check_field(config_file, 'configuration',
config, 'carpentry', ('swc', 'dc', 'lc'))
reporter.check_field(config_file, 'configuration', config, 'title')
reporter.check_field(config_file, 'configuration', config, 'email')

Expand Down Expand Up @@ -235,17 +238,17 @@ def check_fileset(source_dir, reporter, filenames_present):
if m and m.group(1):
seen.append(m.group(1))
else:
reporter.add(None, 'Episode {0} has badly-formatted filename', filename)
reporter.add(
None, 'Episode {0} has badly-formatted filename', filename)

# Check for duplicate episode numbers.
reporter.check(len(seen) == len(set(seen)),
None,
'Duplicate episode numbers {0} vs {1}',
sorted(seen), sorted(set(seen)))
None,
'Duplicate episode numbers {0} vs {1}',
sorted(seen), sorted(set(seen)))

# Check that numbers are consecutive.
seen = [int(s) for s in seen]
seen.sort()
seen = sorted([int(s) for s in seen])
clean = True
for i in range(len(seen) - 1):
clean = clean and ((seen[i+1] - seen[i]) == 1)
Expand All @@ -271,7 +274,7 @@ def __init__(self, args, filename, metadata, metadata_len, text, lines, doc):

super(CheckBase, self).__init__()
self.args = args
self.reporter = self.args.reporter # for convenience
self.reporter = self.args.reporter # for convenience
self.filename = filename
self.metadata = metadata
self.metadata_len = metadata_len
Expand All @@ -281,7 +284,6 @@ def __init__(self, args, filename, metadata, metadata_len, text, lines, doc):

self.layout = None


def check(self):
"""Run tests."""

Expand All @@ -292,7 +294,6 @@ def check(self):
self.check_codeblock_classes()
self.check_defined_link_references()


def check_metadata(self):
"""Check the YAML metadata."""

Expand All @@ -301,61 +302,59 @@ def check_metadata(self):
'Missing metadata entirely')

if self.metadata and (self.layout is not None):
self.reporter.check_field(self.filename, 'metadata', self.metadata, 'layout', self.layout)

self.reporter.check_field(
self.filename, 'metadata', self.metadata, 'layout', self.layout)

def check_line_lengths(self):
"""Check the raw text of the lesson body."""

if self.args.line_lengths:
over = [i for (i, l, n) in self.lines if (n > MAX_LINE_LEN) and (not l.startswith('!'))]
over = [i for (i, l, n) in self.lines if (
n > MAX_LINE_LEN) and (not l.startswith('!'))]
self.reporter.check(not over,
self.filename,
'Line(s) are too long: {0}',
', '.join([str(i) for i in over]))


def check_trailing_whitespace(self):
"""Check for whitespace at the ends of lines."""

if self.args.trailing_whitespace:
trailing = [i for (i, l, n) in self.lines if P_TRAILING_WHITESPACE.match(l)]
trailing = [
i for (i, l, n) in self.lines if P_TRAILING_WHITESPACE.match(l)]
self.reporter.check(not trailing,
self.filename,
'Line(s) end with whitespace: {0}',
', '.join([str(i) for i in trailing]))


def check_blockquote_classes(self):
"""Check that all blockquotes have known classes."""

for node in self.find_all(self.doc, {'type' : 'blockquote'}):
for node in self.find_all(self.doc, {'type': 'blockquote'}):
cls = self.get_val(node, 'attr', 'class')
self.reporter.check(cls in KNOWN_BLOCKQUOTES,
(self.filename, self.get_loc(node)),
'Unknown or missing blockquote type {0}',
cls)


def check_codeblock_classes(self):
"""Check that all code blocks have known classes."""

for node in self.find_all(self.doc, {'type' : 'codeblock'}):
for node in self.find_all(self.doc, {'type': 'codeblock'}):
cls = self.get_val(node, 'attr', 'class')
self.reporter.check(cls in KNOWN_CODEBLOCKS,
(self.filename, self.get_loc(node)),
'Unknown or missing code block type {0}',
cls)


def check_defined_link_references(self):
"""Check that defined links resolve in the file.
Internally-defined links match the pattern [text][label].
"""

result = set()
for node in self.find_all(self.doc, {'type' : 'text'}):
for node in self.find_all(self.doc, {'type': 'text'}):
for match in P_INTERNAL_LINK_REF.findall(node['value']):
text = match[0]
link = match[1]
Expand All @@ -366,11 +365,10 @@ def check_defined_link_references(self):
'Internally-defined links may be missing definitions: {0}',
', '.join(sorted(result)))


def find_all(self, node, pattern, accum=None):
"""Find all matches for a pattern."""

assert type(pattern) == dict, 'Patterns must be dictionaries'
assert isinstance(pattern, dict), 'Patterns must be dictionaries'
if accum is None:
accum = []
if self.match(node, pattern):
Expand All @@ -379,23 +377,21 @@ def find_all(self, node, pattern, accum=None):
self.find_all(child, pattern, accum)
return accum


def match(self, node, pattern):
"""Does this node match the given pattern?"""

for key in pattern:
if key not in node:
return False
val = pattern[key]
if type(val) == str:
if isinstance(val, str):
if node[key] != val:
return False
elif type(val) == dict:
elif isinstance(val, dict):
if not self.match(node[key], val):
return False
return True


def get_val(self, node, *chain):
"""Get value one or more levels down."""

Expand All @@ -406,7 +402,6 @@ def get_val(self, node, *chain):
break
return curr


def get_loc(self, node):
"""Convenience method to get node's line number."""

Expand All @@ -420,8 +415,8 @@ class CheckNonJekyll(CheckBase):
"""Check a file that isn't translated by Jekyll."""

def __init__(self, args, filename, metadata, metadata_len, text, lines, doc):
super(CheckNonJekyll, self).__init__(args, filename, metadata, metadata_len, text, lines, doc)

super(CheckNonJekyll, self).__init__(
args, filename, metadata, metadata_len, text, lines, doc)

def check_metadata(self):
self.reporter.check(self.metadata is None,
Expand All @@ -433,7 +428,8 @@ class CheckIndex(CheckBase):
"""Check the main index page."""

def __init__(self, args, filename, metadata, metadata_len, text, lines, doc):
super(CheckIndex, self).__init__(args, filename, metadata, metadata_len, text, lines, doc)
super(CheckIndex, self).__init__(args, filename,
metadata, metadata_len, text, lines, doc)
self.layout = 'lesson'

def check_metadata(self):
Expand All @@ -447,16 +443,15 @@ class CheckEpisode(CheckBase):
"""Check an episode page."""

def __init__(self, args, filename, metadata, metadata_len, text, lines, doc):
super(CheckEpisode, self).__init__(args, filename, metadata, metadata_len, text, lines, doc)

super(CheckEpisode, self).__init__(args, filename,
metadata, metadata_len, text, lines, doc)

def check(self):
"""Run extra tests."""

super(CheckEpisode, self).check()
self.check_reference_inclusion()


def check_metadata(self):
super(CheckEpisode, self).check_metadata()
if self.metadata:
Expand All @@ -470,19 +465,17 @@ def check_metadata(self):
else:
self.check_metadata_fields(TEACHING_METADATA_FIELDS)


def check_metadata_fields(self, expected):
for (name, type_) in expected:
if name not in self.metadata:
self.reporter.add(self.filename,
'Missing metadata field {0}',
name)
elif type(self.metadata[name]) != type_:
elif not isinstance(self.metadata[name], type_):
self.reporter.add(self.filename,
'"{0}" has wrong type in metadata ({1} instead of {2})',
name, type(self.metadata[name]), type_)


def check_reference_inclusion(self):
"""Check that links file has been included."""

Expand All @@ -507,15 +500,17 @@ class CheckReference(CheckBase):
"""Check the reference page."""

def __init__(self, args, filename, metadata, metadata_len, text, lines, doc):
super(CheckReference, self).__init__(args, filename, metadata, metadata_len, text, lines, doc)
super(CheckReference, self).__init__(
args, filename, metadata, metadata_len, text, lines, doc)
self.layout = 'reference'


class CheckGeneric(CheckBase):
"""Check a generic page."""

def __init__(self, args, filename, metadata, metadata_len, text, lines, doc):
super(CheckGeneric, self).__init__(args, filename, metadata, metadata_len, text, lines, doc)
super(CheckGeneric, self).__init__(args, filename,
metadata, metadata_len, text, lines, doc)
self.layout = 'page'


Expand Down
5 changes: 2 additions & 3 deletions bin/lesson_initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"""Initialize a newly-created repository."""


from __future__ import print_function
import sys
import os

Expand Down Expand Up @@ -121,7 +120,7 @@
If you choose to contribute via GitHub, you may want to look at
[How to Contribute to an Open Source Project on GitHub][how-contribute].
To manage changes, we follow [GitHub flow][github-flow].
To manage changes, we follow [GitHub flow][github-flow].
Each lesson has two maintainers who review issues and pull requests or encourage others to do so.
The maintainers are community volunteers and have final say over what gets merged into the lesson.
To use the web interface for contributing to a lesson:
Expand Down Expand Up @@ -279,7 +278,7 @@

ROOT_AIO_MD = '''\
---
layout: page
layout: page
root: .
---
<script>
Expand Down
Loading

0 comments on commit 079bbc2

Please sign in to comment.