Merge pull request #257 from commonsense/reformat

Meaningless formatting cleanups
commonsense · Apr 9, 2019 · 895ea23 · 895ea23
2 parents 0cbdffb + dd5ef67
commit 895ea23
Show file tree

Hide file tree

Showing 63 changed files with 1,859 additions and 1,193 deletions.
diff --git a/conceptnet5/api.py b/conceptnet5/api.py
@@ -2,8 +2,8 @@
 This file defines the ConceptNet web API responses.
 """
 
+from conceptnet5.nodes import ld_node, standardized_concept_uri
 from conceptnet5.vectors.query import VectorSpaceWrapper
-from conceptnet5.nodes import standardized_concept_uri, ld_node
 
 VECTORS = VectorSpaceWrapper()
 FINDER = VECTORS.finder

diff --git a/conceptnet5/builders/cli.py b/conceptnet5/builders/cli.py
@@ -1,7 +1,8 @@
 import click
+
 from .combine_assertions import combine_assertions
-from .reduce_assoc import reduce_assoc
 from .morphology import prepare_vocab_for_morphology, subwords_to_edges
+from .reduce_assoc import reduce_assoc
 
 
 @click.group()
@@ -25,12 +26,14 @@ def run_combine(input, output):
 
 @cli.command(name='reduce_assoc')
 @click.argument('assoc_filename', type=click.Path(readable=True, dir_okay=False))
-@click.argument('embedding_filenames', nargs=-1, type=click.Path(readable=True, dir_okay=False))
+@click.argument(
+    'embedding_filenames', nargs=-1, type=click.Path(readable=True, dir_okay=False)
+)
 @click.argument('output', type=click.Path(writable=True, dir_okay=False))
 def run_reduce_assoc(assoc_filename, embedding_filenames, output):
     """
-    Takes in a file of tab-separated simple associations, one or more 
-    hdf5 files defining vector embeddings, and removes from the associations 
+    Takes in a file of tab-separated simple associations, one or more
+    hdf5 files defining vector embeddings, and removes from the associations
     low-frequency terms and associations that are judged unlikely to be
     useful by various filters.
     """

diff --git a/conceptnet5/builders/combine_assertions.py b/conceptnet5/builders/combine_assertions.py
@@ -1,16 +1,18 @@
-from __future__ import unicode_literals, print_function
-
 import itertools
 import json
-
 import os
 
 from conceptnet5.edges import make_edge
 from conceptnet5.formats.msgpack_stream import MsgpackStreamWriter
 from conceptnet5.languages import ALL_LANGUAGES
 from conceptnet5.readers.wiktionary import valid_language
-from conceptnet5.uri import conjunction_uri,get_uri_language, is_absolute_url, Licenses, \
-    split_uri, uri_prefix
+from conceptnet5.uri import (
+    Licenses,
+    conjunction_uri,
+    get_uri_language,
+    is_absolute_url,
+    split_uri,
+)
 from conceptnet5.util import get_support_data_filename
 
 N = 100
@@ -91,9 +93,14 @@ def make_assertion(line_group):
         license = Licenses.cc_attribution
 
     return make_edge(
-        rel=rel, start=start, end=end, weight=weight,
-        dataset=dataset, license=license, sources=sources,
-        surfaceText=surface_text
+        rel=rel,
+        start=start,
+        end=end,
+        weight=weight,
+        dataset=dataset,
+        license=license,
+        sources=sources,
+        surfaceText=surface_text,
     )
 
 
@@ -113,6 +120,7 @@ def combine_assertions(input_filename, output_filename):
     This process requires its input to be a sorted CSV so that all edges for
     the same assertion will appear consecutively.
     """
+
     def group_func(line):
         "Group lines by their URI (their first column)."
         return line.split('\t', 1)[0]

diff --git a/conceptnet5/builders/morphology.py b/conceptnet5/builders/morphology.py
@@ -4,7 +4,7 @@
 from conceptnet5.formats.msgpack_stream import MsgpackStreamWriter
 from conceptnet5.languages import ATOMIC_SPACE_LANGUAGES
 from conceptnet5.nodes import split_uri
-from conceptnet5.uri import get_uri_language, join_uri, Licenses
+from conceptnet5.uri import Licenses, get_uri_language, join_uri
 
 
 def prepare_vocab_for_morphology(language, input, output):
@@ -61,11 +61,13 @@ def subwords_to_edges(language, input, output):
             if chunk != '_':
                 start = join_uri('x', language, chunk.strip('_'))
                 edge = make_edge(
-                    '/r/SubwordOf', start, end,
+                    '/r/SubwordOf',
+                    start,
+                    end,
                     dataset='/d/morphology',
                     license=Licenses.cc_attribution,
                     sources=MORPH_SOURCES,
-                    weight=0.01
+                    weight=0.01,
                 )
                 writer.write(edge)
     writer.close()
diff --git a/conceptnet5/builders/reduce_assoc.py b/conceptnet5/builders/reduce_assoc.py
@@ -5,10 +5,11 @@
 
 from collections import defaultdict
 
+import pandas as pd
+
 from conceptnet5.relations import is_negative_relation
 from conceptnet5.uri import is_concept, uri_prefix
 from conceptnet5.vectors.formats import load_hdf
-import pandas as pd
 
 
 def concept_is_bad(uri):
@@ -19,14 +20,19 @@ def concept_is_bad(uri):
     specific phrase, possibly mis-parsed. A concept with a colon is probably
     detritus from a wiki.
     """
-    return (':' in uri or uri.count('_') >= 3 or
-            uri.startswith('/a/') or uri.count('/') <= 2)
+    return (
+        ':' in uri
+        or uri.count('_') >= 3
+        or uri.startswith('/a/')
+        or uri.count('/') <= 2
+    )
 
 
 class ConceptNetAssociationGraph:
     '''
     Class to hold the concept-association edge graph.
     '''
+
     def __init__(self):
         '''Construct a graph with no vertices or edges.'''
         self.vertex_to_neighbors = defaultdict(set)
@@ -43,15 +49,15 @@ def vertices(self):
 
     def find_components(self):
         '''
-        Returns a dict mapping the vertices of the graph to labels, 
-        such that two vertices map to the same label if and only if 
-        they belong to the same connected component of the undirected 
-        graph obtained by adding the reversal of every edge to the 
-        graph.  (But note that this function does not modify the graph, 
+        Returns a dict mapping the vertices of the graph to labels,
+        such that two vertices map to the same label if and only if
+        they belong to the same connected component of the undirected
+        graph obtained by adding the reversal of every edge to the
+        graph.  (But note that this function does not modify the graph,
         i.e. it does not add any edges.)
         '''
-        
-        component_labels = {vertex : -1 for vertex in self.vertices()}
+
+        component_labels = {vertex: -1 for vertex in self.vertices()}
         vertices_to_examine = set(self.vertices())
         new_label = -1
         while len(vertices_to_examine) > 0:
@@ -72,19 +78,18 @@ def find_components(self):
         return component_labels
 
     @classmethod
-    def from_csv(cls, filename, filtered_concepts=None,
-                 reject_negative_relations=True):
+    def from_csv(cls, filename, filtered_concepts=None, reject_negative_relations=True):
         """
-        Reads an association file and builds an (undirected) graph from it. 
+        Reads an association file and builds an (undirected) graph from it.
 
-        If filtered_concepts isn't None, it should be a collection of concepts, 
-        and only vertices from this collection and edges that link two such 
-        vertices will be added to the graph.  If it _is_ None (the default), 
-        however, please note that no such filtering will be done (i.e. the 
-        effective filter collection is then the universal set of concepts, not 
+        If filtered_concepts isn't None, it should be a collection of concepts,
+        and only vertices from this collection and edges that link two such
+        vertices will be added to the graph.  If it _is_ None (the default),
+        however, please note that no such filtering will be done (i.e. the
+        effective filter collection is then the universal set of concepts, not
         the empty set).
 
-        If reject_negative_relations is True (the default), only edges not 
+        If reject_negative_relations is True (the default), only edges not
         corresponding to negative relations will be added to the graph.
         """
         graph = cls()
@@ -119,29 +124,29 @@ def from_csv(cls, filename, filtered_concepts=None,
 
 class ConceptNetAssociationGraphForReduction(ConceptNetAssociationGraph):
     """
-    Subclass of ConceptNetAssociationGraph specialized for use in making 
+    Subclass of ConceptNetAssociationGraph specialized for use in making
     the reduced subgraph of a full set of associations.
     """
+
     def __init__(self):
         super().__init__()
         self.edges = []
-    
+
     def add_edge(self, left, right, value, dataset, relation):
         """
-        In addition to the superclass's handling of a new edge, 
+        In addition to the superclass's handling of a new edge,
         saves the full edge data.
         """
         super().add_edge(left, right, value, dataset, relation)
         self.edges.append((left, right, value, dataset, relation))
 
 
-
 def make_filtered_concepts(filename, cutoff=3, en_cutoff=3):
     """
-    Takes in a file of tab-separated associations, and returns a set of 
-    concepts from which those which are unlikely to be useful have been 
-    removed. 
-    
+    Takes in a file of tab-separated associations, and returns a set of
+    concepts from which those which are unlikely to be useful have been
+    removed.
+
     All concepts that occur fewer than `cutoff` times will be removed.
     All English concepts that occur fewer than `en_cutoff` times will be removed.
     """
@@ -160,20 +165,18 @@ def make_filtered_concepts(filename, cutoff=3, en_cutoff=3):
                     counts[gright] += 1
 
     filtered_concepts = {
-        concept for (concept, count) in counts.items()
-        if (
-            count >= en_cutoff or
-            (not is_concept(concept) and count >= cutoff)
-        )
+        concept
+        for (concept, count) in counts.items()
+        if (count >= en_cutoff or (not is_concept(concept) and count >= cutoff))
     }
     return filtered_concepts
 
 
 def read_embedding_vocabularies(filenames):
     """
-    Reads every vector embedding file in the given collection of 
-    filenames, and returns the union of their vocabularies.  (The 
-    files are assumed to be hdf5 files containing dataframes, and 
+    Reads every vector embedding file in the given collection of
+    filenames, and returns the union of their vocabularies.  (The
+    files are assumed to be hdf5 files containing dataframes, and
     the vocabularies are their indices.
     """
     result = pd.Index([])
@@ -183,28 +186,29 @@ def read_embedding_vocabularies(filenames):
     return result
 
 
-
-def reduce_assoc(assoc_filename, embedding_filenames, output_filename,
-                 cutoff=3, en_cutoff=3):
+def reduce_assoc(
+    assoc_filename, embedding_filenames, output_filename, cutoff=3, en_cutoff=3
+):
     """
     Takes in a file of tab-separated simple associations, and removes
-    uncommon associations and associations unlikely to be useful.  Also 
-    requires one or more vector embedding files (from which only the 
-    vocabularies are used; associations involving terms that have no 
-    connection, no matter how distant, to the union of those vocabularies 
+    uncommon associations and associations unlikely to be useful.  Also
+    requires one or more vector embedding files (from which only the
+    vocabularies are used; associations involving terms that have no
+    connection, no matter how distant, to the union of those vocabularies
     will be removed).
 
     All concepts that occur fewer than `cutoff` times will be removed.
     All English concepts that occur fewer than `en_cutoff` times will be removed.
     """
 
-    filtered_concepts = make_filtered_concepts(assoc_filename, cutoff=cutoff,
-                                               en_cutoff=en_cutoff)
+    filtered_concepts = make_filtered_concepts(
+        assoc_filename, cutoff=cutoff, en_cutoff=en_cutoff
+    )
 
     graph = ConceptNetAssociationGraphForReduction.from_csv(
         assoc_filename,
         filtered_concepts=filtered_concepts,
-        reject_negative_relations=True
+        reject_negative_relations=True,
     )
 
     component_labels = graph.find_components()
@@ -215,11 +219,11 @@ def reduce_assoc(assoc_filename, embedding_filenames, output_filename,
     # from any of the embedding vocabularies, there will be no way to assign
     # vectors to any of its vertices, so we remove that component from the
     # output.
-    
-    good_component_labels = set(label for term, label
-                                in component_labels.items()
-                                if term in embedding_vocab)
-    
+
+    good_component_labels = set(
+        label for term, label in component_labels.items() if term in embedding_vocab
+    )
+
     with open(output_filename, 'w', encoding='utf-8') as out:
         for gleft, gright, value, dataset, rel in graph.edges:
             if component_labels[gleft] not in good_component_labels:

diff --git a/conceptnet5/db/cli.py b/conceptnet5/db/cli.py
@@ -1,7 +1,8 @@
 import click
-from .connection import get_db_connection, check_db_connection
+
+from .connection import check_db_connection, get_db_connection
 from .prepare_data import assertions_to_sql_csv, load_sql_csv
-from .schema import create_tables, create_indices
+from .schema import create_indices, create_tables
 
 
 @click.group()
@@ -11,13 +12,18 @@ def cli():
 
 @cli.command(name='prepare_data')
 @click.argument('input_filename', type=click.Path(readable=True, dir_okay=False))
-@click.argument('output_dir', type=click.Path(writable=True, dir_okay=True, file_okay=False))
+@click.argument(
+    'output_dir', type=click.Path(writable=True, dir_okay=True, file_okay=False)
+)
 def prepare_data(input_filename, output_dir):
     assertions_to_sql_csv(input_filename, output_dir)
 
 
 @cli.command(name='load_data')
-@click.argument('input_dir', type=click.Path(readable=True, writable=True, dir_okay=True, file_okay=False))
+@click.argument(
+    'input_dir',
+    type=click.Path(readable=True, writable=True, dir_okay=True, file_okay=False),
+)
 def load_data(input_dir):
     conn = get_db_connection()
     create_tables(conn)

diff --git a/conceptnet5/db/connection.py b/conceptnet5/db/connection.py
@@ -1,4 +1,5 @@
 import psycopg2
+
 from conceptnet5.db import config
 
 _CONNECTIONS = {}

diff --git a/conceptnet5/db/prepare_data.py b/conceptnet5/db/prepare_data.py
@@ -1,8 +1,9 @@
+import json
+
 from conceptnet5.formats.msgpack_stream import read_msgpack_stream
-from conceptnet5.uri import uri_prefixes
 from conceptnet5.relations import SYMMETRIC_RELATIONS
+from conceptnet5.uri import uri_prefixes
 from ordered_set import OrderedSet
-import json
 
 
 def write_row(outfile, items):