From 8e1e823139d212849a37ee9898da2ba02c29bbe1 Mon Sep 17 00:00:00 2001
From: Christian Clauss <cclauss@me.com>
Date: Thu, 22 Aug 2019 00:04:31 +0200
Subject: [PATCH] Lint Python code for undefined names (#1721)

* Lint Python code for undefined names

* Lint Python code for undefined names

* Exclude tfdv.py to workaround an overzealous pytest

* Fixup for tfdv.py

* Fixup for tfdv.py

* Fixup for tfdv.py
---
 .travis.yml                                   |  6 +++-
 .../src/apiserver/visualization/roc_curve.py  |  4 ++-
 .../snapshots/snap_test_exporter.py           | 21 +++++-------
 .../apiserver/visualization/test_exporter.py  |  2 +-
 backend/src/apiserver/visualization/tfdv.py   |  2 +-
 .../python/arena/_arena_distributed_tf_op.py  | 33 ++++++++++---------
 .../emr/create_cluster/src/create_cluster.py  |  6 ++++
 .../src/submit_pyspark_job.py                 |  5 +++
 .../submit_spark_job/src/submit_spark_job.py  |  5 +++
 .../batch_transform/src/batch_transform.py    |  6 ++++
 proxy/get_proxy_url.py                        |  5 +++
 .../mini-image-classification-pipeline.py     |  2 +-
 .../resnet-cmle/resnet-train-pipeline.py      |  1 +
 sdk/python/kfp/components/_dynamic.py         |  8 ++---
 sdk/python/kfp/dsl/_pipeline.py               |  6 ++--
 sdk/python/tests/dsl/component_tests.py       |  2 +-
 16 files changed, 73 insertions(+), 41 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index fef38aaceca..1c646db8227 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -94,5 +94,9 @@ matrix:
     - language: python
       python: "3.7"
       env: TOXENV=py37
-      dist: xenial # required for Python >= 3.7
       script: *1
+    - name: "Lint Python code with flake8"
+      language: python
+      python: "3.7"
+      install: pip install flake8
+      script: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
diff --git a/backend/src/apiserver/visualization/roc_curve.py b/backend/src/apiserver/visualization/roc_curve.py
index be38bc0a27c..a9142cca17f 100644
--- a/backend/src/apiserver/visualization/roc_curve.py
+++ b/backend/src/apiserver/visualization/roc_curve.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# flake8: noqa TODO
+
 import json
 from pathlib import Path
 from bokeh.layouts import row
@@ -34,7 +36,7 @@
 # trueclass
 # true_score_column
 
-if "is_generated" is not in variables or variables["is_generated"] is False:
+if not variables.get("is_generated"):
     # Create data from specified csv file(s).
     # The schema file provides column names for the csv file that will be used
     # to generate the roc curve.
diff --git a/backend/src/apiserver/visualization/snapshots/snap_test_exporter.py b/backend/src/apiserver/visualization/snapshots/snap_test_exporter.py
index b4955ec2df3..9db40f393e1 100644
--- a/backend/src/apiserver/visualization/snapshots/snap_test_exporter.py
+++ b/backend/src/apiserver/visualization/snapshots/snap_test_exporter.py
@@ -76,7 +76,13 @@
 
 '''
 
-snapshots['TestExporterMethods::test_create_cell_from_file 1'] = '''# Copyright 2019 Google LLC
+snapshots['TestExporterMethods::test_create_cell_from_file 1'] = '''"""
+test.py is used for test_server.py as a way to test the tornado web server
+without having a reliance on the validity of visualizations. It does not serve
+as a valid visualization and is only used for testing purposes.
+"""
+
+# Copyright 2019 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -90,17 +96,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import tensorflow_data_validation as tfdv
-
-# The following variables are provided through dependency injection. These
-# variables come from the specified input path and arguments provided by the
-# API post request.
-#
-# source
-
-train_stats = tfdv.generate_statistics_from_csv(data_location=source)
-
-tfdv.visualize_statistics(train_stats)
+x = 2
+print(x)
 '''
 
 snapshots['TestExporterMethods::test_generate_html_from_notebook 1'] = '''
diff --git a/backend/src/apiserver/visualization/test_exporter.py b/backend/src/apiserver/visualization/test_exporter.py
index c490b26d44a..f7447b932e4 100644
--- a/backend/src/apiserver/visualization/test_exporter.py
+++ b/backend/src/apiserver/visualization/test_exporter.py
@@ -58,7 +58,7 @@ def test_create_cell_from_args_with_multiple_args(self):
 
     def test_create_cell_from_file(self):
         self.maxDiff = None
-        cell = self.exporter.create_cell_from_file("tfdv.py")
+        cell = self.exporter.create_cell_from_file("test.py")
         self.assertMatchSnapshot(cell.source)
 
     def test_generate_html_from_notebook(self):
diff --git a/backend/src/apiserver/visualization/tfdv.py b/backend/src/apiserver/visualization/tfdv.py
index 0190f434e6d..b01f4336fa5 100644
--- a/backend/src/apiserver/visualization/tfdv.py
+++ b/backend/src/apiserver/visualization/tfdv.py
@@ -20,6 +20,6 @@
 #
 # source
 
-train_stats = tfdv.generate_statistics_from_csv(data_location=source)
+train_stats = tfdv.generate_statistics_from_csv(data_location=source)  # noqa: F821
 
 tfdv.visualize_statistics(train_stats)
diff --git a/components/arena/python/arena/_arena_distributed_tf_op.py b/components/arena/python/arena/_arena_distributed_tf_op.py
index e98d01a38e1..f67175cb20f 100644
--- a/components/arena/python/arena/_arena_distributed_tf_op.py
+++ b/components/arena/python/arena/_arena_distributed_tf_op.py
@@ -13,20 +13,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# flake8: noqa TODO
 
 import kfp.dsl as dsl
 import datetime
 import logging
 
 
-def estimator_op(name, image, command, 
+def estimator_op(name, image, command,
                       chief_cpu_limit, chief_memory_limit, chief_port,
                       workers, worker_image, worker_cpu_limit, worker_memory_limit,
-                      parameter_servers, ps_image, ps_cpu_limit, ps_memory_limit, ps_port, 
-                      gpus, rdma, 
-                      tensorboard, 
+                      parameter_servers, ps_image, ps_cpu_limit, ps_memory_limit, ps_port,
+                      gpus, rdma,
+                      tensorboard,
                       worker_port, annotations=[],
-                      evaluator=False, evaluator_cpu_limit='0', evaluator_memory_limit='0', 
+                      evaluator=False, evaluator_cpu_limit='0', evaluator_memory_limit='0',
                       env=[], data=[], sync_source=None,
                       metrics=['Train-accuracy:PERCENTAGE'],
                       arena_image='cheyang/arena_launcher:v0.7',
@@ -44,11 +45,11 @@ def estimator_op(name, image, command,
                       workers=workers, worker_image=worker_image, worker_cpu_limit=worker_cpu_limit, worker_memory_limit=worker_memory,
                       parameter_servers=parameter_servers, ps_image=ps_image, ps_cpu_limit=ps_cpu_limit, ps_memory_limit=ps_memory_limit,
                       gpus=gpus, rdma=rdma,
-                      chief=True, 
+                      chief=True,
                       chief_cpu_limit=chief_cpu_limit,
                       worker_port=worker_port,
                       ps_port=ps_port,
-                      tensorboard=tensorboard, 
+                      tensorboard=tensorboard,
                       metrics=metrics,
                       arena_image=arena_image,
                       timeout_hours=timeout_hours)
@@ -58,9 +59,9 @@ def estimator_op(name, image, command,
 def parameter_servers_op(name, image, command, env, data, sync_source, annotations,
                       workers, worker_image, worker_cpu_limit, worker_memory,
                       parameter_servers, ps_image, ps_cpu_limit, ps_memory_limit,
-                      gpus, rdma, 
-                      tensorboard, 
-                      worker_port, ps_port, 
+                      gpus, rdma,
+                      tensorboard,
+                      worker_port, ps_port,
                       metrics=['Train-accuracy:PERCENTAGE'],
                       arena_image='cheyang/arena_launcher:v0.7',
                       timeout_hours=240):
@@ -76,10 +77,10 @@ def parameter_servers_op(name, image, command, env, data, sync_source, annotatio
     return distributed_tf_op(name=name, image=image, command=command, envs=envs, data=data, sync_source=sync_source,
                       workers=workers, worker_image=worker_image, worker_cpu_limit=worker_cpu_limit, worker_memory_limit=worker_memory,
                       parameter_servers=parameter_servers, ps_image=ps_image, ps_cpu_limit=ps_cpu_limit, ps_memory_limit=ps_memory_limit,
-                      gpus=gpus, rdma=rdma, 
+                      gpus=gpus, rdma=rdma,
                       worker_port=worker_port,
                       ps_port=ps_port,
-                      tensorboard=tensorboard, 
+                      tensorboard=tensorboard,
                       metrics=metrics,
                       arena_image=arena_image,
                       timeout_hours=timeout_hours)
@@ -87,15 +88,15 @@ def parameter_servers_op(name, image, command, env, data, sync_source, annotatio
 
 
 def distributed_tf_op(name, image, command, env=[], data=[], sync_source=None,
-                      chief=False, chief_cpu_limit='0', chief_memory_limit='0', 
+                      chief=False, chief_cpu_limit='0', chief_memory_limit='0',
                       workers=0, worker_image=None, worker_cpu_limit='0', worker_memory_limit='0',
                       parameter_servers=0, ps_image=None, ps_cpu_limit='0', ps_memory_limit='0',
-                      evaluator=False, evaluator_cpu_limit='0', evaluator_memory_limit='0', 
-                      gpus=0, rdma=False, 
+                      evaluator=False, evaluator_cpu_limit='0', evaluator_memory_limit='0',
+                      gpus=0, rdma=False,
                       chief_port=22222,
                       worker_port=22222,
                       ps_port=22224,
-                      tensorboard=False, 
+                      tensorboard=False,
                       metrics=['Train-accuracy:PERCENTAGE'],
                       arena_image='cheyang/arena_launcher:v0.7',
                       timeout_hours=240):
diff --git a/components/aws/emr/create_cluster/src/create_cluster.py b/components/aws/emr/create_cluster/src/create_cluster.py
index 9885158464a..a570042db47 100644
--- a/components/aws/emr/create_cluster/src/create_cluster.py
+++ b/components/aws/emr/create_cluster/src/create_cluster.py
@@ -17,6 +17,12 @@
 
 from common import _utils
 
+try:
+  unicode
+except NameError:
+  unicode = str
+
+
 def main(argv=None):
   parser = argparse.ArgumentParser(description='Create EMR Cluster')
   parser.add_argument('--region', type=str, help='EMR Cluster region.')
diff --git a/components/aws/emr/submit_pyspark_job/src/submit_pyspark_job.py b/components/aws/emr/submit_pyspark_job/src/submit_pyspark_job.py
index a3a668ff158..c4a5acdc0e3 100644
--- a/components/aws/emr/submit_pyspark_job/src/submit_pyspark_job.py
+++ b/components/aws/emr/submit_pyspark_job/src/submit_pyspark_job.py
@@ -29,6 +29,11 @@
 
 from common import _utils
 
+try:
+  unicode
+except NameError:
+  unicode = str
+
 
 def main(argv=None):
   parser = argparse.ArgumentParser(description='Submit PySpark Job')
diff --git a/components/aws/emr/submit_spark_job/src/submit_spark_job.py b/components/aws/emr/submit_spark_job/src/submit_spark_job.py
index 3ed852f0676..0d6630031e3 100644
--- a/components/aws/emr/submit_spark_job/src/submit_spark_job.py
+++ b/components/aws/emr/submit_spark_job/src/submit_spark_job.py
@@ -30,6 +30,11 @@
 
 from common import _utils
 
+try:
+  unicode
+except NameError:
+  unicode = str
+
 
 def main(argv=None):
   parser = argparse.ArgumentParser(description='Submit Spark Job')
diff --git a/components/aws/sagemaker/batch_transform/src/batch_transform.py b/components/aws/sagemaker/batch_transform/src/batch_transform.py
index f75bb65996c..f5c6b8f6fd2 100644
--- a/components/aws/sagemaker/batch_transform/src/batch_transform.py
+++ b/components/aws/sagemaker/batch_transform/src/batch_transform.py
@@ -16,6 +16,12 @@
 
 from common import _utils
 
+try:
+  unicode
+except NameError:
+  unicode = str
+
+
 def main(argv=None):
   parser = argparse.ArgumentParser(description='SageMaker Batch Transformation Job')
   parser.add_argument('--region', type=str.strip, required=True, help='The region where the cluster launches.')
diff --git a/proxy/get_proxy_url.py b/proxy/get_proxy_url.py
index 9c8cfa69f25..a8022428855 100644
--- a/proxy/get_proxy_url.py
+++ b/proxy/get_proxy_url.py
@@ -21,6 +21,11 @@
 import re
 import requests
 
+try:
+  unicode
+except NameError:
+  unicode = str
+
 def urls_for_zone(zone, location_to_urls_map):
   """Returns list of potential proxy URLs for a given zone.
 
diff --git a/samples/contrib/aws-samples/ground_truth_pipeline_demo/mini-image-classification-pipeline.py b/samples/contrib/aws-samples/ground_truth_pipeline_demo/mini-image-classification-pipeline.py
index 0fdc016e293..988a125ed3c 100644
--- a/samples/contrib/aws-samples/ground_truth_pipeline_demo/mini-image-classification-pipeline.py
+++ b/samples/contrib/aws-samples/ground_truth_pipeline_demo/mini-image-classification-pipeline.py
@@ -136,4 +136,4 @@ def ground_truth_test(region='us-west-2',
     ).apply(use_aws_secret('aws-secret', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'))
 
 if __name__ == '__main__':
-    kfp.compiler.Compiler().compile(hpo_test, __file__ + '.zip')
+    kfp.compiler.Compiler().compile(hpo_test, __file__ + '.zip')  # noqa: F821 TODO
diff --git a/samples/contrib/resnet-cmle/resnet-train-pipeline.py b/samples/contrib/resnet-cmle/resnet-train-pipeline.py
index 4075a396303..2e01e878e10 100644
--- a/samples/contrib/resnet-cmle/resnet-train-pipeline.py
+++ b/samples/contrib/resnet-cmle/resnet-train-pipeline.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# flake8: noqa TODO
 
 import kfp.dsl as dsl
 import kfp.gcp as gcp
diff --git a/sdk/python/kfp/components/_dynamic.py b/sdk/python/kfp/components/_dynamic.py
index f35a693d19d..1a1086bd535 100644
--- a/sdk/python/kfp/components/_dynamic.py
+++ b/sdk/python/kfp/components/_dynamic.py
@@ -28,9 +28,9 @@ def create_function_from_parameter_names(func: Callable[[Mapping[str, Any]], Any
 
 def create_function_from_parameters(func: Callable[[Mapping[str, Any]], Any], parameters: Sequence[Parameter], documentation=None, func_name=None, func_filename=None):
     new_signature = Signature(parameters) # Checks the parameter consistency
-    
+
     def pass_locals():
-        return dict_func(locals())
+        return dict_func(locals())  # noqa: F821 TODO
 
     code = pass_locals.__code__
     mod_co_argcount = len(parameters)
@@ -59,10 +59,10 @@ def pass_locals():
         mod_co_firstlineno,
         code.co_lnotab
     )
-    
+
     default_arg_values = tuple( p.default for p in parameters if p.default != Parameter.empty ) #!argdefs "starts from the right"/"is right-aligned"
     modified_func = types.FunctionType(modified_code, {'dict_func': func, 'locals': locals}, name=func_name, argdefs=default_arg_values)
     modified_func.__doc__ = documentation
     modified_func.__signature__ = new_signature
-    
+
     return modified_func
diff --git a/sdk/python/kfp/dsl/_pipeline.py b/sdk/python/kfp/dsl/_pipeline.py
index 902fc0286f7..12216c2a8dd 100644
--- a/sdk/python/kfp/dsl/_pipeline.py
+++ b/sdk/python/kfp/dsl/_pipeline.py
@@ -47,7 +47,7 @@ def _pipeline(func):
     if _pipeline_decorator_handler:
       return _pipeline_decorator_handler(func) or func
     else:
-      return func 
+      return func
 
   return _pipeline
 
@@ -88,7 +88,7 @@ def set_ttl_seconds_after_finished(self, seconds: int):
       seconds: number of seconds for the workflow to be garbage collected after it is finished.
     """
     self.ttl_seconds_after_finished = seconds
-    return self   
+    return self
 
   def set_artifact_location(self, artifact_location):
     """Configures the pipeline level artifact location.
@@ -243,7 +243,7 @@ def _set_metadata(self, metadata):
     Args:
       metadata (ComponentMeta): component metadata
     '''
-    if not isinstance(metadata, PipelineMeta):
+    if not isinstance(metadata, PipelineMeta):  # noqa: F821 TODO
       raise ValueError('_set_medata is expecting PipelineMeta.')
     self._metadata = metadata
 
diff --git a/sdk/python/tests/dsl/component_tests.py b/sdk/python/tests/dsl/component_tests.py
index 05c3b6cd75f..73599c2faf8 100644
--- a/sdk/python/tests/dsl/component_tests.py
+++ b/sdk/python/tests/dsl/component_tests.py
@@ -63,7 +63,7 @@ def a_op(field_l: Integer()) -> {'field_m': GCSPath(), 'field_n': {'customized_t
 
     @component
     def b_op(field_x: {'customized_type': {'property_a': 'value_a', 'property_b': 'value_b'}},
-        field_y: 'GcsUri',
+        field_y: 'GcsUri',  # noqa: F821 TODO
         field_z: GCSPath()) -> {'output_model_uri': 'GcsUri'}:
       return ContainerOp(
           name = 'operator b',