diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 868e4a5d23ed7..39a308b07d886 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -31,9 +31,10 @@ class Module(object): files have changed. """ - def __init__(self, name, dependencies, source_file_regexes, build_profile_flags=(), environ={}, - sbt_test_goals=(), python_test_goals=(), excluded_python_implementations=(), - test_tags=(), should_run_r_tests=False, should_run_build_tests=False): + def __init__(self, name, dependencies, source_file_regexes, build_profile_flags=(), + environ=None, sbt_test_goals=(), python_test_goals=(), + excluded_python_implementations=(), test_tags=(), should_run_r_tests=False, + should_run_build_tests=False): """ Define a new module. @@ -62,7 +63,7 @@ def __init__(self, name, dependencies, source_file_regexes, build_profile_flags= self.source_file_prefixes = source_file_regexes self.sbt_test_goals = sbt_test_goals self.build_profile_flags = build_profile_flags - self.environ = environ + self.environ = environ or {} self.python_test_goals = python_test_goals self.excluded_python_implementations = excluded_python_implementations self.test_tags = test_tags diff --git a/dev/tox.ini b/dev/tox.ini index 7edf7d597fb58..43cd5877dfdb8 100644 --- a/dev/tox.ini +++ b/dev/tox.ini @@ -19,6 +19,6 @@ max-line-length=100 exclude=python/pyspark/cloudpickle/*.py,shared.py,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/* [flake8] -select = E901,E999,F821,F822,F823,F401,F405 +select = E901,E999,F821,F822,F823,F401,F405,B006 exclude = python/pyspark/cloudpickle/*.py,shared.py*,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/*,python/out,python/pyspark/sql/pandas/functions.pyi,python/pyspark/sql/column.pyi,python/pyspark/worker.pyi,python/pyspark/java_gateway.pyi max-line-length = 100 diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index ba0127c938642..68c32b938c2f1 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -47,6 +47,8 @@ 'RandomForestRegressor', 'RandomForestRegressionModel', 'FMRegressor', 'FMRegressionModel'] +DEFAULT_QUANTILE_PROBABILITIES = [0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99] + class Regressor(Predictor, _PredictorParams, metaclass=ABCMeta): """ @@ -1654,7 +1656,7 @@ class _AFTSurvivalRegressionParams(_PredictorParams, HasMaxIter, HasTol, HasFitI def __init__(self, *args): super(_AFTSurvivalRegressionParams, self).__init__(*args) self._setDefault(censorCol="censor", - quantileProbabilities=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99], + quantileProbabilities=DEFAULT_QUANTILE_PROBABILITIES, maxIter=100, tol=1E-6, blockSize=1) @since("1.6.0") @@ -1740,7 +1742,7 @@ class AFTSurvivalRegression(_JavaRegressor, _AFTSurvivalRegressionParams, @keyword_only def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction", fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor", - quantileProbabilities=list([0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]), + quantileProbabilities=DEFAULT_QUANTILE_PROBABILITIES, quantilesCol=None, aggregationDepth=2, blockSize=1): """ __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ @@ -1758,7 +1760,7 @@ def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="p @since("1.6.0") def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction", fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor", - quantileProbabilities=list([0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]), + quantileProbabilities=DEFAULT_QUANTILE_PROBABILITIES, quantilesCol=None, aggregationDepth=2, blockSize=1): """ setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index 21bbe1f96758b..8b7045a10ed9e 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -508,13 +508,13 @@ class CrossValidatorModel(Model, _CrossValidatorParams, MLReadable, MLWritable): .. versionadded:: 1.4.0 """ - def __init__(self, bestModel, avgMetrics=[], subModels=None): + def __init__(self, bestModel, avgMetrics=None, subModels=None): super(CrossValidatorModel, self).__init__() #: best model from cross validation self.bestModel = bestModel #: Average cross-validation metrics for each paramMap in #: CrossValidator.estimatorParamMaps, in the corresponding order. - self.avgMetrics = avgMetrics + self.avgMetrics = avgMetrics or [] #: sub model list from cross validation self.subModels = subModels @@ -868,12 +868,12 @@ class TrainValidationSplitModel(Model, _TrainValidationSplitParams, MLReadable, .. versionadded:: 2.0.0 """ - def __init__(self, bestModel, validationMetrics=[], subModels=None): + def __init__(self, bestModel, validationMetrics=None, subModels=None): super(TrainValidationSplitModel, self).__init__() #: best model from train validation split self.bestModel = bestModel #: evaluated validation metrics - self.validationMetrics = validationMetrics + self.validationMetrics = validationMetrics or [] #: sub models from train validation split self.subModels = subModels diff --git a/python/pyspark/resource/profile.py b/python/pyspark/resource/profile.py index 3f6ae1ddd5e30..6a2be76b8bc30 100644 --- a/python/pyspark/resource/profile.py +++ b/python/pyspark/resource/profile.py @@ -32,13 +32,13 @@ class ResourceProfile(object): .. versionadded:: 3.1.0 """ - def __init__(self, _java_resource_profile=None, _exec_req={}, _task_req={}): + def __init__(self, _java_resource_profile=None, _exec_req=None, _task_req=None): if _java_resource_profile is not None: self._java_resource_profile = _java_resource_profile else: self._java_resource_profile = None - self._executor_resource_requests = _exec_req - self._task_resource_requests = _task_req + self._executor_resource_requests = _exec_req or {} + self._task_resource_requests = _task_req or {} @property def id(self): diff --git a/python/pyspark/sql/avro/functions.py b/python/pyspark/sql/avro/functions.py index 75fe4eaa078a1..e907958088124 100644 --- a/python/pyspark/sql/avro/functions.py +++ b/python/pyspark/sql/avro/functions.py @@ -26,7 +26,7 @@ @since(3.0) -def from_avro(data, jsonFormatSchema, options={}): +def from_avro(data, jsonFormatSchema, options=None): """ Converts a binary column of Avro format into its corresponding catalyst value. The specified schema must match the read data, otherwise the behavior is undefined: @@ -59,7 +59,7 @@ def from_avro(data, jsonFormatSchema, options={}): sc = SparkContext._active_spark_context try: jc = sc._jvm.org.apache.spark.sql.avro.functions.from_avro( - _to_java_column(data), jsonFormatSchema, options) + _to_java_column(data), jsonFormatSchema, options or {}) except TypeError as e: if str(e) == "'JavaPackage' object is not callable": _print_missing_jar("Avro", "avro", "avro", sc.version) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 97146fdb804ab..a20c34cd3ee20 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -113,8 +113,10 @@ def _(): return _ -def _options_to_str(options): - return {key: to_str(value) for (key, value) in options.items()} +def _options_to_str(options=None): + if options: + return {key: to_str(value) for (key, value) in options.items()} + return {} _lit_doc = """ Creates a :class:`Column` of literal value. @@ -2476,7 +2478,7 @@ def json_tuple(col, *fields): @since(2.1) -def from_json(col, schema, options={}): +def from_json(col, schema, options=None): """ Parses a column containing a JSON string into a :class:`MapType` with :class:`StringType` as keys type, :class:`StructType` or :class:`ArrayType` with @@ -2524,7 +2526,7 @@ def from_json(col, schema, options={}): @since(2.1) -def to_json(col, options={}): +def to_json(col, options=None): """ Converts a column containing a :class:`StructType`, :class:`ArrayType` or a :class:`MapType` into a JSON string. Throws an exception, in the case of an unsupported type. @@ -2564,7 +2566,7 @@ def to_json(col, options={}): @since(2.4) -def schema_of_json(json, options={}): +def schema_of_json(json, options=None): """ Parses a JSON string and infers its schema in DDL format. @@ -2594,7 +2596,7 @@ def schema_of_json(json, options={}): @since(3.0) -def schema_of_csv(csv, options={}): +def schema_of_csv(csv, options=None): """ Parses a CSV string and infers its schema in DDL format. @@ -2620,7 +2622,7 @@ def schema_of_csv(csv, options={}): @since(3.0) -def to_csv(col, options={}): +def to_csv(col, options=None): """ Converts a column containing a :class:`StructType` into a CSV string. Throws an exception, in the case of an unsupported type. @@ -2931,7 +2933,7 @@ def sequence(start, stop, step=None): @since(3.0) -def from_csv(col, schema, options={}): +def from_csv(col, schema, options=None): """ Parses a column containing a CSV string to a row with the specified schema. Returns `null`, in the case of an unparseable string.