@@ -124,7 +124,10 @@ def getConf(self, key, defaultValue):
124
124
@property
125
125
@since ("1.3.1" )
126
126
def udf (self ):
127
- """Returns a :class:`UDFRegistration` for UDF registration."""
127
+ """Returns a :class:`UDFRegistration` for UDF registration.
128
+
129
+ :return: :class:`UDFRegistration`
130
+ """
128
131
return UDFRegistration (self )
129
132
130
133
@since (1.4 )
@@ -138,7 +141,7 @@ def range(self, start, end, step=1, numPartitions=None):
138
141
:param end: the end value (exclusive)
139
142
:param step: the incremental step (default: 1)
140
143
:param numPartitions: the number of partitions of the DataFrame
141
- :return: A new DataFrame
144
+ :return: :class:` DataFrame`
142
145
143
146
>>> sqlContext.range(1, 7, 2).collect()
144
147
[Row(id=1), Row(id=3), Row(id=5)]
@@ -196,7 +199,7 @@ def _inferSchema(self, rdd, samplingRatio=None):
196
199
"can not infer schema" )
197
200
if type (first ) is dict :
198
201
warnings .warn ("Using RDD of dict to inferSchema is deprecated,"
199
- "please use pyspark.sql.Row instead" )
202
+ "please use pyspark.sql.Row instead" , DeprecationWarning )
200
203
201
204
if samplingRatio is None :
202
205
schema = _infer_schema (first )
@@ -219,7 +222,8 @@ def inferSchema(self, rdd, samplingRatio=None):
219
222
"""
220
223
.. note:: Deprecated in 1.3, use :func:`createDataFrame` instead.
221
224
"""
222
- warnings .warn ("inferSchema is deprecated, please use createDataFrame instead" )
225
+ warnings .warn (
226
+ "inferSchema is deprecated, please use createDataFrame instead." , DeprecationWarning )
223
227
224
228
if isinstance (rdd , DataFrame ):
225
229
raise TypeError ("Cannot apply schema to DataFrame" )
@@ -231,7 +235,8 @@ def applySchema(self, rdd, schema):
231
235
"""
232
236
.. note:: Deprecated in 1.3, use :func:`createDataFrame` instead.
233
237
"""
234
- warnings .warn ("applySchema is deprecated, please use createDataFrame instead" )
238
+ warnings .warn (
239
+ "applySchema is deprecated, please use createDataFrame instead" , DeprecationWarning )
235
240
236
241
if isinstance (rdd , DataFrame ):
237
242
raise TypeError ("Cannot apply schema to DataFrame" )
@@ -262,6 +267,7 @@ def createDataFrame(self, data, schema=None, samplingRatio=None):
262
267
:class:`list`, or :class:`pandas.DataFrame`.
263
268
:param schema: a :class:`StructType` or list of column names. default None.
264
269
:param samplingRatio: the sample ratio of rows used for inferring
270
+ :return: :class:`DataFrame`
265
271
266
272
>>> l = [('Alice', 1)]
267
273
>>> sqlContext.createDataFrame(l).collect()
@@ -359,58 +365,25 @@ def registerDataFrameAsTable(self, df, tableName):
359
365
else :
360
366
raise ValueError ("Can only register DataFrame as table" )
361
367
362
- @since (1.0 )
363
368
def parquetFile (self , * paths ):
364
369
"""Loads a Parquet file, returning the result as a :class:`DataFrame`.
365
370
366
- >>> import tempfile, shutil
367
- >>> parquetFile = tempfile.mkdtemp()
368
- >>> shutil.rmtree(parquetFile)
369
- >>> df.saveAsParquetFile(parquetFile)
370
- >>> df2 = sqlContext.parquetFile(parquetFile)
371
- >>> sorted(df.collect()) == sorted(df2.collect())
372
- True
371
+ .. note:: Deprecated in 1.4, use :func:`DataFrameReader.parquet` instead.
373
372
"""
373
+ warnings .warn ("parquetFile is deprecated. Use read.parquet() instead." , DeprecationWarning )
374
374
gateway = self ._sc ._gateway
375
375
jpaths = gateway .new_array (gateway .jvm .java .lang .String , len (paths ))
376
376
for i in range (0 , len (paths )):
377
377
jpaths [i ] = paths [i ]
378
378
jdf = self ._ssql_ctx .parquetFile (jpaths )
379
379
return DataFrame (jdf , self )
380
380
381
- @since (1.0 )
382
381
def jsonFile (self , path , schema = None , samplingRatio = 1.0 ):
383
382
"""Loads a text file storing one JSON object per line as a :class:`DataFrame`.
384
383
385
- If the schema is provided, applies the given schema to this JSON dataset.
386
- Otherwise, it samples the dataset with ratio ``samplingRatio`` to determine the schema.
387
-
388
- >>> import tempfile, shutil
389
- >>> jsonFile = tempfile.mkdtemp()
390
- >>> shutil.rmtree(jsonFile)
391
- >>> with open(jsonFile, 'w') as f:
392
- ... f.writelines(jsonStrings)
393
- >>> df1 = sqlContext.jsonFile(jsonFile)
394
- >>> df1.printSchema()
395
- root
396
- |-- field1: long (nullable = true)
397
- |-- field2: string (nullable = true)
398
- |-- field3: struct (nullable = true)
399
- | |-- field4: long (nullable = true)
400
-
401
- >>> from pyspark.sql.types import *
402
- >>> schema = StructType([
403
- ... StructField("field2", StringType()),
404
- ... StructField("field3",
405
- ... StructType([StructField("field5", ArrayType(IntegerType()))]))])
406
- >>> df2 = sqlContext.jsonFile(jsonFile, schema)
407
- >>> df2.printSchema()
408
- root
409
- |-- field2: string (nullable = true)
410
- |-- field3: struct (nullable = true)
411
- | |-- field5: array (nullable = true)
412
- | | |-- element: integer (containsNull = true)
384
+ .. note:: Deprecated in 1.4, use :func:`DataFrameReader.json` instead.
413
385
"""
386
+ warnings .warn ("jsonFile is deprecated. Use read.json() instead." , DeprecationWarning )
414
387
if schema is None :
415
388
df = self ._ssql_ctx .jsonFile (path , samplingRatio )
416
389
else :
@@ -462,21 +435,16 @@ def func(iterator):
462
435
df = self ._ssql_ctx .jsonRDD (jrdd .rdd (), scala_datatype )
463
436
return DataFrame (df , self )
464
437
465
- @since (1.3 )
466
438
def load (self , path = None , source = None , schema = None , ** options ):
467
439
"""Returns the dataset in a data source as a :class:`DataFrame`.
468
440
469
- The data source is specified by the ``source`` and a set of ``options``.
470
- If ``source`` is not specified, the default data source configured by
471
- ``spark.sql.sources.default`` will be used.
472
-
473
- Optionally, a schema can be provided as the schema of the returned DataFrame.
441
+ .. note:: Deprecated in 1.4, use :func:`DataFrameReader.load` instead.
474
442
"""
443
+ warnings .warn ("load is deprecated. Use read.load() instead." , DeprecationWarning )
475
444
return self .read .load (path , source , schema , ** options )
476
445
477
446
@since (1.3 )
478
- def createExternalTable (self , tableName , path = None , source = None ,
479
- schema = None , ** options ):
447
+ def createExternalTable (self , tableName , path = None , source = None , schema = None , ** options ):
480
448
"""Creates an external table based on the dataset in a data source.
481
449
482
450
It returns the DataFrame associated with the external table.
@@ -487,6 +455,8 @@ def createExternalTable(self, tableName, path=None, source=None,
487
455
488
456
Optionally, a schema can be provided as the schema of the returned :class:`DataFrame` and
489
457
created external table.
458
+
459
+ :return: :class:`DataFrame`
490
460
"""
491
461
if path is not None :
492
462
options ["path" ] = path
@@ -508,6 +478,8 @@ def createExternalTable(self, tableName, path=None, source=None,
508
478
def sql (self , sqlQuery ):
509
479
"""Returns a :class:`DataFrame` representing the result of the given query.
510
480
481
+ :return: :class:`DataFrame`
482
+
511
483
>>> sqlContext.registerDataFrameAsTable(df, "table1")
512
484
>>> df2 = sqlContext.sql("SELECT field1 AS f1, field2 as f2 from table1")
513
485
>>> df2.collect()
@@ -519,6 +491,8 @@ def sql(self, sqlQuery):
519
491
def table (self , tableName ):
520
492
"""Returns the specified table as a :class:`DataFrame`.
521
493
494
+ :return: :class:`DataFrame`
495
+
522
496
>>> sqlContext.registerDataFrameAsTable(df, "table1")
523
497
>>> df2 = sqlContext.table("table1")
524
498
>>> sorted(df.collect()) == sorted(df2.collect())
@@ -536,6 +510,9 @@ def tables(self, dbName=None):
536
510
The returned DataFrame has two columns: ``tableName`` and ``isTemporary``
537
511
(a column with :class:`BooleanType` indicating if a table is a temporary one or not).
538
512
513
+ :param dbName: string, name of the database to use.
514
+ :return: :class:`DataFrame`
515
+
539
516
>>> sqlContext.registerDataFrameAsTable(df, "table1")
540
517
>>> df2 = sqlContext.tables()
541
518
>>> df2.filter("tableName = 'table1'").first()
@@ -550,7 +527,8 @@ def tables(self, dbName=None):
550
527
def tableNames (self , dbName = None ):
551
528
"""Returns a list of names of tables in the database ``dbName``.
552
529
553
- If ``dbName`` is not specified, the current database will be used.
530
+ :param dbName: string, name of the database to use. Default to the current database.
531
+ :return: list of table names, in string
554
532
555
533
>>> sqlContext.registerDataFrameAsTable(df, "table1")
556
534
>>> "table1" in sqlContext.tableNames()
@@ -585,8 +563,7 @@ def read(self):
585
563
Returns a :class:`DataFrameReader` that can be used to read data
586
564
in as a :class:`DataFrame`.
587
565
588
- >>> sqlContext.read
589
- <pyspark.sql.readwriter.DataFrameReader object at ...>
566
+ :return: :class:`DataFrameReader`
590
567
"""
591
568
return DataFrameReader (self )
592
569
0 commit comments