Skip to content

Commit d08ed6b

Browse files
committed
[SPARK-23081][PYTHON]Add colRegex API to PySpark
1 parent 6f0ba84 commit d08ed6b

File tree

2 files changed

+13
-0
lines changed

2 files changed

+13
-0
lines changed

python/pyspark/sql/dataframe.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1881,6 +1881,15 @@ def toDF(self, *cols):
18811881
jdf = self._jdf.toDF(self._jseq(cols))
18821882
return DataFrame(jdf, self.sql_ctx)
18831883

1884+
@since(2.3)
1885+
def colRegex(self, colName):
1886+
"""
1887+
Selects column based on the column name specified as a regex and return it
1888+
as :class:`Column`.
1889+
"""
1890+
jc = self._jdf.colRegex(colName)
1891+
return Column(jc)
1892+
18841893
@since(1.3)
18851894
def toPandas(self):
18861895
"""

python/pyspark/sql/tests.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2855,6 +2855,10 @@ def test_create_dataframe_from_old_pandas(self):
28552855
with self.assertRaisesRegexp(ImportError, 'Pandas >= .* must be installed'):
28562856
self.spark.createDataFrame(pdf)
28572857

2858+
def test_colRegex(self):
2859+
df = self.spark.createDataFrame([("a", 1), ("b", 2), ("c", 3)])
2860+
self.assertEqual(df.select(df.colRegex("`(_1)?+.+`")).collect(), df.select("_2").collect())
2861+
28582862

28592863
class HiveSparkSubmitTests(SparkSubmitTests):
28602864

0 commit comments

Comments
 (0)