Skip to content

Commit c346387

Browse files
committed
Merge branch 'master' into SPARK-29375-SPARK-28940-whole-plan-reuse
# Conflicts: # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/simplified.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/simplified.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/simplified.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.sf100/explain.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/explain.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.sf100/explain.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/explain.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/simplified.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24/explain.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24/simplified.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/explain.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/simplified.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6/explain.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6/simplified.txt # sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/explain.txt
2 parents bf29f1a + abf9675 commit c346387

File tree

140 files changed

+8276
-8188
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

140 files changed

+8276
-8188
lines changed

common/sketch/src/main/java/org/apache/spark/util/sketch/BitArray.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,17 @@ void putAll(BitArray array) {
8585
this.bitCount = bitCount;
8686
}
8787

88+
/** Combines the two BitArrays using bitwise AND. */
89+
void and(BitArray array) {
90+
assert data.length == array.data.length : "BitArrays must be of equal length when merging";
91+
long bitCount = 0;
92+
for (int i = 0; i < data.length; i++) {
93+
data[i] &= array.data[i];
94+
bitCount += Long.bitCount(data[i]);
95+
}
96+
this.bitCount = bitCount;
97+
}
98+
8899
void writeTo(DataOutputStream out) throws IOException {
89100
out.writeInt(data.length);
90101
for (long datum : data) {

common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilter.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,16 @@ int getVersionNumber() {
126126
*/
127127
public abstract BloomFilter mergeInPlace(BloomFilter other) throws IncompatibleMergeException;
128128

129+
/**
130+
* Combines this bloom filter with another bloom filter by performing a bitwise AND of the
131+
* underlying data. The mutations happen to <b>this</b> instance. Callers must ensure the
132+
* bloom filters are appropriately sized to avoid saturating them.
133+
*
134+
* @param other The bloom filter to combine this bloom filter with. It is not mutated.
135+
* @throws IncompatibleMergeException if {@code isCompatible(other) == false}
136+
*/
137+
public abstract BloomFilter intersectInPlace(BloomFilter other) throws IncompatibleMergeException;
138+
129139
/**
130140
* Returns {@code true} if the element <i>might</i> have been put in this Bloom filter,
131141
* {@code false} if this is <i>definitely</i> not the case.

common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilterImpl.java

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,22 @@ public boolean isCompatible(BloomFilter other) {
193193

194194
@Override
195195
public BloomFilter mergeInPlace(BloomFilter other) throws IncompatibleMergeException {
196+
BloomFilterImpl otherImplInstance = checkCompatibilityForMerge(other);
197+
198+
this.bits.putAll(otherImplInstance.bits);
199+
return this;
200+
}
201+
202+
@Override
203+
public BloomFilter intersectInPlace(BloomFilter other) throws IncompatibleMergeException {
204+
BloomFilterImpl otherImplInstance = checkCompatibilityForMerge(other);
205+
206+
this.bits.and(otherImplInstance.bits);
207+
return this;
208+
}
209+
210+
private BloomFilterImpl checkCompatibilityForMerge(BloomFilter other)
211+
throws IncompatibleMergeException {
196212
// Duplicates the logic of `isCompatible` here to provide better error message.
197213
if (other == null) {
198214
throw new IncompatibleMergeException("Cannot merge null bloom filter");
@@ -215,9 +231,7 @@ public BloomFilter mergeInPlace(BloomFilter other) throws IncompatibleMergeExcep
215231
"Cannot merge bloom filters with different number of hash functions"
216232
);
217233
}
218-
219-
this.bits.putAll(that.bits);
220-
return this;
234+
return that;
221235
}
222236

223237
@Override

common/sketch/src/test/scala/org/apache/spark/util/sketch/BloomFilterSuite.scala

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,39 @@ class BloomFilterSuite extends AnyFunSuite { // scalastyle:ignore funsuite
9999
}
100100
}
101101

102+
def testIntersectInPlace[T: ClassTag]
103+
(typeName: String, numItems: Int)(itemGen: Random => T): Unit = {
104+
test(s"intersectInPlace - $typeName") {
105+
// use a fixed seed to make the test predictable.
106+
val r = new Random(37)
107+
108+
val items1 = Array.fill(numItems / 2)(itemGen(r))
109+
val items2 = Array.fill(numItems / 2)(itemGen(r))
110+
111+
val filter1 = BloomFilter.create(numItems / 2)
112+
items1.foreach(filter1.put)
113+
114+
val filter2 = BloomFilter.create(numItems / 2)
115+
items2.foreach(filter2.put)
116+
117+
filter1.intersectInPlace(filter2)
118+
119+
val common_items = items1.intersect(items2)
120+
common_items.foreach(i => assert(filter1.mightContain(i)))
121+
122+
// After intersect, `filter1` still has `numItems/2` items
123+
// which doesn't exceed `expectedNumItems`,
124+
// so the `expectedFpp` should not be higher than the default one.
125+
assert(filter1.expectedFpp() - BloomFilter.DEFAULT_FPP < EPSILON)
126+
127+
checkSerDe(filter1)
128+
}
129+
}
130+
102131
def testItemType[T: ClassTag](typeName: String, numItems: Int)(itemGen: Random => T): Unit = {
103132
testAccuracy[T](typeName, numItems)(itemGen)
104133
testMergeInPlace[T](typeName, numItems)(itemGen)
134+
testIntersectInPlace[T](typeName, numItems)(itemGen)
105135
}
106136

107137
testItemType[Byte]("Byte", 160) { _.nextInt().toByte }

dev/deps/spark-deps-hadoop-3.2-hive-2.3

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,10 @@ flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar
5757
generex/1.0.2//generex-1.0.2.jar
5858
gson/2.2.4//gson-2.2.4.jar
5959
guava/14.0.1//guava-14.0.1.jar
60-
hadoop-client-api/3.2.2//hadoop-client-api-3.2.2.jar
61-
hadoop-client-runtime/3.2.2//hadoop-client-runtime-3.2.2.jar
62-
hadoop-yarn-server-web-proxy/3.2.2//hadoop-yarn-server-web-proxy-3.2.2.jar
60+
hadoop-client-api/3.3.1//hadoop-client-api-3.3.1.jar
61+
hadoop-client-runtime/3.3.1//hadoop-client-runtime-3.3.1.jar
62+
hadoop-shaded-guava/1.1.1//hadoop-shaded-guava-1.1.1.jar
63+
hadoop-yarn-server-web-proxy/3.3.1//hadoop-yarn-server-web-proxy-3.3.1.jar
6364
hive-beeline/2.3.9//hive-beeline-2.3.9.jar
6465
hive-cli/2.3.9//hive-cli-2.3.9.jar
6566
hive-common/2.3.9//hive-common-2.3.9.jar

docs/sql-migration-guide.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ license: |
9595

9696
- In Spark 3.2, `FloatType` is mapped to `FLOAT` in MySQL. Prior to this, it used to be mapped to `REAL`, which is by default a synonym to `DOUBLE PRECISION` in MySQL.
9797

98-
- In Spark 3.2, the query executions triggered by `DataFrameWriter` are always named `command` when being sent to `QueryExecutionListener`. In Spark 3.1 and earlier, the name is one of `save`, `insertInto`, `saveAsTable`, `create`, `append`, `overwrite`, `overwritePartitions`, `replace`.
98+
- In Spark 3.2, the query executions triggered by `DataFrameWriter` are always named `command` when being sent to `QueryExecutionListener`. In Spark 3.1 and earlier, the name is one of `save`, `insertInto`, `saveAsTable`.
9999

100100
## Upgrading from Spark SQL 3.0 to 3.1
101101

pom.xml

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@
120120
<sbt.project.name>spark</sbt.project.name>
121121
<slf4j.version>1.7.30</slf4j.version>
122122
<log4j.version>1.2.17</log4j.version>
123-
<hadoop.version>3.2.2</hadoop.version>
123+
<hadoop.version>3.3.1</hadoop.version>
124124
<protobuf.version>2.5.0</protobuf.version>
125125
<yarn.version>${hadoop.version}</yarn.version>
126126
<zookeeper.version>3.6.2</zookeeper.version>
@@ -195,6 +195,7 @@
195195
<maven-antrun.version>1.8</maven-antrun.version>
196196
<commons-crypto.version>1.1.0</commons-crypto.version>
197197
<commons-cli.version>1.2</commons-cli.version>
198+
<bouncycastle.version>1.60</bouncycastle.version>
198199
<!--
199200
If you are changing Arrow version specification, please check
200201
./python/pyspark/sql/pandas/utils.py, and ./python/setup.py too.
@@ -1192,6 +1193,18 @@
11921193
</exclusion>
11931194
</exclusions>
11941195
</dependency>
1196+
<dependency>
1197+
<groupId>org.bouncycastle</groupId>
1198+
<artifactId>bcprov-jdk15on</artifactId>
1199+
<version>${bouncycastle.version}</version>
1200+
<scope>test</scope>
1201+
</dependency>
1202+
<dependency>
1203+
<groupId>org.bouncycastle</groupId>
1204+
<artifactId>bcpkix-jdk15on</artifactId>
1205+
<version>${bouncycastle.version}</version>
1206+
<scope>test</scope>
1207+
</dependency>
11951208
<!-- Managed up to match Hadoop in HADOOP-16530 -->
11961209
<dependency>
11971210
<groupId>xerces</groupId>

project/MimaExcludes.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,10 @@ object MimaExcludes {
5858
ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getChild"),
5959

6060
// [SPARK-35135][CORE] Turn WritablePartitionedIterator from trait into a default implementation class
61-
ProblemFilters.exclude[IncompatibleTemplateDefProblem]("org.apache.spark.util.collection.WritablePartitionedIterator")
61+
ProblemFilters.exclude[IncompatibleTemplateDefProblem]("org.apache.spark.util.collection.WritablePartitionedIterator"),
62+
63+
// [SPARK-35757][CORE] Add bitwise AND operation and functionality for intersecting bloom filters
64+
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.util.sketch.BloomFilter.intersectInPlace")
6265
)
6366

6467
// Exclude rules for 3.1.x

python/docs/source/development/contributing.rst

Lines changed: 111 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -72,17 +72,94 @@ Preparing to Contribute Code Changes
7272
------------------------------------
7373

7474
Before starting to work on codes in PySpark, it is recommended to read `the general guidelines <https://spark.apache.org/contributing.html>`_.
75-
There are a couple of additional notes to keep in mind when contributing to codes in PySpark:
75+
Additionally, there are a couple of additional notes to keep in mind when contributing to codes in PySpark:
76+
77+
* Be Pythonic
78+
See `The Zen of Python <https://www.python.org/dev/peps/pep-0020/>`_.
79+
80+
* Match APIs with Scala and Java sides
81+
Apache Spark is an unified engine that provides a consistent API layer. In general, the APIs are consistently supported across other languages.
82+
83+
* PySpark-specific APIs can be accepted
84+
As long as they are Pythonic and do not conflict with other existent APIs, it is fine to raise a API request, for example, decorator usage of UDFs.
85+
86+
* Adjust the corresponding type hints if you extend or modify public API
87+
See `Contributing and Maintaining Type Hints`_ for details.
88+
89+
If you are fixing pandas API on Spark (``pyspark.pandas``) package, please consider the design principles below:
90+
91+
* Return pandas-on-Spark data structure for big data, and pandas data structure for small data
92+
Often developers face the question whether a particular function should return a pandas-on-Spark DataFrame/Series, or a pandas DataFrame/Series. The principle is: if the returned object can be large, use a pandas-on-Spark DataFrame/Series. If the data is bound to be small, use a pandas DataFrame/Series. For example, ``DataFrame.dtypes`` return a pandas Series, because the number of columns in a DataFrame is bounded and small, whereas ``DataFrame.head()`` or ``Series.unique()`` returns a pandas-on-Spark DataFrame/Series, because the resulting object can be large.
93+
94+
* Provide discoverable APIs for common data science tasks
95+
At the risk of overgeneralization, there are two API design approaches: the first focuses on providing APIs for common tasks; the second starts with abstractions, and enables users to accomplish their tasks by composing primitives. While the world is not black and white, pandas takes more of the former approach, while Spark has taken more of the latter.
96+
97+
One example is value count (count by some key column), one of the most common operations in data science. pandas ``DataFrame.value_count`` returns the result in sorted order, which in 90% of the cases is what users prefer when exploring data, whereas Spark's does not sort, which is more desirable when building data pipelines, as users can accomplish the pandas behavior by adding an explicit ``orderBy``.
98+
99+
Similar to pandas, pandas API on Spark should also lean more towards the former, providing discoverable APIs for common data science tasks. In most cases, this principle is well taken care of by simply implementing pandas' APIs. However, there will be circumstances in which pandas' APIs don't address a specific need, e.g. plotting for big data.
100+
101+
* Guardrails to prevent users from shooting themselves in the foot
102+
Certain operations in pandas are prohibitively expensive as data scales, and we don't want to give users the illusion that they can rely on such operations in pandas API on Spark. That is to say, methods implemented in pandas API on Spark should be safe to perform by default on large datasets. As a result, the following capabilities are not implemented in pandas API on Spark:
103+
104+
* Capabilities that are fundamentally not parallelizable: e.g. imperatively looping over each element
105+
* Capabilities that require materializing the entire working set in a single node's memory. This is why we do not implement `pandas.DataFrame.to_xarray <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_xarray.html>`_. Another example is the ``_repr_html_`` call caps the total number of records shown to a maximum of 1000, to prevent users from blowing up their driver node simply by typing the name of the DataFrame in a notebook.
106+
107+
A few exceptions, however, exist. One common pattern with "big data science" is that while the initial dataset is large, the working set becomes smaller as the analysis goes deeper. For example, data scientists often perform aggregation on datasets and want to then convert the aggregated dataset to some local data structure. To help data scientists, we offer the following:
108+
109+
* ``DataFrame.to_pandas``: returns a pandas DataFrame (pandas-on-Spark only)
110+
* ``DataFrame.to_numpy``: returns a numpy array, works with both pandas and pandas API on Spark
111+
112+
Note that it is clear from the names that these functions return some local data structure that would require materializing data in a single node's memory. For these functions, we also explicitly document them with a warning note that the resulting data structure must be small.
113+
114+
115+
Environment Setup
116+
-----------------
117+
118+
Prerequisite
119+
~~~~~~~~~~~~
120+
121+
PySpark development requires to build Spark that needs a proper JDK installed, etc. See `Building Spark <https://spark.apache.org/docs/latest/building-spark.html>`_ for more details.
122+
123+
Conda
124+
~~~~~
125+
126+
If you are using Conda, the development environment can be set as follows.
127+
128+
.. code-block:: bash
129+
130+
# Python 3.6+ is required
131+
conda create --name pyspark-dev-env python=3.9
132+
conda activate pyspark-dev-env
133+
pip install -r dev/requirements.txt
134+
135+
Once it is set up, make sure you switch to `pyspark-dev-env` before starting the development:
136+
137+
.. code-block:: bash
138+
139+
conda activate pyspark-dev-env
140+
141+
Now, you can start developing and `running the tests <testing.rst>`_.
142+
143+
pip
144+
~~~
145+
146+
With Python 3.6+, pip can be used as below to install and set up the development environment.
147+
148+
.. code-block:: bash
149+
150+
pip install -r dev/requirements.txt
151+
152+
Now, you can start developing and `running the tests <testing.rst>`_.
76153

77-
* Be Pythonic.
78-
* APIs are matched with Scala and Java sides in general.
79-
* PySpark specific APIs can still be considered as long as they are Pythonic and do not conflict with other existent APIs, for example, decorator usage of UDFs.
80-
* If you extend or modify public API, please adjust corresponding type hints. See `Contributing and Maintaining Type Hints`_ for details.
81154

82155
Contributing and Maintaining Type Hints
83156
----------------------------------------
84157

85-
PySpark type hints are provided using stub files, placed in the same directory as the annotated module, with exception to ``# type: ignore`` in modules which don't have their own stubs (tests, examples and non-public API).
158+
PySpark type hints are provided using stub files, placed in the same directory as the annotated module, with exception to:
159+
160+
* ``# type: ignore`` in modules which don't have their own stubs (tests, examples and non-public API).
161+
* pandas API on Spark (``pyspark.pandas`` package) where the type hints are inlined.
162+
86163
As a rule of thumb, only public API is annotated.
87164

88165
Annotations should, when possible:
@@ -122,16 +199,38 @@ Annotations can be validated using ``dev/lint-python`` script or by invoking myp
122199
mypy --config python/mypy.ini python/pyspark
123200
124201
125-
126202
Code and Docstring Guide
127-
----------------------------------
203+
------------------------
204+
205+
Code Conventions
206+
~~~~~~~~~~~~~~~~
128207

129208
Please follow the style of the existing codebase as is, which is virtually PEP 8 with one exception: lines can be up
130209
to 100 characters in length, not 79.
131-
For the docstring style, PySpark follows `NumPy documentation style <https://numpydoc.readthedocs.io/en/latest/format.html>`_.
132210

133-
Note that the method and variable names in PySpark are the similar case is ``threading`` library in Python itself where
134-
the APIs were inspired by Java. PySpark also follows `camelCase` for exposed APIs that match with Scala and Java.
135-
There is an exception ``functions.py`` that uses `snake_case`. It was in order to make APIs SQL (and Python) friendly.
211+
Note that:
212+
213+
* the method and variable names in PySpark are the similar case is ``threading`` library in Python itself where the APIs were inspired by Java. PySpark also follows `camelCase` for exposed APIs that match with Scala and Java.
214+
215+
* In contrast, ``functions.py`` uses `snake_case` in order to make APIs SQL (and Python) friendly.
216+
217+
* In addition, pandas-on-Spark (``pyspark.pandas``) also uses `snake_case` because this package is free from API consistency with other languages.
136218

137219
PySpark leverages linters such as `pycodestyle <https://pycodestyle.pycqa.org/en/latest/>`_ and `flake8 <https://flake8.pycqa.org/en/latest/>`_, which ``dev/lint-python`` runs. Therefore, make sure to run that script to double check.
220+
221+
222+
Docstring Conventions
223+
~~~~~~~~~~~~~~~~~~~~~
224+
225+
PySpark follows `NumPy documentation style <https://numpydoc.readthedocs.io/en/latest/format.html>`_.
226+
227+
228+
Doctest Conventions
229+
~~~~~~~~~~~~~~~~~~~
230+
231+
In general, doctests should be grouped logically by separating a newline.
232+
233+
For instance, the first block is for the statements for preparation, the second block is for using the function with a specific argument,
234+
and third block is for another argument. As a example, please refer `DataFrame.rsub <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rsub.html#pandas.DataFrame.rsub>`_ in pandas.
235+
236+
These blocks should be consistently separated in PySpark doctests, and more doctests should be added if the coverage of the doctests or the number of examples to show is not enough.

python/docs/source/development/index.rst

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,3 @@ Development
2727
debugging
2828
setting_ide
2929

30-
For pandas API on Spark:
31-
32-
.. toctree::
33-
:maxdepth: 2
34-
35-
ps_contributing
36-
ps_design

0 commit comments

Comments
 (0)