Skip to content

Commit 6226ae0

Browse files
authored
Merge pull request #1661 from mathbunnyru/asalikhov/python_docs
Run black pre-commit on python code in docs
2 parents a9c2837 + 610b058 commit 6226ae0

File tree

4 files changed

+51
-27
lines changed

4 files changed

+51
-27
lines changed

.pre-commit-config.yaml

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,7 @@ repos:
107107
- id: markdownlint
108108
args: ["--fix"]
109109

110-
# Run tools on Jupyter notebooks
111-
112-
# strip output from Jupyter notebooks
110+
# Strip output from Jupyter notebooks
113111
- repo: https://github.com/kynan/nbstripout
114112
rev: 0.5.0
115113
hooks:
@@ -127,6 +125,17 @@ repos:
127125
args: [--target-version=py39]
128126
- id: nbqa-flake8
129127

128+
# Run black on python code blocks in documentation files.
129+
- repo: https://github.com/asottile/blacken-docs
130+
rev: v1.12.1
131+
hooks:
132+
- id: blacken-docs
133+
# --skip-errors is added to allow us to have python syntax highlighting even if
134+
# the python code blocks includes jupyter specific additions such as % or !
135+
# See https://github.com/asottile/blacken-docs/issues/127 for an upstream
136+
# feature request about this.
137+
args: [--target-version=py39, --skip-errors]
138+
130139
# Docker hooks do not work in pre-commit.ci
131140
# See: <https://github.com/pre-commit-ci/issues/issues/11>
132141
ci:

docs/using/recipes.md

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -300,34 +300,44 @@ A few suggestions have been made regarding using Docker Stacks with spark.
300300

301301
Using Spark session for hadoop 2.7.3
302302

303-
```py
303+
```python
304304
import os
305+
305306
# !ls /usr/local/spark/jars/hadoop* # to figure out what version of hadoop
306-
os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages "org.apache.hadoop:hadoop-aws:2.7.3" pyspark-shell'
307+
os.environ[
308+
"PYSPARK_SUBMIT_ARGS"
309+
] = '--packages "org.apache.hadoop:hadoop-aws:2.7.3" pyspark-shell'
307310

308311
import pyspark
312+
309313
myAccessKey = input()
310314
mySecretKey = input()
311315

312-
spark = pyspark.sql.SparkSession.builder \
313-
.master("local[*]") \
314-
.config("spark.hadoop.fs.s3a.access.key", myAccessKey) \
315-
.config("spark.hadoop.fs.s3a.secret.key", mySecretKey) \
316-
.getOrCreate()
316+
spark = (
317+
pyspark.sql.SparkSession.builder.master("local[*]")
318+
.config("spark.hadoop.fs.s3a.access.key", myAccessKey)
319+
.config("spark.hadoop.fs.s3a.secret.key", mySecretKey)
320+
.getOrCreate()
321+
)
317322

318323
df = spark.read.parquet("s3://myBucket/myKey")
319324
```
320325

321326
Using Spark context for hadoop 2.6.0
322327

323-
```py
328+
```python
324329
import os
325-
os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages com.amazonaws:aws-java-sdk:1.10.34,org.apache.hadoop:hadoop-aws:2.6.0 pyspark-shell'
330+
331+
os.environ[
332+
"PYSPARK_SUBMIT_ARGS"
333+
] = "--packages com.amazonaws:aws-java-sdk:1.10.34,org.apache.hadoop:hadoop-aws:2.6.0 pyspark-shell"
326334

327335
import pyspark
336+
328337
sc = pyspark.SparkContext("local[*]")
329338

330339
from pyspark.sql import SQLContext
340+
331341
sqlContext = SQLContext(sc)
332342

333343
hadoopConf = sc._jsc.hadoopConfiguration()
@@ -346,14 +356,20 @@ Ref: <https://github.com/jupyter/docker-stacks/issues/127>
346356

347357
```python
348358
import os
349-
os.environ['PYSPARK_SUBMIT_ARGS'] = '--jars /home/jovyan/spark-streaming-kafka-assembly_2.10-1.6.1.jar pyspark-shell'
359+
360+
os.environ[
361+
"PYSPARK_SUBMIT_ARGS"
362+
] = "--jars /home/jovyan/spark-streaming-kafka-assembly_2.10-1.6.1.jar pyspark-shell"
350363
import pyspark
351364
from pyspark.streaming.kafka import KafkaUtils
352365
from pyspark.streaming import StreamingContext
366+
353367
sc = pyspark.SparkContext()
354-
ssc = StreamingContext(sc,1)
368+
ssc = StreamingContext(sc, 1)
355369
broker = "<my_broker_ip>"
356-
directKafkaStream = KafkaUtils.createDirectStream(ssc, ["test1"], {"metadata.broker.list": broker})
370+
directKafkaStream = KafkaUtils.createDirectStream(
371+
ssc, ["test1"], {"metadata.broker.list": broker}
372+
)
357373
directKafkaStream.pprint()
358374
ssc.start()
359375
```

docs/using/specifics.md

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Those logs are still available but only in the container's logs.
2525
If you want to make them appear in the notebook, you can overwrite the configuration in a user level IPython kernel profile.
2626
To do that you have to uncomment the following line in your `~/.ipython/profile_default/ipython_kernel_config.py` and restart the kernel.
2727

28-
```Python
28+
```python
2929
c.IPKernelApp.capture_fd_output = True
3030
```
3131

@@ -91,7 +91,7 @@ In a Python notebook.
9191
from pyspark.sql import SparkSession
9292

9393
# Spark session & context
94-
spark = SparkSession.builder.master('local').getOrCreate()
94+
spark = SparkSession.builder.master("local").getOrCreate()
9595
sc = spark.sparkContext
9696

9797
# Sum of the first 100 whole numbers
@@ -183,7 +183,7 @@ see [Spark Configuration][spark-conf] for more information.
183183
from pyspark.sql import SparkSession
184184

185185
# Spark session & context
186-
spark = SparkSession.builder.master('spark://master:7077').getOrCreate()
186+
spark = SparkSession.builder.master("spark://master:7077").getOrCreate()
187187
sc = spark.sparkContext
188188

189189
# Sum of the first 100 whole numbers
@@ -264,8 +264,7 @@ from pyspark.sql import SparkSession
264264
spark = (
265265
SparkSession.builder.appName("elasticsearch")
266266
.config(
267-
"spark.jars.packages",
268-
"org.elasticsearch:elasticsearch-spark-30_2.12:7.13.0"
267+
"spark.jars.packages", "org.elasticsearch:elasticsearch-spark-30_2.12:7.13.0"
269268
)
270269
.getOrCreate()
271270
)
@@ -292,7 +291,7 @@ The `jupyter/tensorflow-notebook` image supports the use of
292291
```python
293292
import tensorflow as tf
294293

295-
hello = tf.Variable('Hello World!')
294+
hello = tf.Variable("Hello World!")
296295

297296
sess = tf.Session()
298297
init = tf.global_variables_initializer()
@@ -306,7 +305,7 @@ sess.run(hello)
306305
```python
307306
import tensorflow as tf
308307

309-
hello = tf.Variable('Hello Distributed World!')
308+
hello = tf.Variable("Hello Distributed World!")
310309

311310
server = tf.train.Server.create_local_server()
312311
sess = tf.Session(server.target)

tagging/README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ All the taggers are inherited from `TaggerInterface`:
5555
```python
5656
class TaggerInterface:
5757
"""Common interface for all taggers"""
58+
5859
@staticmethod
5960
def tag_value(container) -> str:
6061
raise NotImplementedError
@@ -84,6 +85,7 @@ All the other manifest classes are inherited from `ManifestInterface`:
8485
```python
8586
class ManifestInterface:
8687
"""Common interface for all manifests"""
88+
8789
@staticmethod
8890
def markdown_piece(container) -> str:
8991
raise NotImplementedError
@@ -97,11 +99,9 @@ class ManifestInterface:
9799
class AptPackagesManifest(ManifestInterface):
98100
@staticmethod
99101
def markdown_piece(container) -> str:
100-
return "\n".join([
101-
"## Apt Packages",
102-
"",
103-
quoted_output(container, "apt list --installed")
104-
])
102+
return "\n".join(
103+
["## Apt Packages", "", quoted_output(container, "apt list --installed")]
104+
)
105105
```
106106

107107
- `quoted_output` simply runs the command inside container using `DockerRunner.run_simple_command` and wraps it to triple quotes to create a valid markdown piece of file.

0 commit comments

Comments
 (0)