Skip to content

Commit 63fbcf3

Browse files
committed
Enable path ruff check
1 parent b063f59 commit 63fbcf3

File tree

11 files changed

+35
-36
lines changed

11 files changed

+35
-36
lines changed

benchmarks/db-benchmark/groupby-datafusion.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import gc
1919
import os
2020
import timeit
21+
from pathlib import Path
2122

2223
import datafusion as df
2324
import pyarrow as pa
@@ -34,7 +35,7 @@
3435

3536
print("# groupby-datafusion.py", flush=True)
3637

37-
exec(open("./_helpers/helpers.py").read())
38+
exec(Path.open("./_helpers/helpers.py").read())
3839

3940

4041
def ans_shape(batches) -> tuple[int, int]:
@@ -65,7 +66,7 @@ def execute(df) -> list:
6566
sql = True
6667

6768
data_name = os.environ["SRC_DATANAME"]
68-
src_grp = os.path.join("data", data_name + ".csv")
69+
src_grp = "data" / data_name / ".csv"
6970
print("loading dataset %s" % src_grp, flush=True)
7071

7172
schema = pa.schema(

benchmarks/db-benchmark/join-datafusion.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import gc
1919
import os
2020
import timeit
21+
from pathlib import Path
2122

2223
import datafusion as df
2324
from datafusion import col
@@ -26,7 +27,7 @@
2627

2728
print("# join-datafusion.py", flush=True)
2829

29-
exec(open("./_helpers/helpers.py").read())
30+
exec(Path.open("./_helpers/helpers.py").read())
3031

3132

3233
def ans_shape(batches) -> tuple[int, int]:
@@ -49,12 +50,12 @@ def ans_shape(batches) -> tuple[int, int]:
4950
on_disk = "FALSE"
5051

5152
data_name = os.environ["SRC_DATANAME"]
52-
src_jn_x = os.path.join("data", data_name + ".csv")
53+
src_jn_x = "data" / data_name / ".csv"
5354
y_data_name = join_to_tbls(data_name)
5455
src_jn_y = [
55-
os.path.join("data", y_data_name[0] + ".csv"),
56-
os.path.join("data", y_data_name[1] + ".csv"),
57-
os.path.join("data", y_data_name[2] + ".csv"),
56+
"data" / y_data_name[0] / ".csv",
57+
"data" / y_data_name[1] / ".csv",
58+
"data" / y_data_name[2] / ".csv",
5859
]
5960
if len(src_jn_y) != 3:
6061
error_msg = "Something went wrong in preparing files used for join"

benchmarks/tpch/tpch.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,13 @@
1717

1818
import argparse
1919
import time
20+
from pathlib import Path
2021

2122
from datafusion import SessionContext
2223

2324

2425
def bench(data_path, query_path) -> None:
25-
with open("results.csv", "w") as results:
26+
with Path.open("results.csv", "w") as results:
2627
# register tables
2728
start = time.time()
2829
total_time_millis = 0
@@ -45,7 +46,7 @@ def bench(data_path, query_path) -> None:
4546
print("Configuration:\n", ctx)
4647

4748
# register tables
48-
with open("create_tables.sql") as f:
49+
with Path.open("create_tables.sql") as f:
4950
sql = ""
5051
for line in f.readlines():
5152
if line.startswith("--"):
@@ -65,7 +66,7 @@ def bench(data_path, query_path) -> None:
6566

6667
# run queries
6768
for query in range(1, 23):
68-
with open(f"{query_path}/q{query}.sql") as f:
69+
with Path.open(f"{query_path}/q{query}.sql") as f:
6970
text = f.read()
7071
tmp = text.split(";")
7172
queries = [s.strip() for s in tmp if len(s.strip()) > 0]

dev/create_license.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import json
2222
import subprocess
23+
from pathlib import Path
2324

2425
subprocess.check_output(["cargo", "install", "cargo-license"])
2526
data = subprocess.check_output(
@@ -248,5 +249,5 @@
248249
result += "------------------\n\n"
249250
result += f"### {name} {version}\n* source: [{repository}]({repository})\n* license: {license}\n\n"
250251

251-
with open("LICENSE.txt", "w") as f:
252+
with Path.open("LICENSE.txt", "w") as f:
252253
f.write(result)

dev/release/check-rat-report.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import re
2222
import sys
2323
import xml.etree.ElementTree as ET
24+
from pathlib import Path
2425

2526
if len(sys.argv) != 3:
2627
sys.stderr.write("Usage: %s exclude_globs.lst rat_report.xml\n" % sys.argv[0])
@@ -29,7 +30,7 @@
2930
exclude_globs_filename = sys.argv[1]
3031
xml_filename = sys.argv[2]
3132

32-
globs = [line.strip() for line in open(exclude_globs_filename)]
33+
globs = [line.strip() for line in Path.open(exclude_globs_filename)]
3334

3435
tree = ET.parse(xml_filename)
3536
root = tree.getroot()

examples/python-udf-comparisons.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,16 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717

18-
import os
1918
import time
19+
from pathlib import Path
2020

2121
import pyarrow as pa
2222
import pyarrow.compute as pc
2323
from datafusion import SessionContext, col, lit, udf
2424
from datafusion import functions as F
2525

26-
path = os.path.dirname(os.path.abspath(__file__))
27-
filepath = os.path.join(path, "./tpch/data/lineitem.parquet")
26+
path = Path(__file__).parent.resolve()
27+
filepath = path / "./tpch/data/lineitem.parquet"
2828

2929
# This example serves to demonstrate alternate approaches to answering the
3030
# question "return all of the rows that have a specific combination of these

examples/tpch/convert_data_to_parquet.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
as will be generated by the script provided in this repository.
2323
"""
2424

25-
import os
25+
from pathlib import Path
2626

2727
import datafusion
2828
import pyarrow as pa
@@ -116,7 +116,7 @@
116116
("S_COMMENT", pa.string()),
117117
]
118118

119-
curr_dir = os.path.dirname(os.path.abspath(__file__))
119+
curr_dir = Path(__file__).resolve().parent
120120
for filename, curr_schema_val in all_schemas.items():
121121
# For convenience, go ahead and convert the schema column names to lowercase
122122
curr_schema = [(s[0].lower(), s[1]) for s in curr_schema_val]
@@ -132,10 +132,8 @@
132132

133133
schema = pa.schema(curr_schema)
134134

135-
source_file = os.path.abspath(
136-
os.path.join(curr_dir, f"../../benchmarks/tpch/data/{filename}.csv")
137-
)
138-
dest_file = os.path.abspath(os.path.join(curr_dir, f"./data/{filename}.parquet"))
135+
source_file = (curr_dir / f"../../benchmarks/tpch/data/{filename}.csv").resolve()
136+
dest_file = (curr_dir / f"./data/{filename}.parquet").resolve()
139137

140138
df = ctx.read_csv(source_file, schema=schema, has_header=False, delimiter="|")
141139

examples/tpch/util.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,16 @@
1919
Common utilities for running TPC-H examples.
2020
"""
2121

22-
import os
22+
from pathlib import Path
2323

2424

25-
def get_data_path(filename: str) -> str:
26-
path = os.path.dirname(os.path.abspath(__file__))
25+
def get_data_path(filename: str) -> Path:
26+
path = Path(__file__).resolve().parent
2727

28-
return os.path.join(path, "data", filename)
28+
return path / "data" / filename
2929

3030

31-
def get_answer_file(answer_file: str) -> str:
32-
path = os.path.dirname(os.path.abspath(__file__))
31+
def get_answer_file(answer_file: str) -> Path:
32+
path = Path(__file__).resolve().parent
3333

34-
return os.path.join(
35-
path, "../../benchmarks/tpch/data/answers", f"{answer_file}.out"
36-
)
34+
return path / "../../benchmarks/tpch/data/answers" / f"{answer_file}.out"

pyproject.toml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,6 @@ ignore = [
9090
"PD901", # Allow variable name df
9191
"N812", # Allow importing functions as `F`
9292
"A005", # Allow module named io
93-
# TODO: Enable all of the following, but this PR is getting too large already
94-
"PTH",
9593
]
9694

9795
[tool.ruff.lint.pydocstyle]

python/datafusion/input/location.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
"""The default input source for DataFusion."""
1919

20-
import glob
2120
from pathlib import Path
2221
from typing import Any
2322

@@ -84,6 +83,6 @@ def build_table(
8483
raise RuntimeError(msg)
8584

8685
# Input could possibly be multiple files. Create a list if so
87-
input_files = glob.glob(input_item)
86+
input_files = Path.glob(input_item)
8887

8988
return SqlTable(table_name, columns, num_rows, input_files)

0 commit comments

Comments
 (0)