Skip to content

Commit 1cb37db

Browse files
committed
make lint
1 parent 8ba8693 commit 1cb37db

23 files changed

+1507
-1289
lines changed

pyiceberg/cli/output.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -242,8 +242,10 @@ def version(self, version: str) -> None:
242242
self._out({"version": version})
243243

244244
def describe_refs(self, refs: List[Tuple[str, SnapshotRefType, Dict[str, str]]]) -> None:
245-
self._out([
246-
{"name": name, "type": type, detail_key: detail_val}
247-
for name, type, detail in refs
248-
for detail_key, detail_val in detail.items()
249-
])
245+
self._out(
246+
[
247+
{"name": name, "type": type, detail_key: detail_val}
248+
for name, type, detail in refs
249+
for detail_key, detail_val in detail.items()
250+
]
251+
)

pyiceberg/io/pyarrow.py

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2449,27 +2449,31 @@ def _dataframe_to_data_files(
24492449
yield from write_file(
24502450
io=io,
24512451
table_metadata=table_metadata,
2452-
tasks=iter([
2453-
WriteTask(write_uuid=write_uuid, task_id=next(counter), record_batches=batches, schema=task_schema)
2454-
for batches in bin_pack_arrow_table(df, target_file_size)
2455-
]),
2452+
tasks=iter(
2453+
[
2454+
WriteTask(write_uuid=write_uuid, task_id=next(counter), record_batches=batches, schema=task_schema)
2455+
for batches in bin_pack_arrow_table(df, target_file_size)
2456+
]
2457+
),
24562458
)
24572459
else:
24582460
partitions = _determine_partitions(spec=table_metadata.spec(), schema=table_metadata.schema(), arrow_table=df)
24592461
yield from write_file(
24602462
io=io,
24612463
table_metadata=table_metadata,
2462-
tasks=iter([
2463-
WriteTask(
2464-
write_uuid=write_uuid,
2465-
task_id=next(counter),
2466-
record_batches=batches,
2467-
partition_key=partition.partition_key,
2468-
schema=task_schema,
2469-
)
2470-
for partition in partitions
2471-
for batches in bin_pack_arrow_table(partition.arrow_table_partition, target_file_size)
2472-
]),
2464+
tasks=iter(
2465+
[
2466+
WriteTask(
2467+
write_uuid=write_uuid,
2468+
task_id=next(counter),
2469+
record_batches=batches,
2470+
partition_key=partition.partition_key,
2471+
schema=task_schema,
2472+
)
2473+
for partition in partitions
2474+
for batches in bin_pack_arrow_table(partition.arrow_table_partition, target_file_size)
2475+
]
2476+
),
24732477
)
24742478

24752479

@@ -2534,10 +2538,12 @@ def _determine_partitions(spec: PartitionSpec, schema: Schema, arrow_table: pa.T
25342538
partition_columns: List[Tuple[PartitionField, NestedField]] = [
25352539
(partition_field, schema.find_field(partition_field.source_id)) for partition_field in spec.fields
25362540
]
2537-
partition_values_table = pa.table({
2538-
str(partition.field_id): partition.transform.pyarrow_transform(field.field_type)(arrow_table[field.name])
2539-
for partition, field in partition_columns
2540-
})
2541+
partition_values_table = pa.table(
2542+
{
2543+
str(partition.field_id): partition.transform.pyarrow_transform(field.field_type)(arrow_table[field.name])
2544+
for partition, field in partition_columns
2545+
}
2546+
)
25412547

25422548
# Sort by partitions
25432549
sort_indices = pa.compute.sort_indices(

pyiceberg/manifest.py

Lines changed: 31 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -292,28 +292,32 @@ def __repr__(self) -> str:
292292

293293

294294
def data_file_with_partition(partition_type: StructType, format_version: TableVersion) -> StructType:
295-
data_file_partition_type = StructType(*[
296-
NestedField(
297-
field_id=field.field_id,
298-
name=field.name,
299-
field_type=field.field_type,
300-
required=field.required,
301-
)
302-
for field in partition_type.fields
303-
])
295+
data_file_partition_type = StructType(
296+
*[
297+
NestedField(
298+
field_id=field.field_id,
299+
name=field.name,
300+
field_type=field.field_type,
301+
required=field.required,
302+
)
303+
for field in partition_type.fields
304+
]
305+
)
304306

305-
return StructType(*[
306-
NestedField(
307-
field_id=102,
308-
name="partition",
309-
field_type=data_file_partition_type,
310-
required=True,
311-
doc="Partition data tuple, schema based on the partition spec",
312-
)
313-
if field.field_id == 102
314-
else field
315-
for field in DATA_FILE_TYPE[format_version].fields
316-
])
307+
return StructType(
308+
*[
309+
NestedField(
310+
field_id=102,
311+
name="partition",
312+
field_type=data_file_partition_type,
313+
required=True,
314+
doc="Partition data tuple, schema based on the partition spec",
315+
)
316+
if field.field_id == 102
317+
else field
318+
for field in DATA_FILE_TYPE[format_version].fields
319+
]
320+
)
317321

318322

319323
class DataFile(Record):
@@ -398,10 +402,12 @@ def __eq__(self, other: Any) -> bool:
398402

399403

400404
def manifest_entry_schema_with_data_file(format_version: TableVersion, data_file: StructType) -> Schema:
401-
return Schema(*[
402-
NestedField(2, "data_file", data_file, required=True) if field.field_id == 2 else field
403-
for field in MANIFEST_ENTRY_SCHEMAS[format_version].fields
404-
])
405+
return Schema(
406+
*[
407+
NestedField(2, "data_file", data_file, required=True) if field.field_id == 2 else field
408+
for field in MANIFEST_ENTRY_SCHEMAS[format_version].fields
409+
]
410+
)
405411

406412

407413
class ManifestEntry(Record):

pyiceberg/schema.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1707,12 +1707,14 @@ def list(self, list_type: ListType, element_result: Callable[[], bool]) -> bool:
17071707
return self._is_field_compatible(list_type.element_field) and element_result()
17081708

17091709
def map(self, map_type: MapType, key_result: Callable[[], bool], value_result: Callable[[], bool]) -> bool:
1710-
return all([
1711-
self._is_field_compatible(map_type.key_field),
1712-
self._is_field_compatible(map_type.value_field),
1713-
key_result(),
1714-
value_result(),
1715-
])
1710+
return all(
1711+
[
1712+
self._is_field_compatible(map_type.key_field),
1713+
self._is_field_compatible(map_type.value_field),
1714+
key_result(),
1715+
value_result(),
1716+
]
1717+
)
17161718

17171719
def primitive(self, primitive: PrimitiveType) -> bool:
17181720
return True

pyiceberg/table/__init__.py

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -629,18 +629,20 @@ def delete(
629629
if len(filtered_df) == 0:
630630
replaced_files.append((original_file.file, []))
631631
elif len(df) != len(filtered_df):
632-
replaced_files.append((
633-
original_file.file,
634-
list(
635-
_dataframe_to_data_files(
636-
io=self._table.io,
637-
df=filtered_df,
638-
table_metadata=self.table_metadata,
639-
write_uuid=commit_uuid,
640-
counter=counter,
641-
)
642-
),
643-
))
632+
replaced_files.append(
633+
(
634+
original_file.file,
635+
list(
636+
_dataframe_to_data_files(
637+
io=self._table.io,
638+
df=filtered_df,
639+
table_metadata=self.table_metadata,
640+
write_uuid=commit_uuid,
641+
counter=counter,
642+
)
643+
),
644+
)
645+
)
644646

645647
if len(replaced_files) > 0:
646648
with self.update_snapshot(snapshot_properties=snapshot_properties).overwrite() as overwrite_snapshot:
@@ -680,9 +682,9 @@ def add_files(
680682
raise ValueError(f"Cannot add files that are already referenced by table, files: {', '.join(referenced_files)}")
681683

682684
if self.table_metadata.name_mapping() is None:
683-
self.set_properties(**{
684-
TableProperties.DEFAULT_NAME_MAPPING: self.table_metadata.schema().name_mapping.model_dump_json()
685-
})
685+
self.set_properties(
686+
**{TableProperties.DEFAULT_NAME_MAPPING: self.table_metadata.schema().name_mapping.model_dump_json()}
687+
)
686688
with self.update_snapshot(snapshot_properties=snapshot_properties).fast_append() as update_snapshot:
687689
data_files = _parquet_files_to_data_files(
688690
table_metadata=self.table_metadata, file_paths=file_paths, io=self._table.io

0 commit comments

Comments
 (0)