Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions python/ray/data/datasource/partitioning.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from dataclasses import dataclass
from enum import Enum
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Type, Union
import urllib.parse

from ray.util.annotations import DeveloperAPI, PublicAPI

Expand Down Expand Up @@ -277,7 +278,10 @@ def _parse_hive_path(self, dir_path: str) -> Dict[str, str]:
dictionary for unpartitioned files.
"""
dirs = [d for d in dir_path.split("/") if d and (d.count("=") == 1)]
kv_pairs = [d.split("=") for d in dirs] if dirs else []
kv_pairs = dict([d.split("=") for d in dirs] if dirs else [])
# url decode the partition values
kv_pairs = {k: urllib.parse.unquote(v) for k, v in kv_pairs.items()}

field_names = self._scheme.field_names
if field_names and kv_pairs:
if len(kv_pairs) != len(field_names):
Expand All @@ -291,7 +295,7 @@ def _parse_hive_path(self, dir_path: str) -> Dict[str, str]:
f"Expected partition key {field_name} but found "
f"{kv_pairs[i][0]}"
)
return dict(kv_pairs)
return kv_pairs

def _parse_dir_path(self, dir_path: str) -> Dict[str, str]:
"""Directory partition path parser.
Expand Down