Skip to content

glob is not reporting paths when a folder doesn't contain objects #935

Open
@NicholasFiorentini

Description

@NicholasFiorentini

Given the following S3 bucket structure:

root_bucket/
       something/
           target/
               another_folder/
                   object.txt
       target/
           object.txt
           another_folder/

Given the glob pattern s3://root_bucket/**/target, I expect to obtain s3://root_bucket/something/target and s3://root_bucket/target. However, only s3://root_bucket/target is returned.

This happens in AWS S3, and it is easily reproducible with moto3:

from typing import Generator, NamedTuple

import pytest
import s3fs


class Fixture(NamedTuple):
    base_url: str
    mock_s3_client: s3fs.S3FileSystem


@pytest.fixture(scope="function")
def build(mock_s3_client: s3fs.S3FileSystem) -> Generator[Fixture, None, None]:
    # setup
    base_url = "s3://mock-bucket/fake-project"
    mock_s3_client.mkdir(base_url)

    # First subfolder, with no objects after target.
    mock_s3_client.mkdir(f"{base_url}/something")
    mock_s3_client.mkdir(f"{base_url}/something/target")
    mock_s3_client.mkdir(f"{base_url}/something/target/folder1")
    mock_s3_client.touch(f"{base_url}/something/target/folder1/file.xml")
    mock_s3_client.mkdir(f"{base_url}/something/target/folder2")
    mock_s3_client.touch(f"{base_url}/something/target/folder2/file.xml")

    # Second subfolder, with objects after target.
    mock_s3_client.mkdir(f"{base_url}/target")
    mock_s3_client.touch(f"{base_url}/target/example.txt")
    mock_s3_client.mkdir(f"{base_url}/target/folder3")
    mock_s3_client.touch(f"{base_url}/target/folder3/file.xml")
    mock_s3_client.mkdir(f"{base_url}/target/folder4")
    mock_s3_client.touch(f"{base_url}/target/folder4/file.xml")

    # verify folder structure
    assert set(mock_s3_client.ls(base_url)) == {
        "mock-bucket/fake-project/something",
        "mock-bucket/fake-project/target",
    }

    # run
    yield Fixture(
        base_url=base_url,
        mock_s3_client=mock_s3_client,
    )


def test_find_subfolders(build: Fixture) -> None:
    glob_pattern = f"{build.base_url}/**/target"
    result = build.mock_s3_client.glob(glob_pattern)

    assert len(result) == 2
    assert set(result) == {
        "s3://mock-bucket/fake-project/something/target",
        "s3://mock-bucket/fake-project/target",
    }

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions