Skip to content

Commit 13da220

Browse files
committed
Add CID to sha256 digest conversion support
1 parent 66ec1e4 commit 13da220

File tree

9 files changed

+168
-79
lines changed

9 files changed

+168
-79
lines changed

README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ assert result == "bafkreide5semuafsnds3ugrvm6fbwuyw2ijpj43gwjdxemstjkfozi37hq"
4040

4141
### Wrap an existing SHA 256 checksum as a CID
4242

43+
**WARNING:** This will lead to an invalid CID if an invalid digest is provided.
44+
This is not possible to validate against without the original data.
45+
4346
```python
4447
from hashlib import sha256
4548
from ipfs_cid import cid_sha256_wrap_digest
@@ -50,6 +53,23 @@ result = cid_sha256_wrap_digest(digest)
5053
assert result == "bafkreide5semuafsnds3ugrvm6fbwuyw2ijpj43gwjdxemstjkfozi37hq"
5154
```
5255

56+
### Unwrap a compatible CID to a sha256 digest
57+
58+
**NOTE:** The `cid_sha256_unwrap_digest` function will throw an `AttributeError`
59+
if the input CID is not using the same encoding parameters.
60+
61+
```python
62+
from hashlib import sha256
63+
from ipfs_cid import cid_sha256_unwrap_digest
64+
65+
data = b"Hello world"
66+
digest = sha256(data).digest()
67+
68+
cid = "bafkreide5semuafsnds3ugrvm6fbwuyw2ijpj43gwjdxemstjkfozi37hq"
69+
result = cid_sha256_unwrap_digest(cid)
70+
assert result == digest
71+
```
72+
5373
## Encoding Format
5474

5575
[The CID spec](https://github.com/multiformats/cid) supports multiple different

ipfs_cid/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
from .encode import cid_sha256_hash, cid_sha256_hash_chunked, cid_sha256_wrap_digest
1+
from .cid import cid_sha256_unwrap_digest, cid_sha256_wrap_digest
2+
from .sha256 import cid_sha256_hash, cid_sha256_hash_chunked

ipfs_cid/cid.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from ipfs_cid.base32 import Base32Multibase
2+
3+
MULTICODEC_CIDV1 = b"\x01"
4+
MULTICODEC_RAW_BINARY = b"\x55"
5+
MULTICODEC_SHA_2_256 = b"\x12"
6+
MULTICODEC_SHA_2_256_LENGTH = b"\x20"
7+
8+
CID_PREFIX = b"".join(
9+
[
10+
MULTICODEC_CIDV1,
11+
MULTICODEC_RAW_BINARY,
12+
MULTICODEC_SHA_2_256,
13+
MULTICODEC_SHA_2_256_LENGTH,
14+
],
15+
)
16+
17+
18+
def cid_sha256_wrap_digest(digest: bytes) -> str:
19+
digestlen = len(digest).to_bytes(1, "big")
20+
if digestlen != MULTICODEC_SHA_2_256_LENGTH:
21+
raise AttributeError("Invalid digest length")
22+
return Base32Multibase.encode(CID_PREFIX + digest)
23+
24+
25+
def cid_sha256_unwrap_digest(cid: str) -> bytes:
26+
decoded = Base32Multibase.decode(cid)
27+
if decoded[: len(CID_PREFIX)] != CID_PREFIX:
28+
raise AttributeError("Unsupported CID format")
29+
digest = decoded[len(CID_PREFIX) :]
30+
if len(digest).to_bytes(1, "big") != MULTICODEC_SHA_2_256_LENGTH:
31+
raise AttributeError("Unsupported CID format")
32+
return digest

ipfs_cid/encode.py

Lines changed: 0 additions & 33 deletions
This file was deleted.

ipfs_cid/sha256.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import array
2+
from hashlib import sha256
3+
from typing import Iterable, Union
4+
5+
from ipfs_cid.cid import cid_sha256_wrap_digest
6+
7+
ENCODEABLE_TYPES = Union[bytes, bytearray, memoryview, array.array]
8+
9+
10+
def cid_sha256_hash(data: ENCODEABLE_TYPES) -> str:
11+
sha = sha256(data)
12+
return cid_sha256_wrap_digest(sha.digest())
13+
14+
15+
def cid_sha256_hash_chunked(data: Iterable[ENCODEABLE_TYPES]) -> str:
16+
sha = sha256()
17+
for chunk in data:
18+
sha.update(chunk)
19+
return cid_sha256_wrap_digest(sha.digest())

tests/data.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Comparison targets were generated with Kubo CLI
2+
TEST_SETS = [
3+
(
4+
lambda: b"Hello world",
5+
"bafkreide5semuafsnds3ugrvm6fbwuyw2ijpj43gwjdxemstjkfozi37hq",
6+
),
7+
(
8+
lambda: b"\x00" * 200000,
9+
"bafkreicmxpm34df2nbmdk5k7qj3vq4c5wwsbhrkjjq2cmlgslfdkoptvqi",
10+
),
11+
(
12+
lambda: b"\x00" * 500000,
13+
"bafkreidlw2xpv2ve4girfzlgwrt4imaumorqwcqvxhecjcqa5wonrzmunm",
14+
),
15+
(
16+
lambda: b"".join((x % 256).to_bytes(1, "big") for x in range(200000)),
17+
"bafkreighu7ltw2gscebl67lntprhwqigjf7pzaizejf6x66snm3vkqn544",
18+
),
19+
(
20+
lambda: b"".join((x % 256).to_bytes(1, "big") for x in range(500000)),
21+
"bafkreia7ldhwiil4nra2ymrkz66tyq2zzdwlypgeqvunj6d7sqd6gpmko4",
22+
),
23+
]

tests/test_cid.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
from hashlib import sha256
2+
from typing import Callable
3+
4+
import pytest
5+
6+
from ipfs_cid import cid_sha256_unwrap_digest, cid_sha256_wrap_digest
7+
from tests.data import TEST_SETS
8+
9+
10+
def test_cid_sha256_wrap_digest_invalid_length():
11+
with pytest.raises(AttributeError, match="Invalid digest length"):
12+
cid_sha256_wrap_digest(b"\x00" * 40)
13+
14+
15+
@pytest.mark.parametrize(
16+
"cid",
17+
[
18+
"bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi",
19+
"bafybeibnqmhw7bhvyalczhxzcgdhwk2lm3pgi4n2ctmpqgbnzyz6hpkkji",
20+
"bafkreif2why4bwbiadz3gfjoik2ywzmumcnmebmec4evy4spctrxxd3m5ysa",
21+
],
22+
)
23+
def test_cid_sha256_unwrap_digest_unsupported_format(cid: str):
24+
with pytest.raises(AttributeError, match="Unsupported CID format"):
25+
cid_sha256_unwrap_digest(cid)
26+
27+
28+
# Comparison targets were generated with Kubo CLI
29+
@pytest.mark.parametrize(("data_generator", "expected"), TEST_SETS)
30+
def test_cid_sha256_wrap_unwrap_digest(
31+
data_generator: Callable[[], bytes],
32+
expected: str,
33+
):
34+
digest = sha256(data_generator()).digest()
35+
wrapped = cid_sha256_wrap_digest(digest)
36+
assert wrapped == expected
37+
unwrapped = cid_sha256_unwrap_digest(wrapped)
38+
assert unwrapped == digest

tests/test_encode.py

Lines changed: 0 additions & 45 deletions
This file was deleted.

tests/test_sha256.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from hashlib import sha256
2+
from io import BytesIO
3+
from typing import Callable, Iterable
4+
5+
import pytest
6+
7+
from ipfs_cid import cid_sha256_hash, cid_sha256_hash_chunked, cid_sha256_unwrap_digest
8+
from tests.data import TEST_SETS
9+
10+
11+
@pytest.mark.parametrize(("data_generator", "expected"), TEST_SETS)
12+
def test_cid_sha256_hash(data_generator: Callable[[], bytes], expected: str) -> None:
13+
data = data_generator()
14+
expected_digest = sha256(data).digest()
15+
cid = cid_sha256_hash(data)
16+
assert cid == expected
17+
assert cid_sha256_unwrap_digest(cid) == expected_digest
18+
19+
20+
@pytest.mark.parametrize(("data_generator", "expected"), TEST_SETS)
21+
def test_cid_sha256_hash_chunked(
22+
data_generator: Callable[[], bytes],
23+
expected: str,
24+
) -> None:
25+
def as_chunks(stream: BytesIO, chunk_size: int) -> Iterable[bytes]:
26+
while len((chunk := stream.read(chunk_size))) > 0:
27+
yield chunk
28+
29+
data = data_generator()
30+
expected_digest = sha256(data).digest()
31+
buffer = BytesIO(data)
32+
cid = cid_sha256_hash_chunked(as_chunks(buffer, 100))
33+
assert cid == expected
34+
assert cid_sha256_unwrap_digest(cid) == expected_digest

0 commit comments

Comments
 (0)