Skip to content

Commit

Permalink
changeset
Browse files Browse the repository at this point in the history
  • Loading branch information
manzt committed Aug 22, 2023
1 parent b3a914e commit 177262b
Show file tree
Hide file tree
Showing 11 changed files with 75 additions and 32 deletions.
38 changes: 38 additions & 0 deletions .changeset/nice-jars-explode.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
---
"@zarrita/indexing": patch
"@zarrita/ndarray": patch
"zarrita": patch
"@zarrita/core": patch
---

feat: Support `VLenUTF8` codec in v2 and introduce a strided JS "object" Array.

```python
import zarr
import numcodecs

zarr.create_dataset(
"data.zarr",
data=np.array(
[[["a", "aa"], ["aaa", "aaaa"]],
[["b", "bb"], ["bbb", "bbbb"]]],
dtype=object
),
dtype="|O",
object_codec=numcodecs.VLenUTF8(),
chunks=(1, 1, 2),
)
```

```typescript
import * as zarr from "zarrita";

let store = zarr.FetchStore("http://localhost:8080/data.zarr");
let arr = await zarr.open.v2(store, { kind: "array" });
let result = zarr.get(arr);
// {
// data: ["a", "aa", "aaa", "aaaa", "b", "bb", "bbb", "bbbb"],
// shape: [2, 2, 2],
// stride: [4, 2, 1],
// }
```
1 change: 0 additions & 1 deletion packages/core/__tests__/consolidated.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,5 +86,4 @@ describe("openConsolidated", () => {
let arr = h.open(grp.resolve("1d.chunked.i2"), { kind: "array" });
expect(arr.kind).toBe("array");
});

});
6 changes: 2 additions & 4 deletions packages/core/__tests__/open.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -299,17 +299,16 @@ describe("v2", () => {
[[0, 0, 0], ["a", "aa"]],
[[1, 0, 0], ["b", "bb"]],
[[0, 1, 0], ["aaa", "aaaa"]],
[[1, 1, 0], ["bbb", "bbbb"]]
[[1, 1, 0], ["bbb", "bbbb"]],
])(`getChunk(%j) -> %j`, async (index, expected) => {
expect(await arr.getChunk(index)).toStrictEqual({
data: expected,
shape: [1, 1, 2],
stride: [2, 2, 1],
});
})
});
});


describe("3d.chunked.mixed.i2.C", async () => {
let arr = await open.v2(store.resolve("/3d.chunked.mixed.i2.C"), {
kind: "array",
Expand Down Expand Up @@ -891,5 +890,4 @@ describe("v3", () => {
});
});
});

});
5 changes: 5 additions & 0 deletions packages/core/__tests__/util.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ describe("is_dtype", () => {
["uint64", false],
["v2:U6", false],
["v2:S6", false],
["v2:object", false],
])("is_dtype(%s, 'number') -> %s", (dtype, expected) => {
expect(is_dtype(dtype, "number")).toBe(expected);
});
Expand All @@ -104,6 +105,7 @@ describe("is_dtype", () => {
["uint64", false],
["v2:U6", false],
["v2:S6", false],
["v2:object", false],
])("is_dtype(%s, 'boolean') -> %s", (dtype, expected) => {
expect(is_dtype(dtype, "boolean")).toBe(expected);
});
Expand All @@ -122,6 +124,7 @@ describe("is_dtype", () => {
["uint64", true],
["v2:U6", false],
["v2:S6", false],
["v2:object", false],
])("is_dtype(%s, 'bigint') -> %s", (dtype, expected) => {
expect(is_dtype(dtype, "bigint")).toBe(expected);
});
Expand All @@ -140,6 +143,7 @@ describe("is_dtype", () => {
["uint64", false],
["v2:U6", true],
["v2:S6", true],
["v2:object", false],
])("is_dtype(%s, 'string') -> %s", (dtype, expected) => {
expect(is_dtype(dtype, "string")).toBe(expected);
});
Expand All @@ -158,6 +162,7 @@ describe("is_dtype", () => {
"uint64",
"v2:U6",
"v2:S6",
"v2:object",
])("is_dtype(%s, %s) -> true", (dtype) => {
expect(is_dtype(dtype, dtype)).toBe(true);
});
Expand Down
2 changes: 1 addition & 1 deletion packages/core/src/codecs/endian.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ function bytes_per_element<D extends DataType>(
return 4;
}

type SupportedDataType = Exclude<DataType, "v2:object:string">;
type SupportedDataType = Exclude<DataType, "v2:object">;

export class EndianCodec<D extends SupportedDataType> {
kind = "array_to_bytes";
Expand Down
6 changes: 3 additions & 3 deletions packages/core/src/codecs/vlen-utf8.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { Chunk, ObjectStr } from "../metadata.js";
import type { Chunk, ObjectType } from "../metadata.js";
import { get_strides } from "../util.js";

export class VLenUTF8 {
Expand All @@ -14,11 +14,11 @@ export class VLenUTF8 {
return new VLenUTF8(meta.shape);
}

encode(_chunk: Chunk<ObjectStr>): Uint8Array {
encode(_chunk: Chunk<ObjectType>): Uint8Array {
throw new Error("Method not implemented.");
}

decode(bytes: Uint8Array): Chunk<ObjectStr> {
decode(bytes: Uint8Array): Chunk<ObjectType> {
let decoder = new TextDecoder();
let view = new DataView(bytes.buffer);
let data = Array(view.getUint32(0, true));
Expand Down
8 changes: 5 additions & 3 deletions packages/core/src/metadata.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ export type UnicodeStr = `v2:U${number}`;
export type ByteStr = `v2:S${number}`;

/** @category Object */
export type ObjectStr = "v2:object:string";
export type ObjectType = "v2:object";

export type NumberDataType =
| Int8
Expand All @@ -54,12 +54,13 @@ export type NumberDataType =

export type BigintDataType = Int64 | Uint64;

export type StringDataType = UnicodeStr | ByteStr | ObjectStr;
export type StringDataType = UnicodeStr | ByteStr;

export type DataType =
| NumberDataType
| BigintDataType
| StringDataType
| ObjectType
| Bool;

export type Attributes = Record<string, unknown>;
Expand All @@ -69,6 +70,7 @@ export type Scalar<D extends DataType> = D extends Bool ? boolean
: D extends BigintDataType ? bigint
: D extends StringDataType ? string
: D extends NumberDataType ? number
: D extends ObjectType ? any
: never;

export type CodecMetadata = {
Expand Down Expand Up @@ -138,7 +140,7 @@ export type TypedArray<D extends DataType> = D extends Int8 ? Int8Array
: D extends Bool ? BoolArray
: D extends UnicodeStr ? UnicodeStringArray
: D extends ByteStr ? ByteStringArray
: D extends ObjectStr ? Array<string>
: D extends ObjectType ? Array<any>
: never;

export type TypedArrayConstructor<D extends DataType> = {
Expand Down
3 changes: 1 addition & 2 deletions packages/core/src/open.ts
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@ export function open<Store extends Readable>(

export function open<Store extends Readable>(
location: Location<Store> | Store,
options: { kind: "auto" },
): Promise<Array<DataType, Store> | Group<Store>>;

export function open<Store extends Readable>(
Expand All @@ -158,7 +157,7 @@ export function open<Store extends Readable>(

export async function open<Store extends Readable>(
location: Location<Store> | Store,
options: { kind: "auto" | "array" | "group" } = { kind: "auto" },
options: { kind?: "array" | "group" } = {},
): Promise<Array<DataType, Store> | Group<Store>> {
return open_v3(location, options as any).catch((err) => {
if (err instanceof NodeNotFoundError) {
Expand Down
28 changes: 15 additions & 13 deletions packages/core/src/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import type {
DataType,
GroupMetadata,
NumberDataType,
ObjectType,
Scalar,
StringDataType,
TypedArrayConstructor,
Expand Down Expand Up @@ -59,7 +60,7 @@ const V2_STRING_REGEX = /v2:([US])(\d+)/;
export function get_ctr<D extends DataType>(
data_type: D,
): TypedArrayConstructor<D> {
if (data_type === "v2:object:string") {
if (data_type === "v2:object") {
return globalThis.Array as any;
}
let match = data_type.match(V2_STRING_REGEX);
Expand Down Expand Up @@ -125,13 +126,9 @@ const endian_regex = /^([<|>])(.*)$/;

function coerce_dtype(
dtype: string,
meta: ArrayMetadataV2,
): { data_type: DataType } | { data_type: DataType; endian: "little" | "big" } {
if (
dtype === "|O" &&
meta.filters?.[0]?.id === "vlen-utf8"
) {
return { data_type: "v2:object:string" };
if (dtype === "|O") {
return { data_type: "v2:object" };
}

let match = dtype.match(endian_regex);
Expand Down Expand Up @@ -169,13 +166,13 @@ export function v2_to_v3_array_metadata(
attributes: Record<string, unknown> = {},
): ArrayMetadata<DataType> {
let codecs: CodecMetadata[] = [];
let dtype = coerce_dtype(meta.dtype, meta);
let dtype = coerce_dtype(meta.dtype);
if (meta.order === "F") {
codecs.push({ name: "transpose", configuration: { order: "F" } });
}
if ("endian" in dtype && dtype.endian === "big") {
codecs.push({ name: "endian", configuration: { endian: "big" } });
}
}
for (let { id, ...configuration } of meta.filters ?? []) {
codecs.push({ name: id, configuration });
}
Expand Down Expand Up @@ -222,6 +219,7 @@ export type DataTypeQuery =
| "boolean"
| "number"
| "bigint"
| "object"
| "string";

export type NarrowDataType<
Expand All @@ -230,6 +228,7 @@ export type NarrowDataType<
> = Query extends "number" ? NumberDataType
: Query extends "bigint" ? BigintDataType
: Query extends "string" ? StringDataType
: Query extends "object" ? ObjectType
: Extract<Query, Dtype>;

export function is_dtype<Query extends DataTypeQuery>(
Expand All @@ -240,17 +239,20 @@ export function is_dtype<Query extends DataTypeQuery>(
query !== "number" &&
query !== "bigint" &&
query !== "boolean" &&
query !== "object" &&
query !== "string"
) {
return dtype === query;
}
const is_boolean = dtype === "bool";
let is_boolean = dtype === "bool";
if (query === "boolean") return is_boolean;
const is_string = dtype.startsWith("v2:U") || dtype.startsWith("v2:S");
let is_string = dtype.startsWith("v2:U") || dtype.startsWith("v2:S");
if (query === "string") return is_string;
const is_bigint = dtype === "int64" || dtype === "uint64";
let is_bigint = dtype === "int64" || dtype === "uint64";
if (query === "bigint") return is_bigint;
return !is_string && !is_bigint && !is_boolean;
let is_object = dtype === "v2:object";
if (query === "object") return is_object;
return !is_string && !is_bigint && !is_boolean && !is_object;
}

export type ShardingCodecMetadata = {
Expand Down
3 changes: 0 additions & 3 deletions packages/indexing/__tests__/get.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ async function get_v2(
}

describe("get v2", () => {

it("1d.contiguous.zlib.i2", async () => {
expect(await get_v2("/1d.contiguous.zlib.i2")).toMatchInlineSnapshot(`
{
Expand Down Expand Up @@ -568,7 +567,6 @@ describe("get v2", () => {
stride: [4, 2, 1],
});
});

});

async function get_v3(
Expand Down Expand Up @@ -684,7 +682,6 @@ describe("get v3", () => {
expect(res.shape).toStrictEqual([3, 3, 3]);
});


it("3d.chunked.mixed.i2.C", async () => {
let res = await get_v3("/3d.chunked.mixed.i2.C");
expect(res.data).toStrictEqual(new Int16Array(range(27)));
Expand Down
7 changes: 5 additions & 2 deletions packages/indexing/src/ops.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,11 @@ type CompatChunk<D extends core.DataType> = {
stride: number[];
};

function object_array_proxy<T extends core.DataType>(arr: T[], offset = 0, lengthArg?: number): TypedArrayProxy<T> {
function object_array_proxy<T extends core.DataType>(
arr: T[],
offset = 0,
lengthArg?: number,
): TypedArrayProxy<T> {
let length = lengthArg ?? arr.length - offset;
return new Proxy(arr, {
get(target, prop: string) {
Expand All @@ -57,7 +61,6 @@ function object_array_proxy<T extends core.DataType>(arr: T[], offset = 0, lengt
return true;
},
}) as any;

}

function string_array_proxy<D extends core.ByteStr | core.UnicodeStr>(
Expand Down

0 comments on commit 177262b

Please sign in to comment.