Skip to content

Commit

Permalink
Add an initial tensorstore cli tool.
Browse files Browse the repository at this point in the history
Currently supports:
* tscli list  --source [globs...]
* tscli copy  --source --target
* tscli search --source
* tscli print_spec --spec
* tscli print_stats --spec [boxes...]

PiperOrigin-RevId: 684912204
Change-Id: I13229643fc65d3352670dbdc5a62fdd016259ec6
  • Loading branch information
laramiel authored and copybara-github committed Oct 11, 2024
1 parent b2b7c21 commit 22b804b
Show file tree
Hide file tree
Showing 11 changed files with 1,203 additions and 0 deletions.
84 changes: 84 additions & 0 deletions tensorstore/tscli/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
load("//bazel:tensorstore.bzl", "tensorstore_cc_binary", "tensorstore_cc_library", "tensorstore_cc_test")

package(default_visibility = ["//visibility:public"])

licenses(["notice"])

tensorstore_cc_test(
name = "args_test",
srcs = ["args_test.cc"],
deps = [
":tsclilib",
"@com_google_googletest//:gtest_main",
],
)

tensorstore_cc_library(
name = "tsclilib",
srcs = [
"args.cc",
"kvstore_copy.cc",
"kvstore_list.cc",
"ts_print_spec.cc",
"ts_print_stats.cc",
"ts_search.cc",
],
hdrs = [
"args.h",
"cli.h",
],
deps = [
"//tensorstore",
"//tensorstore:array_storage_statistics",
"//tensorstore:box",
"//tensorstore:context",
"//tensorstore:open",
"//tensorstore:open_mode",
"//tensorstore:spec",
"//tensorstore/index_space:dim_expression",
"//tensorstore/internal:path",
"//tensorstore/internal/json_binding",
"//tensorstore/kvstore",
"//tensorstore/kvstore:generation",
"//tensorstore/kvstore:key_range",
"//tensorstore/util:executor",
"//tensorstore/util:future",
"//tensorstore/util:json_absl_flag",
"//tensorstore/util:quote_string",
"//tensorstore/util:result",
"//tensorstore/util:span",
"//tensorstore/util:status",
"@com_github_nlohmann_json//:json",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/flags:parse",
"@com_google_absl//absl/functional:function_ref",
"@com_google_absl//absl/log",
"@com_google_absl//absl/status",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:string_view",
"@com_google_absl//absl/synchronization",
"@com_google_re2//:re2",
],
)

tensorstore_cc_binary(
name = "tscli",
srcs = [
"main.cc",
],
deps = [
":tsclilib",
"//tensorstore:context",
"//tensorstore/driver:all_drivers",
"//tensorstore/internal/metrics:collect",
"//tensorstore/internal/metrics:registry",
"//tensorstore/kvstore:all_drivers",
"//tensorstore/util:json_absl_flag",
"@com_google_absl//absl/base:log_severity",
"@com_google_absl//absl/flags:flag",
"@com_google_absl//absl/flags:parse",
"@com_google_absl//absl/log:globals",
"@com_google_absl//absl/log:initialize",
"@com_google_absl//absl/status",
],
)
198 changes: 198 additions & 0 deletions tensorstore/tscli/args.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
// Copyright 2024 The TensorStore Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "tensorstore/tscli/args.h"

#include <cassert>
#include <cstdint>
#include <string>
#include <string_view>
#include <vector>

#include "absl/container/flat_hash_set.h"
#include "absl/status/status.h"
#include "absl/strings/ascii.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "absl/strings/strip.h"
#include "tensorstore/util/span.h"
#include "tensorstore/util/status.h"

namespace tensorstore {
namespace cli {

absl::Status TryParseOptions(CommandFlags& flags,
tensorstore::span<Option> options) {
absl::flat_hash_set<uintptr_t> handled;

auto it = flags.argv.begin() + 1;
while (it != flags.argv.end()) {
bool parsed = false;
std::string_view it_str = *it;

// Try to parse as a long option.
for (auto& opt : options) {
if (opt.longname.empty()) continue;
std::string_view arg = it_str;
if (absl::ConsumePrefix(&arg, opt.longname)) {
if (arg.empty()) {
parsed = true;
std::string_view value;
if (it + 1 != flags.argv.end()) {
value = *(it + 1);
handled.insert(reinterpret_cast<uintptr_t>(*it));
it++;
}
handled.insert(reinterpret_cast<uintptr_t>(*it));
TENSORSTORE_RETURN_IF_ERROR(opt.parse(value));
} else if (absl::ConsumePrefix(&arg, "=")) {
parsed = true;
TENSORSTORE_RETURN_IF_ERROR(opt.parse(arg));
handled.insert(reinterpret_cast<uintptr_t>(*it));
}
}
if (parsed) break;
}
it++;
}

// Erase any additionally used values from positional args.
auto i = flags.positional_args.begin();
for (auto j = flags.positional_args.begin(); j != flags.positional_args.end();
++j) {
if (handled.contains(reinterpret_cast<uintptr_t>(j->data()))) {
continue;
}
*i++ = *j;
}
flags.positional_args.erase(i, flags.positional_args.end());
return absl::OkStatus();
}

std::string GlobToRegex(std::string_view glob) {
std::string re;
re.reserve(glob.size() * 2);
re.append("^");

while (!glob.empty()) {
char c = glob[0];
glob.remove_prefix(1);
switch (c) {
case '*': {
bool is_star_star = false;
while (!glob.empty() && glob[0] == '*') {
glob.remove_prefix(1);
is_star_star = true;
}
// TODO: Handle **? / **/, etc.
if (is_star_star) {
absl::StrAppend(&re, ".*");
} else {
absl::StrAppend(&re, "[^/]*");
}
break;
}
case '?':
absl::StrAppend(&re, "[^/]");
break;
case '[': {
if (glob.size() < 2 || glob.find(']', 1) == std::string_view::npos) {
// Literal [
absl::StrAppend(&re, "\\[");
break;
}
re.push_back('[');
bool is_exclude = false;
if (glob[0] == '!' || glob[0] == '^') {
is_exclude = true;
re.push_back('^');
re.push_back('/');
glob.remove_prefix(1);
}
// Copy the characters.
while (glob[0] != ']') {
if (glob[0] == '[' && glob[1] == ':') {
// Escape '[' to avoid character classes.
absl::StrAppend(&re, "\\[");
glob.remove_prefix(1);
} else if (glob[1] != '-' || glob[2] == ']') {
// Not a range, so copy the character unless it is '/'.
if (glob[0] != '/') re.push_back(glob[0]);
glob.remove_prefix(1);
} else if (!is_exclude && glob[0] <= '/' && '/' <= glob[2]) {
// Make sure that the included range does not contain '/'.
//
// NOTE: "/-/" is dropped entirely, which it should,
// because by definition there is no matching pathname.
if (glob[0] < '/') {
re.push_back(glob[0]);
re.push_back('-');
re.push_back('/' - 1);
}
if ('/' < glob[2]) {
re.push_back('/' + 1);
re.push_back('-');
re.push_back(glob[2]);
}
glob.remove_prefix(3);
} else {
// Range will not match '/', so copy it blindly
re.push_back(glob[0]);
re.push_back('-');
re.push_back(glob[2]);
glob.remove_prefix(3);
}
}
re.push_back(']');
glob.remove_prefix(1);
break;
}
case '.':
case '+':
case '{':
case '}':
case '(':
case ')':
case '|':
case '^':
case '$': {
// Escape special characters.
re.push_back('\\');
re.push_back(c);
break;
}
case '\\':
if (glob.empty()) {
re.push_back('\\');
re.push_back('\\');
} else if (!absl::ascii_isalnum(glob[0])) {
re.push_back('\\');
re.push_back(glob[0]);
glob.remove_prefix(1);
} else {
// ignore.
}
break;

default:
re.push_back(c);
break;
}
}
re.push_back('$');
return re;
}

} // namespace cli
} // namespace tensorstore
56 changes: 56 additions & 0 deletions tensorstore/tscli/args.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright 2024 The TensorStore Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef TENSORSTORE_TSCLI_ARGS_H_
#define TENSORSTORE_TSCLI_ARGS_H_

#include <string>
#include <string_view>
#include <vector>

#include "absl/flags/parse.h"
#include "absl/functional/function_ref.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "tensorstore/util/span.h"

namespace tensorstore {
namespace cli {

/// Flag representation after calling absl::ParseAbseilFlagsOnly.
struct CommandFlags {
std::vector<char*> argv;
std::vector<absl::UnrecognizedFlag> unrecognized_flags;
std::vector<std::string_view> positional_args;
};

/// Representation of a cli option.
struct Option {
// Long option name. Should begin with a "--" prefix.
std::string_view longname;
// Parsing function. An error status indicates an invalid argument.
absl::FunctionRef<absl::Status(std::string_view)> parse;
};

// Try to parse cli options.
absl::Status TryParseOptions(CommandFlags& flags,
tensorstore::span<Option> options);

// Convert a glob pattern to a regular expression.
std::string GlobToRegex(std::string_view glob);

} // namespace cli
} // namespace tensorstore

#endif // TENSORSTORE_TSCLI_ARGS_H_
37 changes: 37 additions & 0 deletions tensorstore/tscli/args_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright 2024 The TensorStore Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "tensorstore/tscli/args.h"

#include <gtest/gtest.h>

using ::tensorstore::cli::GlobToRegex;

namespace {

TEST(GlobToRegex, Basic) {
EXPECT_EQ(GlobToRegex("a*b"), "^a[^/]*b$");
EXPECT_EQ(GlobToRegex("a**b"), "^a.*b$");
EXPECT_EQ(GlobToRegex("a?b"), "^a[^/]b$");

EXPECT_EQ(GlobToRegex("a[b"), "^a\\[b$");
EXPECT_EQ(GlobToRegex("a[A-Z]b"), "^a[A-Z]b$");
EXPECT_EQ(GlobToRegex("a[!A-Z]b"), "^a[^/A-Z]b$");
EXPECT_EQ(GlobToRegex("a[A-]b"), "^a[A-]b$");

EXPECT_EQ(GlobToRegex("a.+{}()|^$b"), "^a\\.\\+\\{\\}\\(\\)\\|\\^\\$b$");
EXPECT_EQ(GlobToRegex("a\\-b\\"), "^a\\-b\\\\$");
}

} // namespace
Loading

0 comments on commit 22b804b

Please sign in to comment.