Skip to content

Commit

Permalink
Merge pull request #85 from paulsengroup/refactor/pythonic-api
Browse files Browse the repository at this point in the history
Make the public API more Pythonic
  • Loading branch information
robomics authored Oct 18, 2024
2 parents 41bd79f + ba54cda commit 79df47c
Show file tree
Hide file tree
Showing 25 changed files with 156 additions and 129 deletions.
20 changes: 11 additions & 9 deletions src/cooler_file_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@
#include "hictkpy/cooler_file_writer.hpp"

#include <fmt/format.h>
#include <fmt/std.h>
#include <spdlog/spdlog.h>

#include <cassert>
#include <cstdint>
#include <filesystem>
#include <hictk/cooler/cooler.hpp>
#include <hictk/reference.hpp>
#include <hictk/tmpdir.hpp>
#include <hictk/type_traits.hpp>
#include <stdexcept>
#include <string>
Expand All @@ -28,22 +30,22 @@ namespace nb = nanobind;

namespace hictkpy {

CoolerFileWriter::CoolerFileWriter(std::string_view path_, const nb::dict &chromosomes_,
CoolerFileWriter::CoolerFileWriter(std::filesystem::path path_, const ChromosomeDict &chromosomes_,
std::uint32_t resolution_, std::string_view assembly,
const std::filesystem::path &tmpdir,
std::uint32_t compression_lvl)
: _path(std::string{path_}),
: _path(std::move(path_)),
_tmpdir(tmpdir, true),
_w(create_file(_path, chromosome_dict_to_reference(chromosomes_), resolution_, assembly,
_tmpdir())),
_w(create_file(_path.string(), chromosome_dict_to_reference(chromosomes_), resolution_,
assembly, _tmpdir())),
_compression_lvl(compression_lvl) {
if (std::filesystem::exists(_path)) {
throw std::runtime_error(
fmt::format(FMT_STRING("unable to create .cool file \"{}\": file already exists"), path()));
}
}

std::string_view CoolerFileWriter::path() const noexcept { return _path; }
const std::filesystem::path &CoolerFileWriter::path() const noexcept { return _path; }

std::uint32_t CoolerFileWriter::resolution() const noexcept {
if (_w.has_value()) {
Expand Down Expand Up @@ -111,7 +113,7 @@ void CoolerFileWriter::serialize([[maybe_unused]] const std::string &log_lvl_str
std::visit(
[&](const auto &num) {
using N = hictk::remove_cvref_t<decltype(num)>;
_w->aggregate<N>(_path, false, _compression_lvl);
_w->aggregate<N>(_path.string(), false, _compression_lvl);
},
_w->open("0").pixel_variant());

Expand Down Expand Up @@ -150,11 +152,11 @@ void CoolerFileWriter::bind(nb::module_ &m) {
cooler, "FileWriter", "Class representing a file handle to create .cool files.");

// NOLINTBEGIN(*-avoid-magic-numbers)
writer.def(nb::init<std::string_view, nb::dict, std::uint32_t, std::string_view,
const std::filesystem::path &, std::uint32_t>(),
writer.def(nb::init<std::filesystem::path, const ChromosomeDict &, std::uint32_t,
std::string_view, const std::filesystem::path &, std::uint32_t>(),
nb::arg("path"), nb::arg("chromosomes"), nb::arg("resolution"),
nb::arg("assembly") = "unknown",
nb::arg("tmpdir") = std::filesystem::temp_directory_path().string(),
nb::arg("tmpdir") = hictk::internal::TmpDir::default_temp_directory_path(),
nb::arg("compression_lvl") = 6, "Open a .cool file for writing.");
// NOLINTEND(*-avoid-magic-numbers)

Expand Down
42 changes: 25 additions & 17 deletions src/file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <filesystem>
#include <hictk/balancing/methods.hpp>
#include <hictk/balancing/weights.hpp>
#include <hictk/bin_table.hpp>
Expand Down Expand Up @@ -39,23 +40,28 @@
namespace nb = nanobind;

namespace hictkpy::file {
void ctor(hictk::File *fp, std::string_view path, std::optional<std::int32_t> resolution,
std::string_view matrix_type, std::string_view matrix_unit) {
new (fp) hictk::File{std::string{path},
!!resolution ? static_cast<std::uint32_t>(*resolution) : std::uint32_t{0},
static void ctor(hictk::File *fp, const std::filesystem::path &path,
std::optional<std::int32_t> resolution, std::string_view matrix_type,
std::string_view matrix_unit) {
new (fp) hictk::File{path.string(), static_cast<std::uint32_t>(resolution.value_or(0)),
hictk::hic::ParseMatrixTypeStr(std::string{matrix_type}),
hictk::hic::ParseUnitStr(std::string{matrix_unit})};
}

std::string repr(const hictk::File &f) { return fmt::format(FMT_STRING("File({})"), f.uri()); }
static std::string repr(const hictk::File &f) {
return fmt::format(FMT_STRING("File({})"), f.uri());
}

bool is_cooler(std::string_view uri) { return bool(hictk::cooler::utils::is_cooler(uri)); }
bool is_cooler(const std::filesystem::path &uri) {
return bool(hictk::cooler::utils::is_cooler(uri.string()));
}

bool is_hic(std::string_view uri) { return hictk::hic::utils::is_hic_file(std::string{uri}); }
bool is_hic(const std::filesystem::path &uri) { return hictk::hic::utils::is_hic_file(uri); }

hictkpy::PixelSelector fetch(const hictk::File &f, std::string_view range1, std::string_view range2,
std::string_view normalization, std::string_view count_type, bool join,
std::string_view query_type) {
static hictkpy::PixelSelector fetch(const hictk::File &f, std::string_view range1,
std::string_view range2, std::string_view normalization,
std::string_view count_type, bool join,
std::string_view query_type) {
if (count_type != "float" && count_type != "int") {
throw std::runtime_error(R"(count_type should be either "float" or "int")");
}
Expand Down Expand Up @@ -102,7 +108,7 @@ hictkpy::PixelSelector fetch(const hictk::File &f, std::string_view range1, std:
f.get());
}

[[nodiscard]] inline nb::dict get_cooler_attrs(const hictk::cooler::File &clr) {
static nb::dict get_cooler_attrs(const hictk::cooler::File &clr) {
nb::dict py_attrs;
const auto &attrs = clr.attributes();

Expand Down Expand Up @@ -153,7 +159,7 @@ hictkpy::PixelSelector fetch(const hictk::File &f, std::string_view range1, std:
return py_attrs;
}

[[nodiscard]] inline nb::dict get_hic_attrs(const hictk::hic::File &hf) {
static nb::dict get_hic_attrs(const hictk::hic::File &hf) {
nb::dict py_attrs;

py_attrs["bin_size"] = hf.resolution();
Expand All @@ -167,14 +173,14 @@ hictkpy::PixelSelector fetch(const hictk::File &f, std::string_view range1, std:
return py_attrs;
}

nb::dict attributes(const hictk::File &f) {
static nb::dict attributes(const hictk::File &f) {
if (f.is_cooler()) {
return get_cooler_attrs(f.get<hictk::cooler::File>());
}
return get_hic_attrs(f.get<hictk::hic::File>());
}

std::vector<std::string> avail_normalizations(const hictk::File &f) {
static std::vector<std::string> avail_normalizations(const hictk::File &f) {
const auto norms_ = f.avail_normalizations();
std::vector<std::string> norms{norms_.size()};
std::transform(norms_.begin(), norms_.end(), norms.begin(),
Expand All @@ -183,13 +189,15 @@ std::vector<std::string> avail_normalizations(const hictk::File &f) {
return norms;
}

[[nodiscard]] std::vector<double> weights(const hictk::File &f, std::string_view normalization,
bool divisive) {
static std::vector<double> weights(const hictk::File &f, std::string_view normalization,
bool divisive) {
const auto type = divisive ? hictk::balancing::Weights::Type::DIVISIVE
: hictk::balancing::Weights::Type::MULTIPLICATIVE;
return f.normalization(normalization).to_vector(type);
}

static std::filesystem::path get_path(const hictk::File &f) { return f.path(); }

void declare_file_class(nb::module_ &m) {
auto file = nb::class_<hictk::File>(m, "File",
"Class representing a file handle to a .cool or .hic file.");
Expand All @@ -203,7 +211,7 @@ void declare_file_class(nb::module_ &m) {
file.def("__repr__", &file::repr);

file.def("uri", &hictk::File::uri, "Return the file URI.");
file.def("path", &hictk::File::path, "Return the file path.");
file.def("path", &file::get_path, "Return the file path.");

file.def("is_hic", &hictk::File::is_hic, "Test whether file is in .hic format.");
file.def("is_cooler", &hictk::File::is_cooler, "Test whether file is in .cool format.");
Expand Down
41 changes: 23 additions & 18 deletions src/hic_file_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <cstdint>
#include <filesystem>
#include <hictk/reference.hpp>
#include <hictk/tmpdir.hpp>
#include <stdexcept>
#include <string>
#include <string_view>
Expand All @@ -25,21 +26,21 @@ namespace nb = nanobind;

namespace hictkpy {

HiCFileWriter::HiCFileWriter(std::string_view path, const nb::dict &chromosomes,
const std::vector<std::uint32_t> &resolutions,
HiCFileWriter::HiCFileWriter(const std::filesystem::path &path_, const ChromosomeDict &chromosomes,
const std::vector<std::uint32_t> &resolutions_,
std::string_view assembly, std::size_t n_threads,
std::size_t chunk_size, const std::filesystem::path &tmpdir,
std::uint32_t compression_lvl, bool skip_all_vs_all_matrix)
: _tmpdir(tmpdir, true),
_w(path, chromosome_dict_to_reference(chromosomes), resolutions, assembly, n_threads,
chunk_size, _tmpdir(), compression_lvl, skip_all_vs_all_matrix) {}
_w(path_.string(), chromosome_dict_to_reference(chromosomes), resolutions_, assembly,
n_threads, chunk_size, _tmpdir(), compression_lvl, skip_all_vs_all_matrix) {}

HiCFileWriter::HiCFileWriter(std::string_view path, const nb::dict &chromosomes,
HiCFileWriter::HiCFileWriter(const std::filesystem::path &path_, const ChromosomeDict &chromosomes,
std::uint32_t resolution, std::string_view assembly,
std::size_t n_threads, std::size_t chunk_size,
const std::filesystem::path &tmpdir, std::uint32_t compression_lvl,
bool skip_all_vs_all_matrix)
: HiCFileWriter(path, chromosomes, std::vector<std::uint32_t>{resolution}, assembly, n_threads,
: HiCFileWriter(path_, chromosomes, std::vector<std::uint32_t>{resolution}, assembly, n_threads,
chunk_size, tmpdir, compression_lvl, skip_all_vs_all_matrix) {}

void HiCFileWriter::serialize([[maybe_unused]] const std::string &log_lvl_str) {
Expand All @@ -63,7 +64,9 @@ void HiCFileWriter::serialize([[maybe_unused]] const std::string &log_lvl_str) {
#endif
}

std::string_view HiCFileWriter::path() const noexcept { return _w.path(); }
std::filesystem::path HiCFileWriter::path() const noexcept {
return std::filesystem::path{_w.path()};
}

const std::vector<std::uint32_t> &HiCFileWriter::resolutions() const noexcept {
return _w.resolutions();
Expand Down Expand Up @@ -95,23 +98,25 @@ void HiCFileWriter::bind(nb::module_ &m) {
hic, "FileWriter", "Class representing a file handle to create .hic files.");

// NOLINTBEGIN(*-avoid-magic-numbers)
writer.def(nb::init<std::string_view, nb::dict, std::uint32_t, std::string_view, std::size_t,
std::size_t, const std::filesystem::path &, std::uint32_t, bool>(),
writer.def(nb::init<const std::filesystem::path &, const ChromosomeDict &, std::uint32_t,
std::string_view, std::size_t, std::size_t, const std::filesystem::path &,
std::uint32_t, bool>(),
nb::arg("path"), nb::arg("chromosomes"), nb::arg("resolution"),
nb::arg("assembly") = "unknown", nb::arg("n_threads") = 1,
nb::arg("chunk_size") = 10'000'000,
nb::arg("tmpdir") = std::filesystem::temp_directory_path().string(),
nb::arg("tmpdir") = hictk::internal::TmpDir::default_temp_directory_path(),
nb::arg("compression_lvl") = 10, nb::arg("skip_all_vs_all_matrix") = false,
"Open a .hic file for writing.");

writer.def(
nb::init<std::string_view, nb::dict, const std::vector<std::uint32_t> &, std::string_view,
std::size_t, std::size_t, const std::filesystem::path &, std::uint32_t, bool>(),
nb::arg("path"), nb::arg("chromosomes"), nb::arg("resolutions"),
nb::arg("assembly") = "unknown", nb::arg("n_threads") = 1, nb::arg("chunk_size") = 10'000'000,
nb::arg("tmpdir") = std::filesystem::temp_directory_path().string(),
nb::arg("compression_lvl") = 10, nb::arg("skip_all_vs_all_matrix") = false,
"Open a .hic file for writing.");
writer.def(nb::init<const std::filesystem::path &, const ChromosomeDict &,
const std::vector<std::uint32_t> &, std::string_view, std::size_t,
std::size_t, const std::filesystem::path &, std::uint32_t, bool>(),
nb::arg("path"), nb::arg("chromosomes"), nb::arg("resolutions"),
nb::arg("assembly") = "unknown", nb::arg("n_threads") = 1,
nb::arg("chunk_size") = 10'000'000,
nb::arg("tmpdir") = hictk::internal::TmpDir::default_temp_directory_path(),
nb::arg("compression_lvl") = 10, nb::arg("skip_all_vs_all_matrix") = false,
"Open a .hic file for writing.");
// NOLINTEND(*-avoid-magic-numbers)

writer.def("__repr__", &hictkpy::HiCFileWriter::repr);
Expand Down
9 changes: 5 additions & 4 deletions src/include/hictkpy/cooler_file_writer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,24 @@
#include <string_view>

#include "hictkpy/nanobind.hpp"
#include "hictkpy/reference.hpp"

namespace hictkpy {

class CoolerFileWriter {
std::string _path{};
std::filesystem::path _path{};
hictk::internal::TmpDir _tmpdir{};
std::optional<hictk::cooler::SingleCellFile> _w{};
std::uint32_t _compression_lvl{};
bool _finalized{false};

public:
CoolerFileWriter() = delete;
CoolerFileWriter(std::string_view path_, const nanobind::dict& chromosomes_,
CoolerFileWriter(std::filesystem::path path_, const ChromosomeDict& chromosomes_,
std::uint32_t resolution_, std::string_view assembly,
const std::filesystem::path& tmpdir, std::uint32_t);
const std::filesystem::path& tmpdir, std::uint32_t compression_lvl);

[[nodiscard]] std::string_view path() const noexcept;
[[nodiscard]] const std::filesystem::path& path() const noexcept;
[[nodiscard]] std::uint32_t resolution() const noexcept;

[[nodiscard]] const hictk::Reference& chromosomes() const;
Expand Down
10 changes: 6 additions & 4 deletions src/include/hictkpy/file.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#pragma once

#include <cstdint>
#include <filesystem>
#include <hictk/file.hpp>
#include <optional>
#include <string>
Expand All @@ -15,13 +16,14 @@
#include "hictkpy/pixel_selector.hpp"

namespace hictkpy::file {
void ctor(hictk::File *fp, std::string_view path, std::optional<std::int32_t> resolution,
std::string_view matrix_type, std::string_view matrix_unit);
void ctor(hictk::File *fp, const std::filesystem::path &path,
std::optional<std::int32_t> resolution, std::string_view matrix_type,
std::string_view matrix_unit);

[[nodiscard]] std::string repr(const hictk::File &f);

[[nodiscard]] bool is_cooler(std::string_view uri);
[[nodiscard]] bool is_hic(std::string_view uri);
[[nodiscard]] bool is_cooler(const std::filesystem::path &uri);
[[nodiscard]] bool is_hic(const std::filesystem::path &uri);

[[nodiscard]] hictkpy::PixelSelector fetch(const hictk::File &f, std::string_view range1,
std::string_view range2, std::string_view normalization,
Expand Down
15 changes: 8 additions & 7 deletions src/include/hictkpy/hic_file_writer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <vector>

#include "hictkpy/nanobind.hpp"
#include "hictkpy/reference.hpp"

namespace hictkpy {

Expand All @@ -24,16 +25,16 @@ class HiCFileWriter {
bool _finalized{false};

public:
HiCFileWriter(std::string_view path, const nanobind::dict& chromosomes,
const std::vector<std::uint32_t>& resolutions, std::string_view assembly,
HiCFileWriter(const std::filesystem::path& path_, const ChromosomeDict& chromosomes,
const std::vector<std::uint32_t>& resolutions_, std::string_view assembly,
std::size_t n_threads, std::size_t chunk_size, const std::filesystem::path& tmpdir,
std::uint32_t compression_lvl, bool skip_all_vs_all_matrix);
HiCFileWriter(std::string_view path, const nanobind::dict& chromosomes, std::uint32_t resolution,
std::string_view assembly, std::size_t n_threads, std::size_t chunk_size,
const std::filesystem::path& tmpdir, std::uint32_t compression_lvl,
bool skip_all_vs_all_matrix);
HiCFileWriter(const std::filesystem::path& path_, const ChromosomeDict& chromosomes,
std::uint32_t resolution, std::string_view assembly, std::size_t n_threads,
std::size_t chunk_size, const std::filesystem::path& tmpdir,
std::uint32_t compression_lvl, bool skip_all_vs_all_matrix);

[[nodiscard]] std::string_view path() const noexcept;
[[nodiscard]] std::filesystem::path path() const noexcept;
[[nodiscard]] const std::vector<std::uint32_t>& resolutions() const noexcept;

[[nodiscard]] const hictk::Reference& chromosomes() const;
Expand Down
4 changes: 2 additions & 2 deletions src/include/hictkpy/multires_file.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@

#pragma once

#include <string_view>
#include <filesystem>

#include "hictkpy/nanobind.hpp"

namespace hictkpy::multires_file {

[[nodiscard]] bool is_mcool_file(std::string_view path);
[[nodiscard]] bool is_mcool_file(const std::filesystem::path& path);

void declare_multires_file_class(nanobind::module_& m);

Expand Down
4 changes: 3 additions & 1 deletion src/include/hictkpy/reference.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@

namespace hictkpy {

[[nodiscard]] hictk::Reference chromosome_dict_to_reference(const nanobind::dict &chromosomes);
using ChromosomeDict = nanobind::typed<nanobind::dict, std::string, std::uint32_t>;

[[nodiscard]] hictk::Reference chromosome_dict_to_reference(const ChromosomeDict &chromosomes);

template <typename File>
inline nanobind::typed<nanobind::dict, std::string, std::uint32_t> get_chromosomes_from_file(
Expand Down
4 changes: 2 additions & 2 deletions src/include/hictkpy/singlecell_file.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@

#pragma once

#include <string_view>
#include <filesystem>

#include "hictkpy/nanobind.hpp"

namespace hictkpy::singlecell_file {

[[nodiscard]] bool is_scool_file(std::string_view path);
[[nodiscard]] bool is_scool_file(const std::filesystem::path& path);

void declare_singlecell_file_class(nanobind::module_& m);

Expand Down
Loading

0 comments on commit 79df47c

Please sign in to comment.