Skip to content

Commit

Permalink
datalake: moved some types to separate file
Browse files Browse the repository at this point in the history
Moved basic type definitions to separate file to loose the dependencies
on `data_writer_interface.h`.

Signed-off-by: Michał Maślanka <michal@redpanda.com>
  • Loading branch information
mmaslankaprv committed Oct 17, 2024
1 parent 96d8358 commit 24742dd
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 31 deletions.
17 changes: 17 additions & 0 deletions src/v/datalake/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ redpanda_cc_library(
include_prefix = "datalake",
visibility = [":__subpackages__"],
deps = [
":base_types",
":table_definition",
"//src/v/base",
"//src/v/datalake/coordinator:data_file",
Expand Down Expand Up @@ -161,3 +162,19 @@ redpanda_cc_library(
"@seastar",
],
)

redpanda_cc_library(
name = "base_types",
srcs = [
"base_types.cc",
],
hdrs = [
"base_types.h",
],
include_prefix = "datalake",
visibility = [":__subpackages__"],
deps = [
"//src/v/utils:named_type",
"@fmt",
],
)
1 change: 1 addition & 0 deletions src/v/datalake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ v_cc_library(
schema_protobuf.cc
protobuf_utils.cc
values_protobuf.cc
base_types.cc
DEPS
v::datalake_common
v::datalake_coordinator
Expand Down
24 changes: 24 additions & 0 deletions src/v/datalake/base_types.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*
* Copyright 2024 Redpanda Data, Inc.
*
* Licensed as a Redpanda Enterprise file under the Redpanda Community
* License (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* https://github.com/redpanda-data/redpanda/blob/master/licenses/rcl.md
*/
#include "datalake/base_types.h"

#include <fmt/core.h>
namespace datalake {
std::ostream& operator<<(std::ostream& o, const local_file_metadata& f_meta) {
fmt::print(
o,
"{{relative_path: {}, size_bytes: {}, row_count: {}, hour: {}}}",
f_meta.path,
f_meta.size_bytes,
f_meta.row_count,
f_meta.hour);
return o;
}
} // namespace datalake
36 changes: 36 additions & 0 deletions src/v/datalake/base_types.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Copyright 2024 Redpanda Data, Inc.
*
* Licensed as a Redpanda Enterprise file under the Redpanda Community
* License (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* https://github.com/redpanda-data/redpanda/blob/master/licenses/rcl.md
*/
#pragma once
#include "utils/named_type.h"

#include <filesystem>
namespace datalake {
/**
* Definitions of local and remote paths, as the name indicates the local path
* is always pointing to the location on local disk wheras the remote path is a
* path of the object in the object store.
*/
using local_path = named_type<std::filesystem::path, struct local_path_tag>;
using remote_path = named_type<std::filesystem::path, struct remote_path_tag>;

/**
* Simple type describing local parquet file metadata with its path and basic
* statistics
*/
struct local_file_metadata {
local_path path;
size_t row_count = 0;
size_t size_bytes = 0;
int hour = 0;

friend std::ostream&
operator<<(std::ostream& o, const local_file_metadata& r);
};
} // namespace datalake
10 changes: 0 additions & 10 deletions src/v/datalake/data_writer_interface.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,4 @@ std::string data_writer_error_category::message(int ev) const {
}
}

std::ostream& operator<<(std::ostream& o, const local_file_metadata& f_meta) {
fmt::print(
o,
"{{relative_path: {}, size_bytes: {}, row_count: {}, hour: {}}}",
f_meta.path,
f_meta.size_bytes,
f_meta.row_count,
f_meta.hour);
return o;
}
} // namespace datalake
22 changes: 1 addition & 21 deletions src/v/datalake/data_writer_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,33 +10,13 @@
#pragma once

#include "base/outcome.h"
#include "datalake/base_types.h"
#include "iceberg/datatypes.h"
#include "iceberg/values.h"

#include <cstddef>

namespace datalake {
/**
* Definitions of local and remote paths, as the name indicates the local path
* is always pointing to the location on local disk wheras the remote path is a
* path of the object in the object store.
*/
using local_path = named_type<std::filesystem::path, struct local_path_tag>;
using remote_path = named_type<std::filesystem::path, struct remote_path_tag>;

/**
* Simple type describing local parquet file metadata with its path and basic
* statistics
*/
struct local_file_metadata {
local_path path;
size_t row_count = 0;
size_t size_bytes = 0;
int hour = 0;

friend std::ostream&
operator<<(std::ostream& o, const local_file_metadata& r);
};

enum class data_writer_error {
ok = 0,
Expand Down
1 change: 1 addition & 0 deletions src/v/datalake/tests/gtest_record_multiplexer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*
* https://github.com/redpanda-data/redpanda/blob/master/licenses/rcl.md
*/
#include "datalake/base_types.h"
#include "datalake/batching_parquet_writer.h"
#include "datalake/record_multiplexer.h"
#include "datalake/tests/test_data_writer.h"
Expand Down

0 comments on commit 24742dd

Please sign in to comment.