Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions .github/workflows/linux.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Github Workflow Test for FishStore that will run all CMake tests

name: CMake

on: [ push ]

env:
# Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
BUILD_TYPE: Release

jobs:
build:
# The CMake configure and build commands are platform agnostic and should work equally
# well on Windows or Mac. You can convert this to a matrix build if you need
# cross-platform coverage.
# See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix
runs-on: ubuntu-22.04

steps:
- name: Checkout reposistory
uses: actions/checkout@master

- name: Checkout submodules
run: git submodule update --init --recursive

- name: Install dependencies
run: sudo apt install -y g++ libaio-dev uuid-dev libtbb-dev

- name: Create Build Environment
# Some projects don't allow in-source building, so create a separate build directory
# We'll use this as our working directory for all subsequent commands
run: cmake -E make_directory ${{runner.workspace}}/build

- name: Configure CMake
# Use a bash shell so we can use the same syntax for environment variable
# access regardless of the host operating system
shell: bash
working-directory: ${{runner.workspace}}/build
# Note the current convention is to use the -S and -B options here to specify source
# and build directories, but this is only available with CMake 3.13 and higher.
# The CMake binaries on the Github Actions machines are (as of this writing) 3.12
run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE

- name: Build
working-directory: ${{runner.workspace}}/build
shell: bash
# Execute the build. You can specify a specific target with "--target <NAME>"
run: cmake --build . --config $BUILD_TYPE

- name: Test
working-directory: ${{runner.workspace}}/build
shell: bash
# Execute tests defined by the CMake configuration.
# See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail
run: ctest -j 1 --interactive-debug-mode 0 --output-on-failure -R .*_test
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -330,4 +330,8 @@ ASALocalRun/

# MFractors (Xamarin productivity tool) working folder
.mfractor/

# Build folder
/build
/cmake-build-debug
/cmake-build-release
35 changes: 11 additions & 24 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,39 +18,26 @@ if (MSVC)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /DEBUG /OPT:REF /OPT:NOICF /INCREMENTAL:NO")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /DEBUG /OPT:REF /OPT:NOICF /INCREMENTAL:NO")
else()
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og -g -D_DEBUG")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og -g -D_DEBUG -fPIC")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -fPIC")
endif()

#Always set _DEBUG compiler directive when compiling bits regardless of target OS
set_directory_properties(PROPERTIES COMPILE_DEFINITIONS_DEBUG "_DEBUG")

##### BEGIN GOOGLE TEST INSTALLATION #####
# Copied from https://github.com/google/googletest/tree/master/googletest#incorporating-into-an-existing-cmake-project
# Copied from http://google.github.io/googletest/quickstart-cmake.html
# Download and unpack googletest at configure time
configure_file(CMakeLists.txt.in googletest-download/CMakeLists.txt)
execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
RESULT_VARIABLE result
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download )
if(result)
message(FATAL_ERROR "CMake step for googletest failed: ${result}")
endif()
execute_process(COMMAND ${CMAKE_COMMAND} --build .
RESULT_VARIABLE result
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download )
if(result)
message(FATAL_ERROR "Build step for googletest failed: ${result}")
endif()

# Prevent overriding the parent project's compiler/linker
# settings on Windows
include(FetchContent)
FetchContent_Declare(
googletest
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
)
# For Windows: Prevent overriding the parent project's compiler/linker settings
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)

# Add googletest directly to our build. This defines
# the gtest and gtest_main targets.
add_subdirectory(${CMAKE_BINARY_DIR}/googletest-src
${CMAKE_BINARY_DIR}/googletest-build
EXCLUDE_FROM_ALL)
include(GoogleTest)

##### END GOOGLE TEST INSTALLATION #####

Expand Down
2 changes: 1 addition & 1 deletion examples/online_demo-dir/online_demo.cc
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ int main(int argc, char* argv[]) {
int n_threads = atoi(argv[2]);
std::ifstream fin(argv[1]);
std::vector<std::string> batches;
const uint32_t json_batch_size = 1;
const uint32_t json_batch_size = 8;
uint32_t json_batch_cnt = 0;
size_t line_cnt = 0;
size_t record_cnt = 0;
Expand Down
4 changes: 2 additions & 2 deletions src/adapters/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ using store_t = fishstore::core::FishStore<FooAdapter, disk_t>;
# General Parser Interface
[`parser_api.h`](parser_api.h) provides a general parser interface which extension developer should comply on. Generally speaking, a parser should be able to construct with a given list of field names, parse a batch of documents (or a single document) returning an interator to iterate through all the records and all required fields. For each parsed field, user should be able to get the value in its corresponding format through interfaces like `GetAsInt()` or `GetAsDouble()`. Such functions return values in `NullableInt` and `NullableDouble` defined under `fishstore::adapter` scope.

**Note that `NullableInt` and `NullableStringRef` defined under `fishstore::core` scope has different interfaces thatn that in `fishstore::adapter`. Please do not be confused with them.**
**Note that `NullableInt` and `NullableStringRef` defined under `fishstore::core` scope has different interfaces than that in `fishstore::adapter`. Please do not be confused with them.**

# Parser Adapter

Expand Down Expand Up @@ -37,4 +37,4 @@ User should explicitly define the parser type in `parser_t`, parsed field type i
There are a few known limitations with simdjson parser wrapper and adapter:

- Note that simdjson currently only supports parsing one JSON record at a time. Thus, users can only feed one record in raw text to `BatchInsert()` at a time. As a result, user need to implement their own logic to delimit record boundaries within a batch in application level.
- `SIMDJsonParser` and `SIMDJsonAdapter` only supports object-based field names (e.g., `actor.id`, `payload.action.type`). Arrays (like `a[0].b`) and wildcards `a.*.b` are not supported.
- `SIMDJsonParser` and `SIMDJsonAdapter` only supports object-based field names (e.g., `actor.id`, `payload.action.type`), and arrays (like `a[0].b`), although all field names must start with an object (`[0].xyz` is not allowed). Wildcards `a.*.b` are not supported.
98 changes: 53 additions & 45 deletions src/adapters/common_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,53 +2,61 @@
// Licensed under the MIT license.

#pragma once

#include <cstdint>

namespace fishstore {
namespace adapter {

class StringRef {
public:
StringRef() : ptr(nullptr), size(0) {}
StringRef(const char* ptr_, size_t size_) : ptr(ptr_), size(size_) {}

const char* Data() const {
return ptr;
}

const size_t Length() const {
return size;
}

private:
const char* ptr;
size_t size;
};

template<typename T>
struct Nullable {
Nullable() : has_value(false), value() {}
Nullable(const T& value_) : has_value(true), value(value_) {}

bool HasValue() const {
return has_value;
}
const T& Value() {
return value;
}

private:
bool has_value;
T value;
};

using NullableInt = Nullable<int32_t>;
using NullableLong = Nullable<int64_t>;
using NullableFloat = Nullable<float>;
using NullableDouble = Nullable<double>;
using NullableBool = Nullable<bool>;
using NullableStringRef = Nullable<StringRef>;
using NullableString = Nullable<std::string>;
namespace adapter {

}
class StringRef {
public:
StringRef() : ptr(nullptr), size(0) {}

StringRef(const char *ptr_, size_t size_) : ptr(ptr_), size(size_) {}

const char *Data() const {
return ptr;
}

const size_t Length() const {
return size;
}

private:
const char *ptr;
size_t size;
};

template<typename T>
struct Nullable {
Nullable() : has_value(false), value() {}

Nullable(const T &value_) : has_value(true), value(value_) {}

// Constructs a Nullable, which may have a value, and if it does, then
// it will have the given value
Nullable(const bool has_value_, const T &value_) : has_value(has_value_), value(value_) {}

bool HasValue() const {
return has_value;
}

const T &Value() {
return value;
}

private:
bool has_value;
T value;
};

using NullableInt = Nullable<int32_t>;
using NullableLong = Nullable<int64_t>;
using NullableFloat = Nullable<float>;
using NullableDouble = Nullable<double>;
using NullableBool = Nullable<bool>;
using NullableStringRef = Nullable<StringRef>;
using NullableString = Nullable<std::string>;

}
}
Loading