Skip to content

Commit

Permalink
Python wrapper. (apache#112)
Browse files Browse the repository at this point in the history
* Add TsFile Python.

* Fix macos and windows build

* fix import error in windows python over 3.8.

* refine python code.

---------

Co-authored-by: Haonan <hhaonan@outlook.com>
  • Loading branch information
ColinLeeo and HTHou authored Jul 3, 2024
1 parent 6a5f833 commit ff5617b
Show file tree
Hide file tree
Showing 15 changed files with 1,279 additions and 12 deletions.
13 changes: 8 additions & 5 deletions .github/workflows/unit-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,16 @@ jobs:
core.setOutput('platform_suffix', ``)
}
- name: Install python packages
if: ${{ matrix.os == 'ubuntu-latest'}}
run: pip3 install cython pandas numpy

# Use python 3.12 to avoid Cython files don't compile on Mingw-w64 64-bit
# https://bugs.python.org/issue40167
- name: Set up python 3.12 for windows
if: ${{ matrix.os == 'windows-latest'}}
uses: actions/setup-python@v5
with:
python-version: '3.12'

# Run the actual maven build including all unit- and integration-tests.
- name: Build and test with Maven (All others)
shell: bash
run: |
./mvnw${{ steps.platform_suffix.outputs.platform_suffix }} -P with-java,with-cpp clean verify
./mvnw${{ steps.platform_suffix.outputs.platform_suffix }} -P with-java,with-cpp,with-python clean verify
11 changes: 11 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,14 @@ docs/node_modules/
docs/src/.vuepress/.cache/
docs/src/.vuepress/.temp/
docs/src/.vuepress/dist/

# python files
python/build
python/tsfile/__pycache__
python/tsfile/*so*
python/tsfile/*dll*
python/tsfile/*dylib*
python/tsfile/*.h
python/tsfile/*.cpp
python/data
python/venv/*
16 changes: 9 additions & 7 deletions cpp/src/cwrapper/TsFile-cwrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -206,10 +206,6 @@ ErrorCode tsfile_register_table_column(CTsFileWriter writer,
get_datatype(schema->column_def),
get_data_encoding(schema->column_def),
get_data_compression(schema->column_def));
std::cout << "register table column name" << table_name << std::endl;
std::cout << "register column name" << schema->name << std::endl;
std::cout << "register column type" << get_datatype(schema->column_def)
<< std::endl;
return ret;
}

Expand Down Expand Up @@ -644,7 +640,9 @@ QueryDataRet ts_reader_begin_end(CTsFileReader reader, const char* table_name,
ret->data = qds;
ret->column_num = column_num;
ret->column_names = (char**)malloc(column_num * sizeof(char*));
memcpy(ret->column_names, columns_name, column_num * sizeof(char*));
for (int i = 0; i < column_num; i++) {
ret->column_names[i] = strdup(columns_name[i]);
}
storage::QueryExpression::destory(query_expr);
return ret;
}
Expand All @@ -666,14 +664,19 @@ QueryDataRet ts_reader_read(CTsFileReader reader, const char* table_name,
ret->data = qds;
ret->column_names = (char**)malloc(column_num * sizeof(char*));
ret->column_num = column_num;
memcpy(ret->column_names, columns_name, column_num * sizeof(char*));
for (int i = 0; i < column_num; i++) {
ret->column_names[i] = strdup(columns_name[i]);
}
storage::QueryExpression::destory(query_expr);
return ret;
}

ErrorCode destory_query_dataret(QueryDataRet data) {
storage::QueryDataSet* qds = (storage::QueryDataSet*)data->data;
delete qds;
for (int i = 0; i < data->column_num; i++) {
free(data->column_names[i]);
}
free(data->column_names);
free(data);
return E_OK;
Expand All @@ -699,7 +702,6 @@ DataResult* ts_next(QueryDataRet data, int expect_line_count) {
get_schema_info(field->type_));
}
init_tablet = true;
std::cout << "init finished" << std::endl;
}
for (int col = 0; col < column_num; col++) {
storage::Field* field = record->get_field(col);
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/reader/tsfile_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,14 @@ int TsFileExecutor::execute(QueryExpression *query_expr,
}

if (regular_expr == nullptr || regular_expr->type_ == GLOBALTIME_EXPR) {
#if DEBUG_SE
std::cout << "got into 1 path" << std::endl;
#endif
return execute_may_with_global_timefilter(query_exprs_, ret_qds);
} else {
#if DEBUG_SE
std::cout << "got into 2 path" << std::endl;
#endif
// no filter or just global time filter
return execute_with_timegenerator(query_exprs_, ret_qds);
}
Expand Down
37 changes: 37 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@
<exclude>**/.clang-format</exclude>
<!-- generated by cython-->
<exclude>**/tsfile/tsfile_pywrapper.cpp</exclude>
<exclude>**/venv/**</exclude>
</excludes>
</configuration>
</plugin>
Expand Down Expand Up @@ -191,6 +192,11 @@
<artifactId>build-helper-maven-plugin</artifactId>
<version>3.5.0</version>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>1.6.0</version>
</plugin>
<plugin>
<groupId>org.apache.felix</groupId>
<artifactId>maven-bundle-plugin</artifactId>
Expand Down Expand Up @@ -579,6 +585,13 @@
<module>cpp</module>
</modules>
</profile>
<!-- Build the python version of TsFile -->
<profile>
<id>with-python</id>
<modules>
<module>python</module>
</modules>
</profile>
<profile>
<id>.java-9-and-above</id>
<activation>
Expand Down Expand Up @@ -653,6 +666,9 @@
<os.suffix>linux</os.suffix>
<os.classifier>linux-x86_64</os.classifier>
<cmake.generator>Unix Makefiles</cmake.generator>
<python.venv.bin>venv/bin/</python.venv.bin>
<python.exe.bin>python3</python.exe.bin>
<python.compiler.argument> </python.compiler.argument>
</properties>
</profile>
<!-- Profile for linux amd64 (mainly AMD Processors) (Self-Enabling) -->
Expand All @@ -669,6 +685,9 @@
<os.suffix>linux</os.suffix>
<os.classifier>linux-amd64</os.classifier>
<cmake.generator>Unix Makefiles</cmake.generator>
<python.venv.bin>venv/bin/</python.venv.bin>
<python.exe.bin>python3</python.exe.bin>
<python.compiler.argument> </python.compiler.argument>
</properties>
</profile>
<!-- Profile for linux aarch64 (mainly newer Mac or Raspberry PI Processors) (Self-Enabling) -->
Expand All @@ -685,6 +704,9 @@
<os.suffix>linux</os.suffix>
<os.classifier>linux-${os.arch}</os.classifier>
<cmake.generator>Unix Makefiles</cmake.generator>
<python.venv.bin>venv/bin/</python.venv.bin>
<python.exe.bin>python3</python.exe.bin>
<python.compiler.argument> </python.compiler.argument>
</properties>
</profile>
<!-- Profile for mac x86_64 (mainly Intel Processors) (Self-Enabling) -->
Expand All @@ -700,6 +722,9 @@
<os.suffix>mac</os.suffix>
<os.classifier>mac-x86_64</os.classifier>
<cmake.generator>Unix Makefiles</cmake.generator>
<python.venv.bin>venv/bin/</python.venv.bin>
<python.exe.bin>python3</python.exe.bin>
<python.compiler.argument> </python.compiler.argument>
</properties>
</profile>
<!-- Profile for mac aarch64 (mainly AMD Processors) (Self-Enabling) -->
Expand All @@ -715,6 +740,9 @@
<os.suffix>mac</os.suffix>
<os.classifier>mac-aarch64</os.classifier>
<cmake.generator>Unix Makefiles</cmake.generator>
<python.venv.bin>venv/bin/</python.venv.bin>
<python.exe.bin>python3</python.exe.bin>
<python.compiler.argument> </python.compiler.argument>
</properties>
</profile>
<!-- profile for windows x86_64 (mainly Intel Processors) (Self-Enabling) -->
Expand All @@ -732,6 +760,9 @@
<cmake.generator>MinGW Makefiles</cmake.generator>
<!-- The generated code relied on Boost and that relies on VS and can't be built with MinGW -->
<!--cmake.generator>Visual Studio 17 2022</cmake.generator-->
<python.venv.bin>venv/Scripts/</python.venv.bin>
<python.exe.bin>python</python.exe.bin>
<python.compiler.argument>--compiler=mingw32</python.compiler.argument>
</properties>
</profile>
<!-- profile for windows amd64 (mainly AMD Processors) (Self-Enabling) -->
Expand All @@ -749,6 +780,9 @@
<cmake.generator>MinGW Makefiles</cmake.generator>
<!-- The generated code relied on Boost and that relies on VS and can't be built with MinGW -->
<!--cmake.generator>Visual Studio 17 2022</cmake.generator-->
<python.venv.bin>venv/Scripts/</python.venv.bin>
<python.exe.bin>python</python.exe.bin>
<python.compiler.argument>--compiler=mingw32</python.compiler.argument>
</properties>
</profile>
<!-- profile for windows aarch64 (mainly newer Mac or Raspberry PI Processors) (Self-Enabling) -->
Expand All @@ -766,6 +800,9 @@
<cmake.generator>MinGW Makefiles</cmake.generator>
<!-- The generated code relied on Boost and that relies on VS and can't be built with MinGW -->
<!--cmake.generator>Visual Studio 17 2022</cmake.generator-->
<python.venv.bin>venv/Scripts/</python.venv.bin>
<python.exe.bin>python</python.exe.bin>
<python.compiler.argument>--compiler=mingw32</python.compiler.argument>
</properties>
</profile>
<!-- Little helper profile that will disable running the cmake tests when the maven tests are being skipped -->
Expand Down
66 changes: 66 additions & 0 deletions python/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->

# TsFile Python Document

<pre>
___________ ___________.__.__
\__ ___/____\_ _____/|__| | ____
| | / ___/| __) | | | _/ __ \
| | \___ \ | \ | | |_\ ___/
|____|/____ >\___ / |__|____/\___ > version 1.0.0
\/ \/ \/
</pre>


## Introduction

This directory contains the Python implementation of TsFile. The Python version is built on the CPP version and uses the Cython package to integrate TsFile's read and write capabilities into the Python environment. Users can read and write TsFile as easily as they use read_csv and write_csv in Pandas.

The source code can be found in the `./tsfile` directory. Files ending with `.pyx` and `.pyd` are wrapper code written in Cython. The `tsfile/tsfile.py` defines some user interfaces. You can find some examples of reading and writing in the `.examples/examples.py`.


## How to make contributions

Using pylint to check Python code is recommended. However, there is no suitable style checking tool for Cython code, and this part of the code should be consistent with the Python style required by pylint.

**Feature List**
- [ ] In pywrapper, invoke the batch reading interface implemented in CPP version of TsFile.
- [ ] Supports writing multiple DataFrames into one single TsFile.



## Build

Before constructing Python version of TsFile, it is necessary to build [CPP version of TsFile](../cpp/README.md) first, because Python version of TsFile relies on the shared library files provided by CPP version of TsFile.

Build by mvn in root directory:

```sh
mvn -P with-cpp,with-python clean verify
```

Build by python command:

```sh
python setup.py build_ext --inplace
```

82 changes: 82 additions & 0 deletions python/examlpes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

import numpy as np
import pandas as pd
import os

import tsfile as ts


# test writing data
data_dir = os.path.join(os.path.dirname(__file__), "test.tsfile")
DEVICE_NAME = "test_table"

# 1000 rows data
time = np.arange(1, 1001, dtype=np.int64)
level = np.linspace(2000, 3000, num=1000, dtype=np.float32)
num = np.arange(10000, 11000, dtype=np.int64)
df = pd.DataFrame({"Time": time, "level": level, "num": num})

if os.path.exists(data_dir):
os.remove(data_dir)
ts.write_tsfile(data_dir, DEVICE_NAME, df)


# read data we already wrote
# with 20 chunksize
tsfile_ret = ts.read_tsfile(data_dir, DEVICE_NAME, ["level", "num"], chunksize=20)
print(tsfile_ret.shape)

# with 100 chunksize
tsfile_ret = ts.read_tsfile(data_dir, DEVICE_NAME, ["level", "num"], chunksize=100)
print(tsfile_ret.shape)

# get all data
tsfile_ret = ts.read_tsfile(data_dir, DEVICE_NAME, ["level", "num"])
print(tsfile_ret.shape)

# with iterator
with ts.read_tsfile(
data_dir, DEVICE_NAME, ["level", "num"], iterator=True, chunksize=100
) as reader:
for chunk in reader:
print(chunk.shape)

# with time scale and chunksize
tsfile_ret = ts.read_tsfile(
data_dir, DEVICE_NAME, ["level"], start_time=50, end_time=100, chunksize=10
)
print(tsfile_ret.shape)

# with time scale
tsfile_ret = ts.read_tsfile(data_dir, DEVICE_NAME, ["num"], start_time=50, end_time=100)
print(tsfile_ret.shape)

# with time scale, iterator and chunksize
with ts.read_tsfile(
data_dir,
DEVICE_NAME,
["level", "num"],
iterator=True,
start_time=100,
end_time=500,
chunksize=100,
) as reader:
for chunk in reader:
print(chunk.shape)
Loading

0 comments on commit ff5617b

Please sign in to comment.