Skip to content

Commit

Permalink
[MISC] Add Dockerfile and docker_build, docker_run make commands
Browse files Browse the repository at this point in the history
  • Loading branch information
NicolaDonelli committed Jan 14, 2023
1 parent 7406be0 commit 25b63b4
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 38 deletions.
36 changes: 36 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# syntax=docker/dockerfile:1

ARG PY_VERSION=3.10
FROM python:${PY_VERSION}-buster as builder

WORKDIR /py4ai-data

RUN apt-get update && apt-get upgrade -y

COPY LICENSE MANIFEST.in versioneer.py setup.py pyproject.toml README.md Makefile ./
COPY requirements requirements
COPY py4ai py4ai
COPY tests tests

RUN addgroup --system tester && adduser --system --group tester
RUN chown -R tester:tester /py4ai-data
ENV PATH ${PATH}:/home/tester/.local/bin
USER tester

# change to the tester user: switch to a non-root user is a best practice.
RUN make checks

FROM python:${PY_VERSION}-slim-buster
WORKDIR /py4ai-data
COPY --from=builder /py4ai-data/dist /py4ai-data/dist

RUN apt-get update && apt-get upgrade -y && apt-get install gcc libc6-dev -y --no-install-recommends --fix-missing

RUN addgroup --system runner && adduser --system --group runner
RUN chown -R runner:runner /py4ai-data
ENV PATH ${PATH}:/home/runner/.local/bin
USER runner

RUN pip install --upgrade pip
RUN ls -t ./dist/*.tar.gz | xargs pip install
ENTRYPOINT ["python"]
38 changes: 31 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,22 @@ files := $(shell find . -name "*.py")
doc_files := $(shell find sphinx -name "*.*")

# Uncomment to store cache installation in the environment
# package_dir := $(shell python -c 'import site; print(site.getsitepackages()[0])')
package_dir := .make_cache
# cache_dir := $(shell python -c 'import site; print(site.getsitepackages()[0])')
cache_dir := .make_cache
package_name=$(shell python -c "import tomli;from pathlib import Path;print(tomli.loads(Path('pyproject.toml').read_text(encoding='utf-8'))['project']['name'])")

$(shell mkdir -p $(package_dir))
$(shell mkdir -p $(cache_dir))

pre_deps_tag := $(cache_dir)/.pre_deps
env_tag := $(cache_dir)/.env_tag
env_dev_tag := $(cache_dir)/.env_dev_tag
install_tag := $(cache_dir)/.install_tag
docker_build_tag := $(cache_dir)/.docker_build_tag

project_name := py4ai-data
registry := ghcr.io
image_name := $(registry)/nicoladonelli/$(project_name)

pre_deps_tag := $(package_dir)/.pre_deps
env_tag := $(package_dir)/.env_tag
env_dev_tag := $(package_dir)/.env_dev_tag
install_tag := $(package_dir)/.install_tag

# ======================
# Rules and Dependencies
Expand Down Expand Up @@ -49,6 +55,8 @@ help:
@echo " - docs to produce documentation in html format using sphinx as configured in pyproject.toml"
@echo " - checks to run mypy, lint, bandit, licensecheck, tests and check formatting altogether"
@echo " - clean to remove cache file"
@echo " - docker_build to build docker image according to Dockerfile, tagged with app version"
@echo " - docker_run to run latest built docker image"
@echo "------------------------------------"

$(pre_deps_tag):
Expand Down Expand Up @@ -148,3 +156,19 @@ clean:
rm -rf sphinx/source/api
rm -rf $(shell find . -name "*.pyc") $(shell find . -name "__pycache__")
rm -rf *.egg-info .mypy_cache .pytest_cache .make_cache $(env_tag) $(env_dev_tag) $(install_tag)

$(docker_build_tag): Dockerfile requirements/requirements.txt py4ai pyproject.toml
@echo "==Building docker container=="
TAG=$$(${PYTHON} py4ai/data/_version.py); \
PYTHON_VERSION=$$(python --version); \
PYTHON_VERSION="$${PYTHON_VERSION#Python }"; \
PYTHON_VERSION="$${PYTHON_VERSION%.*}"; \
docker build -t $(image_name):"$${TAG}" --build-arg PY_VERSION=$${PYTHON_VERSION} .; \
VERSION=$$(cat $(docker_build_tag)); \
if [[ "$${VERSION}" != "$${TAG}" ]]; then echo "==Updating docker version tag=="; echo "$${TAG}" > $(docker_build_tag); fi

docker_build: $(docker_build_tag)

docker_run: $(docker_build_tag)
@echo "==Run detached docker image '$(project_name)' from '$(image_name):$$(cat $(docker_build_tag))' container=="
docker run --rm -it --name $(project_name) $(image_name):$$(cat $(docker_build_tag))
4 changes: 4 additions & 0 deletions py4ai/data/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -691,3 +691,7 @@ def get_versions() -> Dict[str, Any]:
"error": "unable to compute version",
"date": None,
}


if __name__ == "__main__":
print(get_versions()["version"].replace("+", "."))
29 changes: 12 additions & 17 deletions py4ai/data/model/ml.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Module for specifying data-models to be used in modelling."""

import sys
from abc import ABC, abstractmethod
from itertools import islice
from typing import (
Any,
Dict,
Expand All @@ -20,6 +20,7 @@

import numpy as np
import pandas as pd
from numpy.typing import NDArray
from pandas import DataFrame, Series
from py4ai.core.types import T
from py4ai.core.utils.decorators import lazyproperty as lazy
Expand All @@ -36,18 +37,12 @@
RegisterLazyCachedIterables,
)

if sys.version_info[0] < 3:
from itertools import islice
from itertools import izip as zip
else:
from itertools import islice

TPandasDataset = TypeVar("TPandasDataset", bound="PandasDataset") # type: ignore
TDatasetUtilsMixin = TypeVar("TDatasetUtilsMixin", bound="DatasetUtilsMixin") # type: ignore

FeatType = TypeVar(
"FeatType",
bound=Union[List[Any], Tuple[Any], np.ndarray[Any, np.dtype[Any]], Dict[str, Any]],
bound=Union[List[Any], Tuple[Any], NDArray[Any], Dict[str, Any]],
)
LabType = TypeVar("LabType", int, float, None)
FeaturesType = Union[
Expand Down Expand Up @@ -118,11 +113,11 @@ def __init__(
self.name: Optional[Union[str, int, Any]] = name


class MultiFeatureSample(Sample[List[np.ndarray[Any, Any]], LabType]):
class MultiFeatureSample(Sample[List[NDArray[Any]], LabType]):
"""Class representing an observation defined by a nested list of arrays."""

@staticmethod
def _check_features(features: List[np.ndarray[Any, Any]]) -> None:
def _check_features(features: List[NDArray[Any]]) -> None:
"""
Check that features is list of lists.
Expand All @@ -138,7 +133,7 @@ def _check_features(features: List[np.ndarray[Any, Any]]) -> None:

def __init__(
self,
features: List[np.ndarray[Any, Any]],
features: List[NDArray[Any]],
label: Optional[LabType] = None,
name: Optional[str] = None,
) -> None:
Expand Down Expand Up @@ -189,7 +184,7 @@ def checkNames(x: Optional[Union[str, int, Any]]) -> Union[str, int]:
return x if isinstance(x, int) else str(x)

@overload
def getFeaturesAs(self, type: Literal["array"]) -> np.ndarray[Any, Any]:
def getFeaturesAs(self, type: Literal["array"]) -> NDArray[Any]:
...

@overload
Expand Down Expand Up @@ -244,7 +239,7 @@ def getFeaturesAs(self, type: AllowedTypes = "array") -> FeaturesType[FeatType]:
raise ValueError(f"Type {type} not allowed")

@overload
def getLabelsAs(self, type: Literal["array"]) -> np.ndarray[Any, Any]:
def getLabelsAs(self, type: Literal["array"]) -> NDArray[Any]:
...

@overload
Expand Down Expand Up @@ -393,7 +388,7 @@ def labels(self) -> Iterator[LabType]:
return self.getLabelsAs("lazy")

@overload
def getFeaturesAs(self, type: Literal["array"]) -> np.ndarray[Any, Any]:
def getFeaturesAs(self, type: Literal["array"]) -> NDArray[Any]:
...

@overload
Expand Down Expand Up @@ -422,7 +417,7 @@ def getFeaturesAs(self, type: AllowedTypes = "lazy") -> FeaturesType[FeatType]:
return super(LazyDataset, self).getFeaturesAs(type)

@overload
def getLabelsAs(self, type: Literal["array"]) -> np.ndarray[Any, Any]:
def getLabelsAs(self, type: Literal["array"]) -> NDArray[Any]:
...

@overload
Expand Down Expand Up @@ -666,7 +661,7 @@ def intersection(self: TPandasDataset) -> TPandasDataset:
return self.loc(idx)

@overload
def getFeaturesAs(self, type: Literal["array"]) -> np.ndarray[Any, Any]:
def getFeaturesAs(self, type: Literal["array"]) -> NDArray[Any]:
...

@overload
Expand Down Expand Up @@ -708,7 +703,7 @@ def getFeaturesAs(self, type: AllowedTypes = "array") -> FeaturesType[FeatType]:
)

@overload
def getLabelsAs(self, type: Literal["array"]) -> np.ndarray[Any, Any]:
def getLabelsAs(self, type: Literal["array"]) -> NDArray[Any]:
...

@overload
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ disallow_incomplete_defs = true
disallow_any_generics = true
warn_redundant_casts = true
strict_equality = false
plugins = ["sqlalchemy.ext.mypy.plugin"]
plugins = ["sqlalchemy.ext.mypy.plugin", "numpy.typing.mypy_plugin"]
exclude = ['_version.py']

[[tool.mypy.overrides]]
Expand Down
25 changes: 12 additions & 13 deletions tests/data/model/test_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import numpy as np
import pandas as pd
from numpy.typing import NDArray
from py4ai.core.tests.core import TestCase, logTest
from py4ai.core.utils.fs import create_dir_if_not_exists

Expand Down Expand Up @@ -157,16 +158,16 @@ def samples_gen() -> Iterator[MultiFeatureSample[float]]:
lookback = 3
batch_size = 4

lazyDat: LazyDataset[List[np.ndarray[Any, np.dtype[Any]]], float] = LazyDataset(
lazyDat: LazyDataset[List[NDArray[Any]], float] = LazyDataset(
IterGenerator(samples_gen)
)
lookbackDat: LazyDataset[
List[np.ndarray[Any, np.dtype[Any]]], float
] = lazyDat.withLookback(lookback)
lookbackDat: LazyDataset[List[NDArray[Any]], float] = lazyDat.withLookback(
lookback
)
batch_gen = lookbackDat.batch(batch_size)

batch1: CachedDataset[List[np.ndarray[Any, Any]], float] = next(batch_gen)
batch2: CachedDataset[List[np.ndarray[Any, Any]], float] = next(batch_gen)
batch1: CachedDataset[List[NDArray[Any]], float] = next(batch_gen)
batch2: CachedDataset[List[NDArray[Any]], float] = next(batch_gen)

tmp1 = batch1.getFeaturesAs("array")
temp1X = np.array(list(map(lambda x: np.stack(x), tmp1[:, :, 0])))
Expand Down Expand Up @@ -207,7 +208,7 @@ def test_withLookback_ArrayFeatureSample(self) -> None:
Sample(features=np.array([116, 117]), label=9),
]

def samples_gen() -> Iterator[Sample[np.ndarray[Any, np.dtype[Any]], int]]:
def samples_gen() -> Iterator[Sample[NDArray[Any], int]]:
for sample in samples:
if not any([np.isnan(x).any() for x in sample.features]):
yield sample
Expand All @@ -233,16 +234,14 @@ def samples_gen() -> Iterator[Sample[np.ndarray[Any, np.dtype[Any]], int]]:
lookback = 3
batch_size = 4

lazyDat: LazyDataset[np.ndarray[Any, np.dtype[Any]], int] = LazyDataset(
lazyDat: LazyDataset[NDArray[Any], int] = LazyDataset(
IterGenerator(samples_gen)
)
lookbackDat: LazyDataset[
np.ndarray[Any, np.dtype[Any]], int
] = lazyDat.withLookback(lookback)
lookbackDat: LazyDataset[NDArray[Any], int] = lazyDat.withLookback(lookback)
batch_gen = lookbackDat.batch(batch_size)

batch1: CachedDataset[np.ndarray[Any, Any], int] = next(batch_gen)
batch2: CachedDataset[np.ndarray[Any, Any], int] = next(batch_gen)
batch1: CachedDataset[NDArray[Any], int] = next(batch_gen)
batch2: CachedDataset[NDArray[Any], int] = next(batch_gen)

tmp1 = batch1.getFeaturesAs("array")
tmp1lab = batch1.getLabelsAs("array")
Expand Down

0 comments on commit 25b63b4

Please sign in to comment.