Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Haruki Inoue committed Dec 1, 2022
0 parents commit d6ad4f4
Show file tree
Hide file tree
Showing 115 changed files with 35,326 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
.git
.cache
.mypy_cache
.venv
.vscode
.ipynb_checkpoints
6 changes: 6 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[flake8]
max-line-length = 80
filename = *.py
max-complexity = 10
exclude = .venv,venv,tests/__init__.py
ignore = D107, D401, D100, D104, W293, W503, C901
34 changes: 34 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
*.pyc
.DS_Store
.ipynb_checkpoints
/data
/features
connect
start
/logs
/output
/img
.metaflow/
temp/
.mypy*
*.npy

/venv
.venv
*.egg-info
.python-version
.coverage
.idea
.local
.jupyter
.cache
.config
.ipython
.vscode
.pyarmor
.bash_history
.gitconfig
.pytest_cache
docker/modules

Tissue
52 changes: 52 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
repos:
# - repo: https://github.com/pre-commit/mirrors-yapf
# rev: '' # Use the sha / tag you want to point at
# hooks:
# - id: yapf
# args:
# - --style blank_line_before_nested_class_or_def=True

# - repo: https://github.com/pre-commit/mirrors-isort
# rev: '' # Use the revision sha / tag you want to point at
# hooks:
# - id: isort

- repo: https://gitlab.com/pycqa/flake8
rev: '' # Use the revision sha / tag you want to point at
hooks:
- id: flake8
additional_dependencies: [flake8-docstrings]

- repo: https://github.com/pre-commit/pre-commit-hooks
rev: '' # Use the revision sha / tag you want to point at
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-docstring-first
- id: check-json
- id: check-yaml
- id: requirements-txt-fixer

# - repo: https://github.com/PyCQA/pylint
# rev: '' # Use the revision sha / tag you want to point at
# hooks:
# - id: pylint

# NOTE
# Uncomment this when you have the full setup already
#- repo: local
# hooks:
# - id: pytest
# name: Running PyTest with coverage
# entry: poetry run coverage run -m pytest
# language: system
# always_run: true
# pass_filenames: false
# - id: coverage
# name: Running coverage summary
# entry: poetry run coverage report -m
# types: [python]
# language: system
# always_run: true
# verbose: true
# pass_filenames: false
85 changes: 85 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
FROM nvidia/cuda:11.4.0-devel-ubuntu20.04
LABEL maintainer="haruki@hacarus.com"
ENV DEBIAN_FRONTEND=noninteractive

SHELL ["/bin/bash", "-c"]
ENV HOME=/home \
WORKDIR=/work
RUN mkdir -p $WORKDIR
WORKDIR $WORKDIR

# for CUDA
RUN apt-key adv --fetch-keys https://developer.download.nvidia.cn/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub
RUN apt-get update \
&& apt-get install --no-install-recommends -y fonts-ipaexfont libglib2.0-0 git gcc vim pip curl wget \
# for opencv
&& apt-get install --no-install-recommends -y \
build-essential \
libsm-dev \
libxrender-dev \
libxext-dev \
libgl1-mesa-dev \
python3-dev \
python3-pip \
python3-wheel \
python3-setuptools \
python-dev \
python3-venv \
libssl-dev \
libffi-dev \
# for pyenv
&& apt-get install --no-install-recommends -y \
gcc \
make \
zlib1g-dev \
libbz2-dev \
libreadline-dev \
libsqlite3-dev \
wget \
llvm \
libncurses5-dev \
xz-utils \
libxml2-dev \
libxmlsec1-dev \
libffi-dev \
liblzma-dev \
# openslide
&& apt-get install --no-install-recommends -y python3-openslide \
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*

# install CUDA Toolkit for CuPy
# RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb \
# && dpkg -i cuda-keyring_1.0-1_all.deb \
# && apt-get update \
# && apt-get -y install cuda

RUN wget https://www.python.org/ftp/python/3.8.6/Python-3.8.6.tar.xz \
&& tar xJf Python-3.8.6.tar.xz \
&& cd Python-3.8.6 \
&& ./configure \
&& make \
&& make install \
&& cd ../ \
&& rm -rf Python*

# poetry
COPY pyproject.toml poetry.lock ./
ENV POETRY_HOME=/usr/local/poetry \
POETRY_VERSION=1.1.13
RUN /usr/local/bin/python3.8 -m pip install --upgrade pip \
&& curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/install-poetry.py | python3 - \
&& echo 'export PATH="/usr/local/poetry/bin:$PATH"' >> $HOME/.bashrc \
&& /usr/local/poetry/bin/poetry config virtualenvs.create false \
&& /usr/local/poetry/bin/poetry install --no-root

# install original packages with separate cache
COPY modules ./modules
RUN /usr/local/poetry/bin/poetry install \
&& chmod -R 777 $POETRY_HOME \
&& chmod -R 777 $HOME

EXPOSE 22
EXPOSE 8888

COPY entrypoint.sh ./entrypoint.sh
ENTRYPOINT /bin/bash entrypoint.sh
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Create Docker image
```bash
sh build_docker.sh
```
# Run notebook

```bash
sh start_notebook.sh <port_number>
```
715 changes: 715 additions & 0 deletions Submission/UICC_classification/01-tma_meta.ipynb

Large diffs are not rendered by default.

125 changes: 125 additions & 0 deletions Submission/cluster_fraction/calculate_fraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
from typing import Dict
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from omegaconf import DictConfig
import hydra

from eda import git_root
from hpi.uicc import extract_pM, extract_pN, extract_pT, classify_stage
from logger import setup_logger


script_path = Path(__file__)
log_dir = script_path.parent / "log"
logger = setup_logger(script_path.name, log_dir / (script_path.stem + ".log"))


def sort(x: Path) -> int:
"""Sort."""
return int(x.stem.split("_")[-1])


def get_cluster_fraction(img: np.ndarray, norm: bool = False) -> Dict[int, int]:
"""Get fraction of each cluster.
Parameters
----------
img: np.ndarray
labeled image.
norm: bool
do normalization or not.
Returns
-------
Dict[cluster_id, pixel count]
"""
mask = img > 0
ulabels, counts = np.unique(img[mask], return_counts=True)
if norm:
counts = counts / counts.sum()
return dict(zip(ulabels, counts))


def add_uicc_classification(df):
"""Add UICC classification to the dataframe."""
df = (df.assign(T=df["pTNM"].apply(lambda x: extract_pT(x)))
.assign(N=df["pTNM"].apply(lambda x: extract_pN(x)))
.assign(M=df["pTNM"].apply(lambda x: extract_pM(x))))
# stages
stages = [classify_stage(*(df.iloc[i].loc["T":"M"]))
for i in range(df.shape[0])]
df = df.assign(stage=stages)
return df


@hydra.main(config_name="config", config_path="config",
version_base=None)
def main(cfg: DictConfig):
"""Main process."""
ROOT = git_root(absolute=True)
input_dir = ROOT / cfg.data.INPUT_DIR
output_dir = ROOT / cfg.data.OUTPUT_DIR
output_dir.mkdir(mode=0o775, parents=True, exist_ok=True)
path_meta = ROOT / cfg.data.META

# get clinical information
df_meta = pd.read_csv(path_meta, index_col=0)
df_meta["sex"].replace("F", "Female", inplace=True)
df_meta["sex"].replace("M", "Male", inplace=True)
# each patient has two samples.
# _df_meta = df_meta.drop_duplicates(["TMA_ID"])
# _df_meta = add_uicc_classification(_df_meta)

# NOTE: each patient has two samples.
_df_meta_undrop = add_uicc_classification(df_meta)
_df_meta_undrop = _df_meta_undrop.ffill()

paths_human_cluster = sorted(list(input_dir.glob("*klabels*.npy")),
key=sort)

# calculate pixel-counts list
logger.info("calculate pixel-counts")
pixel_counts = []
for path in paths_human_cluster:
img = np.load(path)
pixel_counts.append(get_cluster_fraction(img, norm=False))

# list -> dataframe, drop noise
logger.info("drop unknown label")
df_pixel_counts = pd.DataFrame(pixel_counts)
df_pixel_counts = df_pixel_counts.assign(stage=_df_meta_undrop["stage"])
df_pixel_counts = df_pixel_counts.fillna(0)
df_pixel_counts = df_pixel_counts.drop(cfg.cluster.ignore, axis=1)
df_pixel_counts = (df_pixel_counts
.query('not stage == "Unknown"')
.drop('stage', axis=1))

# pixel count to ratio
logger.info("calculate fraction")
all_area = df_pixel_counts.sum().sum()
df_fraction = pd.DataFrame({
'cluster': df_pixel_counts.columns,
'area': df_pixel_counts.sum(),
'fraction': df_pixel_counts.sum() / all_area
})
df_fraction = df_fraction.sort_index()

logger.info(f"save: {output_dir}/fraction.csv")
df_fraction.to_csv(output_dir / "fraction.csv")

logger.info(f"plot: {output_dir}/fraction.png")

df_fraction = df_fraction.sort_values("fraction")
order = df_fraction["cluster"]
sns.barplot(data=df_fraction, x="cluster", y="fraction", order=order,
color="gray")
fig = plt.gcf()
fig.savefig(output_dir / "fraction.png")
plt.close("all")


if __name__ == "__main__":
main()
22 changes: 22 additions & 0 deletions Submission/cluster_fraction/config/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# https://github.com/knazeri/edge-connect/blob/master/src/config.py
# hydra's settings ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
defaults:
- _self_
# - override hydra/hydra_logging: disabled
- override hydra/job_logging: custom

hydra:
output_subdir: null
run:
dir: .
sweep:
dir: .
subdir: .

data:
INPUT_DIR: data/submission/clustering/human/3_7
OUTPUT_DIR: data/submission/cluster_fraction/human/3_7
META: data/submission/meta/human_tma_20220112.csv

cluster:
ignore: [3, 6, 7, 8, 12, 16, 21, 26, 27, 29, 30]
13 changes: 13 additions & 0 deletions Submission/cluster_fraction/config/hydra/job_logging/custom.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
version: 1
formatters:
console:
format: "%(asctime)s - %(levelname)s - %(filename)s - %(message)s"
handlers:
console:
class: logging.StreamHandler
formatter: console
stream: ext://sys.stdout
root:
handlers: [console]

disable_existing_loggers: false
10 changes: 10 additions & 0 deletions Submission/cluster_fraction/log/classify_stage.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
2022-09-02 07:48:17,585 - INFO - classify_stage.py - main - calculate pixel-counts
2022-09-02 07:48:19,763 - INFO - classify_stage.py - main - drop unknown label
2022-09-02 07:48:19,802 - INFO - classify_stage.py - main - calculate fraction
2022-09-02 07:48:19,806 - INFO - classify_stage.py - main - save: /work/data/submission/cluster_fraction/human/3_7/fraction.csv
2022-09-02 07:48:19,808 - INFO - classify_stage.py - main - plot: /work/data/submission/cluster_fraction/human/3_7/fraction.png
2022-09-02 07:51:34,187 - INFO - classify_stage.py - main - calculate pixel-counts
2022-09-02 07:51:34,777 - INFO - classify_stage.py - main - drop unknown label
2022-09-02 07:51:34,805 - INFO - classify_stage.py - main - calculate fraction
2022-09-02 07:51:34,808 - INFO - classify_stage.py - main - save: /work/data/submission/cluster_fraction/human/3_7/fraction.csv
2022-09-02 07:51:34,810 - INFO - classify_stage.py - main - plot: /work/data/submission/cluster_fraction/human/3_7/fraction.png
14 changes: 14 additions & 0 deletions Submission/cluster_fraction/log/classify_stage.log.2022-08-27
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
2022-08-27 17:30:28,714 - INFO - classify_stage.py - main - calculate pixel-counts
2022-08-27 17:30:41,285 - INFO - classify_stage.py - main - drop unknown label
2022-08-27 17:31:51,597 - INFO - classify_stage.py - main - calculate pixel-counts
2022-08-27 17:31:52,163 - INFO - classify_stage.py - main - drop unknown label
2022-08-27 17:40:11,327 - INFO - classify_stage.py - main - calculate pixel-counts
2022-08-27 17:40:12,610 - INFO - classify_stage.py - main - drop unknown label
2022-08-27 17:40:12,645 - INFO - classify_stage.py - main - calculate fraction
2022-08-27 17:40:12,649 - INFO - classify_stage.py - main - save: /work/data/submission/cluster_fraction/human/3_7/fraction.csv
2022-08-27 17:40:12,657 - INFO - classify_stage.py - main - plot: /work/data/submission/cluster_fraction/human/3_7/fraction.png
2022-08-27 17:40:33,282 - INFO - classify_stage.py - main - calculate pixel-counts
2022-08-27 17:40:33,872 - INFO - classify_stage.py - main - drop unknown label
2022-08-27 17:40:33,895 - INFO - classify_stage.py - main - calculate fraction
2022-08-27 17:40:33,898 - INFO - classify_stage.py - main - save: /work/data/submission/cluster_fraction/human/3_7/fraction.csv
2022-08-27 17:40:33,901 - INFO - classify_stage.py - main - plot: /work/data/submission/cluster_fraction/human/3_7/fraction.png
10 changes: 10 additions & 0 deletions Submission/cluster_fraction/log/classify_stage.log.2022-08-28
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
2022-08-28 07:35:44,145 - INFO - classify_stage.py - main - calculate pixel-counts
2022-08-28 07:35:44,809 - INFO - classify_stage.py - main - drop unknown label
2022-08-28 07:35:44,832 - INFO - classify_stage.py - main - calculate fraction
2022-08-28 07:35:44,835 - INFO - classify_stage.py - main - save: /work/data/submission/cluster_fraction/human/3_7/fraction.csv
2022-08-28 07:35:44,837 - INFO - classify_stage.py - main - plot: /work/data/submission/cluster_fraction/human/3_7/fraction.png
2022-08-28 08:06:27,208 - INFO - classify_stage.py - main - calculate pixel-counts
2022-08-28 08:06:27,766 - INFO - classify_stage.py - main - drop unknown label
2022-08-28 08:06:27,789 - INFO - classify_stage.py - main - calculate fraction
2022-08-28 08:06:27,792 - INFO - classify_stage.py - main - save: /work/data/submission/cluster_fraction/human/3_7/fraction.csv
2022-08-28 08:06:27,794 - INFO - classify_stage.py - main - plot: /work/data/submission/cluster_fraction/human/3_7/fraction.png
Loading

0 comments on commit d6ad4f4

Please sign in to comment.