-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Haruki Inoue
committed
Dec 1, 2022
0 parents
commit d6ad4f4
Showing
115 changed files
with
35,326 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
.git | ||
.cache | ||
.mypy_cache | ||
.venv | ||
.vscode | ||
.ipynb_checkpoints |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
[flake8] | ||
max-line-length = 80 | ||
filename = *.py | ||
max-complexity = 10 | ||
exclude = .venv,venv,tests/__init__.py | ||
ignore = D107, D401, D100, D104, W293, W503, C901 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
*.pyc | ||
.DS_Store | ||
.ipynb_checkpoints | ||
/data | ||
/features | ||
connect | ||
start | ||
/logs | ||
/output | ||
/img | ||
.metaflow/ | ||
temp/ | ||
.mypy* | ||
*.npy | ||
|
||
/venv | ||
.venv | ||
*.egg-info | ||
.python-version | ||
.coverage | ||
.idea | ||
.local | ||
.jupyter | ||
.cache | ||
.config | ||
.ipython | ||
.vscode | ||
.pyarmor | ||
.bash_history | ||
.gitconfig | ||
.pytest_cache | ||
docker/modules | ||
|
||
Tissue |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
repos: | ||
# - repo: https://github.com/pre-commit/mirrors-yapf | ||
# rev: '' # Use the sha / tag you want to point at | ||
# hooks: | ||
# - id: yapf | ||
# args: | ||
# - --style blank_line_before_nested_class_or_def=True | ||
|
||
# - repo: https://github.com/pre-commit/mirrors-isort | ||
# rev: '' # Use the revision sha / tag you want to point at | ||
# hooks: | ||
# - id: isort | ||
|
||
- repo: https://gitlab.com/pycqa/flake8 | ||
rev: '' # Use the revision sha / tag you want to point at | ||
hooks: | ||
- id: flake8 | ||
additional_dependencies: [flake8-docstrings] | ||
|
||
- repo: https://github.com/pre-commit/pre-commit-hooks | ||
rev: '' # Use the revision sha / tag you want to point at | ||
hooks: | ||
- id: trailing-whitespace | ||
- id: end-of-file-fixer | ||
- id: check-docstring-first | ||
- id: check-json | ||
- id: check-yaml | ||
- id: requirements-txt-fixer | ||
|
||
# - repo: https://github.com/PyCQA/pylint | ||
# rev: '' # Use the revision sha / tag you want to point at | ||
# hooks: | ||
# - id: pylint | ||
|
||
# NOTE | ||
# Uncomment this when you have the full setup already | ||
#- repo: local | ||
# hooks: | ||
# - id: pytest | ||
# name: Running PyTest with coverage | ||
# entry: poetry run coverage run -m pytest | ||
# language: system | ||
# always_run: true | ||
# pass_filenames: false | ||
# - id: coverage | ||
# name: Running coverage summary | ||
# entry: poetry run coverage report -m | ||
# types: [python] | ||
# language: system | ||
# always_run: true | ||
# verbose: true | ||
# pass_filenames: false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
FROM nvidia/cuda:11.4.0-devel-ubuntu20.04 | ||
LABEL maintainer="haruki@hacarus.com" | ||
ENV DEBIAN_FRONTEND=noninteractive | ||
|
||
SHELL ["/bin/bash", "-c"] | ||
ENV HOME=/home \ | ||
WORKDIR=/work | ||
RUN mkdir -p $WORKDIR | ||
WORKDIR $WORKDIR | ||
|
||
# for CUDA | ||
RUN apt-key adv --fetch-keys https://developer.download.nvidia.cn/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub | ||
RUN apt-get update \ | ||
&& apt-get install --no-install-recommends -y fonts-ipaexfont libglib2.0-0 git gcc vim pip curl wget \ | ||
# for opencv | ||
&& apt-get install --no-install-recommends -y \ | ||
build-essential \ | ||
libsm-dev \ | ||
libxrender-dev \ | ||
libxext-dev \ | ||
libgl1-mesa-dev \ | ||
python3-dev \ | ||
python3-pip \ | ||
python3-wheel \ | ||
python3-setuptools \ | ||
python-dev \ | ||
python3-venv \ | ||
libssl-dev \ | ||
libffi-dev \ | ||
# for pyenv | ||
&& apt-get install --no-install-recommends -y \ | ||
gcc \ | ||
make \ | ||
zlib1g-dev \ | ||
libbz2-dev \ | ||
libreadline-dev \ | ||
libsqlite3-dev \ | ||
wget \ | ||
llvm \ | ||
libncurses5-dev \ | ||
xz-utils \ | ||
libxml2-dev \ | ||
libxmlsec1-dev \ | ||
libffi-dev \ | ||
liblzma-dev \ | ||
# openslide | ||
&& apt-get install --no-install-recommends -y python3-openslide \ | ||
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* | ||
|
||
# install CUDA Toolkit for CuPy | ||
# RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb \ | ||
# && dpkg -i cuda-keyring_1.0-1_all.deb \ | ||
# && apt-get update \ | ||
# && apt-get -y install cuda | ||
|
||
RUN wget https://www.python.org/ftp/python/3.8.6/Python-3.8.6.tar.xz \ | ||
&& tar xJf Python-3.8.6.tar.xz \ | ||
&& cd Python-3.8.6 \ | ||
&& ./configure \ | ||
&& make \ | ||
&& make install \ | ||
&& cd ../ \ | ||
&& rm -rf Python* | ||
|
||
# poetry | ||
COPY pyproject.toml poetry.lock ./ | ||
ENV POETRY_HOME=/usr/local/poetry \ | ||
POETRY_VERSION=1.1.13 | ||
RUN /usr/local/bin/python3.8 -m pip install --upgrade pip \ | ||
&& curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/install-poetry.py | python3 - \ | ||
&& echo 'export PATH="/usr/local/poetry/bin:$PATH"' >> $HOME/.bashrc \ | ||
&& /usr/local/poetry/bin/poetry config virtualenvs.create false \ | ||
&& /usr/local/poetry/bin/poetry install --no-root | ||
|
||
# install original packages with separate cache | ||
COPY modules ./modules | ||
RUN /usr/local/poetry/bin/poetry install \ | ||
&& chmod -R 777 $POETRY_HOME \ | ||
&& chmod -R 777 $HOME | ||
|
||
EXPOSE 22 | ||
EXPOSE 8888 | ||
|
||
COPY entrypoint.sh ./entrypoint.sh | ||
ENTRYPOINT /bin/bash entrypoint.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# Create Docker image | ||
```bash | ||
sh build_docker.sh | ||
``` | ||
# Run notebook | ||
|
||
```bash | ||
sh start_notebook.sh <port_number> | ||
``` |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
from typing import Dict | ||
from pathlib import Path | ||
import numpy as np | ||
import pandas as pd | ||
import matplotlib.pyplot as plt | ||
import seaborn as sns | ||
from omegaconf import DictConfig | ||
import hydra | ||
|
||
from eda import git_root | ||
from hpi.uicc import extract_pM, extract_pN, extract_pT, classify_stage | ||
from logger import setup_logger | ||
|
||
|
||
script_path = Path(__file__) | ||
log_dir = script_path.parent / "log" | ||
logger = setup_logger(script_path.name, log_dir / (script_path.stem + ".log")) | ||
|
||
|
||
def sort(x: Path) -> int: | ||
"""Sort.""" | ||
return int(x.stem.split("_")[-1]) | ||
|
||
|
||
def get_cluster_fraction(img: np.ndarray, norm: bool = False) -> Dict[int, int]: | ||
"""Get fraction of each cluster. | ||
Parameters | ||
---------- | ||
img: np.ndarray | ||
labeled image. | ||
norm: bool | ||
do normalization or not. | ||
Returns | ||
------- | ||
Dict[cluster_id, pixel count] | ||
""" | ||
mask = img > 0 | ||
ulabels, counts = np.unique(img[mask], return_counts=True) | ||
if norm: | ||
counts = counts / counts.sum() | ||
return dict(zip(ulabels, counts)) | ||
|
||
|
||
def add_uicc_classification(df): | ||
"""Add UICC classification to the dataframe.""" | ||
df = (df.assign(T=df["pTNM"].apply(lambda x: extract_pT(x))) | ||
.assign(N=df["pTNM"].apply(lambda x: extract_pN(x))) | ||
.assign(M=df["pTNM"].apply(lambda x: extract_pM(x)))) | ||
# stages | ||
stages = [classify_stage(*(df.iloc[i].loc["T":"M"])) | ||
for i in range(df.shape[0])] | ||
df = df.assign(stage=stages) | ||
return df | ||
|
||
|
||
@hydra.main(config_name="config", config_path="config", | ||
version_base=None) | ||
def main(cfg: DictConfig): | ||
"""Main process.""" | ||
ROOT = git_root(absolute=True) | ||
input_dir = ROOT / cfg.data.INPUT_DIR | ||
output_dir = ROOT / cfg.data.OUTPUT_DIR | ||
output_dir.mkdir(mode=0o775, parents=True, exist_ok=True) | ||
path_meta = ROOT / cfg.data.META | ||
|
||
# get clinical information | ||
df_meta = pd.read_csv(path_meta, index_col=0) | ||
df_meta["sex"].replace("F", "Female", inplace=True) | ||
df_meta["sex"].replace("M", "Male", inplace=True) | ||
# each patient has two samples. | ||
# _df_meta = df_meta.drop_duplicates(["TMA_ID"]) | ||
# _df_meta = add_uicc_classification(_df_meta) | ||
|
||
# NOTE: each patient has two samples. | ||
_df_meta_undrop = add_uicc_classification(df_meta) | ||
_df_meta_undrop = _df_meta_undrop.ffill() | ||
|
||
paths_human_cluster = sorted(list(input_dir.glob("*klabels*.npy")), | ||
key=sort) | ||
|
||
# calculate pixel-counts list | ||
logger.info("calculate pixel-counts") | ||
pixel_counts = [] | ||
for path in paths_human_cluster: | ||
img = np.load(path) | ||
pixel_counts.append(get_cluster_fraction(img, norm=False)) | ||
|
||
# list -> dataframe, drop noise | ||
logger.info("drop unknown label") | ||
df_pixel_counts = pd.DataFrame(pixel_counts) | ||
df_pixel_counts = df_pixel_counts.assign(stage=_df_meta_undrop["stage"]) | ||
df_pixel_counts = df_pixel_counts.fillna(0) | ||
df_pixel_counts = df_pixel_counts.drop(cfg.cluster.ignore, axis=1) | ||
df_pixel_counts = (df_pixel_counts | ||
.query('not stage == "Unknown"') | ||
.drop('stage', axis=1)) | ||
|
||
# pixel count to ratio | ||
logger.info("calculate fraction") | ||
all_area = df_pixel_counts.sum().sum() | ||
df_fraction = pd.DataFrame({ | ||
'cluster': df_pixel_counts.columns, | ||
'area': df_pixel_counts.sum(), | ||
'fraction': df_pixel_counts.sum() / all_area | ||
}) | ||
df_fraction = df_fraction.sort_index() | ||
|
||
logger.info(f"save: {output_dir}/fraction.csv") | ||
df_fraction.to_csv(output_dir / "fraction.csv") | ||
|
||
logger.info(f"plot: {output_dir}/fraction.png") | ||
|
||
df_fraction = df_fraction.sort_values("fraction") | ||
order = df_fraction["cluster"] | ||
sns.barplot(data=df_fraction, x="cluster", y="fraction", order=order, | ||
color="gray") | ||
fig = plt.gcf() | ||
fig.savefig(output_dir / "fraction.png") | ||
plt.close("all") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# https://github.com/knazeri/edge-connect/blob/master/src/config.py | ||
# hydra's settings ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | ||
defaults: | ||
- _self_ | ||
# - override hydra/hydra_logging: disabled | ||
- override hydra/job_logging: custom | ||
|
||
hydra: | ||
output_subdir: null | ||
run: | ||
dir: . | ||
sweep: | ||
dir: . | ||
subdir: . | ||
|
||
data: | ||
INPUT_DIR: data/submission/clustering/human/3_7 | ||
OUTPUT_DIR: data/submission/cluster_fraction/human/3_7 | ||
META: data/submission/meta/human_tma_20220112.csv | ||
|
||
cluster: | ||
ignore: [3, 6, 7, 8, 12, 16, 21, 26, 27, 29, 30] |
13 changes: 13 additions & 0 deletions
13
Submission/cluster_fraction/config/hydra/job_logging/custom.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
version: 1 | ||
formatters: | ||
console: | ||
format: "%(asctime)s - %(levelname)s - %(filename)s - %(message)s" | ||
handlers: | ||
console: | ||
class: logging.StreamHandler | ||
formatter: console | ||
stream: ext://sys.stdout | ||
root: | ||
handlers: [console] | ||
|
||
disable_existing_loggers: false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
2022-09-02 07:48:17,585 - INFO - classify_stage.py - main - calculate pixel-counts | ||
2022-09-02 07:48:19,763 - INFO - classify_stage.py - main - drop unknown label | ||
2022-09-02 07:48:19,802 - INFO - classify_stage.py - main - calculate fraction | ||
2022-09-02 07:48:19,806 - INFO - classify_stage.py - main - save: /work/data/submission/cluster_fraction/human/3_7/fraction.csv | ||
2022-09-02 07:48:19,808 - INFO - classify_stage.py - main - plot: /work/data/submission/cluster_fraction/human/3_7/fraction.png | ||
2022-09-02 07:51:34,187 - INFO - classify_stage.py - main - calculate pixel-counts | ||
2022-09-02 07:51:34,777 - INFO - classify_stage.py - main - drop unknown label | ||
2022-09-02 07:51:34,805 - INFO - classify_stage.py - main - calculate fraction | ||
2022-09-02 07:51:34,808 - INFO - classify_stage.py - main - save: /work/data/submission/cluster_fraction/human/3_7/fraction.csv | ||
2022-09-02 07:51:34,810 - INFO - classify_stage.py - main - plot: /work/data/submission/cluster_fraction/human/3_7/fraction.png |
14 changes: 14 additions & 0 deletions
14
Submission/cluster_fraction/log/classify_stage.log.2022-08-27
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
2022-08-27 17:30:28,714 - INFO - classify_stage.py - main - calculate pixel-counts | ||
2022-08-27 17:30:41,285 - INFO - classify_stage.py - main - drop unknown label | ||
2022-08-27 17:31:51,597 - INFO - classify_stage.py - main - calculate pixel-counts | ||
2022-08-27 17:31:52,163 - INFO - classify_stage.py - main - drop unknown label | ||
2022-08-27 17:40:11,327 - INFO - classify_stage.py - main - calculate pixel-counts | ||
2022-08-27 17:40:12,610 - INFO - classify_stage.py - main - drop unknown label | ||
2022-08-27 17:40:12,645 - INFO - classify_stage.py - main - calculate fraction | ||
2022-08-27 17:40:12,649 - INFO - classify_stage.py - main - save: /work/data/submission/cluster_fraction/human/3_7/fraction.csv | ||
2022-08-27 17:40:12,657 - INFO - classify_stage.py - main - plot: /work/data/submission/cluster_fraction/human/3_7/fraction.png | ||
2022-08-27 17:40:33,282 - INFO - classify_stage.py - main - calculate pixel-counts | ||
2022-08-27 17:40:33,872 - INFO - classify_stage.py - main - drop unknown label | ||
2022-08-27 17:40:33,895 - INFO - classify_stage.py - main - calculate fraction | ||
2022-08-27 17:40:33,898 - INFO - classify_stage.py - main - save: /work/data/submission/cluster_fraction/human/3_7/fraction.csv | ||
2022-08-27 17:40:33,901 - INFO - classify_stage.py - main - plot: /work/data/submission/cluster_fraction/human/3_7/fraction.png |
10 changes: 10 additions & 0 deletions
10
Submission/cluster_fraction/log/classify_stage.log.2022-08-28
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
2022-08-28 07:35:44,145 - INFO - classify_stage.py - main - calculate pixel-counts | ||
2022-08-28 07:35:44,809 - INFO - classify_stage.py - main - drop unknown label | ||
2022-08-28 07:35:44,832 - INFO - classify_stage.py - main - calculate fraction | ||
2022-08-28 07:35:44,835 - INFO - classify_stage.py - main - save: /work/data/submission/cluster_fraction/human/3_7/fraction.csv | ||
2022-08-28 07:35:44,837 - INFO - classify_stage.py - main - plot: /work/data/submission/cluster_fraction/human/3_7/fraction.png | ||
2022-08-28 08:06:27,208 - INFO - classify_stage.py - main - calculate pixel-counts | ||
2022-08-28 08:06:27,766 - INFO - classify_stage.py - main - drop unknown label | ||
2022-08-28 08:06:27,789 - INFO - classify_stage.py - main - calculate fraction | ||
2022-08-28 08:06:27,792 - INFO - classify_stage.py - main - save: /work/data/submission/cluster_fraction/human/3_7/fraction.csv | ||
2022-08-28 08:06:27,794 - INFO - classify_stage.py - main - plot: /work/data/submission/cluster_fraction/human/3_7/fraction.png |
Oops, something went wrong.