new dataset 0390 #160
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Preprocessing | |
on: | |
pull_request_target: | |
# (re)opened PR or new commit in fork | |
types: [ opened, synchronize, reopened ] | |
paths: | |
- 'raw_data/**' | |
- 'processed_data/**' | |
jobs: | |
preprocess: | |
name: Preprocess raw data | |
# NOTE: on windows as computing of descriptors has a bug on linux right now | |
runs-on: windows-2019 | |
env: | |
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} # needed for pulling R packages from github | |
steps: | |
- name: Checkout fork repository | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
repository: ${{github.event.pull_request.head.repo.full_name}} | |
ref: ${{ github.head_ref }} | |
lfs: true | |
- name: Get changed files | |
id: files | |
uses: Ana06/get-changed-files@v2.2.0 | |
- name: Get new/changed datasets | |
id: filesfolders | |
shell: bash {0} | |
run: echo "files=$(for f in ${{ steps.files.outputs.all }}; do basename $(dirname $f); done | grep -E '^[0-9]+$' | sort | uniq | tr '\n' ' ')" >> $GITHUB_OUTPUT | |
- name: List all added files | |
shell: bash {0} | |
run: | | |
for f in ${{ steps.filesfolders.outputs.files }}; do | |
ls -lh raw_data/$f | |
done | |
- name: Pyton dependencies | |
run: pip install -r scripts/Python/requirements.txt | |
- name: Setup java | |
uses: actions/setup-java@v3 | |
with: | |
distribution: 'temurin' | |
java-version: '17' | |
- name: Set RENV_PATHS_ROOT | |
shell: bash | |
run: | | |
echo "RENV_PATHS_ROOT=${{ runner.temp }}/renv" >> $GITHUB_ENV | |
- name: Setup R | |
uses: r-lib/actions/setup-r@v2 | |
- name: Restore Renv package cache | |
uses: actions/cache@v3 | |
with: | |
path: ${{ env.RENV_PATHS_ROOT }} | |
key: ${{ runner.os }}-renv-${{ hashFiles('**/renv.lock') }} | |
restore-keys: | | |
${{ runner.os }}-renv- | |
- name: Install and activate renv | |
shell: Rscript {0} | |
run: | | |
install.packages("renv") | |
renv::restore() | |
- name: Standardize compounds | |
run: Rscript scripts/R_ci/compounds_standardize.R ${{ steps.filesfolders.outputs.files }} | |
- name: Compounds classyfire classes | |
run: Rscript scripts/R_ci/compounds_classyfire.R ${{ steps.filesfolders.outputs.files }} | |
- name: Compounds descriptors | |
run: Rscript scripts/R_ci/compounds_descriptors.R ${{ steps.filesfolders.outputs.files }} | |
- name: Compounds fingerprints | |
run: Rscript scripts/R_ci/compounds_fingerprints.R ${{ steps.filesfolders.outputs.files }} | |
- name: Metadata standardization | |
run: Rscript scripts/R_ci/metadata_standardize.R ${{ steps.filesfolders.outputs.files }} | |
- name: Generate dataset reports | |
run: Rscript scripts/R_ci/compounds_overview.R ${{ steps.filesfolders.outputs.files }} | |
- name: Verify that required files are present | |
run: Rscript scripts/R_ci/files_complete.R ${{ steps.filesfolders.outputs.files }} | |
- name: Update overview table of all datasets | |
run: python3 scripts/Python/datasets_overview.py | |
continue-on-error: true | |
- name: QSPR-based validation | |
run: python3 scripts/Python/validation_qspr.py ${{ steps.filesfolders.outputs.files }} | |
continue-on-error: true | |
- name: Retention order-based validation for datasets with nominally identical setups | |
run: python3 scripts/Python/validation_order.py --mode same_condition ${{ steps.filesfolders.outputs.files }} | |
continue-on-error: true | |
- name: Retention order-based validation for datasets of systematic measurements | |
run: python3 scripts/Python/validation_order.py --mode systematic ${{ steps.filesfolders.outputs.files }} | |
continue-on-error: true | |
- name: Commit preprocessing | |
run: | | |
git config --global user.email 'actions@github.com' | |
git config --global user.name 'Github Actions' | |
# Use LFS storage of main repository: no push access to fork LFS storage | |
# TODO: change once repository is moved | |
git config lfs.url 'https://github.com/michaelwitting/RepoRT.git/info/lfs' | |
git add processed_data raw_data | |
git commit -m "Preprocessing ${{ steps.filesfolders.outputs.files }}" | |
git lfs push origin HEAD # first push LFS, otherwise failure because of lfs.url | |
git push origin HEAD | |
- name: Add comment with report to PR | |
uses: actions/github-script@v6 | |
with: | |
script: | | |
github.rest.issues.createComment({ | |
issue_number: context.issue.number, | |
owner: context.repo.owner, | |
repo: context.repo.repo, | |
body: (await exec.getExecOutput('python3 scripts/Python/report.py', '${{ steps.filesfolders.outputs.files }}'.trim().split(' '))).stdout | |
}) | |
continue-on-error: true | |
- name: Label as successfully preprocessed | |
if: ${{ success() }} | |
uses: andymckay/labeler@master | |
with: | |
add-labels: "preprocessing successful" | |
remove-labels: "preprocessing failed" | |
- name: Debug with tmate on failure | |
if: ${{ failure() }} | |
uses: mxschmitt/action-tmate@v3 | |
- name: Label as failed | |
if: ${{ failure() }} | |
uses: andymckay/labeler@master | |
with: | |
add-labels: "preprocessing failed" | |
remove-labels: "preprocessing successful" |