diff --git a/.codecov.yml b/.codecov.yml new file mode 100644 index 00000000..5dce83ad --- /dev/null +++ b/.codecov.yml @@ -0,0 +1,14 @@ +coverage: + status: + project: + default: + # Commits pushed to main should not make the overall + # project coverage decrease by more than 1% + target: auto + threshold: 1% + patch: + default: + # Be tolerant on slight code coverage diff on PRs to limit + # noisy red coverage status on github PRs. + target: auto + threshold: 1% diff --git a/.github/workflows/build-release-candidate.yaml b/.github/workflows/build-release-candidate.yaml new file mode 100644 index 00000000..fbdabcac --- /dev/null +++ b/.github/workflows/build-release-candidate.yaml @@ -0,0 +1,185 @@ +# Build wheels and executables for Windows + +name: build release candidate + +on: + push: + branches: + # Release branches. + # Examples: "v1", "v3.0", "v1.2.x", "1.5.0", "1.2rc0" + # Expected usage is (for example) a branch named "v1.2.x" which contains + # the latest release in the 1.2 series. + - 'v[0-9]+' + - 'v?[0-9]+.[0-9x]+' + - 'v?[0-9]+.[0-9]+.[0-9x]+' + - 'v?[0-9]+.[0-9]+.[0-9]+.[0-9x]+' + - 'v?[0-9]+.[0-9x]+rc[0-9]*' + tags: + # Run whenever any tag is created + - '**' + pull_request: + branches: + # Release branches + - 'v[0-9]+' + - 'v?[0-9]+.[0-9x]+' + - 'v?[0-9]+.[0-9]+.[0-9x]+' + - 'v?[0-9]+.[0-9]+.[0-9]+.[0-9x]+' + - 'v?[0-9]+.[0-9x]+rc[0-9]*' + release: + # Run on a new release + types: [created, edited, published] + +jobs: + build-executable: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [windows-latest] + python-version: ["3.7"] + frozen: ["frozen"] + env: + OS: ${{ matrix.os }} + PYTHON: ${{ matrix.python-version }} + + defaults: + run: + shell: cmd + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + + - name: System information + run: python .github/workflows/system_info.py + + - name: Install common dependencies + run: | + python -m pip install --upgrade pip wheel + python -m pip install --upgrade setuptools twine + python -m pip install flake8 + + - name: Install frozen dependencies + if: ${{ matrix.frozen }} + run: | + python -m pip install torch==1.4.0+cpu torchvision==0.5.0+cpu -f https://download.pytorch.org/whl/torch_stable.html + python -m pip install -r frozen_requirements.txt + + - name: Install package + run: python -m pip install .[dev,test] + + - name: Uninstall typing (for pyinstaller) + # Otherwise we get the following error on Python 3.7: + # The 'typing' package is an obsolete backport of a standard library package and + # is incompatible with PyInstaller. Please `pip uninstall typing` then try again. + run: pip uninstall -y typing + + - name: Sanity check with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + python -m flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings + python -m flake8 . --count --exit-zero --statistics + + - name: Debug environment + run: python -m pip freeze + + - name: Test with pytest + run: | + python -m pytest --cov=echofilter --cov-report term --cov-report xml --cov-config .coveragerc --junitxml=testresults.xml + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v3 + with: + token: ${{ secrets.CODECOV_TOKEN }} + flags: unittests + env_vars: OS,PYTHON + name: Python ${{ matrix.python-version }} on ${{ runner.os }} + + - name: Build wheels + run: | + python setup.py sdist bdist_wheel + + - name: Compile echofilter inference exe + run: | + pyinstaller bin\inference.py ^ + --name echofilter ^ + --add-data echofilter\checkpoints.yaml;. ^ + --icon images/deepsense.ico ^ + --hidden-import pkg_resources.py2_warn ^ + --hidden-import colorama ^ + --hidden-import torch ^ + --hidden-import torchvision ^ + --hidden-import Pillow ^ + --hidden-import numpy ^ + --hidden-import matplotlib ^ + --hidden-import scipy ^ + --hidden-import scipy.spatial ^ + --hidden-import pandas ^ + --hidden-import scikit-image ^ + --hidden-import tqdm ^ + --hidden-import echofilter ^ + --hidden-import echofilter.data ^ + --hidden-import echofilter.nn ^ + --hidden-import echofilter.optim ^ + --hidden-import echofilter.raw ^ + --hidden-import echofilter.path ^ + --hidden-import echofilter.win ^ + -y --clean ^ + --distpath executable + + - name: Compile ev2csv exe + run: | + pyinstaller echofilter\ev2csv.py ^ + --icon images/deepsense.ico ^ + --hidden-import pkg_resources.py2_warn ^ + --hidden-import echofilter.path ^ + --hidden-import echofilter.win ^ + --hidden-import tqdm ^ + -y --clean ^ + --distpath executable + move executable\ev2csv\ev2csv.exe executable\echofilter\ + rd /s /q executable\ev2csv + + - name: Test show inference help + run: executable\echofilter\echofilter.exe --help + + - name: Test show inference version + run: executable\echofilter\echofilter.exe --version + + - name: Test list-checkpoints + run: executable\echofilter\echofilter.exe --list-checkpoints + + - name: Test list-colors + run: executable\echofilter\echofilter.exe --list-colors + + - name: Test dry-run + run: executable\echofilter\echofilter.exe test-resources -n + + - name: Test run + run: executable\echofilter\echofilter.exe test-resources + + - name: Test skip after already ran + run: executable\echofilter\echofilter.exe test-resources -s + + - name: Test show ev2csv help + run: executable\echofilter\ev2csv.exe --help + + - name: Test show ev2csv version + run: executable\echofilter\ev2csv.exe --version + + - name: Store wheel artifacts + uses: actions/upload-artifact@v3 + with: + name: wheel-${{ matrix.os }}-py${{ matrix.python-version }} + path: dist/* + + - name: Store executable artifacts + uses: actions/upload-artifact@v3 + with: + name: executable-windows-py${{ matrix.python-version }} + path: executable/* diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index c91f681a..5c273f25 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -23,7 +23,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: ["3.6", "3.7"] + python-version: ["3.6", "3.10"] env: OS: ${{ matrix.os }} PYTHON: ${{ matrix.python-version }} @@ -50,13 +50,19 @@ jobs: key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-${{ hashFiles('**/requirements*.txt') }} restore-keys: ${{ runner.os }}-pip- - - name: Install dependencies + - name: Install common dependencies run: | - python -m pip install --upgrade pip + python -m pip install --upgrade pip wheel python -m pip install flake8 + + - name: Install frozen dependencies + if: ${{ matrix.frozen }} + run: | python -m pip install torch==1.4.0+cpu torchvision==0.5.0+cpu -f https://download.pytorch.org/whl/torch_stable.html python -m pip install -r frozen_requirements.txt - python -m pip install .[test] + + - name: Install package + run: python -m pip install .[test] - name: Get appdirs cache dir id: appdirs-cache @@ -84,8 +90,9 @@ jobs: python -m pytest --cov=echofilter --cov-report term --cov-report xml --cov-config .coveragerc --junitxml=testresults.xml - name: Upload coverage to Codecov - uses: codecov/codecov-action@v1 + uses: codecov/codecov-action@v3 with: + token: ${{ secrets.CODECOV_TOKEN }} flags: unittests env_vars: OS,PYTHON name: Python ${{ matrix.python-version }} on ${{ runner.os }} diff --git a/.gitignore b/.gitignore index 5bb35a0e..4a4688eb 100644 --- a/.gitignore +++ b/.gitignore @@ -23,8 +23,12 @@ models/ notes/ processed/ results/ +test-resources/*.evl +test-resources/*.evr test-output/ test-resources-source/ +executable/ +executable2/ ##--------------------------------------------------- diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index aa7ac4b3..2383727b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,7 +34,7 @@ repos: - id: nbstripout - repo: https://github.com/nbQA-dev/nbQA - rev: 1.5.2 + rev: 1.5.3 hooks: - id: nbqa-isort args: ["--profile=black"] @@ -88,7 +88,7 @@ repos: exclude: \.(html|svg)$ - repo: https://github.com/asottile/setup-cfg-fmt - rev: v2.0.0 + rev: v2.2.0 hooks: - id: setup-cfg-fmt diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a7391081..e9f60eea 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -13,6 +13,37 @@ Categories for changes are: Added, Changed, Deprecated, Removed, Fixed, Security. +Version `1.1.1 `__ +------------------------------------------------------------------------ + +Release date: 2022-11-16. +`Full commit changelog `__. + + +.. _v1.1.1 Fixed: + +Fixed +~~~~~~~ + +.. _v1.1.1 Fixed Inference: + +Inference +^^^^^^^^^ + +- EVL final value pad was for a timestamp in between the preceding two, not extending forward in time by half a timepoint. + (`#300 `__) + +.. _v1.1.1 Fixed Metadata: + +Metadata +^^^^^^^^ + +- Declare ``python_requires<3.11`` requirement. + (`#302 `__) +- Declare ``torch<1.12.0`` requirement. + (`#302 `__) + + Version `1.1.0 `__ ------------------------------------------------------------------------ @@ -130,32 +161,31 @@ Internal - Add EVR reader ``echofilter.raw.loader.evr_reader``. (`#280 `__) -- Refactor ``fixup_dataset_sample``, moved into its own function - (`#279 `__) -.. _v1.1.0 Added Documentation: -Documentation -^^^^^^^^^^^^^ +Version `1.0.3 `__ +------------------------------------------------------------------------ -- Change installation instructions in Usage Guide to point to GitHub releases as the source of exe files. - (`#265 `__) -- Improve docstrings. - (`#287 `__) -- Improve README. - (`#269 `__, - `#284 `__, - `#285 `__) +Release date: 2022-11-15. +`Full commit changelog `__. +This minor patch fix addresses package metadata. -.. _v1.1.0 Miscellaneous: +.. _v1.0.3 Fixed: -Miscellaneous -~~~~~~~~~~~~~ +Fixed +~~~~~ -- Update black version, blacken notebooks, fix flake8 errors. - (`#283 `__) +.. _v1.0.3 Fixed Metadata: +Metadata +^^^^^^^^ + +- Declare ``python_requires>=3.6,<3.11`` requirement. + (`#264 `__, + `#302 `__) +- Declare ``torch<1.12.0`` requirement. + (`#302 `__) Version `1.0.2 `__ @@ -187,6 +217,7 @@ Training ^^^^^^^^ - Default optimizer changed from ``"rangerva"`` to ``"adam"``. + If you have manually installed `ranger `__ you can still use the ``"rangerva"`` optimizer if you specify it. (`#261 `__) @@ -199,21 +230,6 @@ Release date: 2022-11-06. This patch fix addresses requirement inconsistencies and documentation building. This release is provided under the `AGPLv3 `__ license. -.. _v1.0.1 Added: - -Added -~~~~~ - -.. _v1.0.1 Added Documentation: - -Documentation -^^^^^^^^^^^^^ - -- Deploy documentation on github pages. - (`#251 `__) -- Include link to built documentation in README. - (`#253 `__) - .. _v1.0.1 Changed: Changed @@ -229,14 +245,6 @@ Requirements and remove it from the requirements. (`#249 `__) -.. _v1.0.1 Changed Checkpoints: - -Checkpoints -^^^^^^^^^^^ - -- Look for checkpoints.yaml in repo/executable dir as well as package dir. - (`#256 `__) - .. _v1.0.1 Fixed: Fixed @@ -254,16 +262,6 @@ Release - Support for pytorch>=1.11 by dropping import of ``torch._six.container_abcs``. (`#250 `__) -.. _v1.0.1 Fixed Documentation: - -Documentation -^^^^^^^^^^^^^ - -- Fix some API docstrings and CLI help text - (`#241 `__, - `#243 `__, - `#251 `__) - Version `1.0.0 `__ ------------------------------------------------------------------------ @@ -301,21 +299,6 @@ Documentation `#234 `__, `#235 `__) -.. _v1.0.0 Fixed: - -Fixed -~~~~~ - -.. _v1.0.0 Fixed Documentation: - -Documentation -^^^^^^^^^^^^^ - -- Fix formatting of some CHANGELOG and docstrings - (`#230 `__, - `#231 `__, - `#235 `__) - Version `1.0.0rc3 `__ ------------------------------------------------------------------------------ @@ -487,10 +470,10 @@ Training - Don't record fraction of image which is active during training. (`#206 `__) -.. _v1.0.0b4 Changed General: +.. _v1.0.0b4 Changed Miscellaneous: -General -^^^^^^^ +Miscellaneous +^^^^^^^^^^^^^ - Rename top->turbulence, bot->bottom surf->surface, throughout all code. (`#190 `__) @@ -810,10 +793,10 @@ Inference `#137 `__, `#145 `__) -.. _v1.0.0b1 Changed General: +.. _v1.0.0b1 Changed Miscellaneous: -General -^^^^^^^ +Miscellaneous +^^^^^^^^^^^^^ - Set Sv values outside the range (-1e37, 1e37) to be NaN (previously values lower than -1e6 were set to NaN). (`#140 `__) @@ -860,10 +843,10 @@ Inference - Exporting raw data in ev2csv required more Echoview parameters to be disabled, such as the minimum value threshold. (`#100 `__) -.. _v1.0.0b1 Fixed General: +.. _v1.0.0b1 Fixed Miscellaneous: -General -^^^^^^^ +Miscellaneous +^^^^^^^^^^^^^ - Fixed behaviour when loading data from CSVs with different number of depth samples and range of depths for different rows in the CSV file. (`#102 `__, @@ -952,10 +935,10 @@ Inference - Add ``--suffix-file`` argument, will allows a suffix common to all the output files to be set. (`#152 `__) -.. _v1.0.0b1 Added General: +.. _v1.0.0b1 Added Miscellaneous: -General -^^^^^^^ +Miscellaneous +^^^^^^^^^^^^^ - Add ``-V`` alias for ``--version`` to all command line interfaces. (`#84 `__) diff --git a/README.rst b/README.rst index 472066a9..597024fd 100644 --- a/README.rst +++ b/README.rst @@ -8,7 +8,7 @@ echofilter +------------------+----------------------------------------------------------------------+ | Documentation | |readthedocs| | +------------------+----------------------------------------------------------------------+ -| Build Status | |Documentation| |GHA tests| |pre-commit-status| | +| Build Status | |Documentation| |GHA tests| |Codecov| |pre-commit-status| | +------------------+----------------------------------------------------------------------+ | Code style | |black| |pre-commit| | +------------------+----------------------------------------------------------------------+ @@ -51,7 +51,7 @@ If you encounter a specific problem please `open a new issue`_. .. _Echoview: https://www.echoview.com/ .. _doi: https://www.doi.org/10.3389/fmars.2022.867857 -.. _readthedocs: https://echofilter.readthedocs.io +.. _readthedocs: https://echofilter.readthedocs.io/en/latest/usage/ .. _open a new issue: https://github.com/DeepSenseCA/echofilter/issues/new Usage @@ -76,9 +76,9 @@ argument. echofilter --help For more details, see the -`Usage Guide `__, +`Usage Guide `__, and the -`command line interface (CLI) reference `__ +`command line interface (CLI) reference `__ documentation. @@ -94,10 +94,10 @@ To install this, download and unzip the echofilter-executable-M.N.P.zip file from the latest release in the `releases tab `__. For example: -`echofilter-executable-1.1.0.zip `__ +`echofilter-executable-1.1.1.zip `__ For more details, see the step-by-step instructions in the -`Usage Guide `__. +`Usage Guide `__. Note: The precompiled executable has only CPU support, and does not support running on GPU. @@ -133,12 +133,15 @@ Then install the rest of the requirements. Citing Echofilter ----------------- -If you use Echofilter for your research, we would be grateful if you could cite our -paper on echofilter in any resulting publications: +For technical details about how the Echofilter model was trained, and our +findings about its empirical results, please consult our companion paper: SC Lowe, LP McGarry, J Douglas, J Newport, S Oore, C Whidden, DJ Hasselman (2022). Echofilter: A Deep Learning Segmention Model Improves the Automation, Standardization, and Timeliness for Post-Processing Echosounder Data in Tidal Energy Streams. *Front. Mar. Sci.*, **9**, 1–21. doi: |nbsp| `10.3389/fmars.2022.867857 `_. +If you use Echofilter for your research, we would be grateful if you could cite +this paper in any resulting publications. + For your convenience, we provide a copy of this citation in `bibtex`_ format. .. _bibtex: https://raw.githubusercontent.com/DeepSenseCA/echofilter/master/CITATION.bib @@ -175,11 +178,14 @@ along with this program. If not, see . :target: https://github.com/DeepSenseCA/echofilter/actions?query=workflow%3Atest :alt: GHA Status .. |readthedocs| image:: https://img.shields.io/badge/docs-readthedocs-blue - :target: https://echofilter.readthedocs.io/ + :target: readthedocs_ :alt: Documentation .. |Documentation| image:: https://readthedocs.org/projects/echofilter/badge/ - :target: https://echofilter.readthedocs.io/ + :target: readthedocs_ :alt: Documentation Status +.. |Codecov| image:: https://codecov.io/gh/DeepSenseCA/echofilter/branch/master/graph/badge.svg?token=BGX2EJ0SSI + :target: https://codecov.io/gh/DeepSenseCA/echofilter + :alt: Coverage .. |DOI badge| image:: https://img.shields.io/badge/DOI-10.3389/fmars.2022.867857-blue.svg :target: doi_ :alt: DOI diff --git a/docs/Makefile b/docs/Makefile index 51285967..14ebbfec 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -4,7 +4,7 @@ # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build -SOURCEDIR = . +SOURCEDIR = source BUILDDIR = _build # Put it first so that "make" without argument is like "make help". diff --git a/docs/export_pdf.sh b/docs/export_pdf.sh index f4601d58..5f9e3fc0 100755 --- a/docs/export_pdf.sh +++ b/docs/export_pdf.sh @@ -10,38 +10,43 @@ cd "${0%/*}" #------------------ # Remove previous export rm -vf -r _build_guide +# Move guide into source directory +cp guide.rst source/ # sphinx-build does not give precedence to a master_doc argument on the # command line over the arugment in conf.py, so we have to edit the conf.py -sed -i 's/master_doc = "index"/master_doc = "guide"/' conf.py -sed -i 's/" Documentation"/" Usage Guide"/' conf.py +sed -i 's/master_doc = "index"/master_doc = "guide"/' source/conf.py +sed -i 's/" Documentation"/" Usage Guide"/' source/conf.py # Export rST to LaTeX with sphinx -sphinx-build -b latex -D master_doc='guide' -D latex_show_urls='footnote' . ./_build_guide guide.rst +sphinx-build -b latex -D master_doc='guide' -D latex_show_urls='footnote' source ./_build_guide source/guide.rst # Remove docstring formatting indicators, which aren't stripped by # sphinx-argparse sed -in 's+^\\item\s*\[{[Rd]|}\]+\\item\[\]+' _build_guide/Echofilter.tex # Build PDF from LaTeX using the make file provided by sphinx make -C _build_guide # Restore conf.py to processing index.rst instead of guide.rst -sed -i 's/master_doc = "guide"/master_doc = "index"/' conf.py -sed -i 's/" Usage Guide"/" Documentation"/' conf.py +sed -i 's/master_doc = "guide"/master_doc = "index"/' source/conf.py +sed -i 's/" Usage Guide"/" Documentation"/' source/conf.py +rm source/guide.rst +mv _build_guide/Echofilter.pdf _build_guide/Echofilter_Usage-Guide.pdf # Build full documentation #------------------------- # Remove previous export rm -vf -r _build_pdf # Make a copy of index.rst with indices removed from TOC -cp index.rst index_pdf.rst -sed -i '/py-modindex/d' ./index_pdf.rst -sed -i '/genindex/d' ./index_pdf.rst +cp source/index.rst source/index_pdf.rst +sed -i '/py-modindex/d' source/index_pdf.rst +sed -i '/genindex/d' source/index_pdf.rst # sphinx-build does not give precedence to a master_doc argument on the # command line over the arugment in conf.py, so we have to edit the conf.py -sed -i 's/master_doc = "index"/master_doc = "index_pdf"/' conf.py +sed -i 's/master_doc = "index"/master_doc = "index_pdf"/' source/conf.py # Export rST to LaTeX with sphinx -sphinx-build -b latex . ./_build_pdf index.rst +sphinx-build -b latex source ./_build_pdf # Remove docstring formatting indicators, which aren't stripped by # sphinx-argparse sed -in 's+^\\item\s*\[{[Rd]|}\]+\\item\[\]+' _build_pdf/Echofilter.tex # Build PDF from LaTeX using the make file provided by sphinx make -C _build_pdf # Restore conf.py to processing index.rst instead of index_pdf.rst -sed -i 's/master_doc = "index_pdf"/master_doc = "index"/' conf.py +sed -i 's/master_doc = "index_pdf"/master_doc = "index"/' source/conf.py +rm source/index_pdf.rst diff --git a/docs/guide.rst b/docs/guide.rst index 188d9b4b..002e607b 100644 --- a/docs/guide.rst +++ b/docs/guide.rst @@ -5,5 +5,5 @@ Echofilter Usage Guide :maxdepth: 3 :caption: Contents: - source/usage_guide - source/programs/programs2 + usage/index + programs/programs2 diff --git a/docs/conf.py b/docs/source/conf.py similarity index 96% rename from docs/conf.py rename to docs/source/conf.py index e95fac5e..dae093c5 100644 --- a/docs/conf.py +++ b/docs/source/conf.py @@ -15,9 +15,13 @@ import datetime import os import sys +from inspect import getsourcefile -sys.path.insert(0, os.path.abspath(".")) -sys.path.insert(0, os.path.abspath("../")) +REPO_DIR = os.path.dirname( + os.path.dirname(os.path.dirname(os.path.abspath(getsourcefile(lambda: 0)))) +) + +sys.path.insert(0, REPO_DIR) from echofilter import __meta__ as meta # noqa: E402 @@ -31,6 +35,7 @@ author = meta.author copyright = "{}, {}".format(now.year, author) +PACKAGE_DIR = os.path.join(REPO_DIR, project_path) # The full version, including alpha/beta/rc tags release = meta.version @@ -43,7 +48,7 @@ def run_apidoc(_): ignore_paths = [ - os.path.join("..", project_path, "tests"), + os.path.join(PACKAGE_DIR, "tests"), ] argv = [ @@ -53,7 +58,7 @@ def run_apidoc(_): "--module-first", # Put module documentation before submodule "-o", "source/packages", # Output path - os.path.join("..", project_path), + os.path.join(PACKAGE_DIR), ] + ignore_paths try: diff --git a/docs/genindex.rst b/docs/source/genindex.rst similarity index 100% rename from docs/genindex.rst rename to docs/source/genindex.rst diff --git a/docs/index.rst b/docs/source/index.rst similarity index 50% rename from docs/index.rst rename to docs/source/index.rst index 21d750d4..6de43df2 100644 --- a/docs/index.rst +++ b/docs/source/index.rst @@ -2,12 +2,12 @@ Echofilter documentation ======================== .. toctree:: - :maxdepth: 3 + :maxdepth: 2 :caption: Contents: - source/usage_guide - source/programs/programs - source/packages/modules - source/changelog + usage/index + programs/programs + packages/modules + changelog py-modindex genindex diff --git a/docs/source/programs/ev2csv.rst b/docs/source/programs/ev2csv.rst index 1752650a..f8d63f6a 100644 --- a/docs/source/programs/ev2csv.rst +++ b/docs/source/programs/ev2csv.rst @@ -1,4 +1,4 @@ -.. highlight:: powershell +.. highlight:: winbatch .. _ev2csv CLI: diff --git a/docs/source/programs/generate_shards.rst b/docs/source/programs/generate_shards.rst index 799686e7..8e300098 100644 --- a/docs/source/programs/generate_shards.rst +++ b/docs/source/programs/generate_shards.rst @@ -1,4 +1,4 @@ -.. highlight:: powershell +.. highlight:: winbatch .. _echofilter-generate-shards: diff --git a/docs/source/programs/inference.rst b/docs/source/programs/inference.rst index ab8f060d..44ba475c 100644 --- a/docs/source/programs/inference.rst +++ b/docs/source/programs/inference.rst @@ -1,4 +1,4 @@ -.. highlight:: powershell +.. highlight:: winbatch .. _echofilter CLI: diff --git a/docs/source/programs/train.rst b/docs/source/programs/train.rst index cd835c75..bb58880e 100644 --- a/docs/source/programs/train.rst +++ b/docs/source/programs/train.rst @@ -1,4 +1,4 @@ -.. highlight:: powershell +.. highlight:: winbatch .. _echofilter-train: diff --git a/docs/py-modindex.rst b/docs/source/py-modindex.rst similarity index 100% rename from docs/py-modindex.rst rename to docs/source/py-modindex.rst diff --git a/docs/source/readme.rst b/docs/source/readme.rst deleted file mode 100644 index a6210d3d..00000000 --- a/docs/source/readme.rst +++ /dev/null @@ -1 +0,0 @@ -.. include:: ../../README.rst diff --git a/docs/source/usage/citing.rst b/docs/source/usage/citing.rst new file mode 100644 index 00000000..888adf2f --- /dev/null +++ b/docs/source/usage/citing.rst @@ -0,0 +1,21 @@ +Citing Echofilter +----------------- + +For technical details about how the Echofilter model was trained, and our +findings about its empirical results, please consult our companion paper: + + SC Lowe, LP McGarry, J Douglas, J Newport, S Oore, C Whidden, DJ Hasselman (2022). Echofilter: A Deep Learning Segmention Model Improves the Automation, Standardization, and Timeliness for Post-Processing Echosounder Data in Tidal Energy Streams. *Front. Mar. Sci.*, **9**, 1–21. + doi: |nbsp| `10.3389/fmars.2022.867857 `_. + +If you use Echofilter for your research, we would be grateful if you could cite +this paper in any resulting publications. + +For your convenience, we provide a copy of this citation in `bibtex`_ format. + +You can browse papers which utilise Echofilter `here `_. + +.. |nbsp| unicode:: 0xA0 + :trim: +.. _doi: https://www.doi.org/10.3389/fmars.2022.867857 +.. _bibtex: https://raw.githubusercontent.com/DeepSenseCA/echofilter/master/CITATION.bib +.. _gscholarcitations: https://scholar.google.com/scholar?cites=18122679926970563847 diff --git a/docs/source/usage/command_line_primer.rst b/docs/source/usage/command_line_primer.rst new file mode 100644 index 00000000..cd6b2371 --- /dev/null +++ b/docs/source/usage/command_line_primer.rst @@ -0,0 +1,146 @@ +Command line interface primer +----------------------------- + +.. highlight:: winbatch + +In this section, we provide some pointers for users new to using the +command prompt. + +Spaces in file names +~~~~~~~~~~~~~~~~~~~~ + +Running commands on files with spaces in their file names is +problematic. This is because spaces are used to separate arguments from +each other, so for instance:: + + command-name some path with spaces + +is actually running the command ``command-name`` with four arguments: +``some``, ``path``, ``with``, and ``spaces``. + +You can run commands on paths containing spaces by encapsulating the path +in quotes (either single, ``'``, or double ``"`` quotes), so it becomes +a single string. For instance:: + + command-name "some path with spaces" + +In the long run, you may find it easier to change your directory +structure to not include any spaces in any of the names of directories +used for the data. + +Trailing backslash +~~~~~~~~~~~~~~~~~~ + +The backslash (``\``) character is an +`escape character `__, +used to give alternative meanings to symbols with special meanings. +For example, the quote characters ``"`` and ``'`` indicate the start or end +of a string but can be escaped to obtain a literal quote character. + +On Windows, ``\`` is also used to denote directories. This overloads +the ``\`` symbol with multiple meanings. For this reason, you should not +include a trailing ``\`` when specifying directory inputs. Otherwise, if you +provide the path in quotes, an input of ``"some\path\"`` will not be +registered correctly, and will include a literal ``"`` character, with +the end of the string implicitly indicated by the end of the input. +Instead, you should use ``"some\path"``. + +Alternatively, you could escape the backslash character to ensure +it is a literal backslash with ``"some\path\\"``, or use a forward +slash with ``"some/path/"`` since :ref:`echofilter` +also understands forward slashes as a directory separator. + +Argument types +~~~~~~~~~~~~~~ + +Commands at the command prompt can take arguments. There are a couple of +types of arguments: + +- mandatory, positional arguments + +- optional arguments + + - shorthand arguments which start with a single hyphen (``-v``) + + - longhand arguments which start with two hyphens (``--verbose``) + +For :ref:`echofilter`, the only positional argument is +the path to the file(s) or directory(ies) to process. + +Arguments take differing numbers of parameters. +For :ref:`echofilter` the positional argument (files to +process) must have at least one entry and can contain as many as you like. + +Arguments which take zero parameters are sometimes called flags, such as +the flag ``--skip-existing`` + +Shorthand arguments can be given together, such as ``-vvfsn``, which is the +same as all of ``--verbose --verbose --force --skip --dry-run``. + +In the help documentation, arguments which require at least one value to +be supplied have text in capitals after the argument, such as +``--suffix-var SUFFIX_VAR``. Arguments which have synonyms are listed +together in one entry, such as ``--skip-existing``, ``--skip``, ``-s``; and +``--output-dir OUTPUT_DIR``, ``-o OUTPUT_DIR``. Arguments where a variable is +optional have it shown in square brackets, such as +``--cache-csv [CSV_DIR]``. Arguments which accept a variable number of values +are shown such as ``--extension SEARCH_EXTENSION [SEARCH_EXTENSION ...]``. +Arguments whose value can only take one of a set number of options are +shown in curly brackets, such as ``--facing {downward,upward,auto}``. + + +Breaking up long lines +~~~~~~~~~~~~~~~~~~~~~~ + +To increase readability, long lines for commands at the command prompt (or in +scripts) can be broken up into multiple lines by using a continuation character. +Writing the continuation character at the very end of a line indicates that the +new line character which follows it should be ignored, and both lines should +be treated together as if they were one line. + +On Linux, the line continuation character is ``\`` (backslash). + +.. code-block:: bash + + cp "path/to/source/file_with_a_very_very_long_filename" \ + "path/to/destination/location/" + +On Windows, the line continuation character depends on the command prompt being used. + +In the `Windows command prompt (cmd.exe) application `_, which is used to +run Windows batch (.bat) files, the line continuation character is ``^`` (caret). + +.. code-block:: winbatch + + copy "path\to\source\file_with_a_very_very_long_filename" ^ + "path\to\destination\location\" + +In the Windows command prompt, when you are separating out arguments you must +make sure you include at least one space at the start of the second line. +There must be spaces between arguments for them to be registered as distinct +arguments, and for some reason only having a space before the ``^`` on the +preceding line does not work. + +In the Windows `PowerShell application `_, the line continuation +character is ````` (backtick). + +.. code-block:: powershell + + copy "path\to\source\file_with_a_very_very_long_filename" ` + "path\to\destination\location\" + +Please note that, in all cases, the line continuation character must be the very +final character on the line. If there is whitespace after the continuation +character, that will stop the line continuation character from actually merging +the lines together. In that case, the two lines will be executed as separate +commands (which may result in an error, or if not will not result in the +expected behaviour). + +.. _cmd: https://learn.microsoft.com/en-us/windows-server/administration/windows-commands/windows-commands +.. _PowerShell: https://learn.microsoft.com/en-us/powershell/ + +.. highlight:: python + +.. raw:: latex + + \clearpage diff --git a/docs/source/usage/glossary.rst b/docs/source/usage/glossary.rst new file mode 100644 index 00000000..1c413042 --- /dev/null +++ b/docs/source/usage/glossary.rst @@ -0,0 +1,207 @@ +Glossary +-------- + +.. glossary:: + + Active data + Data collected while the :term:`echosounder` is emitting sonar pulses + (":term:`pings`") at regular intervals. This is the normal + operating mode for data in this project. + + Algorithm + A finite sequence of well-defined, unambiguous, + computer-implementable operations. + + Bad data regions + Regions of data which must be excluded from analysis in their entirety. + Bad data regions identified by :ref:`echofilter` come + in two forms: rectangular regions covering the full depth-extend of the + echogram for a period of time, and polygonal or contour regions + encompassing a localised area. + + Bottom line + A line separating the seafloor from the :term:`water column`. + + Checkpoint + A checkpoint file defines the weights for a particular + :term:`neural network` :term:`model`. + + Conditional model + A :term:`model` which outputs conditional probabilities. In the context + of an :term:`echofilter` model, the conditional probabilities are + :math:`p(x|\text{upfacing})` and :math:`p(x|\text{downfacing})`, + where :math:`x` is any of the :term:`model` output + types; conditional models are necessarily hybrid models. + + CSV + A comma-separated values file. The :term:`Sv` data can be exported + into this format by :term:`Echoview`. + + Dataset + A collection of data :term:`samples`. In this + project, the datasets are :term:`Sv` recordings from multiple surveys. + + Downfacing + The orientation of an :term:`echosounder` when it is located at the + surface and records from the :term:`water column` below it. + + Echofilter + A software package for defining the placement of the boundary lines + and regions required to post-process :term:`echosounder` data. + The topic of this usage guide. + + echofilter.exe + The compiled :ref:`echofilter` program which can be + run on a Windows machine. + + Echogram + The two-dimensional representation of a temporal series of + :term:`echosounder`-collected data. Time is along the x-axis, and depth + along the y-axis. A common way of plotting :term:`echosounder` + recordings. + + Echosounder + An electronic system that includes a computer, transceiver, and + :term:`transducer`. The system emits sonar :term:`pings` and + records the intensity of the reflected echos at some fixed sampling + rate. + + Echoview + A Windows software application (`Echoview `__ + Software Pty Ltd, Tasmania, Australia) for hydroacoustic data + post-processing. + + Entrained air + Bubbles of air which have been submerged into the ocean by waves or + by the strong :term:`turbulence` commonly found in tidal energy + channels. + + EV file + An :term:`Echoview` file bundling :term:`Sv` data together with + associated lines and regions produced by processing. + + EVL + The :term:`Echoview` line file format. + + EVR + The :term:`Echoview` region file format. + + Inference + The procedure of using a :term:`model` to generate output predictions + based on a particular input. + + Hybrid model + A :term:`model` which has been trained on both :term:`downfacing` and + :term:`upfacing` data. + + Machine learning (ML) + The process by which an :term:`algorithm` builds a mathematical model + based on :term:`sample` data + (":term:`training data`"), in order to make predictions or decisions + without being explicitly programmed to do so. A subset of the field of + Artificial Intelligence. + + Mobile + A mobile :term:`echosounder` is one which is moving (relative to the + ocean floor) during its period of operation. + + Model + A mathematical model of a particular type of data. In our context, + the model understands an echogram-like input + :term:`sample` of :term:`Sv` data + (which is its input) and outputs a probability distribution for + where it predicts the :term:`turbulence` (:term:`entrained air`) + boundary, :term:`bottom boundary`, and + :term:`surface boundary` to be located, and the + probability of :term:`passive` periods and + :term:`bad data`. + + Nearfield + The region of space too close to the :term:`echosounder` to collect + viable data. + + Nearfield distance + The maximum distance which is too close to the :term:`echosounder` to + be viable for data collection. + + Nearfield line + A line placed at the :term:`nearfield distance`. + + Neural network + An artificial neural network contains layers of interconnected + neurons with weights between them. The weights are learned through a + :term:`machine learning` process. After + :term:`training`, the network is a :term:`model` mapping inputs to + outputs. + + Passive data + Data collected while the :term:`echosounder` is silent. Since the sonar + pulses are not being generated, only ambient sounds are collected. + This package is designed for analysing :term:`active data`, and hence + :term:`passive data` is marked for removal. + + Ping + An :term:`echosounder` sonar pulse event. + + Sample (model input) + A single echogram-like matrix of :term:`Sv` values. + + Sample (ping) + A single datapoint recorded at a certain temporal latency in response + to a particular :term:`ping`. + + Stationary + A stationary :term:`echosounder` is at a fixed location (relative to + the ocean floor) during its period of operation. + + Surface line + Separates atmosphere and water at the ocean surface. + + Sv + The volume backscattering strength. + + Test set + Data which was used to evaluate the ability of the :term:`model` to + generalise to novel, unseen data. + + Training + The process by which a :term:`model` is iteratively improved. + + Training data + Data which was used to train the :term:`model(s)`. + + Training set + A subset (partition) of the :term:`dataset` which was used to train + the :term:`model`. + + Transducer + An underwater electronic device that converts electrical energy to + sound pressure energy. The emitted sound pulse is called a + ":term:`ping`". The device converts the returning sound pressure + energy to electrical energy, which is then recorded. + + Turbulence + In contrast to laminar flow, fluid motion in turbulent regions are + characterized by chaotic fluctuations in flow speed and direction. + Air is often entrained into the :term:`water column` in regions of + strong turbulence. + + Turbulence line + A line demarcating the depth of the end-boundary of air entrained + into the :term:`water column` by :term:`turbulence` at the sea + surface. + + Upfacing + The orientation of an :term:`echosounder` when it is located at the + seabed and records from the :term:`water column` above it. + + Validation set + Data which was used during the :term:`training` process to evaluate the + ability of the :term:`model` to generalise to novel, unseen data. + + Water column + The body of water between seafloor and ocean surface. + +.. raw:: latex + + \clearpage diff --git a/docs/source/usage/index.rst b/docs/source/usage/index.rst new file mode 100644 index 00000000..e157d8e2 --- /dev/null +++ b/docs/source/usage/index.rst @@ -0,0 +1,68 @@ +Usage Guide +########### + +.. highlight:: winbatch + +:term:`Echofilter` is an application for segmenting an echogram. It takes as +its input an :term:`Echoview` .EV file, and produces as its output several +lines and regions: + +- :term:`turbulence` (:term:`entrained air`) line + +- :term:`bottom (seafloor) line` + +- :term:`surface line` + +- :term:`nearfield line` + +- :term:`passive data` regions + +- \*bad data regions for entirely removed periods of time, in the form + of boxes covering the entire vertical depth + +- \*bad data regions for localised anomalies, in the form of polygonal + contour patches + +:term:`Echofilter` uses a :term:`machine learning` +:term:`model` to complete this task. The machine learning model was trained on +:term:`upfacing` :term:`stationary` and :term:`downfacing` :term:`mobile` data +provided by Fundy Ocean Research Centre for Energy +(`FORCE `__.). + +**Disclaimer** + +- The :term:`model` is only confirmed to work reliably with :term:`upfacing` + data recorded at the same location and with the same instrumentation as + the data it was trained on. It is expected to work well on a wider + range of data, but this has not been confirmed. Even on data similar + to the :term:`training data`, the :term:`model` is not perfect and it is + recommended that a human analyst manually inspects the results it generates + to confirm they are correct. + +- \* :term:`Bad data regions` are particularly challenging for the + :term:`model` to generate. Consequently, the bad data region outputs are + not reliable and should be considered experimental. By default, these + outputs are disabled. + +- Integration with :term:`Echoview` was tested for Echoview 10 and 11. + +.. raw:: latex + + \clearpage + + +.. toctree:: + :maxdepth: 3 + :caption: Contents: + + installation + command_line_primer + quick_start + inference_steps + models + citing + issues + glossary + + +.. highlight:: python diff --git a/docs/source/usage/inference_steps.rst b/docs/source/usage/inference_steps.rst new file mode 100644 index 00000000..2295f291 --- /dev/null +++ b/docs/source/usage/inference_steps.rst @@ -0,0 +1,278 @@ +Inference operations +-------------------- + +In this section, we describe the :term:`inference` process, its outputs and +inputs. Inference is the process of generating predictions from the +:term:`model`, and is the principal functionality of +:ref:`echofilter`. + +Processing overview +~~~~~~~~~~~~~~~~~~~ + +This is an overview of how files are processed in the :term:`inference` +pipeline. + +First, the setup: + +- If a directory input was given, determine list of files to process. + +- Download the model :term:`checkpoint`, if necessary. + +- Load the :term:`model` from the :term:`checkpoint` into memory. + +- If any file to process is an :term:`EV file`, open :term:`Echoview`. + +- If it was not already open, hide the Echoview window. + +After the :term:`model` is loaded from its checkpoint, each file is processed +in turn. The processing time for an individual file scales linearly with +the number of :term:`pings` in the file (twice as many pings = twice as +long to process). + +Each file is processed in the following steps: + +- If the input is an :term:`EV file`, export the :term:`Sv` data to + :term:`CSV` format. + + - By default, the :term:`Sv` data is taken from ``"Fileset1: Sv pings T1"``. + + - Unless ``--cache-csv`` is provided, the :term:`CSV file` is + output to a temporary file, which is deleted after the + :term:`CSV file` is imported. + +- Import the :term:`Sv` data from the :term:`CSV file`. (If the + input was a :term:`CSV file`, this is the input; if the input + was an :term:`EV file` this is the :term:`CSV file` generated + from the :term:`EV file` in the preceding step.) + +- Rescale the height of the :term:`Sv` input to have the number of pixels + expected by the :term:`model`. + +- Automatically determine whether the :term:`echosounder` recording is + :term:`upfacing` or :term:`downfacing`, based on the order of the Depths + data in the :term:`CSV file`. + + - If the orientation was manually specified, issue a warning if it + does not match the detected orientation. + + - Reflect the data in the Depth dimension if it is :term:`upfacing`, so + that the shallowest :term:`samples` always occur first, + and deepest last. + +- Normalise the distribution of the :term:`Sv` intensities to match that + expected by the :term:`model`. + +- Split the input data into segments + + - Detect temporal discontinuities between :term:`pings`. + + - Split the input :term:`Sv` data into segments such that each segment + contains contiguous :term:`pings`. + +- Pass the each segment of the input through the :term:`model` to generate + output probabilities. + +- Crop the depth dimension down to zoom in on the most salient data. + + - If :term:`upfacing`, crop the top off the echogram to show only 2m above + the shallowest estimated :term:`surface line` depth. + + - If :term:`downfacing`, crop the bottom off the echogram only 2m below the + deepest estimated :term:`bottom line` depth. + + - If more than 35% of the echogram's height (threshold value set + with ``--autocrop-threshold``) was cropped away, pass the cropped + :term:`Sv` data through the :term:`model` to get better predictions + based on the zoomed in data. + +- Line boundary probabilities are converted into output depths. + + - The boundary probabilities at each pixel are integrated to make a + cumulative probability distribution across depth, + :math:`p(\text{depth} > \text{boundary location})`. + + - The output boundary depth is estimated as the depth at which the + cumulative probability distribution first exceeds 50%. + +- Bottom, surface, and turbulence lines are output to :term:`EVL` files. + + - Note: there is no EVL file for the :term:`nearfield line` since it + is at a constant depth as provided by the user and not generated by + the :term:`model`. + +- Regions are generated: + + - Regions are collated if there is a small gap between consecutive + :term:`passive data` or :term:`bad data regions`. + + - Regions which are too small (fewer than 10 pings for rectangles) + are dropped. + + - All regions are written to a single :term:`EVR` file. + +- If the input was an :term:`EV file`, the lines and regions are imported into + the :term:`EV file`, and a :term:`nearfield line` is added. + +Simulating processing +~~~~~~~~~~~~~~~~~~~~~ + +To see which files will be processed by a command and what the output +will be, run :ref:`echofilter` with the ``--dry-run`` argument. + +Input +~~~~~ + +:ref:`Echofilter` can process two types of file as its +input: .EV files and .CSV files. The :term:`EV file` input is more +user-friendly, but requires the Windows operating system, and a fully +operational :term:`Echoview` application (i.e. with an Echoview dongle). +The :term:`CSV file` format can be processed without Echoview, but +must be generated in advance from the .EV file on a system with Echoview. +The :term:`CSV files` must contain raw :term:`Sv` data (without +thresholding or masking) and in the format produced by exporting +:term:`Sv` data from Echoview. These raw :term:`CSV files` can be +exported using the utility :ref:`ev2csv`, which is provided +as a separate executable in the :ref:`echofilter` package. + +If the input path is a directory, all files in the directory are +processed. By default, all subdirectories are recursively processed; +this behaviour can be disabled with the ``--no-recursive-dir-search`` +argument. All files in the directory (and subdirectories) with an +appropriate file extension will be processed. By default, files with a +.CSV or .EV file extension (case insensitive) which will be processed. +The file extensions to include can be set with the ``--extension`` argument. + +Multiple input files or directories can also be specified (each +separated by a space). + +By default, when processing an :term:`EV file`, the :term:`Sv` data is taken +from the ``"Fileset1: Sv pings T1"`` variable. This can be changed with the +``--variable-name`` argument. + +Loading model +~~~~~~~~~~~~~ + +The :term:`model` used to process the data is loaded from a :term:`checkpoint` +file. The executable :term:`echofilter.exe` comes with its default model +checkpoint bundled as part of the release. Aside from this, the first time a +particular model is used, the checkpoint file will be downloaded over the +internet. The checkpoint file will be cached on your system and will not need +to be downloaded again unless you clear your cache. + +Multiple models are available to select from. These can be shown by +running the command ``echofilter --list-checkpoints``. The default model +will be highlighted in the output. In general, it is recommended to use +the default checkpoint. See :ref:`Model checkpoints` below for more details. + +When running :ref:`echofilter` for :term:`inference`, the +checkpoint can be specified with the ``--checkpoint`` argument. + +If you wish to use a custom model which is not built in to :term:`echofilter`, +specify a path to the checkpoint file using the ``--checkpoint`` argument. + +Output +~~~~~~ + +Output files +^^^^^^^^^^^^ + +For each input file, :ref:`echofilter` produces the +following output files: + +.bottom.evl + An Echoview line file containing the depth of the + :term:`bottom line`. + +.regions.evr + An Echoview region file containing + spatiotemporal definitions of :term:`passive` recording + rectangle regions, :term:`bad data` full-vertical depth + rectangle regions, and :term:`bad data` anomaly + polygonal (contour) regions. + +.surface.evl + An Echoview line file containing the depth of + the :term:`surface line`. + +.turbulence.evl + An Echoview line file containing the depth of + the :term:`turbulence line`. + +where is the path to an input file, stripped of its file +extension. There is no :term:`EVL` file for the :term:`nearfield line`, since +it is a virtual line of fixed depth added to the :term:`EV file` during the +:ref:`Importing outputs into EV file` step. + +By default, the output files are located in the same directory as the +file being processed. The output directory can be changed with the +``--output-dir`` argument, and a user-defined suffix can be added to the +output file names using the ``--suffix`` argument. + +If the output files already exist, by default :ref:`echofilter` +will stop running and raise an error. If you want to overwrite output files +which already exist, supply the ``--overwrite-files`` argument. If you want to +skip inputs whose output files all already exist, supply the ``--skip`` +argument. Note: if both ``--skip`` and ``--overwrite-files`` are supplied, +inputs whose outputs all exist will be skipped and those inputs for +which only some of the outputs exist will have existing outputs +overwritten. + +Specific outputs can be dropped by supplying the corresponding argument +``--no-bottom-line``, ``--no-surface-line``, or ``--no-turbulence-line`` +respectively. To drop particular types of region entirely from the :term:`EVR` +output, use ``--minimum-passive-length -1``, ``--minimum-removed-length -1``, +or ``--minimum-patch-area -1`` respectively. By default, +:term:`bad data` regions (rectangles and contours) are not +included in the :term:`EVR` file. To include these, set +``--minimum-removed-length`` and ``--minimum-patch-area`` to non-negative +values. + +The lines written to the :term:`EVL` files are the raw output from the model +and do not include any offset. + +.. _Importing outputs into EV file: + +Importing outputs into EV file +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If the input file is an Echoview :term:`EV file`, by default +:ref:`echofilter` will import the output files into the +:term:`EV file` and save the :term:`EV file` (overwriting the original +:term:`EV file`). The behaviour can be disabled by supplying the +``--no-ev-import`` argument. + +All lines will be imported twice: once at the original depth and a +second time with an offset included. This offset ensures the exclusion +of data biased by the acoustic deadzone, and provides a margin of safety +at the bottom depth of the :term:`entrained air`. The offset moves the +:term:`surface` and :term:`turbulence` lines +downwards (deeper), and the :term:`bottom line` upwards (shallower). +The default offset is 1m for all three lines, and can be +set using the ``--offset`` argument. A different offset can be used for each +line by providing the ``--offset-bottom``, ``--offset-surface``, and +``--offset-turbulence`` arguments. + +The names of the objects imported into the :term:`EV file` have the suffix +``"_echofilter"`` appended to them, to indicate the source of the +line/region. However, if the ``--suffix`` argument was provided, that suffix +is used instead. A custom suffix for the variable names within the EV +file can be specified using the ``--suffix-var`` argument. + +If the variable name to be used for a line is already in use, the +default behaviour is to append the current datetime to the new variable +name. To instead overwrite existing line variables, supply the +``--overwrite-ev-lines`` argument. Note that existing regions will not be +overwritten (only lines). + +By default, a :term:`nearfield line` is also added to the :term:`EV file` +at a fixed range of 1.7m from the :term:`transducer` position. +The :term:`nearfield distance` can be changed as appropriate for the +:term:`echosounder` in use by setting the ``--nearfield`` parameter. + +The colour and thickness of the lines can be customised using the +``--color-surface``, ``--thickness-surface`` (etc) arguments. +See ``echofilter --list-colors`` to see the list of supported colour names. + +.. raw:: latex + + \clearpage diff --git a/docs/source/usage/installation.rst b/docs/source/usage/installation.rst new file mode 100644 index 00000000..b923950a --- /dev/null +++ b/docs/source/usage/installation.rst @@ -0,0 +1,112 @@ +Installation +------------ + +.. highlight:: winbatch + +Installing as an executable file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:ref:`Echofilter` is distributed as an +:term:`executable binary file` for Windows. All +dependencies are packaged as part of the distribution. + +1. Download the zip file containing the echofilter executable as follows: + + a. Go to the `releases tab `__ of the echofilter repository. + + b. Select the release to download. It is recommended to use the latest + version, with the highest release number. + + c. Click on the file named echofilter-executable-M.N.P.zip, where M.N.P is + replaced with the version number, to download it. + For example: + `echofilter-executable-1.1.1.zip `__ + + Alternatively, the zipped executables can be downloaded from a mirror on + `GDrive `__. + +2. Unzip the zip file, and put the directory contained within it + wherever you like on your Windows machine. It is recommended to put + it as an "echofilter" directory within your Programs folder, or + similar. (You may need the + `WinZip `__ or + `7z `__ application to unzip + the .zip file.) + +3. In File Explorer, + + a. navigate to the echofilter directory you unzipped. This directory + contains a file named :term:`echofilter.exe`. + + b. left click on the echofilter directory containing the + :term:`echofilter.exe` file + + c. Shift+Right click on the echofilter directory + + d. select "Copy as path" + + e. paste the path into a text editor of your choice (e.g. Notepad) + +4. Find and open the Command Prompt application (your Windows machine + comes with this pre-installed). That application is also called + cmd.exe. It will open a window containing a terminal within which + there is a command prompt where you can type to enter commands. + +5. Within the Command Prompt window (the terminal window): + + a. type: ``"cd "`` (without quote marks, with a trailing space) and + then right click and select paste in order to paste the full path + to the echofilter directory, which you copied to the clipboard + in step 3d. + + b. press enter to run this command, which will change the current + working directory of the terminal to the echofilter directory. + + c. type: ``echofilter --version`` + + d. press enter to run this command + + e. you will see the version number of echofilter printed in the + terminal window + + f. type: ``echofilter --help`` + + g. press enter to run this command + + h. you will see the help for echofilter printed in the terminal + window + +6. (Optional) So that you can just run :ref:`echofilter` + without having to change directory (using the ``cd`` command) to the + directory containing :term:`echofilter.exe`, or use the full path to + :term:`echofilter.exe`, every time you want to use it, it is useful to + add echofilter to the PATH environment variable. This step is entirely + optional and for your convenience only. The PATH environment variable + tells the terminal where it should look for executable commands. + + a. Instructions for how to do this depend on your version of Windows + and can be found here: + `https://www.computerhope.com/issues/ch000549.htm `__. + + b. An environment variable named PATH (case-insensitive) should + already exist. + + c. If this is a string, you need to edit the string and prepend the + path from 3e, plus a semicolon. For example, change the + current value of + ``C:\Program Files;C:\Winnt;C:\Winnt\System32`` + into + ``C:\Program Files\echofilter;C:\Program Files;C:\Winnt;C:\Winnt\System32`` + + d. If this is a list of strings (without semicolons), add your path + from 3e (e.g. ``C:\Program Files\echofilter``) to the list + +7. You can now run :ref:`echofilter` on some files, by using + the echofilter command in the terminal. :ref:`Example commands` are shown + below. + +.. highlight:: python + +.. raw:: latex + + \clearpage diff --git a/docs/source/usage/issues.rst b/docs/source/usage/issues.rst new file mode 100644 index 00000000..82ba40a6 --- /dev/null +++ b/docs/source/usage/issues.rst @@ -0,0 +1,79 @@ +Issues +------ + +Known issues +~~~~~~~~~~~~ + +There is a memory leak somewhere in :ref:`echofilter`. +Consequently, its memory usage will slowly rise while it is in use. +When processing a very large number of files, you may eventually run out +of memory. In this case, you must close the Command Window (to release +the memory). You can then restart :ref:`echofilter` +from where it was up to, or run the same command with the ``--skip`` +argument, to process the rest of the files. + +Troubleshooting +~~~~~~~~~~~~~~~ + +- If you run out of memory after processing a single file, consider + closing other programs to free up some memory. If this does not help, + report the issue. + +- If you run out of memory when part way through processing a large + number of files, restart the process by running the same command with + the ``--skip`` argument. See the known issues section above. + +- If you have a problem using a :term:`checkpoint` for the first time: + + - check your internet connection + + - check that you have at least 100MB of hard-drive space available + to download the new checkpoint + + - if you have an error saying the checkpoint was not recognised, + check the spelling of the checkpoint name. + +- If you receive error messages about writing or loading + :term:`CSV files` automatically generated from + :term:`EV files`, check that sufficient hard-drive space is + available. + +- If you experience problems with operations which occur inside + :term:`Echoview`, please re-run the code but manually open Echoview + before running :ref:`echofilter`. This will leave the + Echoview window open and you will be able to read the error message + within Echoview. + +Reporting an issue +~~~~~~~~~~~~~~~~~~ + +If you experience a problem with :term:`echofilter`, please report it by +`creating a new issue on our repository `__ +if possible, or otherwise by emailing scottclowe@gmail.com. + +Please include: + +- Which version of echofilter which you are using. This is found by running + the command ``echofilter --version``. + +- The operating system you are using. + On Windows 10, system information information can be found by going to + ``Start > Settings > System > About``. + Instructions for other Windows versions can be + `found here `__. + +- If you are using Echoview integration, your Echoview version number + (which can be found by going to ``Help > About`` in Echoview), and + whether you have and are using an Echoview HASP USB dongle. + +- What you expected to happen. + +- What actually happened. + +- All steps/details necessary to reproduce the issue. + +- Any error messages which were produced. + +.. raw:: latex + + \clearpage diff --git a/docs/source/usage/models.rst b/docs/source/usage/models.rst new file mode 100644 index 00000000..29ef6d92 --- /dev/null +++ b/docs/source/usage/models.rst @@ -0,0 +1,135 @@ +Pre-trained models +------------------ + +The currently available model checkpoints can be seen by running the +command:: + + echofilter --list-checkpoints + +All current checkpoints were trained on data acquired by +`FORCE `__. + +Training Datasets +~~~~~~~~~~~~~~~~~ + +Stationary +^^^^^^^^^^ + +:data collection: + bottom-mounted :term:`stationary`, autonomous + +:orientation: + uplooking + +:echosounder: + 120 kHz Simrad WBAT + +:locations: + + - FORCE tidal power demonstration site, Minas Passage + + - 45°21'47.34"N 64°25'38.94"W + - December 2017 through November 2018 + + - SMEC, Grand Passage + + - 44°15'49.80"N 66°20'12.60"W + - December 2019 through January 2020 + +:organization: + FORCE + +Mobile +^^^^^^ + +:data collection: + vessel-based 24-hour transect surveys + +:orientation: + downlooking + +:echosounder: + 120 kHz Simrad EK80 + +:locations: + + - FORCE tidal power demonstration site, Minas Passage + + - 45°21'57.58"N 64°25'50.97"W + - May 2016 through October 2018 + +:organization: + FORCE + +.. _Model checkpoints: + +Model checkpoints +~~~~~~~~~~~~~~~~~ + +The architecture used for all current models is a U-Net with a backbone +of 6 EfficientNet blocks in each direction (encoding and decoding). +There are horizontal skip connections between compression and expansion +blocks at the same spatial scale and a latent space of 32 channels +throughout the network. The depth dimension of the input is halved +(doubled) after each block, whilst the time dimension is halved +(doubled) every other block. + +Details for notable model checkpoints are provided below. + +:conditional_mobile-stationary2_effunet6x2-1_lc32_v2.2: + + - Trained on both :term:`upfacing` :term:`stationary` and + :term:`downfacing` :term:`mobile` data. + + - Jaccard Index of **96.84%** on :term:`downfacing` :term:`mobile` and + **94.51%** on :term:`upfacing` :term:`stationary` + :term:`validation` data. + + - Default model checkpoint. + +:conditional_mobile-stationary2_effunet6x2-1_lc32_v2.1: + + - Trained on both :term:`upfacing` :term:`stationary` and + :term:`downfacing` :term:`mobile` data. + + - Jaccard Index of 96.8% on :term:`downfacing` :term:`mobile` and + 94.4% on :term:`upfacing` :term:`stationary` + :term:`validation` data. + +:conditional_mobile-stationary2_effunet6x2-1_lc32_v2.0: + + - Trained on both :term:`upfacing` :term:`stationary` and + :term:`downfacing` :term:`mobile` data. + + - Jaccard Index of 96.62% on :term:`downfacing` :term:`mobile` and + 94.29% on :term:`upfacing` :term:`stationary` + :term:`validation` data. + + - :term:`Sample` outputs on :term:`upfacing` + :term:`stationary` data were thoroughly verified via manual inspection + by trained analysts. + +:stationary2_effunet6x2-1_lc32_v2.1: + + - Trained on :term:`upfacing` :term:`stationary` data only. + + - Jaccard Index of 94.4% on :term:`upfacing` :term:`stationary` + :term:`validation` data. + +:stationary2_effunet6x2-1_lc32_v2.0: + + - Trained on :term:`upfacing` :term:`stationary` data only. + + - Jaccard Index of 94.41% on :term:`upfacing` :term:`stationary` + :term:`validation` data. + + - :term:`Sample` outputs thoroughly were thoroughly + verified via manual inspection by trained analysts. + +:mobile_effunet6x2-1_lc32_v1.0: + + - Trained on :term:`downfacing` :term:`mobile` data only. + +.. raw:: latex + + \clearpage diff --git a/docs/source/usage/quick_start.rst b/docs/source/usage/quick_start.rst new file mode 100644 index 00000000..66cd079e --- /dev/null +++ b/docs/source/usage/quick_start.rst @@ -0,0 +1,236 @@ +Quick Start +----------- + +.. highlight:: winbatch + +Note that it is recommended to close :term:`Echoview` before running +:ref:`echofilter` so that :ref:`echofilter` +can run its own Echoview instance in the background. +After :ref:`echofilter` has started processing the files, +you can open Echoview again for your own use without interrupting +:ref:`echofilter`. + +Recommended first time usage +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The first time you use :ref:`echofilter`, you should run +it in simulation mode (by supplying the ``--dry-run`` argument) +before-hand so you can see what it will do:: + + echofilter some/path/to/directory_or_file --dry-run + +The path you supply to :ref:`echofilter` can be an +absolute path, or a relative path. If it is a relative path, it should be +relative to the current working directory of the command prompt. + +.. _Example commands: + +Example commands +~~~~~~~~~~~~~~~~ + +Review echofilter's documentation help within the terminal:: + + echofilter --help + +Specifying a single file to process, using an absolute path:: + + echofilter "C:\Users\Bob\Desktop\MinasPassage\2020\20200801_SiteA.EV" + +Specifying a single file to process, using a path relative to the +current directory of the command prompt:: + + echofilter "MinasPassage\2020\20200801_SiteA.EV" + +Simulating processing of a single file, using a relative path:: + + echofilter "MinasPassage\2020\20200801_SiteA.EV" --dry-run + +Specifying a directory of :term:`upfacing` :term:`stationary` data to process, +and excluding the bottom line from the output:: + + echofilter "C:\Users\Bob\OneDrive\Desktop\MinasPassage\2020" --no-bottom-line + +Specifying a directory of :term:`downfacing` :term:`mobile` data to process, +and excluding the surface line from the output:: + + echofilter "C:\Users\Bob\Documents\MobileSurveyData\Survey11" --no-surface-line + +Processing the same directory after some files were added to it, +skipping files already processed:: + + echofilter "C:\Users\Bob\Documents\MobileSurveyData\Survey11" --no-surface --skip + +Processing the same directory after some files were added to it, +overwriting files already processed:: + + echofilter "C:\Users\Bob\Documents\MobileSurveyData\Survey11" --no-surface --force + +Ignoring all :term:`bad data regions` (default), +using ``^`` to break up the long command into multiple lines for Windows cmd:: + + echofilter "path/to/file_or_directory" ^ + --minimum-removed-length -1 ^ + --minimum-patch-area -1 + +Including :term:`bad data regions` in the :term:`EVR` output:: + + echofilter "path/to/file_or_directory" ^ + --minimum-removed-length 10 ^ + --minimum-patch-area 25 + +Keep line predictions during :term:`passive` periods (default +is to linearly interpolate lines during passive data collection):: + + echofilter "path/to/file_or_directory" --lines-during-passive predict + +Specifying file and variable suffix, and line colours and thickness:: + + echofilter "path/to/file_or_directory" ^ + --suffix "_echofilter-model" ^ + --color-surface "green" --thickness-surface 4 ^ + --color-nearfield "red" --thickness-nearfield 3 + +Processing a file with more output messages displayed in the terminal:: + + echofilter "path/to/file_or_directory" --verbose + +Processing a file and sending the output to a log file instead of the +terminal:: + + echofilter "path/to/file_or_directory" -v > path/to/log_file.txt 2>&1 + + +Config file +~~~~~~~~~~~ + +You may find that you are setting some parameters every time you call +echofilter, to consistently tweak the input or output processing settings in the +same way. +If this is the case, you can save these arguments to a configuration file, +and pass the configuration file to echofilter instead. + +For example, if you have a file named ``"echofilter_params.cfg"`` with the following contents: + +.. code-block:: winbatch + :caption: echofilter_params.cfg + :name: echofilter_params.cfg + + --suffix "_echofilter-model" + --color-surface "green" + --thickness-surface 4 + --color-nearfield "red" + --thickness-nearfield 3 + +then you can call echofilter with this configuration file as follows:: + + echofilter "file_or_dir" --config "path/to/echofilter_params.cfg" + +and it will use the parameters specified in your config file. +The format of the parameters is the same as they would be on the command prompt, +except in the config file each parameter must be on its own line. + +The parameters in the config file also can be added to, or even overridden, at +the command prompt. +For example:: + + echofilter "file_or_dir" --config "path/to/echofilter_params.cfg" --suffix "_test" + +will use the ``--suffix "_test"`` argument from the command prompt instead of +the value set in the file ``"echofilter_params.cfg"``, but will still use the +other parameters as per the config file. + +If you have several different workflows or protocols which you need to use, +you can create multiple config files corresponding to each of these workflows +and choose which one to use with the ``--config`` argument. + +Common configuration options which you want to always be enabled can be set in +a special default config file in your home directory named ``".echofilter"``. +The path to your homedirectory, and hence to the default config file, +depends on your operating system. +On Windows it is typically ``"C:\Users\USERNAME\.echofilter"``, whilst on Linux +it is typically ``"/home/USERNAME/.echofilter"``, where ``"USERNAME"`` is +replaced with your username. +If it exists, the the default config file is always loaded everytime you run +echofilter. + +If a config file is manually provided with the ``--config`` argument, any +parameters set in the manually provided config file override those in the +default config file ("~/.echofilter). + +With the default verbosity settings, at the start of the inference routine +echofilter outputs the set of parameters it is using, and the source for each +of these parameters (command line, manual config file, default config file, or +program defaults). + +You can read more about the `syntax for the configuration files here `__. + + +Argument documentation +~~~~~~~~~~~~~~~~~~~~~~ + +:ref:`Echofilter` has a large number of customisation options. +The complete list of argument options available to the user can be seen in the +:ref:`CLI Reference`, or by consulting the help for +:ref:`echofilter`. The help documentation is output to the +terminal when you run the command ``echofilter --help``. + + +Actions +~~~~~~~ + +The main :ref:`echofilter` action is to perform +:term:`inference` on a file or collection of files. However, certain +arguments trigger different actions. + +help +^^^^ + +Show :ref:`echofilter` documentation and all possible +arguments. + +.. code-block:: winbatch + + echofilter --help + +version +^^^^^^^ + +Show program's version number. + +.. code-block:: winbatch + + echofilter --version + + +list checkpoints +^^^^^^^^^^^^^^^^ + +Show the available model checkpoints and exit. + +.. code-block:: winbatch + + echofilter --list-checkpoints + +list colours +^^^^^^^^^^^^ + +List the available (main) colour options for lines. The palette can be +viewed at https://matplotlib.org/gallery/color/named_colors.html + +.. code-block:: winbatch + + echofilter --list-colors + +List all available colour options (very long list) including the XKCD +colour palette of 954 colours, which can be viewed at +https://xkcd.com/color/rgb/ + +.. code-block:: winbatch + + echofilter --list-colors full + +.. highlight:: python + +.. raw:: latex + + \clearpage diff --git a/docs/source/usage_guide.rst b/docs/source/usage_guide.rst deleted file mode 100644 index 8119dcc0..00000000 --- a/docs/source/usage_guide.rst +++ /dev/null @@ -1,1129 +0,0 @@ -Usage Guide -########### - -Authors - Scott C. Lowe, Louise McGarry - -.. highlight:: powershell - -.. raw:: latex - - \clearpage - -Introduction ------------- - -:term:`Echofilter` is an application for segmenting an echogram. It takes as -its input an :term:`Echoview` .EV file, and produces as its output several -lines and regions: - -- :term:`turbulence` (:term:`entrained air`) line - -- :term:`bottom (seafloor) line` - -- :term:`surface line` - -- :term:`nearfield line` - -- :term:`passive data` regions - -- \*bad data regions for entirely removed periods of time, in the form - of boxes covering the entire vertical depth - -- \*bad data regions for localised anomalies, in the form of polygonal - contour patches - -:term:`Echofilter` uses a :term:`machine learning` -:term:`model` to complete this task. The machine learning model was trained on -:term:`upfacing` :term:`stationary` and :term:`downfacing` :term:`mobile` data -provided by Fundy Ocean Research Centre for Energy -(`FORCE `__.). - -Disclaimers -~~~~~~~~~~~ - -- The :term:`model` is only confirmed to work reliably with :term:`upfacing` - data recorded at the same location and with the same instrumentation as - the data it was trained on. It is expected to work well on a wider - range of data, but this has not been confirmed. Even on data similar - to the :term:`training data`, the :term:`model` is not perfect and it is - recommended that a human analyst manually inspects the results it generates - to confirm they are correct. - -- \* :term:`Bad data regions` are particularly challenging for the - :term:`model` to generate. Consequently, the bad data region outputs are - not reliable and should be considered experimental. By default, these - outputs are disabled. - -- Integration with :term:`Echoview` was tested for Echoview 10 and 11. - -.. raw:: latex - - \clearpage - - -Glossary --------- - -.. glossary:: - - Active data - Data collected while the :term:`echosounder` is emitting sonar pulses - (":term:`pings`") at regular intervals. This is the normal - operating mode for data in this project. - - Algorithm - A finite sequence of well-defined, unambiguous, - computer-implementable operations. - - Bad data regions - Regions of data which must be excluded from analysis in their entirety. - Bad data regions identified by :ref:`echofilter` come - in two forms: rectangular regions covering the full depth-extend of the - echogram for a period of time, and polygonal or contour regions - encompassing a localised area. - - Bottom line - A line separating the seafloor from the :term:`water column`. - - Checkpoint - A checkpoint file defines the weights for a particular - :term:`neural network` :term:`model`. - - Conditional model - A :term:`model` which outputs conditional probabilities. In the context - of an :term:`echofilter` model, the conditional probabilities are - :math:`p(x|\text{upfacing})` and :math:`p(x|\text{downfacing})`, - where :math:`x` is any of the :term:`model` output - types; conditional models are necessarily hybrid models. - - CSV - A comma-separated values file. The :term:`Sv` data can be exported - into this format by :term:`Echoview`. - - Dataset - A collection of data :term:`samples`. In this - project, the datasets are :term:`Sv` recordings from multiple surveys. - - Downfacing - The orientation of an :term:`echosounder` when it is located at the - surface and records from the :term:`water column` below it. - - Echofilter - A software package for defining the placement of the boundary lines - and regions required to post-process :term:`echosounder` data. - The topic of this usage guide. - - echofilter.exe - The compiled :ref:`echofilter` program which can be - run on a Windows machine. - - Echogram - The two-dimensional representation of a temporal series of - :term:`echosounder`-collected data. Time is along the x-axis, and depth - along the y-axis. A common way of plotting :term:`echosounder` - recordings. - - Echosounder - An electronic system that includes a computer, transceiver, and - :term:`transducer`. The system emits sonar :term:`pings` and - records the intensity of the reflected echos at some fixed sampling - rate. - - Echoview - A Windows software application (`Echoview `__ - Software Pty Ltd, Tasmania, Australia) for hydroacoustic data - post-processing. - - Entrained air - Bubbles of air which have been submerged into the ocean by waves or - by the strong :term:`turbulence` commonly found in tidal energy - channels. - - EV file - An :term:`Echoview` file bundling :term:`Sv` data together with - associated lines and regions produced by processing. - - EVL - The :term:`Echoview` line file format. - - EVR - The :term:`Echoview` region file format. - - Inference - The procedure of using a :term:`model` to generate output predictions - based on a particular input. - - Hybrid model - A :term:`model` which has been trained on both :term:`downfacing` and - :term:`upfacing` data. - - Machine learning (ML) - The process by which an :term:`algorithm` builds a mathematical model - based on :term:`sample` data - (":term:`training data`"), in order to make predictions or decisions - without being explicitly programmed to do so. A subset of the field of - Artificial Intelligence. - - Mobile - A mobile :term:`echosounder` is one which is moving (relative to the - ocean floor) during its period of operation. - - Model - A mathematical model of a particular type of data. In our context, - the model understands an echogram-like input - :term:`sample` of :term:`Sv` data - (which is its input) and outputs a probability distribution for - where it predicts the :term:`turbulence` (:term:`entrained air`) - boundary, :term:`bottom boundary`, and - :term:`surface boundary` to be located, and the - probability of :term:`passive` periods and - :term:`bad data`. - - Nearfield - The region of space too close to the :term:`echosounder` to collect - viable data. - - Nearfield distance - The maximum distance which is too close to the :term:`echosounder` to - be viable for data collection. - - Nearfield line - A line placed at the :term:`nearfield distance`. - - Neural network - An artificial neural network contains layers of interconnected - neurons with weights between them. The weights are learned through a - :term:`machine learning` process. After - :term:`training`, the network is a :term:`model` mapping inputs to - outputs. - - Passive data - Data collected while the :term:`echosounder` is silent. Since the sonar - pulses are not being generated, only ambient sounds are collected. - This package is designed for analysing :term:`active data`, and hence - :term:`passive data` is marked for removal. - - Ping - An :term:`echosounder` sonar pulse event. - - Sample (model input) - A single echogram-like matrix of :term:`Sv` values. - - Sample (ping) - A single datapoint recorded at a certain temporal latency in response - to a particular :term:`ping`. - - Stationary - A stationary :term:`echosounder` is at a fixed location (relative to - the ocean floor) during its period of operation. - - Surface line - Separates atmosphere and water at the ocean surface. - - Sv - The volume backscattering strength. - - Test set - Data which was used to evaluate the ability of the :term:`model` to - generalise to novel, unseen data. - - Training - The process by which a :term:`model` is iteratively improved. - - Training data - Data which was used to train the :term:`model(s)`. - - Training set - A subset (partition) of the :term:`dataset` which was used to train - the :term:`model`. - - Transducer - An underwater electronic device that converts electrical energy to - sound pressure energy. The emitted sound pulse is called a - ":term:`ping`". The device converts the returning sound pressure - energy to electrical energy, which is then recorded. - - Turbulence - In contrast to laminar flow, fluid motion in turbulent regions are - characterized by chaotic fluctuations in flow speed and direction. - Air is often entrained into the :term:`water column` in regions of - strong turbulence. - - Turbulence line - A line demarcating the depth of the end-boundary of air entrained - into the :term:`water column` by :term:`turbulence` at the sea - surface. - - Upfacing - The orientation of an :term:`echosounder` when it is located at the - seabed and records from the :term:`water column` above it. - - Validation set - Data which was used during the :term:`training` process to evaluate the - ability of the :term:`model` to generalise to novel, unseen data. - - Water column - The body of water between seafloor and ocean surface. - -.. raw:: latex - - \clearpage - - -Inference operations --------------------- - -In this section, we describe the :term:`inference` process, its outputs and -inputs. Inference is the process of generating predictions from the -:term:`model`, and is the principal functionality of -:ref:`echofilter`. - -Processing overview -~~~~~~~~~~~~~~~~~~~ - -This is an overview of how files are processed in the :term:`inference` -pipeline. - -First, the setup: - -- If a directory input was given, determine list of files to process. - -- Download the model :term:`checkpoint`, if necessary. - -- Load the :term:`model` from the :term:`checkpoint` into memory. - -- If any file to process is an :term:`EV file`, open :term:`Echoview`. - -- If it was not already open, hide the Echoview window. - -After the :term:`model` is loaded from its checkpoint, each file is processed -in turn. The processing time for an individual file scales linearly with -the number of :term:`pings` in the file (twice as many pings = twice as -long to process). - -Each file is processed in the following steps: - -- If the input is an :term:`EV file`, export the :term:`Sv` data to - :term:`CSV` format. - - - By default, the :term:`Sv` data is taken from ``"Fileset1: Sv pings T1"``. - - - Unless ``--cache-csv`` is provided, the :term:`CSV file` is - output to a temporary file, which is deleted after the - :term:`CSV file` is imported. - -- Import the :term:`Sv` data from the :term:`CSV file`. (If the - input was a :term:`CSV file`, this is the input; if the input - was an :term:`EV file` this is the :term:`CSV file` generated - from the :term:`EV file` in the preceding step.) - -- Rescale the height of the :term:`Sv` input to have the number of pixels - expected by the :term:`model`. - -- Automatically determine whether the :term:`echosounder` recording is - :term:`upfacing` or :term:`downfacing`, based on the order of the Depths - data in the :term:`CSV file`. - - - If the orientation was manually specified, issue a warning if it - does not match the detected orientation. - - - Reflect the data in the Depth dimension if it is :term:`upfacing`, so - that the shallowest :term:`samples` always occur first, - and deepest last. - -- Normalise the distribution of the :term:`Sv` intensities to match that - expected by the :term:`model`. - -- Split the input data into segments - - - Detect temporal discontinuities between :term:`pings`. - - - Split the input :term:`Sv` data into segments such that each segment - contains contiguous :term:`pings`. - -- Pass the each segment of the input through the :term:`model` to generate - output probabilities. - -- Crop the depth dimension down to zoom in on the most salient data. - - - If :term:`upfacing`, crop the top off the echogram to show only 2m above - the shallowest estimated :term:`surface line` depth. - - - If :term:`downfacing`, crop the bottom off the echogram only 2m below the - deepest estimated :term:`bottom line` depth. - - - If more than 35% of the echogram's height (threshold value set - with ``--autocrop-threshold``) was cropped away, pass the cropped - :term:`Sv` data through the :term:`model` to get better predictions - based on the zoomed in data. - -- Line boundary probabilities are converted into output depths. - - - The boundary probabilities at each pixel are integrated to make a - cumulative probability distribution across depth, - :math:`p(\text{depth} > \text{boundary location})`. - - - The output boundary depth is estimated as the depth at which the - cumulative probability distribution first exceeds 50%. - -- Bottom, surface, and turbulence lines are output to :term:`EVL` files. - - - Note: there is no EVL file for the :term:`nearfield line` since it - is at a constant depth as provided by the user and not generated by - the :term:`model`. - -- Regions are generated: - - - Regions are collated if there is a small gap between consecutive - :term:`passive data` or :term:`bad data regions`. - - - Regions which are too small (fewer than 10 pings for rectangles) - are dropped. - - - All regions are written to a single :term:`EVR` file. - -- If the input was an :term:`EV file`, the lines and regions are imported into - the :term:`EV file`, and a :term:`nearfield line` is added. - -Simulating processing -~~~~~~~~~~~~~~~~~~~~~ - -To see which files will be processed by a command and what the output -will be, run :ref:`echofilter` with the ``--dry-run`` argument. - -Input -~~~~~ - -:ref:`Echofilter` can process two types of file as its -input: .EV files and .CSV files. The :term:`EV file` input is more -user-friendly, but requires the Windows operating system, and a fully -operational :term:`Echoview` application (i.e. with an Echoview dongle). -The :term:`CSV file` format can be processed without Echoview, but -must be generated in advance from the .EV file on a system with Echoview. -The :term:`CSV files` must contain raw :term:`Sv` data (without -thresholding or masking) and in the format produced by exporting -:term:`Sv` data from Echoview. These raw :term:`CSV files` can be -exported using the utility :ref:`ev2csv`, which is provided -as a separate executable in the :ref:`echofilter` package. - -If the input path is a directory, all files in the directory are -processed. By default, all subdirectories are recursively processed; -this behaviour can be disabled with the ``--no-recursive-dir-search`` -argument. All files in the directory (and subdirectories) with an -appropriate file extension will be processed. By default, files with a -.CSV or .EV file extension (case insensitive) which will be processed. -The file extensions to include can be set with the ``--extension`` argument. - -Multiple input files or directories can also be specified (each -separated by a space). - -By default, when processing an :term:`EV file`, the :term:`Sv` data is taken -from the ``"Fileset1: Sv pings T1"`` variable. This can be changed with the -``--variable-name`` argument. - -Loading model -~~~~~~~~~~~~~ - -The :term:`model` used to process the data is loaded from a :term:`checkpoint` -file. The executable :term:`echofilter.exe` comes with its default model -checkpoint bundled as part of the release. Aside from this, the first time a -particular model is used, the checkpoint file will be downloaded over the -internet. The checkpoint file will be cached on your system and will not need -to be downloaded again unless you clear your cache. - -Multiple models are available to select from. These can be shown by -running the command ``echofilter --list-checkpoints``. The default model -will be highlighted in the output. In general, it is recommended to use -the default checkpoint. See :ref:`Model checkpoints` below for more details. - -When running :ref:`echofilter` for :term:`inference`, the -checkpoint can be specified with the ``--checkpoint`` argument. - -If you wish to use a custom model which is not built in to :term:`echofilter`, -specify a path to the checkpoint file using the ``--checkpoint`` argument. - -Output -~~~~~~ - -Output files -^^^^^^^^^^^^ - -For each input file, :ref:`echofilter` produces the -following output files: - -.bottom.evl - An Echoview line file containing the depth of the - :term:`bottom line`. - -.regions.evr - An Echoview region file containing - spatiotemporal definitions of :term:`passive` recording - rectangle regions, :term:`bad data` full-vertical depth - rectangle regions, and :term:`bad data` anomaly - polygonal (contour) regions. - -.surface.evl - An Echoview line file containing the depth of - the :term:`surface line`. - -.turbulence.evl - An Echoview line file containing the depth of - the :term:`turbulence line`. - -where is the path to an input file, stripped of its file -extension. There is no :term:`EVL` file for the :term:`nearfield line`, since -it is a virtual line of fixed depth added to the :term:`EV file` during the -:ref:`Importing outputs into EV file` step. - -By default, the output files are located in the same directory as the -file being processed. The output directory can be changed with the -``--output-dir`` argument, and a user-defined suffix can be added to the -output file names using the ``--suffix`` argument. - -If the output files already exist, by default :ref:`echofilter` -will stop running and raise an error. If you want to overwrite output files -which already exist, supply the ``--overwrite-files`` argument. If you want to -skip inputs whose output files all already exist, supply the ``--skip`` -argument. Note: if both ``--skip`` and ``--overwrite-files`` are supplied, -inputs whose outputs all exist will be skipped and those inputs for -which only some of the outputs exist will have existing outputs -overwritten. - -Specific outputs can be dropped by supplying the corresponding argument -``--no-bottom-line``, ``--no-surface-line``, or ``--no-turbulence-line`` -respectively. To drop particular types of region entirely from the :term:`EVR` -output, use ``--minimum-passive-length -1``, ``--minimum-removed-length -1``, -or ``--minimum-patch-area -1`` respectively. By default, -:term:`bad data` regions (rectangles and contours) are not -included in the :term:`EVR` file. To include these, set -``--minimum-removed-length`` and ``--minimum-patch-area`` to non-negative -values. - -The lines written to the :term:`EVL` files are the raw output from the model -and do not include any offset. - -.. _Importing outputs into EV file: - -Importing outputs into EV file -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If the input file is an Echoview :term:`EV file`, by default -:ref:`echofilter` will import the output files into the -:term:`EV file` and save the :term:`EV file` (overwriting the original -:term:`EV file`). The behaviour can be disabled by supplying the -``--no-ev-import`` argument. - -All lines will be imported twice: once at the original depth and a -second time with an offset included. This offset ensures the exclusion -of data biased by the acoustic deadzone, and provides a margin of safety -at the bottom depth of the :term:`entrained air`. The offset moves the -:term:`surface` and :term:`turbulence` lines -downwards (deeper), and the :term:`bottom line` upwards (shallower). -The default offset is 1m for all three lines, and can be -set using the ``--offset`` argument. A different offset can be used for each -line by providing the ``--offset-bottom``, ``--offset-surface``, and -``--offset-turbulence`` arguments. - -The names of the objects imported into the :term:`EV file` have the suffix -``"_echofilter"`` appended to them, to indicate the source of the -line/region. However, if the ``--suffix`` argument was provided, that suffix -is used instead. A custom suffix for the variable names within the EV -file can be specified using the ``--suffix-var`` argument. - -If the variable name to be used for a line is already in use, the -default behaviour is to append the current datetime to the new variable -name. To instead overwrite existing line variables, supply the -``--overwrite-ev-lines`` argument. Note that existing regions will not be -overwritten (only lines). - -By default, a :term:`nearfield line` is also added to the :term:`EV file` -at a fixed range of 1.7m from the :term:`transducer` position. -The :term:`nearfield distance` can be changed as appropriate for the -:term:`echosounder` in use by setting the ``--nearfield`` parameter. - -The colour and thickness of the lines can be customised using the -``--color-surface``, ``--thickness-surface`` (etc) arguments. -See ``echofilter --list-colors`` to see the list of supported colour names. - - -Installation ------------- - -Installing as an executable file -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -:ref:`Echofilter` is distributed as an -:term:`executable binary file` for Windows. All -dependencies are packaged as part of the distribution. - -1. Download the zip file containing the echofilter executable as follows: - - a. Go to the `releases tab `__ of the echofilter repository. - - b. Select the release to download. It is recommended to use the latest - version, with the highest release number. - - c. Click on the file named echofilter-executable-M.N.P.zip, where M.N.P is - replaced with the version number, to download it. - For example: - `echofilter-executable-1.1.0.zip `__ - - Alternatively, the zipped executables can be downloaded from a mirror on - `GDrive `__. - -2. Unzip the zip file, and put the directory contained within it - wherever you like on your Windows machine. It is recommended to put - it as an "echofilter" directory within your Programs folder, or - similar. (You may need the - `WinZip `__ or - `7z `__ application to unzip - the .zip file.) - -3. In File Explorer, - - a. navigate to the echofilter directory you unzipped. This directory - contains a file named :term:`echofilter.exe`. - - b. left click on the echofilter directory containing the - :term:`echofilter.exe` file - - c. Shift+Right click on the echofilter directory - - d. select "Copy as path" - - e. paste the path into a text editor of your choice (e.g. Notepad) - -4. Find and open the Command Prompt application (your Windows machine - comes with this pre-installed). That application is also called - cmd.exe. It will open a window containing a terminal within which - there is a command prompt where you can type to enter commands. - -5. Within the Command Prompt window (the terminal window): - - a. type: ``"cd "`` (without quote marks, with a trailing space) and - then right click and select paste in order to paste the full path - to the echofilter directory, which you copied to the clipboard - in step 3d. - - b. press enter to run this command, which will change the current - working directory of the terminal to the echofilter directory. - - c. type: ``echofilter --version`` - - d. press enter to run this command - - e. you will see the version number of echofilter printed in the - terminal window - - f. type: ``echofilter --help`` - - g. press enter to run this command - - h. you will see the help for echofilter printed in the terminal - window - -6. (Optional) So that you can just run :ref:`echofilter` - without having to change directory (using the ``cd`` command) to the - directory containing :term:`echofilter.exe`, or use the full path to - :term:`echofilter.exe`, every time you want to use it, it is useful to - add echofilter to the PATH environment variable. This step is entirely - optional and for your convenience only. The PATH environment variable - tells the terminal where it should look for executable commands. - - a. Instructions for how to do this depend on your version of Windows - and can be found here: - `https://www.computerhope.com/issues/ch000549.htm `__. - - b. An environment variable named PATH (case-insensitive) should - already exist. - - c. If this is a string, you need to edit the string and prepend the - path from 3e, plus a semicolon. For example, change the - current value of - ``C:\Program Files;C:\Winnt;C:\Winnt\System32`` - into - ``C:\Program Files\echofilter;C:\Program Files;C:\Winnt;C:\Winnt\System32`` - - d. If this is a list of strings (without semicolons), add your path - from 3e (e.g. ``C:\Program Files\echofilter``) to the list - -7. You can now run :ref:`echofilter` on some files, by using - the echofilter command in the terminal. :ref:`Example commands` are shown - below. - -.. raw:: latex - - \clearpage - - -Quick Start ------------ - -Note that it is recommended to close :term:`Echoview` before running -:ref:`echofilter` so that :ref:`echofilter` -can run its own Echoview instance in the background. -After :ref:`echofilter` has started processing the files, -you can open Echoview again for your own use without interrupting -:ref:`echofilter`. - -Recommended first time usage -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The first time you use :ref:`echofilter`, you should run -it in simulation mode (by supplying the ``--dry-run`` argument) -before-hand so you can see what it will do:: - - echofilter some/path/to/directory_or_file --dry-run - -The path you supply to :ref:`echofilter` can be an -absolute path, or a relative path. If it is a relative path, it should be -relative to the current working directory of the command prompt. - -.. _Example commands: - -Example commands -~~~~~~~~~~~~~~~~ - -Review echofilter's documentation help within the terminal:: - - echofilter --help - -Specifying a single file to process, using an absolute path:: - - echofilter "C:\Users\Bob\Desktop\MinasPassage\2020\20200801_SiteA.EV" - -Specifying a single file to process, using a path relative to the -current directory of the command prompt:: - - echofilter "MinasPassage\2020\20200801_SiteA.EV" - -Simulating processing of a single file, using a relative path:: - - echofilter "MinasPassage\2020\20200801_SiteA.EV" --dry-run - -Specifying a directory of :term:`upfacing` :term:`stationary` data to process, -and excluding the bottom line from the output:: - - echofilter "C:\Users\Bob\OneDrive\Desktop\MinasPassage\2020" --no-bottom-line - -Specifying a directory of :term:`downfacing` :term:`mobile` data to process, -and excluding the surface line from the output:: - - echofilter "C:\Users\Bob\Documents\MobileSurveyData\Survey11" --no-surface-line - -Processing the same directory after some files were added to it, -skipping files already processed:: - - echofilter "C:\Users\Bob\Documents\MobileSurveyData\Survey11" --no-surface --skip - -Processing the same directory after some files were added to it, -overwriting files already processed:: - - echofilter "C:\Users\Bob\Documents\MobileSurveyData\Survey11" --no-surface --force - -Ignoring all :term:`bad data regions` (default), -using ``^`` to break up the long command into multiple lines:: - - echofilter "path/to/file_or_directory" ^ - --minimum-removed-length -1 ^ - --minimum-patch-area -1 - -Including :term:`bad data regions` in the :term:`EVR` output:: - - echofilter "path/to/file_or_directory" ^ - --minimum-removed-length 10 ^ - --minimum-patch-area 25 - -Keep line predictions during :term:`passive` periods (default -is to linearly interpolate lines during passive data collection):: - - echofilter "path/to/file_or_directory" --lines-during-passive predict - -Specifying file and variable suffix, and line colours and thickness:: - - echofilter "path/to/file_or_directory" ^ - --suffix "_echofilter_stationary-model" ^ - --color-surface "green" --thickness-surface 4 ^ - --color-nearfield "red" --thickness-nearfield 3 - -Processing a file with more output messages displayed in the terminal:: - - echofilter "path/to/file_or_directory" --verbose - -Processing a file and sending the output to a log file instead of the -terminal:: - - echofilter "path/to/file_or_directory" -v > path/to/log_file.txt 2>&1 - - -Argument documentation -~~~~~~~~~~~~~~~~~~~~~~ - -:ref:`Echofilter` has a large number of customisation options. -The complete list of argument options available to the user can be seen in the -:ref:`CLI Reference`, or by consulting the help for -:ref:`echofilter`. The help documentation is output to the -terminal when you run the command ``echofilter --help``. - - -Actions -~~~~~~~ - -The main :ref:`echofilter` action is to perform -:term:`inference` on a file or collection of files. However, certain -arguments trigger different actions. - -help -^^^^ - -Show :ref:`echofilter` documentation and all possible -arguments. - -.. code-block:: powershell - - echofilter --help - -version -^^^^^^^ - -Show program's version number. - -.. code-block:: powershell - - echofilter --version - - -list checkpoints -^^^^^^^^^^^^^^^^ - -Show the available model checkpoints and exit. - -.. code-block:: powershell - - echofilter --list-checkpoints - -list colours -^^^^^^^^^^^^ - -List the available (main) colour options for lines. The palette can be -viewed at https://matplotlib.org/gallery/color/named_colors.html - -.. code-block:: powershell - - echofilter --list-colors - -List all available colour options (very long list) including the XKCD -colour palette of 954 colours, which can be viewed at -https://xkcd.com/color/rgb/ - -.. code-block:: powershell - - echofilter --list-colors full - - -Command line interface primer ------------------------------ - -In this section, we provide some pointers for users new to using the -command prompt. - -Spaces in file names -~~~~~~~~~~~~~~~~~~~~ - -Running commands on files with spaces in their file names is -problematic. This is because spaces are used to separate arguments from -each other, so for instance:: - - command-name some path with spaces - -is actually running the command ``command-name`` with four arguments: -``some``, ``path``, ``with``, and ``spaces``. - -You can run commands on paths containing spaces by encapsulating the path -in quotes (either single, ``'``, or double ``"`` quotes), so it becomes -a single string. For instance:: - - command-name "some path with spaces" - -In the long run, you may find it easier to change your directory -structure to not include any spaces in any of the names of directories -used for the data. - -Trailing backslash -~~~~~~~~~~~~~~~~~~ - -The backslash (``\``) character is an -`escape character `__, -used to give alternative meanings to symbols with special meanings. -For example, the quote characters ``"`` and ``'`` indicate the start or end -of a string but can be escaped to obtain a literal quote character. - -On Windows, ``\`` is also used to denote directories. This overloads -the ``\`` symbol with multiple meanings. For this reason, you should not -include a trailing ``\`` when specifying directory inputs. Otherwise, if you -provide the path in quotes, an input of ``"some\path\"`` will not be -registered correctly, and will include a literal ``"`` character, with -the end of the string implicitly indicated by the end of the input. -Instead, you should use ``"some\path"``. - -Alternatively, you could escape the backslash character to ensure -it is a literal backslash with ``"some\path\\"``, or use a forward -slash with ``"some/path/"`` since :ref:`echofilter` -also understands forward slashes as a directory separator. - -Argument types -~~~~~~~~~~~~~~ - -Commands at the command prompt can take arguments. There are a couple of -types of arguments: - -- mandatory, positional arguments - -- optional arguments - - - shorthand arguments which start with a single hyphen (``-v``) - - - longhand arguments which start with two hyphens (``--verbose``) - -For :ref:`echofilter`, the only positional argument is -the path to the file(s) or directory(ies) to process. - -Arguments take differing numbers of parameters. -For :ref:`echofilter` the positional argument (files to -process) must have at least one entry and can contain as many as you like. - -Arguments which take zero parameters are sometimes called flags, such as -the flag ``--skip-existing`` - -Shorthand arguments can be given together, such as ``-vvfsn``, which is the -same as all of ``--verbose --verbose --force --skip --dry-run``. - -In the help documentation, arguments which require at least one value to -be supplied have text in capitals after the argument, such as -``--suffix-var SUFFIX_VAR``. Arguments which have synonyms are listed -together in one entry, such as ``--skip-existing``, ``--skip``, ``-s``; and -``--output-dir OUTPUT_DIR``, ``-o OUTPUT_DIR``. Arguments where a variable is -optional have it shown in square brackets, such as -``--cache-csv [CSV_DIR]``. Arguments which accept a variable number of values -are shown such as ``--extension SEARCH_EXTENSION [SEARCH_EXTENSION ...]``. -Arguments whose value can only take one of a set number of options are -shown in curly brackets, such as ``--facing {downward,upward,auto}``. - -Long lines for commands at the command prompt can be broken up into -multiple lines by using a continuation character. On Windows, the line -continuation character is ``^``, the caret symbol. When specifying optional -arguments requires that the command be continued on the next line, -finish the current line with ``^`` and begin the subsequent line at the -start of the next line. - -Pre-trained models ------------------- - -The currently available model checkpoints can be seen by running the -command:: - - echofilter --list-checkpoints - -All current checkpoints were trained on data acquired by -`FORCE `__. - -Training Datasets -~~~~~~~~~~~~~~~~~ - -Stationary -^^^^^^^^^^ - -:data collection: - bottom-mounted :term:`stationary`, autonomous - -:orientation: - uplooking - -:echosounder: - 120 kHz Simrad WBAT - -:locations: - - - FORCE tidal power demonstration site, Minas Passage - - - 45°21'47.34"N 64°25'38.94"W - - December 2017 through November 2018 - - - SMEC, Grand Passage - - - 44°15'49.80"N 66°20'12.60"W - - December 2019 through January 2020 - -:organization: - FORCE - -Mobile -^^^^^^ - -:data collection: - vessel-based 24-hour transect surveys - -:orientation: - downlooking - -:echosounder: - 120 kHz Simrad EK80 - -:locations: - - - FORCE tidal power demonstration site, Minas Passage - - - 45°21'57.58"N 64°25'50.97"W - - May 2016 through October 2018 - -:organization: - FORCE - -.. _Model checkpoints: - -Model checkpoints -~~~~~~~~~~~~~~~~~ - -The architecture used for all current models is a U-Net with a backbone -of 6 EfficientNet blocks in each direction (encoding and decoding). -There are horizontal skip connections between compression and expansion -blocks at the same spatial scale and a latent space of 32 channels -throughout the network. The depth dimension of the input is halved -(doubled) after each block, whilst the time dimension is halved -(doubled) every other block. - -Details for notable model checkpoints are provided below. - -:conditional_mobile-stationary2_effunet6x2-1_lc32_v2.2: - - - Trained on both :term:`upfacing` :term:`stationary` and - :term:`downfacing` :term:`mobile` data. - - - Jaccard Index of **96.84%** on :term:`downfacing` :term:`mobile` and - **94.51%** on :term:`upfacing` :term:`stationary` - :term:`validation` data. - - - Default model checkpoint. - -:conditional_mobile-stationary2_effunet6x2-1_lc32_v2.1: - - - Trained on both :term:`upfacing` :term:`stationary` and - :term:`downfacing` :term:`mobile` data. - - - Jaccard Index of 96.8% on :term:`downfacing` :term:`mobile` and - 94.4% on :term:`upfacing` :term:`stationary` - :term:`validation` data. - -:conditional_mobile-stationary2_effunet6x2-1_lc32_v2.0: - - - Trained on both :term:`upfacing` :term:`stationary` and - :term:`downfacing` :term:`mobile` data. - - - Jaccard Index of 96.62% on :term:`downfacing` :term:`mobile` and - 94.29% on :term:`upfacing` :term:`stationary` - :term:`validation` data. - - - :term:`Sample` outputs on :term:`upfacing` - :term:`stationary` data were thoroughly verified via manual inspection - by trained analysts. - -:stationary2_effunet6x2-1_lc32_v2.1: - - - Trained on :term:`upfacing` :term:`stationary` data only. - - - Jaccard Index of 94.4% on :term:`upfacing` :term:`stationary` - :term:`validation` data. - -:stationary2_effunet6x2-1_lc32_v2.0: - - - Trained on :term:`upfacing` :term:`stationary` data only. - - - Jaccard Index of 94.41% on :term:`upfacing` :term:`stationary` - :term:`validation` data. - - - :term:`Sample` outputs thoroughly were thoroughly - verified via manual inspection by trained analysts. - -:mobile_effunet6x2-1_lc32_v1.0: - - - Trained on :term:`downfacing` :term:`mobile` data only. - - -Issues ------- - -Known issues -~~~~~~~~~~~~ - -There is a memory leak somewhere in :ref:`echofilter`. -Consequently, its memory usage will slowly rise while it is in use. -When processing a very large number of files, you may eventually run out -of memory. In this case, you must close the Command Window (to release -the memory). You can then restart :ref:`echofilter` -from where it was up to, or run the same command with the ``--skip`` -argument, to process the rest of the files. - -Troubleshooting -~~~~~~~~~~~~~~~ - -- If you run out of memory after processing a single file, consider - closing other programs to free up some memory. If this does not help, - report the issue. - -- If you run out of memory when part way through processing a large - number of files, restart the process by running the same command with - the ``--skip`` argument. See the known issues section above. - -- If you have a problem using a :term:`checkpoint` for the first time: - - - check your internet connection - - - check that you have at least 100MB of hard-drive space available - to download the new checkpoint - - - if you have an error saying the checkpoint was not recognised, - check the spelling of the checkpoint name. - -- If you receive error messages about writing or loading - :term:`CSV files` automatically generated from - :term:`EV files`, check that sufficient hard-drive space is - available. - -- If you experience problems with operations which occur inside - :term:`Echoview`, please re-run the code but manually open Echoview - before running :ref:`echofilter`. This will leave the - Echoview window open and you will be able to read the error message - within Echoview. - -Reporting an issue -~~~~~~~~~~~~~~~~~~ - -If you experience a problem with :term:`echofilter`, please report it by -`creating a new issue on our repository `__ -if possible, or otherwise by emailing scottclowe@gmail.com. - -Please include: - -- Which version of echofilter which you are using. This is found by running - the command ``echofilter --version``. - -- The operating system you are using. - On Windows 10, system information information can be found by going to - ``Start > Settings > System > About``. - Instructions for other Windows versions can be - `found here `__. - -- If you are using Echoview integration, your Echoview version number - (which can be found by going to ``Help > About`` in Echoview), and - whether you have and are using an Echoview HASP USB dongle. - -- What you expected to happen. - -- What actually happened. - -- All steps/details necessary to reproduce the issue. - -- Any error messages which were produced. - -.. highlight:: python diff --git a/echofilter/__meta__.py b/echofilter/__meta__.py index 9c9a667c..b08b9a83 100644 --- a/echofilter/__meta__.py +++ b/echofilter/__meta__.py @@ -1,6 +1,6 @@ name = "echofilter" path = name.lower().replace("-", "_").replace(" ", "_") -version = "1.1.0" +version = "1.1.1" author = "Scott C. Lowe" author_email = "scott.code.lowe@gmail.com" description = "Remove echosounder noise by identifying the ocean floor and entrained air at the ocean surface." diff --git a/echofilter/nn/utils.py b/echofilter/nn/utils.py index 8593616c..c2fb8bae 100644 --- a/echofilter/nn/utils.py +++ b/echofilter/nn/utils.py @@ -219,6 +219,8 @@ def seed_all(seed=None, only_current_gpu=False, mirror_gpus=False): :attr:`seed` is not ``None``, each device receives a different but deterministically determined seed. Default is ``False``. + Notes + ----- Note that we override the settings for the cudnn backend whenever this function is called. If :attr:`seed` is not ``None``, we set:: diff --git a/echofilter/raw/loader.py b/echofilter/raw/loader.py index 53f629f0..c2500399 100644 --- a/echofilter/raw/loader.py +++ b/echofilter/raw/loader.py @@ -753,7 +753,7 @@ def evl_writer(fname, timestamps, depths, status=1, line_ending="\r\n", pad=Fals timestamps = np.r_[ timestamps[0] - (timestamps[1] - timestamps[0]) / 2, timestamps, - timestamps[-1] + (timestamps[-2] - timestamps[-1]) / 2, + timestamps[-1] + (timestamps[-1] - timestamps[-2]) / 2, ] depths = np.r_[depths[0], depths, depths[-1]] # The file object will automatically replace \n with our chosen line ending diff --git a/echofilter/tests/test_inference.py b/echofilter/tests/test_inference.py index 17088a3f..22642b01 100644 --- a/echofilter/tests/test_inference.py +++ b/echofilter/tests/test_inference.py @@ -23,11 +23,52 @@ import tempfile import pytest +from parametrize import parametrize from .. import inference +from ..raw.loader import evl_loader from ..ui import inference_cli from .base_test import BaseTestCase +EXPECTED_STATS = { + "GP_20200125T160020_first240_Sv_raw.csv": { + "timestamps": 242, + "surface_depths": [31, 32], + "turbulence_depths": [35, 41], + "bottom_depths": [49, 50], + }, + "Survey17_GR4_N5W_E_first240_Sv_raw.csv": { + "timestamps": 242, + "surface_depths": [0, 1], + "turbulence_depths": [0, 19], + "bottom_depths": [49, 57], + }, + "Survey17_GR4_N5W_E_first50-redact_Sv_raw.csv": { + "timestamps": 52, + "surface_depths": [0, 1], + "turbulence_depths": [0, 19], + "bottom_depths": [49, 57], + }, + "dec2017_20180108T045216_first600_Sv_raw.csv": { + "timestamps": 602, + "surface_depths": [15, 17], + "turbulence_depths": [22, 47], + "bottom_depths": [49, 50], + }, + "mar2018_20180513T015216_first120_Sv_raw.csv": { + "timestamps": 122, + "surface_depths": [6, 8], + "turbulence_depths": [7, 16], + "bottom_depths": [49, 50], + }, + "sep2018_20181027T022221_first720_Sv_raw.csv": { + "timestamps": 722, + "surface_depths": [11, 14], + "turbulence_depths": [20, 48], + "bottom_depths": [49, 50], + }, +} + class test_get_color_palette(BaseTestCase): """ @@ -72,87 +113,93 @@ class test_run_inference(BaseTestCase): Tests for run_inference. """ + def check_lines(self, input_fname, output_dirname, lines=None): + stats = EXPECTED_STATS[input_fname] + if lines is None: + lines = [ + k.replace("_depths", "") for k in stats.keys() if k != "timestamps" + ] + basefile = os.path.splitext(input_fname)[0] + for line_name in lines: + fname = os.path.join(output_dirname, f"{basefile}.{line_name}.evl") + ts, depths = evl_loader(fname) + self.assertEqual(len(ts), len(depths)) + self.assertEqual(len(ts), stats["timestamps"]) + self.assertGreaterEqual(min(depths), stats[f"{line_name}_depths"][0]) + self.assertLessEqual(max(depths), stats[f"{line_name}_depths"][1]) + def test_dryrun(self): inference.run_inference( self.resource_directory, dry_run=True, ) - def test_run_downfacing(self): - with tempfile.TemporaryDirectory() as outdirname: - inference.run_inference( - self.testfile_downfacing, - source_dir=self.resource_directory, - output_dir=outdirname, - ) - basefile = os.path.splitext(self.testfile_downfacing)[0] - self.assert_file_exists(os.path.join(outdirname, basefile + ".bottom.evl")) - self.assert_file_exists(os.path.join(outdirname, basefile + ".surface.evl")) - self.assert_file_exists( - os.path.join(outdirname, basefile + ".turbulence.evl") - ) - self.assert_file_exists(os.path.join(outdirname, basefile + ".regions.evr")) - - def test_run_upfacing(self): + @parametrize("test_fname", EXPECTED_STATS.keys()) + def test_run_files(self, test_fname): with tempfile.TemporaryDirectory() as outdirname: inference.run_inference( - self.testfile_upfacing, + test_fname, source_dir=self.resource_directory, output_dir=outdirname, ) - basefile = os.path.splitext(self.testfile_upfacing)[0] + basefile = os.path.splitext(test_fname)[0] self.assert_file_exists(os.path.join(outdirname, basefile + ".bottom.evl")) self.assert_file_exists(os.path.join(outdirname, basefile + ".surface.evl")) self.assert_file_exists( os.path.join(outdirname, basefile + ".turbulence.evl") ) self.assert_file_exists(os.path.join(outdirname, basefile + ".regions.evr")) + self.check_lines(test_fname, outdirname) def test_noclobber_bottom(self): with tempfile.TemporaryDirectory() as outdirname: - basefile = os.path.splitext(self.testfile_upfacing)[0] + test_fname = self.testfile_upfacing + basefile = os.path.splitext(test_fname)[0] fname = os.path.join(outdirname, basefile + ".bottom.evl") pathlib.Path(fname).touch() with pytest.raises(EnvironmentError): inference.run_inference( - self.testfile_upfacing, + test_fname, source_dir=self.resource_directory, output_dir=outdirname, ) def test_noclobber_surface(self): with tempfile.TemporaryDirectory() as outdirname: - basefile = os.path.splitext(self.testfile_upfacing)[0] + test_fname = self.testfile_upfacing + basefile = os.path.splitext(test_fname)[0] fname = os.path.join(outdirname, basefile + ".surface.evl") pathlib.Path(fname).touch() with pytest.raises(EnvironmentError): inference.run_inference( - self.testfile_upfacing, + test_fname, source_dir=self.resource_directory, output_dir=outdirname, ) def test_noclobber_turbulence(self): with tempfile.TemporaryDirectory() as outdirname: - basefile = os.path.splitext(self.testfile_upfacing)[0] + test_fname = self.testfile_upfacing + basefile = os.path.splitext(test_fname)[0] fname = os.path.join(outdirname, basefile + ".turbulence.evl") pathlib.Path(fname).touch() with pytest.raises(EnvironmentError): inference.run_inference( - self.testfile_upfacing, + test_fname, source_dir=self.resource_directory, output_dir=outdirname, ) def test_rerun_skip(self): with tempfile.TemporaryDirectory() as outdirname: - basefile = os.path.splitext(self.testfile_upfacing)[0] + test_fname = self.testfile_upfacing + basefile = os.path.splitext(test_fname)[0] pathlib.Path(os.path.join(outdirname, basefile + ".bottom.evl")).touch() pathlib.Path(os.path.join(outdirname, basefile + ".surface.evl")).touch() pathlib.Path(os.path.join(outdirname, basefile + ".turbulence.evl")).touch() pathlib.Path(os.path.join(outdirname, basefile + ".regions.evr")).touch() inference.run_inference( - self.testfile_upfacing, + test_fname, source_dir=self.resource_directory, output_dir=outdirname, skip_existing=True, @@ -160,27 +207,30 @@ def test_rerun_skip(self): def test_rerun_overwrite(self): with tempfile.TemporaryDirectory() as outdirname: - basefile = os.path.splitext(self.testfile_upfacing)[0] + test_fname = self.testfile_upfacing + basefile = os.path.splitext(test_fname)[0] pathlib.Path(os.path.join(outdirname, basefile + ".bottom.evl")).touch() pathlib.Path(os.path.join(outdirname, basefile + ".surface.evl")).touch() pathlib.Path(os.path.join(outdirname, basefile + ".turbulence.evl")).touch() pathlib.Path(os.path.join(outdirname, basefile + ".regions.evr")).touch() inference.run_inference( - self.testfile_upfacing, + test_fname, source_dir=self.resource_directory, output_dir=outdirname, overwrite_existing=True, ) + self.check_lines(test_fname, outdirname) def test_no_bottom(self): with tempfile.TemporaryDirectory() as outdirname: + test_fname = self.testfile_upfacing inference.run_inference( - self.testfile_upfacing, + test_fname, source_dir=self.resource_directory, output_dir=outdirname, generate_bottom_line=False, ) - basefile = os.path.splitext(self.testfile_upfacing)[0] + basefile = os.path.splitext(test_fname)[0] self.assert_file_absent(os.path.join(outdirname, basefile + ".bottom.evl")) self.assert_file_exists(os.path.join(outdirname, basefile + ".surface.evl")) self.assert_file_exists( @@ -190,13 +240,14 @@ def test_no_bottom(self): def test_no_surface(self): with tempfile.TemporaryDirectory() as outdirname: + test_fname = self.testfile_upfacing inference.run_inference( - self.testfile_upfacing, + test_fname, source_dir=self.resource_directory, output_dir=outdirname, generate_surface_line=False, ) - basefile = os.path.splitext(self.testfile_upfacing)[0] + basefile = os.path.splitext(test_fname)[0] self.assert_file_exists(os.path.join(outdirname, basefile + ".bottom.evl")) self.assert_file_absent(os.path.join(outdirname, basefile + ".surface.evl")) self.assert_file_exists( @@ -206,13 +257,14 @@ def test_no_surface(self): def test_no_turbulence(self): with tempfile.TemporaryDirectory() as outdirname: + test_fname = self.testfile_upfacing inference.run_inference( - self.testfile_upfacing, + test_fname, source_dir=self.resource_directory, output_dir=outdirname, generate_turbulence_line=False, ) - basefile = os.path.splitext(self.testfile_upfacing)[0] + basefile = os.path.splitext(test_fname)[0] self.assert_file_exists(os.path.join(outdirname, basefile + ".bottom.evl")) self.assert_file_exists(os.path.join(outdirname, basefile + ".surface.evl")) self.assert_file_absent( @@ -222,13 +274,14 @@ def test_no_turbulence(self): def test_with_patches(self): with tempfile.TemporaryDirectory() as outdirname: + test_fname = self.testfile_upfacing inference.run_inference( - self.testfile_upfacing, + test_fname, source_dir=self.resource_directory, output_dir=outdirname, minimum_patch_area=25, ) - basefile = os.path.splitext(self.testfile_upfacing)[0] + basefile = os.path.splitext(test_fname)[0] self.assert_file_exists(os.path.join(outdirname, basefile + ".bottom.evl")) self.assert_file_exists(os.path.join(outdirname, basefile + ".surface.evl")) self.assert_file_exists( @@ -238,13 +291,14 @@ def test_with_patches(self): def test_with_logitsmoothing(self): with tempfile.TemporaryDirectory() as outdirname: + test_fname = self.testfile_upfacing inference.run_inference( - self.testfile_upfacing, + test_fname, source_dir=self.resource_directory, output_dir=outdirname, logit_smoothing_sigma=2, ) - basefile = os.path.splitext(self.testfile_upfacing)[0] + basefile = os.path.splitext(test_fname)[0] self.assert_file_exists(os.path.join(outdirname, basefile + ".bottom.evl")) self.assert_file_exists(os.path.join(outdirname, basefile + ".surface.evl")) self.assert_file_exists( @@ -254,13 +308,14 @@ def test_with_logitsmoothing(self): def test_run_verbose(self): with tempfile.TemporaryDirectory() as outdirname: + test_fname = self.testfile_upfacing inference.run_inference( - self.testfile_upfacing, + test_fname, source_dir=self.resource_directory, output_dir=outdirname, verbose=10, ) - basefile = os.path.splitext(self.testfile_upfacing)[0] + basefile = os.path.splitext(test_fname)[0] self.assert_file_exists(os.path.join(outdirname, basefile + ".bottom.evl")) self.assert_file_exists(os.path.join(outdirname, basefile + ".surface.evl")) self.assert_file_exists( @@ -270,13 +325,14 @@ def test_run_verbose(self): def test_run_quiet(self): with tempfile.TemporaryDirectory() as outdirname: + test_fname = self.testfile_upfacing inference.run_inference( - self.testfile_upfacing, + test_fname, source_dir=self.resource_directory, output_dir=outdirname, verbose=0, ) - basefile = os.path.splitext(self.testfile_upfacing)[0] + basefile = os.path.splitext(test_fname)[0] self.assert_file_exists(os.path.join(outdirname, basefile + ".bottom.evl")) self.assert_file_exists(os.path.join(outdirname, basefile + ".surface.evl")) self.assert_file_exists( @@ -285,11 +341,27 @@ def test_run_quiet(self): self.assert_file_exists(os.path.join(outdirname, basefile + ".regions.evr")) def test_run_directory(self): - with tempfile.TemporaryDirectory() as outdirname: - inference.run_inference( - self.resource_directory, - output_dir=outdirname, - ) + with tempfile.TemporaryDirectory() as tempdir: + fnames = [self.testfile_upfacing, self.testfile_downfacing] + for fname in fnames: + os.symlink( + os.path.join(self.resource_directory, fname), + os.path.join(tempdir, fname), + ) + inference.run_inference(tempdir) + for test_fname in fnames: + basefile = os.path.splitext(test_fname)[0] + self.assert_file_exists(os.path.join(tempdir, basefile + ".bottom.evl")) + self.assert_file_exists( + os.path.join(tempdir, basefile + ".surface.evl") + ) + self.assert_file_exists( + os.path.join(tempdir, basefile + ".turbulence.evl") + ) + self.assert_file_exists( + os.path.join(tempdir, basefile + ".regions.evr") + ) + self.check_lines(test_fname, tempdir) class test_cli(BaseTestCase): diff --git a/images/deepsense.ico b/images/deepsense.ico new file mode 100644 index 00000000..ab9a817e Binary files /dev/null and b/images/deepsense.ico differ diff --git a/requirements-dev.txt b/requirements-dev.txt index 4b0738fe..7cb8f5f1 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,3 +1,4 @@ black==22.10.0 identify>=1.4.20 pre-commit +pyinstaller diff --git a/requirements-test.txt b/requirements-test.txt index 376d5007..3326c209 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,2 +1,3 @@ +parametrize pytest>=3.5.0 pytest-cov diff --git a/requirements.txt b/requirements.txt index 61fa3e82..ed6e7f16 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,6 +8,6 @@ pywin32; platform_system=="Windows" pyyaml scikit-image scipy -torch -torchvision +torch<1.12.0 +torchvision<0.13.0 tqdm diff --git a/setup.py b/setup.py index ae33c750..22adfaa9 100755 --- a/setup.py +++ b/setup.py @@ -116,7 +116,7 @@ def run_tests(self): setup( # Essential details on the package and its dependencies name=meta["name"], - python_requires=">=3.6", + python_requires=">=3.6,<3.11", version=meta["version"], packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]), package_dir={meta["name"]: os.path.join(".", meta["path"])}, @@ -136,10 +136,22 @@ def run_tests(self): classifiers=[ # Trove classifiers # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Science/Research", "License :: OSI Approved :: GNU Affero General Public License v3", "Natural Language :: English", "Programming Language :: Python", "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Topic :: Scientific/Engineering :: Hydrology", + "Topic :: Scientific/Engineering :: Image Processing", + "Topic :: Scientific/Engineering :: Image Recognition", ], # Could also include keywords, download_url, project_urls, etc. entry_points={ @@ -150,6 +162,12 @@ def run_tests(self): "ev2csv=echofilter.ev2csv:main", ], }, + project_urls={ + "Documentation": "https://echofilter.readthedocs.io", + "Source Code": "https://github.com/DeepSenseCA/echofilter", + "Bug Tracker": "https://github.com/DeepSenseCA/echofilter/issues", + "Citation": "https://www.doi.org/10.3389/fmars.2022.867857", + }, # Custom commands cmdclass={"test": PyTest}, )