From 05645b2138530779f2168bb857935267e90fba8b Mon Sep 17 00:00:00 2001 From: Kalle Westerling Date: Thu, 25 May 2023 09:47:09 +0100 Subject: [PATCH 1/5] Adding action --- .github/workflows/docs.yml | 74 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 .github/workflows/docs.yml diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..6994103 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,74 @@ +name: generate-docs + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + run: + runs-on: ubuntu-latest + + strategy: + matrix: + os: [ubuntu-latest] + python-version: [3.9] + fail-fast: false + + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + + - name: Bootstrap poetry + shell: bash + run: | + python -m ensurepip + python -m pip install --upgrade pip + python -m pip install poetry + + - name: Configure poetry + shell: bash + run: | + python -m poetry config virtualenvs.in-project true + + # - name: Set up cache + # uses: actions/cache@v2 + # id: cache + # with: + # path: .venv + # key: venv-${{ runner.os }}-${{ steps.full-python-version.outputs.version }}-${{ hashFiles('**/poetry.lock') }} + + # - name: Ensure cache is healthy + # if: steps.cache.outputs.cache-hit == 'true' + # shell: bash + # run: timeout 10s python -m poetry run pip --version || rm -rf .venv + + # no need for `--extras docs` since we have Sphinx included in main here + - name: Install dependencies + shell: bash + run: | + python -m poetry install + + - name: Build documentation + run: | + mkdir gh-pages + touch gh-pages/.nojekyll + cd docs/ + poetry run sphinx-build -b html source _build + cp -r _build/* ../gh-pages/ + + - name: Deploy documentation + # if: ${{ github.event_name == 'push' }} + uses: JamesIves/github-pages-deploy-action@4.1.4 + with: + branch: gh-pages + folder: gh-pages From 94acaa63dbffa3892ac4ffd676f0ed654a0e9c82 Mon Sep 17 00:00:00 2001 From: Kalle Westerling Date: Thu, 25 May 2023 09:47:58 +0100 Subject: [PATCH 2/5] Change of `main` branch to `v0.1.0` --- .github/workflows/docs.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 6994103..854fab1 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -3,10 +3,10 @@ name: generate-docs on: push: branches: - - main + - v0.1.0 pull_request: branches: - - main + - v0.1.0 jobs: run: From 5d904453b29552b790a51dc8075432f856c60bb0 Mon Sep 17 00:00:00 2001 From: Kalle Westerling Date: Thu, 25 May 2023 11:22:37 +0100 Subject: [PATCH 3/5] Updating docs to make them more manageable --- README.md | 2 +- docs/source/api/index.rst | 10 --- docs/source/api/project.rst | 6 -- docs/source/api/utils.rst | 5 -- docs/source/getting-started/index.rst | 2 +- ...roduction-to-zoonypers-data-structures.rst | 32 ++++--- .../tutorials/counting-annotations.rst | 13 ++- .../tutorials/handling-comments.rst | 45 +++++++--- .../getting-started/tutorials/index.rst | 2 +- .../tutorials/loading-a-project.rst | 81 +++++++++++------ .../setting-up-your-first-project.rst | 90 +++++++++++++------ .../working-with-workflows-and-tasks.rst | 24 +++-- docs/source/index.rst | 44 ++++++++- .../install.rst => installing.rst} | 45 ++++++---- docs/source/reference.rst | 18 ++++ 15 files changed, 285 insertions(+), 134 deletions(-) delete mode 100644 docs/source/api/index.rst delete mode 100644 docs/source/api/project.rst delete mode 100644 docs/source/api/utils.rst rename docs/source/{getting-started/install.rst => installing.rst} (71%) create mode 100644 docs/source/reference.rst diff --git a/README.md b/README.md index a56d3d5..754475f 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ $ pip install zoonyper ``` --> -Because this project is in **active development**, you need to install from the repository for the time being. In order to do so, follow [the installation instructions](docs/source/getting-started/install.rst). +Because this project is in **active development**, you need to install from the repository for the time being. In order to do so, follow [the installation instructions](docs/source/installing.rst). ## Documentation diff --git a/docs/source/api/index.rst b/docs/source/api/index.rst deleted file mode 100644 index 73bdc51..0000000 --- a/docs/source/api/index.rst +++ /dev/null @@ -1,10 +0,0 @@ -API Documentation -################## - -.. rubric:: Content - -.. toctree:: - :maxdepth: 2 - - project - utils \ No newline at end of file diff --git a/docs/source/api/project.rst b/docs/source/api/project.rst deleted file mode 100644 index b5deded..0000000 --- a/docs/source/api/project.rst +++ /dev/null @@ -1,6 +0,0 @@ -Project -####### - -.. automodule:: zoonyper.project - :members: - :show-inheritance: diff --git a/docs/source/api/utils.rst b/docs/source/api/utils.rst deleted file mode 100644 index 298604f..0000000 --- a/docs/source/api/utils.rst +++ /dev/null @@ -1,5 +0,0 @@ -Utils -##### - -.. automodule:: zoonyper.utils - :members: diff --git a/docs/source/getting-started/index.rst b/docs/source/getting-started/index.rst index 58b0f8c..e0cbbcb 100644 --- a/docs/source/getting-started/index.rst +++ b/docs/source/getting-started/index.rst @@ -1,5 +1,5 @@ Getting Started -################## +=============== .. rubric:: Content diff --git a/docs/source/getting-started/introduction-to-zoonypers-data-structures.rst b/docs/source/getting-started/introduction-to-zoonypers-data-structures.rst index ff1eff7..37934a2 100644 --- a/docs/source/getting-started/introduction-to-zoonypers-data-structures.rst +++ b/docs/source/getting-started/introduction-to-zoonypers-data-structures.rst @@ -1,55 +1,65 @@ Introduction to Zoonyper's Data Structures -########################################## +========================================== DataFrames constructed from the CSV files -========================================= +----------------------------------------- -:attr:`Project.classifications ` is the classifications DataFrame, and has all the functionality of a regular :class:`pandas.DataFrame`: +:attr:`Project.classifications ` is +the classifications DataFrame, and has all the functionality of a regular +:class:`pandas.DataFrame`: .. code-block:: python project.classifications.head(2) -:attr:`Project.subjects ` is the subjects DataFrame, and has the same :class:`kind of functionality `: +:attr:`Project.subjects ` is the subjects +DataFrame, and has the same :class:`kind of functionality `: .. code-block:: python project.subjects.head(2) -:attr:`Project.workflows ` is the :class:`pandas.DataFrame` representing the project's workflows: +:attr:`Project.workflows ` is the +:class:`pandas.DataFrame` representing the project's workflows: .. code-block:: python project.workflows.head(2) Shortcuts to Column Summaries -============================= +----------------------------- -:attr:`Project.workflow_ids ` is a list all of the project's workflow IDs: +:attr:`Project.workflow_ids ` is a list +of all of the project's workflow IDs: .. code-block:: python project.workflow_ids -:attr:`Project.inactive_workflow_ids ` is a list of the project's inactive workflow's IDs: +:attr:`Project.inactive_workflow_ids ` +is a list of the project's inactive workflow's IDs: .. code-block:: python project.inactive_workflow_ids -By using :attr:`Project.workflow_ids ` and :attr:`zoonyper.project.Project.inactive_workflow_ids`, we can get the active workflows by using: +By using :attr:`Project.workflow_ids ` +and :attr:`zoonyper.project.Project.inactive_workflow_ids`, we can get the +active workflows by using: .. code-block:: python set(project.workflow_ids) - set(project.inactive_workflow_ids) -:attr:`Project.subject_sets ` is a list all of the project's subject sets and corresponding subject IDs: +:attr:`Project.subject_sets ` is a list +all of the project's subject sets and corresponding subject IDs: .. code-block:: python project.subject_sets -:attr:`Project.subject_urls ` is a list of all of the project's subjects and their corresponding URLs: +:attr:`Project.subject_urls ` is a list +of all of the project's subjects and their corresponding URLs: .. code-block:: python diff --git a/docs/source/getting-started/tutorials/counting-annotations.rst b/docs/source/getting-started/tutorials/counting-annotations.rst index fa1f48c..720aae3 100644 --- a/docs/source/getting-started/tutorials/counting-annotations.rst +++ b/docs/source/getting-started/tutorials/counting-annotations.rst @@ -1,7 +1,10 @@ Counting annotations -#################### +==================== -:meth:`zoonyper.project.Project.classification_counts` is a useful method for retrieving the number of different classifications per subject ID for any given workflow. It takes two arguments, the workflow ID (passed as ``workflow_id``) and the task number (``task_number``) that you want to extract: +:meth:`zoonyper.project.Project.classification_counts` is a useful method for +retrieving the number of different classifications per subject ID for any given +workflow. It takes two arguments, the workflow ID (passed as ``workflow_id``) +and the task number (``task_number``) that you want to extract: .. code-block:: python @@ -11,7 +14,8 @@ Counting annotations The method currently works best with text annotations. -Using ``classification_counts``, we can also easily check for "agreement", say when all annotators have agreed on *one* classification: +Using ``classification_counts``, we can also easily check for "agreement", say +when all annotators have agreed on *one* classification: .. code-block:: python :linenos: @@ -26,7 +30,8 @@ Using ``classification_counts``, we can also easily check for "agreement", say w print(agreement) -Similarly, we can construct a code block for whenever at least **four annotators** have agreed on one response for a subject: +Similarly, we can construct a code block for whenever at least **four +annotators** have agreed on one response for a subject: .. code-block:: python :linenos: diff --git a/docs/source/getting-started/tutorials/handling-comments.rst b/docs/source/getting-started/tutorials/handling-comments.rst index 449b58c..67b81df 100644 --- a/docs/source/getting-started/tutorials/handling-comments.rst +++ b/docs/source/getting-started/tutorials/handling-comments.rst @@ -1,28 +1,39 @@ Handling Comments -################# +================= -Comments are an important part of ``zoopyper``'s functionality. They allow access to discussions from Zooniverse's Talk functionality, where volunteers contribute comments on the crowdsourced citizen science projects where they can discuss the project's subjects. +Comments are an important part of ``zoopyper``'s functionality. They allow +access to discussions from Zooniverse's Talk functionality, where volunteers +contribute comments on the crowdsourced citizen science projects where they can +discuss the project's subjects. -This section will show you how to handle comments using the ``zoopyper`` package, including how to get access to all comments, how to get pre-filtered comments, and how to get comments for a specific subject. +This section will show you how to handle comments using the ``zoopyper`` +package, including how to get access to all comments, how to get pre-filtered +comments, and how to get comments for a specific subject. Getting Access to All Comments -============================== +------------------------------ -To get access to all comments in the project, you can use the ``Project.comments`` property: +To get access to all comments in the project, you can use the +``Project.comments`` property: .. code-block:: python # Get all comments for the project project.comments -This will return a :class:`pandas.DataFrame` containing all the comments for the project. +This will return a :class:`pandas.DataFrame` containing all the comments for +the project. Getting Pre-Filtered Comments -============================= +----------------------------- -To get a pre-filtered comments DataFrame, including only non-staff members, you can use the ``Project.get_comments()`` method with the ``include_staff=False`` setting. +To get a pre-filtered comments DataFrame, including only non-staff members, +you can use the ``Project.get_comments()`` method with the +``include_staff=False`` setting. -If you run it before informing the ``Project`` which users count as "staff", you will get a warning. In the example below, we start by letting the ``Project`` know who is a staff member: +If you run it before informing the ``Project`` which users count as "staff", +you will get a warning. In the example below, we start by letting the +``Project`` know who is a staff member: .. code-block:: python @@ -32,21 +43,27 @@ If you run it before informing the ``Project`` which users count as "staff", you # Get pre-filtered comments project.get_comments(include_staff=False) -This will return a :class:`pandas.DataFrame` containing only comments from non-staff members. +This will return a :class:`pandas.DataFrame` containing only comments from +non-staff members. Getting Comments for a Specific Subject -======================================= +--------------------------------------- -To get comments for a specific subject, you can use the ``Project.get_subject_comments()`` method with the subject ID as the argument: +To get comments for a specific subject, you can use the +``Project.get_subject_comments()`` method with the subject ID as the argument: .. code-block:: python # Get comments for a specific subject project.get_subject_comments(73334345) -This will return a :class:`pandas.DataFrame` containing all the comments for the specified subject. +This will return a :class:`pandas.DataFrame` containing all the comments for +the specified subject. -Note that, by default, the ``get_subject_comments`` method will always includes comments from contributors marked as "staff" in Zoonyper. You can disable this by informing the ``Project`` instance about which usernames count as staff and then passing the parameter ``include_staff=False`` to the method: +Note that, by default, the ``get_subject_comments`` method will always includes +comments from contributors marked as "staff" in Zoonyper. You can disable this +by informing the ``Project`` instance about which usernames count as staff and +then passing the parameter ``include_staff=False`` to the method: .. code-block:: python diff --git a/docs/source/getting-started/tutorials/index.rst b/docs/source/getting-started/tutorials/index.rst index cc6724d..4896672 100644 --- a/docs/source/getting-started/tutorials/index.rst +++ b/docs/source/getting-started/tutorials/index.rst @@ -1,5 +1,5 @@ Tutorials -######### +========= .. rubric:: Content diff --git a/docs/source/getting-started/tutorials/loading-a-project.rst b/docs/source/getting-started/tutorials/loading-a-project.rst index 33944b5..2b27d35 100644 --- a/docs/source/getting-started/tutorials/loading-a-project.rst +++ b/docs/source/getting-started/tutorials/loading-a-project.rst @@ -1,13 +1,16 @@ Loading a Project -################# +================= Introduction -============ +------------ -When working with Zoonyper ``Project`` objects in Python, one of the first steps is to load the required files into a Project object. This section will show you how to load a project using the ``Project`` class in the ``zoopyper`` package. +When working with Zoonyper ``Project`` objects in Python, one of the first +steps is to load the required files into a Project object. This section will +show you how to load a project using the ``Project`` class in the ``zoopyper`` +package. Required Files -============== +-------------- To load a project, you will need to have the following files available: @@ -17,19 +20,23 @@ To load a project, you will need to have the following files available: * comments.json * tags.json -These files contain the data needed to define the project and its associated tasks. +These files contain the data needed to define the project and its associated +tasks. -You can learn more about how to set those files up in :ref:`setting up your first project`. +You can learn more about how to set those files up in +:ref:`setting up your first project`. Loading a Project's Files -========================= +------------------------- -There are two ways to load a project's files into ``zoonyper``: by specifying individual file paths or by specifying a directory with the required files. +There are two ways to load a project's files into ``zoonyper``: by specifying +individual file paths or by specifying a directory with the required files. Option 1: Specifying Individual File Paths ------------------------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Providing the path for each of the five required files has the benefit of you being able to specify exactly where the files are located, individually: +Providing the path for each of the five required files has the benefit of you +being able to specify exactly where the files are located, individually: .. code-block:: python :name: loading-project-option-1 @@ -56,18 +63,28 @@ Providing the path for each of the five required files has the benefit of you be Here's what each line of code does: -* ``from zoopyper import Project``: Imports the ``Project`` class from the ``zoopyper`` package. -* ``classifications_path = "/classifications.csv"``: Specifies the full path to the ``classifications.csv`` file. -* ``subjects_path = "/subjects.csv"``: Specifies the full path to the ``subjects.csv`` file. -* ``workflows_path = "/workflows.csv"``: Specifies the full path to the ``workflows.csv`` file. -* ``comments_path = "/comments.json"``: Specifies the full path to the ``comments.json`` file. -* ``tags_path = "/tags.json"``: Specifies the full path to the ``tags.json`` file. -* ``project = Project(...)``: Creates a ``Project`` object using the specified file paths. +* ``from zoopyper import Project``: Imports the ``Project`` class from the + ``zoopyper`` package. +* ``classifications_path = "/classifications.csv"``: Specifies + the full path to the ``classifications.csv`` file. +* ``subjects_path = "/subjects.csv"``: Specifies the full path + to the ``subjects.csv`` file. +* ``workflows_path = "/workflows.csv"``: Specifies the full path + to the ``workflows.csv`` file. +* ``comments_path = "/comments.json"``: Specifies the full path + to the ``comments.json`` file. +* ``tags_path = "/tags.json"``: Specifies the full path to the + ``tags.json`` file. +* ``project = Project(...)``: Creates a ``Project`` object using the specified + file paths. Option 2: Specifying directory with required files --------------------------------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -In the example above, if all the required files (``classifications.csv``, ``subjects.csv``, ``workflows.csv``, ``comments.json`` and ``tags.json``) are located in the same path, you can just provide the path where all of them are located: +In the example above, if all the required files (``classifications.csv``, +``subjects.csv``, ``workflows.csv``, ``comments.json`` and ``tags.json``) are +located in the same path, you can just provide the path where all of them are +located: .. code-block:: python :name: loading-project-option-2 @@ -83,16 +100,24 @@ In the example above, if all the required files (``classifications.csv``, ``subj Here's what each line of code does: -* ``from zoopyper import Project``: Imports the ``Project`` class from the ``zoopyper`` package. -* ``path = ""``: Specifies the full path to the directory containing all the required files. -* ``project = Project(path=path)``: Creates a ``Project`` object using the specified path to the directory containing all the required files. +* ``from zoopyper import Project``: Imports the ``Project`` class from the + ``zoopyper`` package. +* ``path = ""``: Specifies the full path to the + directory containing all the required files. +* ``project = Project(path=path)``: Creates a ``Project`` object using the + specified path to the directory containing all the required files. Tips ---- -* Verify that the required files are in the correct format before loading them into the Project object. -* Use a consistent naming convention for the file paths to make it easier to manage and maintain your code. -* Ensure that the file paths are correct, as incorrect paths can lead to errors and prevent the project from loading correctly. -* If you are working with a large project, consider breaking up the data into smaller, more manageable files to make it easier to work with. - -By following these tips, you can ensure that your project is loaded correctly and that you can begin working with the data right away. +* Verify that the required files are in the correct format before loading them + into the Project object. +* Use a consistent naming convention for the file paths to make it easier to + manage and maintain your code. +* Ensure that the file paths are correct, as incorrect paths can lead to + errors and prevent the project from loading correctly. +* If you are working with a large project, consider breaking up the data into + smaller, more manageable files to make it easier to work with. + +By following these tips, you can ensure that your project is loaded correctly +and that you can begin working with the data right away. diff --git a/docs/source/getting-started/tutorials/setting-up-your-first-project.rst b/docs/source/getting-started/tutorials/setting-up-your-first-project.rst index 92512cc..bf8afe5 100644 --- a/docs/source/getting-started/tutorials/setting-up-your-first-project.rst +++ b/docs/source/getting-started/tutorials/setting-up-your-first-project.rst @@ -1,12 +1,14 @@ Setting up your first project -############################# +============================= Downloading and Organizing Files for Your Zoonyper Project -========================================================== +---------------------------------------------------------- -To set up your Zoonyper project, you'll need to download all the necessary files and store them in the same folder. Here's how to do it: +To set up your Zoonyper project, you'll need to download all the necessary +files and store them in the same folder. Here's how to do it: -#. Go to the Lab page of your Zooniverse project and navigate to the "Data Exports" section. +#. Go to the Lab page of your Zooniverse project and navigate to the "Data + Exports" section. #. Download the following files: @@ -20,9 +22,11 @@ To set up your Zoonyper project, you'll need to download all the necessary files * Tags (as a JSON file) - *Note: Be sure to select the correct files and formats, and avoid downloading the "workflow classifications" file.* + *Note: Be sure to select the correct files and formats, and avoid + downloading the "workflow classifications" file.* -#. Once the files are downloaded, move them to a new folder and give each file a specific name: +#. Once the files are downloaded, move them to a new folder and give each file + a specific name: * ``classifications.csv`` for the classifications file * ``subjects.csv`` for the subjects file @@ -32,12 +36,15 @@ To set up your Zoonyper project, you'll need to download all the necessary files *(Note: Remember that the last two files are JSON files and not CSV files.)* -By following these steps, you'll have all the necessary files organized and ready to use for your Zoonyper project. +By following these steps, you'll have all the necessary files organized and +ready to use for your Zoonyper project. .. admonition:: Unpacking comments and tags .tar files :class: dropdown - The comments and tags files are downloaded as .tar files, which need to be unpacked on your local machine. On macOS, you can use the built-in Archive Utility. + The comments and tags files are downloaded as .tar files, which need to be + unarchived on your local machine. On macOS, you can use the built-in Archive + Utility. If you'd rather use the terminal, you can run the following command: @@ -46,11 +53,14 @@ By following these steps, you'll have all the necessary files organized and read $ tar -xvzf Initializing a ``Project`` with the Downloaded Files -==================================================== +---------------------------------------------------- -Once you've downloaded and organized the necessary files for your Zoonyper project (in the previous step), you can initiate a new ``Project`` in Python. Here's how to do it: +Once you've downloaded and organized the necessary files for your Zoonyper +project (in the previous step), you can initiate a new ``Project`` in Python. +Here's how to do it: -#. Open a new Python script or Jupyter notebook and import the Zoonyper library: +#. Open a new Python script or Jupyter notebook and import the Zoonyper + library: .. code-block:: python @@ -62,20 +72,30 @@ Once you've downloaded and organized the necessary files for your Zoonyper proje project = Project("path/to/input-directory") - (Replace ``"path/to/input-directory"`` with the actual path to your directory.) + (Replace ``"path/to/input-directory"`` with the actual path to your + directory.) - This creates a new ``Project`` object that will contain all the necessary data from the downloaded files. + This creates a new ``Project`` object that will contain all the necessary + data from the downloaded files. -That's it! You've now successfully initialized a Zoonyper project with your downloaded files. +That's it! You've now successfully initialized a Zoonyper project with your +downloaded files. .. note:: - If you are interested in alternative ways to set up a project, check out the :ref:`loading a project` tutorial. (The method shown here is equivalent to :ref:`"Option 2: Specifying directory with required files" `) + If you are interested in alternative ways to set up a project, check out the + :ref:`loading a project` tutorial. (The method shown here is equivalent to + :ref:`"Option 2: Specifying directory with required files" `) Disambiguating subjects (Optional) -================================== +---------------------------------- -To avoid ambiguous classifications and consolidate all classifications per actual subject (rather than the subjects uploaded to Zooniverse), you can perform a process called *disambiguation* on the downloaded subjects. Disambiguation involves downloading each subject image and extracting a unique identifier for each one, which Zoonyper can use to group identical subjects together. +To avoid ambiguous classifications and consolidate all classifications per +actual subject (rather than the subjects uploaded to Zooniverse), you can +perform a process called *disambiguation* on the downloaded subjects. +Disambiguation involves downloading each subject image and extracting a unique +identifier for each one, which Zoonyper can use to group identical subjects +together. To disambiguate the subjects in your Zoonyper project, follow these steps: @@ -91,26 +111,40 @@ To disambiguate the subjects in your Zoonyper project, follow these steps: project.download_all_subjects(sleep=(0, 1), organize_by_workflow=False, organize_by_subject_id=False) - Note that this step will take some time as you will have to download every single subject processed in your project. Depending on how many subjects you have across all your workflows, it may take several hours. + Note that this step will take some time as you will have to download every + single subject processed in your project. Depending on how many subjects you + have across all your workflows, it may take several hours. - By setting the ``sleep=(0, 1)`` parameter, we allow the method to sleep a random number between 0 and 1 in-between each download. If you keep running into timeout errors, you can increase these numbers to see if it helps. + By setting the ``sleep=(0, 1)`` parameter, we allow the method to wait a + random number of seconds (between 0 and 1 in the example) in-between each + download. If you keep running into timeout errors, you can increase these + numbers to see if it helps. - Setting ``organize_by_workflow=False`` and ``organize_by_subject_id=False`` will organize the downloaded files as a flat structure in the downloads folder. + Setting ``organize_by_workflow=False`` and ``organize_by_subject_id=False`` + will organize the downloaded files as a flat structure in the downloads + folder. -#. Next, call the ``.disambiguate_subjects()`` method on your ``Project`` object and pass in the download directory as its argument: +#. Next, call the ``.disambiguate_subjects()`` method on your ``Project`` +#. object and pass in the download directory as its argument: .. code-block:: python project.disambiguate_subjects() - This method will download each subject image and extract its unique identifier, which will be stored in the project's metadata. Note that this process may take some time depending on the number of subjects in your project. + This method will download each subject image and extract its unique + identifier, which will be stored in the project's metadata. Note that this + process may take some time depending on the number of subjects in your + project. -That's it! You've now successfully disambiguated the subjects in your Zoonyper project. +That's it! You've now successfully disambiguated the subjects in your Zoonyper +project. Finishing Up -============ +------------ -Congratulations, you've successfully set up and initialized a Zoonyper project with your downloaded files! Here are a couple of final tips to help you get started: +Congratulations, you've successfully set up and initialized a Zoonyper project +with your downloaded files! Here are a couple of final tips to help you get +started: * Access the project's subjects and classifications as Pandas DataFrames: @@ -119,9 +153,11 @@ Congratulations, you've successfully set up and initialized a Zoonyper project w project.subjects project.classifications - These two DataFrames contain all the information you need to start analyzing and visualizing your project data. + These two DataFrames contain all the information you need to start analyzing + and visualizing your project data. -* Check out the Zoonyper documentation and examples for more ideas on how to use the library. Here are a few topics to get you started: +* Check out the Zoonyper documentation and examples for more ideas on how to + use the library. Here are a few topics to get you started: * Working with workflows and tasks * Filtering and grouping classifications diff --git a/docs/source/getting-started/tutorials/working-with-workflows-and-tasks.rst b/docs/source/getting-started/tutorials/working-with-workflows-and-tasks.rst index ebcdb63..35b1e0e 100644 --- a/docs/source/getting-started/tutorials/working-with-workflows-and-tasks.rst +++ b/docs/source/getting-started/tutorials/working-with-workflows-and-tasks.rst @@ -1,30 +1,38 @@ Working with Workflows and Tasks -################################ +================================ -Zoonyper projects can include one or more workflows, each of which consists of a series of tasks that volunteers complete to classify the project's subjects. Here's how you can work with workflows and tasks in Zoonyper: +Zoonyper projects can include one or more workflows, each of which consists of +a series of tasks that volunteers complete to classify the project's subjects. + +Here's how you can work with workflows and tasks in Zoonyper: #. Access the project's workflows as a pandas DataFrame: .. code-block:: python - project.workflows + project.workflows - This returns a DataFrame with the workflow ID as the index column and the other column keeping information like: + This returns a DataFrame with the workflow ID as the index column and the + other column keeping information like: * Display name * Version * Classification counts - See the API documentation for more detailed information about the contents of the workflows DataFrame. + See the API documentation for more detailed information about the contents + of the workflows DataFrame. #. Get subject IDs from a specific workflow's: .. code-block:: python - workflow_id = project.workflow_ids[0] - project.workflow_subjects(workflow_id=workflow_id) + workflow_id = project.workflow_ids[0] + project.workflow_subjects(workflow_id=workflow_id) - This returns a list of the subjects for the specified workflow ID. In this example, we pick the first of the workflow IDs from the list of all the workflow IDs, which can be accessed using the ``Project``'s ``workflow_ids`` property. + This returns a list of the subjects for the specified workflow ID. In this + example, we pick the first of the workflow IDs from the list of all the + workflow IDs, which can be accessed using the ``Project``'s + ``workflow_ids`` property. diff --git a/docs/source/index.rst b/docs/source/index.rst index 74a2324..204251c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,7 +1,44 @@ +================== About ``zoonyper`` -################## +================== + +**Zoonyper** is a Python library, designed to make it easy for users to import +and process Zooniverse annotations and their metadata in your own Python code. +It is especially designed for use in [Jupyter Notebooks](https://jupyter.org/). + +Purpose +------- + +Zoonyper can process the output files from the Zooniverse citizen science +platform, and facilitate data wrangling, compression, and output into JSON and +CSV files. The output files can then be more easily used in e.g. Observable +visualisations. + +Background +---------- + +The library was created as part of the +`Living with Machines project `_, a project +aimed to generate new historical perspectives on the effects of the +mechanisation of labour on the lives of ordinary people during the long +nineteenth century. As part of that work, we have used newspapers for +historical research at scale. In that work, it has been important for us to +use the newspapers also as source documents for crowdsourced activities. The +platform used for the crowdsourced activities is Zooniverse, created for +citizen science projects where volunteers contribute to scientific research +projects by annotating and categorizing images or other data. The annotations +created by volunteers are collected as "classifications" in the Zooniverse +system. -**Zoonyper** is a Python library that facilitates interpretation and wrangling for Zooniverse files in Jupyter (and Python more generally). +In the Living with Machines project, we used the Zooniverse platform to +annotate articles extracted from historical newspapers. We winnowed out +articles that were deemed unsuitable or irrelevant for the study, and then +asked volunteers to help us with more detailed classifications on the remaining +articles. This helps to ensure that the annotations are focused and accurate, +and that the results of the study are meaningful and relevant. The articles, +along with metadata, were included in Zooniverse manifests. The final goal for +the research overall was to use the annotations to study the content of these +historical newspapers and gain insights into the events and trends of the past. .. note:: @@ -13,8 +50,9 @@ Contents .. toctree:: :maxdepth: 3 + installing getting-started/index - api/index + reference .. import diff --git a/docs/source/getting-started/install.rst b/docs/source/installing.rst similarity index 71% rename from docs/source/getting-started/install.rst rename to docs/source/installing.rst index 5327463..7f50850 100644 --- a/docs/source/getting-started/install.rst +++ b/docs/source/installing.rst @@ -1,5 +1,5 @@ Installing Zoonyper -################### +=================== When in production, you can use PyPI to install zoonyper: @@ -9,13 +9,14 @@ When in production, you can use PyPI to install zoonyper: .. warning:: - This command will not work currently, as this package is not yet published on PyPI. + This command will not work currently, as this package is not yet published + on PyPI. -================================= In development -================================= +-------------- -Because this project is in active development, you will likely need to install from the repository for the time being. +Because this project is in active development, you will likely need to install +from the repository for the time being. In order to do so, you need to first ensure that you have installed Poetry: @@ -25,11 +26,15 @@ In order to do so, you need to first ensure that you have installed Poetry: .. warning:: - Make sure the command above, after the pipe ``|`` refers to the correctly linked Python, i.e. you may want it to refer to ``python`` or ``python3`` or whatever your symlinked binary is called. + Make sure the command above, after the pipe ``|`` refers to the correctly + linked Python, i.e. you may want it to refer to ``python`` or ``python3`` + or whatever your symlinked binary is called. .. note:: - Don't forget to add `export PATH="/home//.local/bin:$PATH"` to your shell configuration file in order to get access to the ``poetry`` tool on your command line. + Don't forget to add `export PATH="/home//.local/bin:$PATH"` to + your shell configuration file in order to get access to the ``poetry`` + tool on your command line. Then, clone the repository: @@ -67,11 +72,17 @@ Then, install the dependencies: ["In /home//.cache/pypoetry/artifacts/38/be/e4/0afbe5654cdc0168ebfaf6864c20009c2eec3dd953961a7d44e0ed3fe9/furo-2022.12.7-py3-none-any.whl, hash / size of furo/__init__.py didn't match RECORD", "In /home//.cache/pypoetry/artifacts/38/be/e4/0afbe5654cdc0168ebfaf6864c20009c2eec3dd953961a7d44e0ed3fe9/furo-2022.12.7-py3-none-any.whl, hash / size of furo/_demo_module.py didn't match RECORD", ... [etc] - If this is the case, see the solution here: https://github.com/python-poetry/poetry/issues/7691#issue-1632193622 + If this is the case, see the solution in + `the python-poetry repository `_ - The easiest solution is to exit poetry (by running ``exit`` and running a ``pip install poetry==1.4.0``). + The easiest solution is to exit poetry (by running ``exit`` and running: - This is a problem with poetry 1.4.1 so it may be solved by the time you're reading this. + .. code-block:: bash + + $ pip install poetry==1.4.0 + + This is a problem with poetry 1.4.1 so it may be solved by the time you're + reading this. Following that, you can run a build and ``pip install`` from the local files: @@ -79,17 +90,21 @@ Following that, you can run a build and ``pip install`` from the local files: $ poetry build && pip install dist/zoonyper-0.1.0.tar.gz -Now you should be able to use ``zoonyper`` as a regular package on your local computer. +Now you should be able to use ``zoonyper`` as a regular package on your local +computer. .. warning:: - If you change the source code (located in the ``zoonyper`` directory in the repository), you will need to rerun the ``poetry build`` and the ``pip install dist/zoonyper-0.1.0.tar.gz`` commands again. + If you change the source code (located in the ``zoonyper`` directory in + the repository), you will need to rerun the ``poetry build`` and the + ``pip install dist/zoonyper-0.1.0.tar.gz`` commands again. -================================= Quick Install in Jupyter Notebook -================================= +--------------------------------- -If you are in a hurry and want to install the developer version in whichever Python kernel you're currently running in a Notebook, here's a handy bash script that you can copy and paste into a Jupyter notebook: +If you are in a hurry and want to install the developer version in whichever +Python kernel you're currently running in a Notebook, here's a handy bash +script that you can copy and paste into a Jupyter notebook: .. code-block:: bash diff --git a/docs/source/reference.rst b/docs/source/reference.rst new file mode 100644 index 0000000..8e436f5 --- /dev/null +++ b/docs/source/reference.rst @@ -0,0 +1,18 @@ +Reference +========= + +Zoonyper consists of two classes: ``Project`` and ``Utils``. They are +documented below. + +Project +------- + +.. automodule:: zoonyper.project + :members: + :show-inheritance: + +Utils +----- + +.. automodule:: zoonyper.utils + :members: From 4802fecb7ce8bab0d358bff27f3fbf7397084a3c Mon Sep 17 00:00:00 2001 From: Kalle Westerling Date: Thu, 25 May 2023 11:22:49 +0100 Subject: [PATCH 4/5] Fix spelling error --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 754475f..c3592eb 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ **Zoonyper** is a Python library, designed to make it easy for users to import and process Zooniverse annotations and their metadata in your own Python code. It is especially designed for use in [Jupyter Notebooks](https://jupyter.org/). ## Purpose -Zoonpyter can process the output files from the Zooniverse citizen science platform, and facilitate data wrangling, compression, and output into JSON and CSV files. The output files can then be more easily used in e.g. Observable visualisations. +Zoonyper can process the output files from the Zooniverse citizen science platform, and facilitate data wrangling, compression, and output into JSON and CSV files. The output files can then be more easily used in e.g. Observable visualisations. ## Background From fcdcefaf9588d60762c44cfedcca128acdf247ea Mon Sep 17 00:00:00 2001 From: Kalle Westerling Date: Wed, 7 Jun 2023 12:02:14 +0100 Subject: [PATCH 5/5] Add GitHub action to generate public docs Fixes #24 --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c3592eb..259d074 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,9 @@ Because this project is in **active development**, you need to install from the ## Documentation -The full documentation is currently available with [`sphinx`](https://www.sphinx-doc.org/en/master/) in the [`docs`](docs) directory. +You can see the public documentation on https://living-with-machines.github.io/zoonyper. + +You can contribute to the documentation using [`sphinx`](https://www.sphinx-doc.org/en/master/) to edit and render the [`docs`](docs) directory. ## Data model