Skip to content

Commit

Permalink
docs: add google dosctrings and module docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
Ingerid committed Mar 6, 2024
1 parent b222ed7 commit 43d7285
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 64 deletions.
8 changes: 8 additions & 0 deletions dhlab/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
"""The dhlab python library provides functions and objects
to retrieve and analyze image and text data from the digital collection of the National Library of Norway.
You can build text corpora, retrieve their metadata, search for images, and do quantitative analyses.
The dhlab python package calls the [DHLAB API](https://api.nb.no/dhlab/) under the hood to retrieve data.
"""

# api
from dhlab.api.dhlab_api import totals

Expand Down
9 changes: 6 additions & 3 deletions dhlab/metadata/metadata.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
from typing import List
"""Retrieve metadata from objects in the digital archive."""
from typing import List, Optional

import pandas as pd
import requests

from dhlab.constants import BASE_URL


def get_metadata(urns: List = None):
def get_metadata(urns: Optional[List] = None):
"""Fetch metadata from a list of urns.
:param list urns: uniform resource names, example: ``["URN:NBN:no-nb_digibok_2011051112001", ...]``
Args:
urns: uniform resource names, example:
`["URN:NBN:no-nb_digibok_2011051112001"]`
"""
params = locals()
r = requests.post(f"{BASE_URL}/get_metadata", json=params)
Expand Down
59 changes: 32 additions & 27 deletions dhlab/metadata/natbib.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,26 @@

import os
from functools import wraps
from typing import List, Optional

import requests

from dhlab.constants import BASE_URL

# TODO: Add support for more fields
import requests

from dhlab.constants import BASE_URL

def _api_call_deco(service):
"""Decorator for calling a service from DH-lab API
# TODO: Add support for more fields

:param service: Name of service
"""

def inner_decorator(func):
"""Inner decorator
def _api_call_deco(service: str):
"""Decorator for calling a service from DH-lab API"""

:param func: function to decorate. Must return params
def inner_decorator(func):
"""
Args:
func: function to decorate. Must return params
"""

@wraps(func)
Expand All @@ -33,33 +35,35 @@ def wrapper(*args, **kwargs):


@_api_call_deco("metadata_query")
def metadata_query(conditions, limit=5):
def metadata_query(conditions: List[list], limit: Optional[int] = 5) -> dict:
"""Query the Norwegian National Bibliography using Marc 21 fields and values
Example:
conditions = [
["245", "a", "kongen"],
["008", "", "nno"]
]
:param conditions: Marc 21 fields and values to search for
:type conditions: list of lists
:param limit: number of records to return, defaults to 5
:type limit: int, optional
:return: list of urns
:rtype: json
Examples:
>>> conditions = [["245", "a", "kongen"],["008", "", "nno"]]
>>> metadata_query(conditions, limit=5)
Args:
conditions: Marc 21 fields and values to search
for
limit: number of records to return.
Returns:
a dict of the input parameters
"""
params = {"conditions": conditions, "limit": limit}
return params


@_api_call_deco("metadata_from_urn")
def metadata_from_urn(urns, fields=None):
def metadata_from_urn(urns: list, fields: Optional[list] = None) -> dict:
"""Gets MARC 21 json for a URN or list of URN
:param urns: list of URNs
:param fields: list of marc 21 fields to return
:return: API call parameters
Args:
urns: list of URNs
fields: list of marc 21 fields to return
Returns:
API call parameters
"""
params = {"urns": urns, "fields": fields}
return params
Expand All @@ -68,10 +72,11 @@ def metadata_from_urn(urns, fields=None):
## Utility


def pretty_print_marc21json(record):
def pretty_print_marc21json(record: dict):
"""Prints a record from the Norwegian National Bibliography in a readable format
:param record: Marc 21 record in json format
Args:
record: Marc 21 record in json format
"""

print("Record:")
Expand Down
42 changes: 21 additions & 21 deletions dhlab/ngram/nb_ngram.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pandas as pd
from pandas import DataFrame

from dhlab.api.nb_ngram_api import get_ngram

Expand All @@ -11,16 +12,13 @@ def nb_ngram(
mode: str = "relative",
lang: str = "nob",
):
"""Extract N-gram frequencies from given ``terms`` and ``years``.
"""Extract N-gram frequencies from given `terms` and `years`.
`lang` param is not supported for corpus=`avis` and will be set to None if `avis` is passed.
:param terms: comma
:param corpus:
:param smooth:
:param years:
:param mode:
:param lang:
:return: A sorted Pandas DataFrame index
The `lang` param is not supported for `corpus="avis"` and will be set to None if `avis` is passed.
Returns:
A sorted Pandas DataFrame indexed by year, with columns for each term.
:meta private:
"""
Expand All @@ -41,15 +39,9 @@ def nb_ngram(
## tar tilbake til original den her virker ikke LGJ
def ngram_conv_old(
ngrams, smooth: int = 1, years: tuple = (1810, 2013), mode: str = "relative"
):
) -> DataFrame:
"""Construct a dataframe with ngram mean frequencies per year over a given time period.
:param ngrams: TODO: FIll in appropriate type and description.
:param smooth: Smoothing factor for the graph visualisation.
:param years: Tuple with start and end years for the time period of interest
:param mode: Frequency measure. Defaults to 'relative'.
:return: pandas dataframe with mean values for each year
:meta private:
"""
ngc = {}
Expand All @@ -68,14 +60,22 @@ def ngram_conv_old(
return pd.DataFrame(ngc).rolling(window=smooth, win_type="triang").mean()


def ngram_conv(ngrams, smooth=1, years=(1810, 2013), mode="relative"):
def ngram_conv(
ngrams,
smooth: int = 1,
years: tuple = (1810, 2013),
mode: str = "relative",
) -> DataFrame:
"""Construct a dataframe with ngram mean frequencies per year over a given time period.
:param ngrams: TODO: FIll in appropriate type and description.
:param smooth: Smoothing factor for the graph visualisation.
:param years: Tuple with start and end years for the time period of interest
:param mode: Frequency measure. Defaults to 'relative'.
:return: pandas dataframe with mean values for each year
Args:
ngrams: To be filled in.
smooth: Smoothing factor for the graph visualisation.
years: Tuple with start and end years for the time period of interest
mode: Frequency measure.
Returns:
pandas dataframe with mean values for each year
:meta private:
"""
Expand Down
10 changes: 6 additions & 4 deletions dhlab/text/wildcard.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@ class WildcardWordSearch(DhlabObj):
"""

def __init__(self, word, factor=2, freq_limit=10, limit=50):
""":param word: word from a mixture of * and characters
:factor int: the additional length of words to be returned
:freq_lim: the frequency of returned words lower limit
:limit int: number of words returned
"""
Args:
word: word from a mixture of * and characters
factor: the additional length of words to be returned
freq_limit: the frequency of returned words lower limit
limit: number of words returned
"""
self.words = wildcard_search(
word, factor=factor, freq_limit=freq_limit, limit=limit
Expand Down
23 changes: 14 additions & 9 deletions dhlab/utils/files.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Fetch raw files from Github and write them to working directory."""

import contextlib
import os
from pathlib import Path
Expand All @@ -22,13 +24,14 @@ def download_from_github(
):
"""Fetch a file from Github and write it to working directory.
:param filename: Filename, including file extension (e.g. `.py` or `.txt`)
:param user: Github username of the repo owner.
:param repository: Github repository name.
:param branch: Name of the repo branch. Defaults to 'master'.
:param overwrite: Whether to overwrite existing files in working directory. Defaults to not
overwrite.
:param silent: Whether to output logging messages to stdout. Default is not silent.
Args:
filename: Filename, including file extension (e.g. `.py` or `.txt`)
user: Github username of the repo owner.
repository: Github repository name.
branch: Name of the repo branch. Defaults to 'master'.
overwrite: Whether to overwrite existing files in working
directory. Defaults to not overwrite.
silent: Whether to output logging messages to stdout.
"""

nba = requests.get(
Expand Down Expand Up @@ -60,8 +63,10 @@ def get_file_from_github(url, overwrite=False, silent=False):
it is enough with reference
it will look in raw user content for the file.
:param overwrite: defaults to no overwrite
:param silent: default is not silent"""
Args:
overwrite: defaults to no overwrite
silent: default is not silent
"""

if url.startswith("https://github.com/") or url.startswith("github.com"):
trail = url.split("github.com")[-1].replace("blob/", "")
Expand Down

0 comments on commit 43d7285

Please sign in to comment.