Skip to content

Commit

Permalink
docs: simplify API documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
dovahcrow committed Apr 30, 2020
1 parent c363ac2 commit 744a394
Show file tree
Hide file tree
Showing 23 changed files with 380 additions and 459 deletions.
2 changes: 1 addition & 1 deletion Justfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
build-docs:
poetry run sphinx-build -M html docs/source docs/build
poetry run sphinx-build -b html docs/source docs/build

gen-apidocs:
poetry run sphinx-apidoc --ext-doctest --ext-autodoc --ext-mathjax -f -o docs/source dataprep
Expand Down
2 changes: 2 additions & 0 deletions dataprep/data_connector/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
DataConnector
"""
from .connector import Connector

__all__ = ["Connector"]
13 changes: 1 addition & 12 deletions dataprep/data_connector/config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,7 @@

def config_directory() -> Path:
"""
Returns
-------
Path
Returns the config directory path
Returns the config directory path
"""
tmp = gettempdir()
return Path(tmp) / "dataprep" / "data_connector"
Expand All @@ -32,10 +29,6 @@ def config_directory() -> Path:
def ensure_config(impdb: str) -> bool:
"""
Ensure the config for `impdb` is downloaded
Returns
-------
bool
"""
path = config_directory()
obsolete = is_obsolete(impdb)
Expand All @@ -51,10 +44,6 @@ def is_obsolete(impdb: str) -> bool:
"""
Test if the implicit db config files are obsolete
and need to be re-downloaded.
Returns
-------
bool
"""
path = config_directory()
if not (path / impdb).exists():
Expand Down
95 changes: 45 additions & 50 deletions dataprep/data_connector/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,39 +37,37 @@
class Connector:
"""
This is the main class of data_connector component.
A class should be initialized as the following.
Initialize Connector class as the example code.
Parameters
----------
config_path
The path to the config. It can be hosted, e.g. "yelp", or from
local filesystem, e.g. "./yelp"
auth_params
The parameter for authentication, e.g. OAuth2
kwargs
Additional parameters
Example
--------
-------
>>> from dataprep.data_connector import Connector
>>> dc = Connector("./DataConnectorConfigs/yelp", auth_params={"access_token":access_token})
"""

# impdb: ImplicitDatabase
# vars: Dict[str, Any]
# auth_params: Dict[str, Any]
# session: Session
# jenv: Environment
_impdb: ImplicitDatabase
_vars: Dict[str, Any]
_auth_params: Dict[str, Any]
_session: Session
_jenv: Environment

def __init__(
self,
config_path: str,
auth_params: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> None:
"""
Initialize Connector class
Parameters
----------
config_path : str
The path to the config. It can be hosted, e.g. "yelp", or from
local filesystem, e.g. "./yelp"
**kwargs : Dict[str, Any]
Additional parameters
"""

self.session = Session()
self._session = Session()
if (
config_path.startswith(".")
or config_path.startswith("/")
Expand All @@ -81,11 +79,11 @@ def __init__(
ensure_config(config_path)
path = config_directory() / config_path

self.impdb = ImplicitDatabase(path)
self._impdb = ImplicitDatabase(path)

self.vars = kwargs
self.auth_params = auth_params or {}
self.jenv = Environment(undefined=StrictUndefined)
self._vars = kwargs
self._auth_params = auth_params or {}
self._jenv = Environment(undefined=StrictUndefined)

def _fetch(
self,
Expand All @@ -101,27 +99,27 @@ def _fetch(
"cookies": {},
}

merged_vars = {**self.vars, **kwargs}
merged_vars = {**self._vars, **kwargs}
if table.authorization is not None:
table.authorization.build(req_data, auth_params or self.auth_params)
table.authorization.build(req_data, auth_params or self._auth_params)

for key in ["headers", "params", "cookies"]:
if getattr(table, key) is not None:
instantiated_fields = getattr(table, key).populate(
self.jenv, merged_vars
self._jenv, merged_vars
)
req_data[key].update(**instantiated_fields)
if table.body is not None:
# TODO: do we support binary body?
instantiated_fields = table.body.populate(self.jenv, merged_vars)
instantiated_fields = table.body.populate(self._jenv, merged_vars)
if table.body_ctype == "application/x-www-form-urlencoded":
req_data["data"] = instantiated_fields
elif table.body_ctype == "application/json":
req_data["json"] = instantiated_fields
else:
raise UnreachableError

resp: Response = self.session.send( # type: ignore
resp: Response = self._session.send( # type: ignore
Request(
method=method,
url=url,
Expand All @@ -147,9 +145,8 @@ def table_names(self) -> List[str]:
----
We abstract each website as a database containing several tables.
For example in Spotify, we have artist and album table.
"""
return list(self.impdb.tables.keys())
return list(self._impdb.tables.keys())

@property
def info(self) -> None:
Expand All @@ -159,8 +156,8 @@ def info(self) -> None:

# get info
tbs: Dict[str, Any] = {}
for cur_table in self.impdb.tables:
table_config_content = self.impdb.tables[cur_table].config
for cur_table in self._impdb.tables:
table_config_content = self._impdb.tables[cur_table].config
params_required = []
params_optional = []
example_query_fields = []
Expand All @@ -180,7 +177,7 @@ def info(self) -> None:
# show table info
print(
INFO_TEMPLATE.render(
ntables=len(self.table_names), dbname=self.impdb.name, tbs=tbs
ntables=len(self.table_names), dbname=self._impdb.name, tbs=tbs
)
)

Expand All @@ -191,22 +188,21 @@ def show_schema(self, table_name: str) -> pd.DataFrame:
Parameters
----------
table_name : str
table_name
The table name.
Returns
-------
pd.DataFrame
The returned data's schema.
pd.DataFrame
The returned data's schema.
Note
----
The schema is defined in the configuration file.
The user can either use the default one or change it by editing the configuration file.
"""
print(f"table: {table_name}")
table_config_content = self.impdb.tables[table_name].config
table_config_content = self._impdb.tables[table_name].config
schema = table_config_content["response"]["schema"]
new_schema_dict: Dict[str, List[Any]] = {}
new_schema_dict["column_name"] = []
Expand All @@ -217,38 +213,37 @@ def show_schema(self, table_name: str) -> pd.DataFrame:
return pd.DataFrame.from_dict(new_schema_dict)

def query(
self,
table: str,
auth_params: Optional[Dict[str, Any]] = None,
**where: Dict[str, Any],
self, table: str, auth_params: Optional[Dict[str, Any]] = None, **where: Any,
) -> pd.DataFrame:
"""
Use this method to query the API and get the returned table.
Example
--------
-------
>>> df = dc.query('businesses', term="korean", location="vancouver)
Parameters
----------
table : str
table
The table name.
auth_params : Optional[Dict[str, Any]] = None
auth_params
The parameters for authentication. Usually the authentication parameters
should be defined when instantiating the Connector. In case some tables have different
authentication options, a different authentication parameter can be defined here.
This parameter will override the one from Connector if passed.
**where: Dict[str, Any]
where
The additional parameters required for the query.
Returns
-------
pd.DataFrame
A DataFrame that contains the data returned by the website API.
"""
assert table in self.impdb.tables, f"No such table {table} in {self.impdb.name}"
assert (
table in self._impdb.tables
), f"No such table {table} in {self._impdb.name}"

itable = self.impdb.tables[table]
itable = self._impdb.tables[table]

resp = self._fetch(itable, auth_params, where)

Expand Down
10 changes: 4 additions & 6 deletions dataprep/eda/basic/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
"""
plot(df)
========
This module implements the plot(df) function.
"""

Expand All @@ -10,12 +7,13 @@
import dask.dataframe as dd
import pandas as pd
from bokeh.io import show
from bokeh.plotting import Figure

from .compute import compute
from .render import render
from ..report import Report

__all__ = ["plot", "compute", "render"]


def plot(
df: Union[pd.DataFrame, dd.DataFrame],
Expand All @@ -34,7 +32,7 @@ def plot(
value_range: Optional[Tuple[float, float]] = None,
yscale: str = "linear",
tile_size: Optional[float] = None,
) -> Figure:
) -> Report:
"""Generates plots for exploratory data analysis.
If no columns are specified, the distribution of
Expand Down Expand Up @@ -109,7 +107,7 @@ def plot(
value_range: Optional[Tuple[float, float]], default None
The lower and upper bounds on the range of a numerical column.
Applies when column x is specified and column y is unspecified.
yscale: str, default "linear"
yscale
The scale to show on the y axis. Can be "linear" or "log".
tile_size: Optional[float], default None
Size of the tile for the hexbin plot. Measured from the middle
Expand Down
3 changes: 0 additions & 3 deletions dataprep/eda/basic/compute.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
"""
Compute ``plot(df)``
====================
This module implements the intermediates computation for plot(df) function.
"""
from sys import stderr
Expand Down
21 changes: 10 additions & 11 deletions dataprep/eda/basic/render.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
"""
Render ``plot(df)``
===================
This module implements the visualization for the plot(df) function.
""" # pylint: disable=too-many-lines
from math import pi
Expand Down Expand Up @@ -1286,21 +1283,23 @@ def render(
Parameters
----------
itmdt
The Intermediate containing results from the compute function
The Intermediate containing results from the compute function.
yscale: str, default "linear"
The scale to show on the y axis. Can be "linear" or "log".
tile_size: Optional[float], default None
Size of the tile for the hexbin plot; measured from the middle
tile_size
Size of the tile for the hexbin plot. Measured from the middle
of a hexagon to its left or right corner.
plot_width_sml: int, default 324,
plot_width_small: int, default 324
The width of the small plots
plot_height_sml: int, default 300,
plot_height_small: int, default 300
The height of the small plots
plot_width_lrg: int, default 450,
plot_width_large: int, default 450
The width of the large plots
plot_height_lrg: int, default 400,
plot_height_large: int, default 400
The height of the large plots
plot_width_lrg: int, default 972,
plot_width_large: int, default 972
The width of the large plots
plot_width_wide: int, default 972
The width of the wide plots
"""
# pylint: disable=too-many-arguments
Expand Down
Loading

0 comments on commit 744a394

Please sign in to comment.