bump package versions

xLaszlo · xLaszlo · commit dc4b12098db1 · 2021-07-14T13:14:05.000+01:00
diff --git a/README.md b/README.md
@@ -37,23 +37,42 @@ deactivate
 source .venv/bin/activate
 ```
 
-
-### How to list existing Datasets (in Jupyter)
+### How to load a dataset (in Jupyter)
 
 ```
 sys.path.insert(0, '<project_directory>/example')
 from hypergol import HypergolProject
-from data_models.example_datamodel_class import ExampleDatamodelClass
+from data_models.data_type import DataType
 project = HypergolProject(
     projectDirectory='<project_directory>/example',
-    dataDirectory='<data_directory>'
+    dataDirectory='<data_directory>',
+    force=True
 )
-ds = project.datasetFactory.get(dataType=ExampleDatamodelClass, name='sentences')
-# project.list_datasets(pattern='.*', asCode=True);
+
+dataTypeDataset = project.datasetFactory.get(dataType=DataType, name='data_types')
+with dataTypeDataset.open('r') as dsr:
+    dataTypes = [value.to_data() for value in islice(dsr, 10)]
+
+# Or convert straight into pandas
+import pandas as pd
+dataTypeDataframe = pd.DataFrame([value.to_data() for value in islice(dataTypeDataset.open('r'), 10)])
 ```
 
+`<project_directory>` is the repo's directory.
+`<data_directory>` is the *parent* data directory.
+
+If the project is called `my_project` and the code is located in `~/my_project` and the project data is in `~/data/my_project`, `<data_directory>` is `~/data`.
+Set `branch` argument in `datasetFactory.get()` if you need anything else other than the current branch.
+
+The `force` argument allows you to load the data even if your repo has uncommitted code, this is usually not a problem unless you plan to write into dataframes from Jupyter.
+
+### How to list existing Datasets
+
 This will list all existing datasets that matches `pattern` as self contained executable code.
 
+```
+project.list_datasets(pattern='.*', asCode=True);
+```
 
 ### How to start Tensorboard
 
@@ -65,7 +84,6 @@ source .venv/bin/activate
 tensorboard --logdir=<data_directory>/example/tensorboard/
 ```
 
-
 ### How to train your model
 
 After implementing all components and required functions:
@@ -90,7 +108,6 @@ then start serving with (port and host can be set in the shell script):
 ./serve_example.sh
 ```
 
-
 ### How to call your model from python with requests
 
 ```
diff --git a/make_venv.sh b/make_venv.sh
@@ -1,7 +1,7 @@
 python3 -m venv .venv
 source .venv/bin/activate
 pip3 install --upgrade pip
-pip3 install setuptools==47.1.1
+pip3 install setuptools==57.1.0
 pip3 install wheel
 pip3 install -r requirements.txt
 # setup here
diff --git a/pipelines/process_blogposts.py b/pipelines/process_blogposts.py
@@ -12,7 +12,7 @@
 from data_models.sentence import Sentence
 
 
-def process_blogposts(threads=1, force=False):
+def process_blogposts(threads=1, force=False, onlyTasks=None):
     project = HypergolProject(dataDirectory=f'{os.environ["BASE_DIR"]}/tempdata', force=force)
     SOURCE_PATTERN = f'{os.environ["BASE_DIR"]}/data/blogposts/pages_*.pkl'
     articles = project.datasetFactory.get(dataType=Article, name='articles')
@@ -49,7 +49,7 @@ def process_blogposts(threads=1, force=False):
             createSentencesTask,
         ]
     )
-    pipeline.run(threads=threads)
+    pipeline.run(threads=threads, onlyTasks=onlyTasks)
 
 
 if __name__ == '__main__':
diff --git a/requirements.txt b/requirements.txt
@@ -4,8 +4,8 @@ spacy
 GitPython==3.1.3
 nose2==0.9.2
 pylint==2.5.3
-hypergol
+hypergol==0.1.20
 tensorflow==2.5.0
 pydantic==1.6.2
-fastapi==0.61.0
+fastapi==0.65.2
 uvicorn==0.11.8

Original file line number	Diff line number	Diff line change
`@@ -12,7 +12,7 @@`
`12`	`12`	`from data_models.sentence import Sentence`
`13`	`13`
`14`	`14`
`15`		`-def process_blogposts(threads=1, force=False):`
	`15`	`+def process_blogposts(threads=1, force=False, onlyTasks=None):`
`16`	`16`	`project = HypergolProject(dataDirectory=f'{os.environ["BASE_DIR"]}/tempdata', force=force)`
`17`	`17`	`SOURCE_PATTERN = f'{os.environ["BASE_DIR"]}/data/blogposts/pages_*.pkl'`
`18`	`18`	`articles = project.datasetFactory.get(dataType=Article, name='articles')`
`@@ -49,7 +49,7 @@ def process_blogposts(threads=1, force=False):`
`49`	`49`	`createSentencesTask,`
`50`	`50`	`]`
`51`	`51`	`)`
`52`		`- pipeline.run(threads=threads)`
	`52`	`+ pipeline.run(threads=threads, onlyTasks=onlyTasks)`
`53`	`53`
`54`	`54`
`55`	`55`	`if __name__ == '__main__':`