2
2
3
3
from omnipy import (Chain2 , Chain3 , convert_dataset , Dataset , HttpUrlDataset , LinearFlowTemplate ,
4
4
MatchItemsModel , Model , PandasDataset , SplitToItemsModel , SplitToLinesModel ,
5
- TableOfPydanticRecordsModel , TaskTemplate )
5
+ TableOfPydanticRecordsModel , TaskTemplate , StrDataset )
6
6
from omnipy_examples .util import get_github_repo_urls
7
7
from pydantic import BaseModel , conint , constr
8
8
@@ -36,7 +36,7 @@ class BedRecordModel(BaseModel):
36
36
strand : constr (regex = '[-+\.]' ) | None
37
37
thickStart : GenomeCoord | None
38
38
thickEnd : GenomeCoord | None
39
- itemRgb : SplitOnComma2RgbColorModel | Literal [0 ] | None
39
+ itemRgb : SplitOnComma2RgbColorModel | Literal ['0' ] | None
40
40
blockCount : conint (ge = 0 ) | None
41
41
blockSizes : SplitOnComma2ListOfIntsModel | None
42
42
blockStarts : SplitOnComma2ListOfIntsModel | None
@@ -56,8 +56,8 @@ class BedDataset(Dataset[BedModel]):
56
56
57
57
# Omnipy tasks
58
58
@TaskTemplate ()
59
- def fetch_bed_dataset (url_list : HttpUrlDataset ) -> BedDataset :
60
- bed_raw_dataset = BedDataset ()
59
+ def fetch_bed_dataset (url_list : HttpUrlDataset ) -> StrDataset :
60
+ bed_raw_dataset = StrDataset ()
61
61
bed_raw_dataset .load (url_list )
62
62
return bed_raw_dataset
63
63
@@ -66,15 +66,11 @@ def fetch_bed_dataset(url_list: HttpUrlDataset) -> BedDataset:
66
66
@LinearFlowTemplate (
67
67
get_github_repo_urls ,
68
68
fetch_bed_dataset ,
69
+ convert_dataset .refine (name = 'parse_bed' , fixed_params = {'dataset_cls' : BedDataset }),
69
70
convert_dataset .refine (
70
71
name = 'convert_to_dataframe' , fixed_params = {'dataset_cls' : PandasDataset }),
71
72
)
72
73
def import_bed_files_to_pandas (owner : str , repo : str , branch : str , path : str ,
73
74
file_suffix : str ) -> PandasDataset :
74
75
...
75
76
76
-
77
- # Running the flow
78
- if __name__ == '__main__' :
79
- import_bed_files_to_pandas .run (
80
- owner = 'arq5x' , repo = 'bedtools2' , branch = 'master' , path = 'data' , file_suffix = 'bed' )
0 commit comments