@@ -61,67 +61,71 @@ def parse_arguments(description='', features=''):
61
61
return parser .parse_args ().conf
62
62
63
63
64
- DataTypes = {
65
- 'bool' : '$bool' ,
66
- 'str' : '$cat_string' ,
67
- 'num' : '$num' ,
68
- 'datetime' : '$num_datetime'
69
- }
70
-
71
-
72
- def features (ini , ** kwargs ):
64
+ def get_features (params , ** kwargs ):
65
+ """Returns string in PolyAnalyst's json format."""
73
66
return json .dumps (
74
67
{
75
68
'columns' : [{'name' : k , 'type' : v } for k , v in kwargs .items ()],
76
- 'params' : ini
77
- },
78
- indent = 4
69
+ 'params' : params
70
+ }
79
71
)
80
72
81
73
82
74
def parse_ini (ini ):
83
- """Converts ini file string to dict."""
84
- configparser .ConfigParser .optionxform = str # make parser case insensitive
75
+ """Returns keys from default section of ini file as a dict."""
85
76
parser = configparser .ConfigParser (allow_no_value = True )
77
+ parser .optionxform = str # make parser case-sensitive
86
78
parser .read_string (ini )
87
79
88
- return {k : v for k , v in parser ['DEFAULT' ].items ()}
80
+ return dict (parser ['DEFAULT' ])
81
+
82
+
83
+ # dict of Internet Source's supported data types
84
+ DataTypes = {
85
+ 'bool' : '$bool' ,
86
+ 'str' : '$cat_string' ,
87
+ 'num' : '$num' ,
88
+ 'datetime' : '$num_datetime'
89
+ }
89
90
90
91
91
92
def write (path , url , content , title , ** kwargs ):
92
- encoded = base64 .standard_b64encode (content ).decode ('ascii' )
93
+ """Writes json file with PolyAnalyst's result format."""
94
+ data = {
95
+ 'docs' : [
96
+ {
97
+ 'url' : url ,
98
+ 'docurl' : url ,
99
+ 'title' : title ,
100
+ 'mime' : 'text/html' ,
101
+ 'content' : base64 .standard_b64encode (content ).decode ('ascii' ),
102
+ 'columns' : kwargs ,
103
+ 'files' : {},
104
+ }
105
+ ]
106
+ }
93
107
94
108
with open (path , mode = 'w' , encoding = 'utf_8' ) as f :
95
- data = {
96
- 'docs' : [
97
- {
98
- 'url' : url ,
99
- 'docurl' : url ,
100
- 'title' : title ,
101
- 'mime' : 'text/html' ,
102
- 'content' : encoded ,
103
- 'columns' : kwargs ,
104
- 'files' : {},
105
- }
106
- ]
107
- }
108
- json .dump (data , f , indent = 4 )
109
+ json .dump (data , f )
109
110
110
111
111
112
def main (data ):
112
113
write (
113
- data ['output_folder' ] + '\example_result.json' ,
114
- 'http://example.com' ,
115
- 'Example text content' .encode ('utf_8' ),
116
- 'Example title' ,
114
+ path = data ['output_folder' ] + '\example_result.json' ,
115
+ url = 'http://example.com' ,
116
+ content = b'Example text content' ,
117
+ title = 'Example title' ,
118
+ ExtraColumn = data ['params' ]['value_for_ExtraColumn' ],
117
119
)
118
120
119
121
120
122
if __name__ == '__main__' :
121
- descr = 'web scraper template'
122
- file = parse_arguments (descr , features ('' ))
123
+ description = 'web scraper template'
124
+ ini = '[DEFAULT]\\ nvalue_for_ExtraColumn=default value'
125
+ features = get_features (ini , ExtraColumn = DataTypes ['str' ])
126
+ file = parse_arguments (description , features )
123
127
124
- data = json .loads (file . read () )
128
+ data = json .load (file )
125
129
data ['params' ] = parse_ini (data ['params' ])
126
130
127
131
main (data )
0 commit comments