filtering for specific products

HumanSignal · Jun 4, 2019 · c9383af · c9383af
1 parent ae6f902
commit c9383af
Show file tree

Hide file tree

Showing 3 changed files with 24 additions and 11 deletions.
diff --git a/README.md b/README.md
@@ -56,19 +56,23 @@ filter those occurrences first.
 To do that we will use another type of model which is called a tagger
 model. It learns when you tag relevant occurrences.
 
+```sh
+PRODUCTS="Apple,iOS,iPadOS,watchOS,macOS,MacPro,Pro Display"
+```
+
 ```sh
 # create Heartex project to filter news that are only relevent to your brand name
 
 # you will get back a link where you need to train a neural network a little bit to make it understand what is relevent to you
-python src/create_filter_project.py --token=$TOKEN --input=news.csv
+python src/create_filter_project.py --token=$TOKEN --input=news.csv --labels=$PRODUCTS
 
 # set project here
 export FILTER_PROJECT=""
 ```
 
 ```sh
 # get predictions
-python src/predict_and_filter.py --project=$FILTER_PROJECT --token=$TOKEN --output=filtered.csv
+python src/predict_and_filter.py --project=$FILTER_PROJECT --token=$TOKEN --output=filtered.csv --filter-labels=$PRODUCTS
 ```
 
 Now you have filtered.csv which you can use for further sentiment

diff --git a/src/create_filter_project.py b/src/create_filter_project.py
@@ -5,10 +5,9 @@
 
 CONFIG="""<View>
   <Text name="txt-1" value="$news"></Text>
-  <Choices name="chc-1" toName="txt-1">
-    <Choice value="Relevant"></Choice>
-    <Choice value="Not Relevant"></Choice>
-  </Choices>
+  <Labels name="chc-1" toName="txt-1">
+%s
+  </Labels>
 </View>"""
 
 
@@ -19,8 +18,14 @@
 
     parser.add_option('-t', '--token', action="store", dest="token", help="heartex token")
     parser.add_option('-i', '--input', action="store", dest="input", default="news.csv", help="input file name")
+    parser.add_option('-l', '--labels', type=str, dest="labels", action="store", help='A list of labels')
 
     options, args = parser.parse_args()
+
+    labels = options.labels.split(',')
+
+    labels_conf = "\n".join([ "    <Label value=\"%s\"></Label>" % (l,) for l in labels ])
+    CONFIG = CONFIG % (labels_conf, )
 
     project = heartex.new_project_setup(**vars(options), label_config=CONFIG, name="Brand Filter Project")
 

diff --git a/src/predict_and_filter.py b/src/predict_and_filter.py
@@ -14,24 +14,28 @@
     parser.add_option('-s', '--score', action="store", type=float, dest="score", default=0.90, help="score used to filter")
     parser.add_option('-i', '--input', action="store", dest="input", default="news.csv", help="input file name")
     parser.add_option('-o', '--output', action="store", dest="output", default="filtered.csv", help="csv output filename")
+    parser.add_option('-l', '--filter-labels', type=str, dest="filter_labels", action="store", help='A list of labels to filter on')
 
     options, args = parser.parse_args()
-
+    labels = options.filter_labels.split(',')
+
     data = []
     with open(options.input, newline='') as csvfile:
         reader = csv.DictReader(csvfile)
         for row in reader:
             data.append({ "news": row["news"] })
-
+    
     predictions = heartex.run_predict(**vars(options), data=data)
     filtered = []
 
     for idx, p in enumerate(predictions.json()):
         if p['score'] > options.score:
             for row in p['result']:
-                if 'Relevant' in row['value']['labels']:
-                    filtered.append(data[idx])
-
+                for label in labels:
+                    if label in row['value']['labels']:
+                        filtered.append(data[idx])
+                        break
+
     with open(options.output, 'w', newline='') as csvfile:
         writer = csv.DictWriter(csvfile, delimiter=',', fieldnames = ["news"])
         writer.writeheader()