Skip to content
This repository has been archived by the owner on Sep 26, 2023. It is now read-only.

Commit

Permalink
filtering for specific products
Browse files Browse the repository at this point in the history
  • Loading branch information
deppp committed Jun 4, 2019
1 parent ae6f902 commit c9383af
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 11 deletions.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,19 +56,23 @@ filter those occurrences first.
To do that we will use another type of model which is called a tagger
model. It learns when you tag relevant occurrences.

```sh
PRODUCTS="Apple,iOS,iPadOS,watchOS,macOS,MacPro,Pro Display"
```

```sh
# create Heartex project to filter news that are only relevent to your brand name

# you will get back a link where you need to train a neural network a little bit to make it understand what is relevent to you
python src/create_filter_project.py --token=$TOKEN --input=news.csv
python src/create_filter_project.py --token=$TOKEN --input=news.csv --labels=$PRODUCTS

# set project here
export FILTER_PROJECT=""
```

```sh
# get predictions
python src/predict_and_filter.py --project=$FILTER_PROJECT --token=$TOKEN --output=filtered.csv
python src/predict_and_filter.py --project=$FILTER_PROJECT --token=$TOKEN --output=filtered.csv --filter-labels=$PRODUCTS
```

Now you have filtered.csv which you can use for further sentiment
Expand Down
13 changes: 9 additions & 4 deletions src/create_filter_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@

CONFIG="""<View>
<Text name="txt-1" value="$news"></Text>
<Choices name="chc-1" toName="txt-1">
<Choice value="Relevant"></Choice>
<Choice value="Not Relevant"></Choice>
</Choices>
<Labels name="chc-1" toName="txt-1">
%s
</Labels>
</View>"""


Expand All @@ -19,8 +18,14 @@

parser.add_option('-t', '--token', action="store", dest="token", help="heartex token")
parser.add_option('-i', '--input', action="store", dest="input", default="news.csv", help="input file name")
parser.add_option('-l', '--labels', type=str, dest="labels", action="store", help='A list of labels')

options, args = parser.parse_args()

labels = options.labels.split(',')

labels_conf = "\n".join([ " <Label value=\"%s\"></Label>" % (l,) for l in labels ])
CONFIG = CONFIG % (labels_conf, )

project = heartex.new_project_setup(**vars(options), label_config=CONFIG, name="Brand Filter Project")

Expand Down
14 changes: 9 additions & 5 deletions src/predict_and_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,28 @@
parser.add_option('-s', '--score', action="store", type=float, dest="score", default=0.90, help="score used to filter")
parser.add_option('-i', '--input', action="store", dest="input", default="news.csv", help="input file name")
parser.add_option('-o', '--output', action="store", dest="output", default="filtered.csv", help="csv output filename")
parser.add_option('-l', '--filter-labels', type=str, dest="filter_labels", action="store", help='A list of labels to filter on')

options, args = parser.parse_args()

labels = options.filter_labels.split(',')

data = []
with open(options.input, newline='') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
data.append({ "news": row["news"] })

predictions = heartex.run_predict(**vars(options), data=data)
filtered = []

for idx, p in enumerate(predictions.json()):
if p['score'] > options.score:
for row in p['result']:
if 'Relevant' in row['value']['labels']:
filtered.append(data[idx])

for label in labels:
if label in row['value']['labels']:
filtered.append(data[idx])
break

with open(options.output, 'w', newline='') as csvfile:
writer = csv.DictWriter(csvfile, delimiter=',', fieldnames = ["news"])
writer.writeheader()
Expand Down

0 comments on commit c9383af

Please sign in to comment.