Skip to content

Commit

Permalink
model: fastText (#57)
Browse files Browse the repository at this point in the history
  • Loading branch information
S-Kaisei authored Sep 17, 2020
1 parent b99ec95 commit e10318a
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 7 deletions.
3 changes: 2 additions & 1 deletion models/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.ACCESS_TOKEN
*.txt
training_data/
suggestions.json
training_data/
11 changes: 11 additions & 0 deletions models/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
IMAGE_NAME := peaceiris/actions-suggest-related-links-models:latest

.PHONY: build
build:
# docker-compose build --build-arg NODE_VERSION=$(NODE_VERSION)
docker build -t ${IMAGE_NAME}

.PHONY: run
run:
# docker-compose run --rm dev bash
docker run --rm -i -t -v ${PWD}:/src ${IMAGE_NAME} bash
14 changes: 14 additions & 0 deletions models/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<!-- START doctoc generated TOC please keep comment here to allow auto update -->
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
**Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)*

- [Models](#models)

<!-- END doctoc generated TOC please keep comment here to allow auto update -->

## Models

```sh
make build
make run
```
66 changes: 60 additions & 6 deletions models/fasttext/train.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,67 @@
import argparse
import fasttext
import json
import numpy as np

parser = argparse.ArgumentParser()
parser.add_argument("-d","--document", type=str, required=True, help="path to documents")
parser.add_argument("-model","--model_name", type=str, default="model.bin", help="outputted model name")
parser.add_argument("-d","--documents", type=str, default='../training_data/training-data.json', help="path to documents")
parser.add_argument("-train","--train_document", type=str, default='./train.txt', help="path to train document")
parser.add_argument("-test","--test_document", type=str, default='./test.txt', help="path to test document")
args = parser.parse_args()

with open(args.document, "r") as f:
terms = f.readlines()
def cos_sim(v1, v2):
return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))

model = fasttext.train_supervised(input=terms)
model.save_model(args.model_name)
# load train data
issues_open = open(args.documents, "r")
issues_load = json.load(issues_open)

# load test data
with open(args.test_document) as f:
test = f.readlines()

# add label
terms = ''
label_num = 0
for issue in issues_load:
if issue['html_url'].split('/')[-2] == 'issues':
terms += f"__label__{issue['number']} {issue['body']}\n"
label_num += 1

with open(args.train_document, 'w') as f:
f.write(terms)

# train model
with open(args.train_document) as f:
trains = f.readlines()
model = fasttext.train_supervised(input=args.train_document)

# test model
test_word_vector = np.mean([model[x] for word in test for x in word.split()], axis=0)
results = []
for train in trains:
result = {}
train_word_vector = np.mean([model[x] for word in train.split()[1:] for x in word.split()], axis=0)
prob = cos_sim(train_word_vector, test_word_vector)
result['probability'] = prob
result['label'] = train.split()[0]
results.append(result)

results = sorted(results, key=lambda x:x['probability'], reverse=True)
suggestions = []

for result in results:
for issue in issues_load:
if issue['number'] == int(result['label'].split('__')[-1]):
suggestion = {}
suggestion['html_url'] = issue['html_url']
suggestion['title'] = issue['title']
suggestion['number'] = int(issue['html_url'].split('/')[-1])
suggestion['probability'] = float(result['probability'])
suggestions.append(suggestion)

suggestions = json.dumps(suggestions, indent=4)
with open('suggestions.json', 'w') as f:
f.write(suggestions)

print(suggestions)

0 comments on commit e10318a

Please sign in to comment.