-
Notifications
You must be signed in to change notification settings - Fork 2
/
main.py
53 lines (43 loc) · 1.56 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from keras.models import Sequential
from keras.layers import LSTM, Dense, Activation,Dropout,Embedding
import pandas as pd
import keras.utils
from keras.preprocessing.text import Tokenizer
from keras.preprocessing import sequence
from sklearn.preprocessing import LabelEncoder
import numpy as np
from keras.datasets import imdb
#объявляем константы
max_words = 1000
batch_size = 32
epochs = 3
#считываем из CSV
df = pd.read_csv('cleaned_dataset.csv',delimiter=';',encoding = "utf-8").astype(str)
num_classes = len(df['класс'].drop_duplicates())
X_raw = df['запрос'].values
Y_raw = df['класс'].values
#трансформируем текст запросов в матрицы
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_raw)
x_train = tokenizer.texts_to_matrix(X_raw)
#трансформируем классы
encoder = LabelEncoder()
encoder.fit(Y_raw)
encoded_Y = encoder.transform(Y_raw)
print(encoded_Y)
y_train = keras.utils.to_categorical(encoded_Y, num_classes)
#строим модель
model = Sequential()
model.add(Dense(512, input_shape=(max_words,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1)
model.save('classifier.h5')