Skip to content

Commit

Permalink
Adding Log Reg functions
Browse files Browse the repository at this point in the history
  • Loading branch information
Zach McQuiston committed Feb 18, 2020
1 parent ce79370 commit 122d003
Showing 1 changed file with 47 additions and 0 deletions.
47 changes: 47 additions & 0 deletions CNN.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,14 @@
import re
from nltk.corpus import stopwords
from numpy import array

#sklearn
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression





# Snowflake Credentials from env variables #
Expand All @@ -30,6 +37,7 @@
ACCOUNT = os.getenv('SNOWACCT')
USER = os.getenv('SNOWUSER')


# Connection Manager for Snowflake Instance #
con = snowflake.connector.connect(
user=USER,
Expand All @@ -48,3 +56,42 @@

df = cur.fetch_pandas_all()

# Start of Convolutional Models #

# Yelp Log Reg for baseline test #
df_yelp = df[df['source'] == 'yelp']

sentences = df_yelp['sentence'].values

y = df_yelp['label'].values
sentences_train, sentences_test, y_train, y_test = train_test_split(sentences, y, test_size=0.25, random_state=1000)

vectorizer = CountVectorizer()
vectorizer.fit(sentences_train)

X_train = vectorizer.transform(sentences_train)
X_test = vectorizer.transform(sentences_test)

classifier = LogisticRegression()
classifier.fit(X_train, y_train)
score = classifier.score(X_test, y_test)

print('Accuracy of Log Reg: ', score)

# Log Reg for each unique source in dataframe #
for source in df['source'].unique():
df_source = df[df['source'] == source]
sentences = df_source['sentence'].values
y = df_source['label'].values

sentences_train, sentences_test, y_train, y_test = train_test_split(sentences, y, test_size=0.25, random_state=1000)

vectorizer = CountVectorizer()
vectorizer.fit(sentences_train)
X_train = vectorizer.transform(sentences_train)
X_test = vectorizer.transform(sentences_test)

classifier = LogisticRegression()
classifier.fit(X_train, y_train)
score = classifier.score(X_test, y_test)
print('Accuracy for {} data: {:.4f}'.format(source, score))

0 comments on commit 122d003

Please sign in to comment.