Skip to content

Commit 85fa833

Browse files
committed
Merge branch 'master' of github.com:cathywu/Sentiment-Analysis
2 parents d56c0b2 + d33c6b1 commit 85fa833

9 files changed

+1214
-102
lines changed

egpaper_final.aux

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
\relax
2-
\citation{Authors11}
32
\@writefile{toc}{\contentsline {section}{\numberline {1}\hskip -1em.\nobreakspace {}Introduction}{1}}
43
\@writefile{toc}{\contentsline {section}{\numberline {2}\hskip -1em.\nobreakspace {}Previous Work}{1}}
54
\@writefile{toc}{\contentsline {section}{\numberline {3}\hskip -1em.\nobreakspace {}The User Review Domain}{1}}
6-
\@writefile{toc}{\contentsline {section}{\numberline {4}\hskip -1em.\nobreakspace {}Machine Learning Methods}{2}}
7-
\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}\hskip -1em.\nobreakspace {}The Naive Bayes Classifier}{2}}
5+
\@writefile{toc}{\contentsline {section}{\numberline {4}\hskip -1em.\nobreakspace {}Machine Learning Methods}{1}}
6+
\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}\hskip -1em.\nobreakspace {}The Naive Bayes Classifier}{1}}
87
\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}\hskip -1em.\nobreakspace {}The Maximum Entropy Classifier}{2}}
98
\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}\hskip -1em.\nobreakspace {}The Support Vector Machine Classifier}{2}}
109
\@writefile{toc}{\contentsline {section}{\numberline {5}\hskip -1em.\nobreakspace {}Experimental Setup}{3}}
@@ -20,7 +19,8 @@
2019
\@writefile{toc}{\contentsline {subsection}{\numberline {6.9}\hskip -1em.\nobreakspace {}Majority Voting}{4}}
2120
\@writefile{toc}{\contentsline {subsection}{\numberline {6.10}\hskip -1em.\nobreakspace {}Neighboring Domain Data}{4}}
2221
\bibstyle{ieee}
23-
\bibdata{egbib}
22+
\bibdata{fpbib}
23+
\bibcite{Martin}{1}
2424
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces 3-fold cross validation results on movie dataset. Values repesent positive, negative, or overall accuracy.}}{6}}
2525
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Test results on Yelp dataset with Naive Bayes classifier. Values repesent percent of reviews classified as positive for a given star rating.}}{7}}
2626
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Test results on Yelp dataset with Maximum Entropy classifier. Values repesent percent of reviews classified as positive for a given star rating.}}{7}}

egpaper_final.bbl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
\begin{thebibliography}{1}\itemsep=-1pt
2+
3+
\bibitem{Martin}
4+
J.~M. D.~Martin, C.~Fowlkes.
5+
\newblock Learning to detect natural image boundaries using local brightness,
6+
color, and texture cues.
7+
\newblock {\em IEEE Transactions on Pattern Analysis and Machine Intelligence},
8+
26(5):530--549, 2004.
9+
10+
\end{thebibliography}

egpaper_final.blg

Lines changed: 33 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,45 @@
11
This is BibTeX, Version 0.99c (TeX Live 2009/Debian)
22
The top-level auxiliary file: egpaper_final.aux
3-
I couldn't open style file ieee.bst
4-
---line 22 of file egpaper_final.aux
5-
: \bibstyle{ieee
6-
: }
7-
I'm skipping whatever remains of this command
8-
I couldn't open database file egbib.bib
9-
---line 23 of file egpaper_final.aux
10-
: \bibdata{egbib
11-
: }
12-
I'm skipping whatever remains of this command
13-
I found no database files---while reading file egpaper_final.aux
14-
I found no style file---while reading file egpaper_final.aux
3+
The style file: ieee.bst
4+
Database file #1: fpbib.bib
155
You've used 1 entry,
16-
0 wiz_defined-function locations,
17-
87 strings with 527 characters,
18-
and the built_in function-call counts, 0 in all, are:
19-
= -- 0
20-
> -- 0
6+
2120 wiz_defined-function locations,
7+
507 strings with 3943 characters,
8+
and the built_in function-call counts, 318 in all, are:
9+
= -- 32
10+
> -- 7
2111
< -- 0
22-
+ -- 0
23-
- -- 0
24-
* -- 0
25-
:= -- 0
26-
add.period$ -- 0
27-
call.type$ -- 0
28-
change.case$ -- 0
12+
+ -- 3
13+
- -- 2
14+
* -- 25
15+
:= -- 58
16+
add.period$ -- 3
17+
call.type$ -- 1
18+
change.case$ -- 4
2919
chr.to.int$ -- 0
30-
cite$ -- 0
31-
duplicate$ -- 0
32-
empty$ -- 0
33-
format.name$ -- 0
34-
if$ -- 0
20+
cite$ -- 1
21+
duplicate$ -- 11
22+
empty$ -- 28
23+
format.name$ -- 2
24+
if$ -- 62
3525
int.to.chr$ -- 0
36-
int.to.str$ -- 0
37-
missing$ -- 0
38-
newline$ -- 0
39-
num.names$ -- 0
40-
pop$ -- 0
41-
preamble$ -- 0
42-
purify$ -- 0
26+
int.to.str$ -- 1
27+
missing$ -- 1
28+
newline$ -- 8
29+
num.names$ -- 2
30+
pop$ -- 2
31+
preamble$ -- 1
32+
purify$ -- 3
4333
quote$ -- 0
44-
skip$ -- 0
34+
skip$ -- 7
4535
stack$ -- 0
46-
substring$ -- 0
47-
swap$ -- 0
36+
substring$ -- 30
37+
swap$ -- 1
4838
text.length$ -- 0
4939
text.prefix$ -- 0
5040
top$ -- 0
51-
type$ -- 0
41+
type$ -- 4
5242
warning$ -- 0
53-
while$ -- 0
54-
width$ -- 0
55-
write$ -- 0
56-
(There were 4 error messages)
43+
while$ -- 4
44+
width$ -- 2
45+
write$ -- 13

egpaper_final.dvi

34.1 KB
Binary file not shown.

egpaper_final.log

Lines changed: 34 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
This is pdfTeX, Version 3.1415926-1.40.10 (TeX Live 2009/Debian) (format=pdflatex 2011.11.2) 5 FEB 2012 21:26
1+
This is pdfTeX, Version 3.1415926-1.40.10 (TeX Live 2009/Debian) (format=pdflatex 2011.11.2) 5 FEB 2012 21:39
22
entering extended mode
33
%&-line parsing enabled.
44
**egpaper_final.tex
@@ -191,29 +191,23 @@ Package color Info: Driver file: pdftex.def on input line 130.
191191
\everyMPtoPDFconversion=\toks21
192192
)
193193
LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <14.4> not available
194-
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 45.
195-
LaTeX Font Info: Try loading font information for U+msa on input line 45.
194+
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 41.
195+
LaTeX Font Info: Try loading font information for U+msa on input line 41.
196196
(/usr/share/texmf-texlive/tex/latex/amsfonts/umsa.fd
197197
File: umsa.fd 2009/06/22 v3.00 AMS symbols A
198198
)
199-
LaTeX Font Info: Try loading font information for U+msb on input line 45.
199+
LaTeX Font Info: Try loading font information for U+msb on input line 41.
200200

201201
(/usr/share/texmf-texlive/tex/latex/amsfonts/umsb.fd
202202
File: umsb.fd 2009/06/22 v3.00 AMS symbols B
203203
)
204-
LaTeX Font Info: Try loading font information for OT1+pcr on input line 45.
204+
LaTeX Font Info: Try loading font information for OT1+pcr on input line 41.
205205

206206
(/usr/share/texmf-texlive/tex/latex/psnfss/ot1pcr.fd
207207
File: ot1pcr.fd 2001/06/04 font definitions for OT1/pcr.
208208
)
209209
LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <12> not available
210-
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 49.
211-
LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <10> not available
212-
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 50.
213-
214-
215-
LaTeX Warning: Citation `Authors11' on page 1 undefined on input line 50.
216-
210+
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 45.
217211
Missing character: There is no � in font ptmr7t!
218212
Missing character: There is no � in font ptmr7t!
219213
Missing character: There is no � in font ptmr7t!
@@ -227,17 +221,17 @@ Missing character: There is no
227221
Missing character: There is no � in font ptmr7t!
228222
Missing character: There is no � in font ptmr7t!
229223

230-
Underfull \hbox (badness 3088) in paragraph at lines 65--66
224+
Underfull \hbox (badness 3088) in paragraph at lines 61--62
231225
[]\OT1/ptm/m/n/10 For our ex-per-i-ments, we worked with movie re-
232226
[]
233227

234228

235-
Underfull \hbox (badness 1803) in paragraph at lines 65--66
229+
Underfull \hbox (badness 1803) in paragraph at lines 61--62
236230
\OT1/ptm/m/n/10 views. Our data source was Pangs re-leased dataset
237231
[]
238232

239233

240-
Underfull \hbox (badness 10000) in paragraph at lines 65--66
234+
Underfull \hbox (badness 10000) in paragraph at lines 61--62
241235
\OT1/ptm/m/n/10 (http://www.cs.cornell.edu/people/pabo/movie-review-
242236
[]
243237

@@ -247,14 +241,11 @@ Missing character: There is no
247241
Missing character: There is no � in font ptmr7t!
248242
Missing character: There is no � in font ptmr7t!
249243
Missing character: There is no � in font ptmr7t!
250-
251-
Underfull \vbox (badness 10000) has occurred while \output is active []
252-
253-
[1{/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map}
244+
[1{/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map}
254245

255246

256247
]
257-
Overfull \hbox (2.72952pt too wide) detected at line 84
248+
Overfull \hbox (2.72952pt too wide) detected at line 80
258249
\OT1/cmr/m/n/10 = \OML/cmm/m/it/10 P\OT1/cmr/m/n/10 (\OML/cmm/m/it/10 C\OT1/cmr
259250
/m/n/10 )\OML/cmm/m/it/10 P\OT1/cmr/m/n/10 (\OML/cmm/m/it/10 F[]\OMS/cmsy/m/n/1
260251
0 j\OML/cmm/m/it/10 C\OT1/cmr/m/n/10 )\OML/cmm/m/it/10 P\OT1/cmr/m/n/10 (\OML/c
@@ -264,12 +255,12 @@ m/m/it/10 P\OT1/cmr/m/n/10 (\OML/cmm/m/it/10 F[]; F[]; [] ; F[]\OMS/cmsy/m/n/10
264255
[]
265256

266257

267-
Underfull \hbox (badness 10000) in paragraph at lines 105--106
258+
Underfull \hbox (badness 10000) in paragraph at lines 101--102
268259
[]\OT1/ptm/m/n/10 [http://nlp.stanford.edu/IR-
269260
[]
270261

271262

272-
Underfull \hbox (badness 10000) in paragraph at lines 105--106
263+
Underfull \hbox (badness 10000) in paragraph at lines 101--102
273264
\OT1/ptm/m/n/10 book/html/htmledition/naive-bayes-text-classification-
274265
[]
275266

@@ -289,12 +280,12 @@ Missing character: There is no
289280
Missing character: There is no � in font ptmr7t!
290281
Missing character: There is no � in font ptmr7t!
291282
[2]
292-
Underfull \hbox (badness 1688) in paragraph at lines 145--146
283+
Underfull \hbox (badness 1688) in paragraph at lines 141--142
293284
\OT1/ptm/m/n/10 tions well, in part be-cause the pos-i-tive and neg-a-tive
294285
[]
295286

296287

297-
Overfull \hbox (58.7314pt too wide) in paragraph at lines 145--146
288+
Overfull \hbox (58.7314pt too wide) in paragraph at lines 141--142
298289
\OT1/ptm/m/n/10 [http://www.cs.unb.ca/profs/hzhang/publications/FLAIRS04ZhangH.
299290
pdf].
300291
[]
@@ -322,36 +313,33 @@ Missing character: There is no
322313
Missing character: There is no � in font ptmr7t!
323314
Missing character: There is no � in font ptmr7t!
324315
[4]
325-
Overfull \hbox (4.21208pt too wide) in paragraph at lines 199--244
316+
Overfull \hbox (4.21208pt too wide) in paragraph at lines 195--240
326317
[][]
327318
[]
328319

329320
(./egpaper_final.bbl) [5
330321

331-
] [6] [7] (./egpaper_final.aux)
332-
333-
LaTeX Warning: There were undefined references.
334-
335-
)
322+
] [6] [7] (./egpaper_final.aux) )
336323
Here is how much of TeX's memory you used:
337-
2139 strings out of 495061
338-
25791 string characters out of 1182621
339-
134725 words of memory out of 3000000
340-
5284 multiletter control sequences out of 15000+50000
341-
24844 words of font info for 61 fonts, out of 3000000 for 9000
324+
2138 strings out of 495061
325+
25774 string characters out of 1182621
326+
134728 words of memory out of 3000000
327+
5283 multiletter control sequences out of 15000+50000
328+
24902 words of font info for 61 fonts, out of 3000000 for 9000
342329
28 hyphenation exceptions out of 8191
343330
32i,9n,24p,794b,314s stack positions out of 5000i,500n,10000p,200000b,50000s
344-
{/usr/share/texmf-texlive/fonts/enc/dvips/base/8r.enc}</usr/share/texmf-texli
345-
ve/fonts/type1/public/amsfonts/cm/cmex10.pfb></usr/share/texmf-texlive/fonts/ty
346-
pe1/public/amsfonts/cm/cmmi10.pfb></usr/share/texmf-texlive/fonts/type1/public/
347-
amsfonts/cm/cmmi7.pfb></usr/share/texmf-texlive/fonts/type1/public/amsfonts/cm/
348-
cmr10.pfb></usr/share/texmf-texlive/fonts/type1/public/amsfonts/cm/cmr7.pfb></u
349-
sr/share/texmf-texlive/fonts/type1/public/amsfonts/cm/cmsy10.pfb></usr/share/te
350-
xmf-texlive/fonts/type1/public/amsfonts/cm/cmsy7.pfb></usr/share/texmf-texlive/
351-
fonts/type1/urw/courier/ucrr8a.pfb></usr/share/texmf-texlive/fonts/type1/urw/ti
352-
mes/utmb8a.pfb></usr/share/texmf-texlive/fonts/type1/urw/times/utmr8a.pfb></usr
353-
/share/texmf-texlive/fonts/type1/urw/times/utmri8a.pfb>
354-
Output written on egpaper_final.pdf (7 pages, 160339 bytes).
331+
{/usr/share/texmf-texlive/fonts/enc/dvips/base
332+
/8r.enc}</usr/share/texmf-texlive/fonts/type1/public/amsfonts/cm/cmex10.pfb></u
333+
sr/share/texmf-texlive/fonts/type1/public/amsfonts/cm/cmmi10.pfb></usr/share/te
334+
xmf-texlive/fonts/type1/public/amsfonts/cm/cmmi7.pfb></usr/share/texmf-texlive/
335+
fonts/type1/public/amsfonts/cm/cmr10.pfb></usr/share/texmf-texlive/fonts/type1/
336+
public/amsfonts/cm/cmr7.pfb></usr/share/texmf-texlive/fonts/type1/public/amsfon
337+
ts/cm/cmsy10.pfb></usr/share/texmf-texlive/fonts/type1/public/amsfonts/cm/cmsy7
338+
.pfb></usr/share/texmf-texlive/fonts/type1/urw/courier/ucrr8a.pfb></usr/share/t
339+
exmf-texlive/fonts/type1/urw/times/utmb8a.pfb></usr/share/texmf-texlive/fonts/t
340+
ype1/urw/times/utmr8a.pfb></usr/share/texmf-texlive/fonts/type1/urw/times/utmri
341+
8a.pfb>
342+
Output written on egpaper_final.pdf (7 pages, 160449 bytes).
355343
PDF statistics:
356344
71 PDF objects out of 1000 (max. 8388607)
357345
0 named destinations out of 1000 (max. 500000)

egpaper_final.pdf

110 Bytes
Binary file not shown.

egpaper_final.tex

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ \subsection{Neighboring Domain Data}
320320

321321
{\small
322322
\bibliographystyle{ieee}
323-
\bibliography{egbib}
323+
\bibliography{fpbib}
324324
}
325325

326326
\end{document}

egpaper_final.tex~

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,7 @@
2626
%%%%%%%%% TITLE
2727
\title{Sentiment Classification using Machine Learning Techniques}
2828

29-
\author{Pranjal Vashaspati\\
30-
Institution1\\
31-
Institution1 address\\
29+
\author{Pranjal Vachaspati\\
3230
{\tt\small pranjal@mit.edu}
3331
% For a paper whose authors are all at the same institutiton,
3432
% omit the following lines up until the closing ``}''.
@@ -37,8 +35,6 @@ Institution1 address\\
3735
% To save space, use either the email address or home page, not both
3836
\and
3937
Cathy Wu\\
40-
Institution2\\
41-
First line of institution2 address\\
4238
{\tt\small cathywu@mit.edu}
4339
}
4440

@@ -47,7 +43,7 @@ First line of institution2 address\\
4743

4844
%%%%%%%%% ABSTRACT
4945
\begin{abstract}
50-
We implement a series of classifiers (Naive Bayes, Maximum Entropy, and SVM) to distinguish positive and negative sentiment in critic and user reviews. We apply various processing methods, including negation tagging, part-of-speech tagging, and position tagging to achieve maximum accuracy. We test our classifiers on an external dataset to see how well they generalize. Finally, we use a majority-voting technique to combine classifiers and achieve accuracy of close to 90\% in 3-fold cross-validation.
46+
We implement a series of classifiers (Naive Bayes, Maximum Entropy, and SVM) to distinguish positive and negative sentiment in critic and user reviews. We apply various processing methods, including negation tagging, part-of-speech tagging, and position tagging to achieve maximum accuracy. We test our classifiers on an external dataset to see how well they generalize. Finally, we use a majority-voting technique to combine classifiers and achieve accuracy of close to 90\% in 3-fold cross-validation\cite{Martin}.
5147
\end{abstract}
5248

5349
%%%%%%%%% BODY TEXT
@@ -322,7 +318,7 @@ verbs & Unigrams & maximum & Presence & 0.45 & 0.45 & 0.42 & 0.38 & 0.3
322318

323319
{\small
324320
\bibliographystyle{ieee}
325-
\bibliography{egbib}
321+
\bibliography{fpbib}
326322
}
327323

328324
\end{document}

0 commit comments

Comments
 (0)