Merge branch 'master' of github.com:cathywu/Sentiment-Analysis

pranjalv123 · pranjalv123 · commit 85fa833c91fa · 2012-02-05T21:41:12.000-05:00
diff --git a/egpaper_final.aux b/egpaper_final.aux
@@ -1,10 +1,9 @@
 \relax 
-\citation{Authors11}
 \@writefile{toc}{\contentsline {section}{\numberline {1}\hskip -1em.\nobreakspace  {}Introduction}{1}}
 \@writefile{toc}{\contentsline {section}{\numberline {2}\hskip -1em.\nobreakspace  {}Previous Work}{1}}
 \@writefile{toc}{\contentsline {section}{\numberline {3}\hskip -1em.\nobreakspace  {}The User Review Domain}{1}}
-\@writefile{toc}{\contentsline {section}{\numberline {4}\hskip -1em.\nobreakspace  {}Machine Learning Methods}{2}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}\hskip -1em.\nobreakspace  {}The Naive Bayes Classifier}{2}}
+\@writefile{toc}{\contentsline {section}{\numberline {4}\hskip -1em.\nobreakspace  {}Machine Learning Methods}{1}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}\hskip -1em.\nobreakspace  {}The Naive Bayes Classifier}{1}}
 \@writefile{toc}{\contentsline {subsection}{\numberline {4.2}\hskip -1em.\nobreakspace  {}The Maximum Entropy Classifier}{2}}
 \@writefile{toc}{\contentsline {subsection}{\numberline {4.3}\hskip -1em.\nobreakspace  {}The Support Vector Machine Classifier}{2}}
 \@writefile{toc}{\contentsline {section}{\numberline {5}\hskip -1em.\nobreakspace  {}Experimental Setup}{3}}
@@ -20,7 +19,8 @@
 \@writefile{toc}{\contentsline {subsection}{\numberline {6.9}\hskip -1em.\nobreakspace  {}Majority Voting}{4}}
 \@writefile{toc}{\contentsline {subsection}{\numberline {6.10}\hskip -1em.\nobreakspace  {}Neighboring Domain Data}{4}}
 \bibstyle{ieee}
-\bibdata{egbib}
+\bibdata{fpbib}
+\bibcite{Martin}{1}
 \@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces 3-fold cross validation results on movie dataset. Values repesent positive, negative, or overall accuracy.}}{6}}
 \@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Test results on Yelp dataset with Naive Bayes classifier. Values repesent percent of reviews classified as positive for a given star rating.}}{7}}
 \@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Test results on Yelp dataset with Maximum Entropy classifier. Values repesent percent of reviews classified as positive for a given star rating.}}{7}}
diff --git a/egpaper_final.bbl b/egpaper_final.bbl
@@ -0,0 +1,10 @@
+\begin{thebibliography}{1}\itemsep=-1pt
+
+\bibitem{Martin}
+J.~M. D.~Martin, C.~Fowlkes.
+\newblock Learning to detect natural image boundaries using local brightness,
+  color, and texture cues.
+\newblock {\em IEEE Transactions on Pattern Analysis and Machine Intelligence},
+  26(5):530--549, 2004.
+
+\end{thebibliography}
diff --git a/egpaper_final.blg b/egpaper_final.blg
@@ -1,56 +1,45 @@
 This is BibTeX, Version 0.99c (TeX Live 2009/Debian)
 The top-level auxiliary file: egpaper_final.aux
-I couldn't open style file ieee.bst
----line 22 of file egpaper_final.aux
- : \bibstyle{ieee
- :               }
-I'm skipping whatever remains of this command
-I couldn't open database file egbib.bib
----line 23 of file egpaper_final.aux
- : \bibdata{egbib
- :               }
-I'm skipping whatever remains of this command
-I found no database files---while reading file egpaper_final.aux
-I found no style file---while reading file egpaper_final.aux
+The style file: ieee.bst
+Database file #1: fpbib.bib
 You've used 1 entry,
-            0 wiz_defined-function locations,
-            87 strings with 527 characters,
-and the built_in function-call counts, 0 in all, are:
-= -- 0
-> -- 0
+            2120 wiz_defined-function locations,
+            507 strings with 3943 characters,
+and the built_in function-call counts, 318 in all, are:
+= -- 32
+> -- 7
 < -- 0
-+ -- 0
-- -- 0
-* -- 0
-:= -- 0
-add.period$ -- 0
-call.type$ -- 0
-change.case$ -- 0
++ -- 3
+- -- 2
+* -- 25
+:= -- 58
+add.period$ -- 3
+call.type$ -- 1
+change.case$ -- 4
 chr.to.int$ -- 0
-cite$ -- 0
-duplicate$ -- 0
-empty$ -- 0
-format.name$ -- 0
-if$ -- 0
+cite$ -- 1
+duplicate$ -- 11
+empty$ -- 28
+format.name$ -- 2
+if$ -- 62
 int.to.chr$ -- 0
-int.to.str$ -- 0
-missing$ -- 0
-newline$ -- 0
-num.names$ -- 0
-pop$ -- 0
-preamble$ -- 0
-purify$ -- 0
+int.to.str$ -- 1
+missing$ -- 1
+newline$ -- 8
+num.names$ -- 2
+pop$ -- 2
+preamble$ -- 1
+purify$ -- 3
 quote$ -- 0
-skip$ -- 0
+skip$ -- 7
 stack$ -- 0
-substring$ -- 0
-swap$ -- 0
+substring$ -- 30
+swap$ -- 1
 text.length$ -- 0
 text.prefix$ -- 0
 top$ -- 0
-type$ -- 0
+type$ -- 4
 warning$ -- 0
-while$ -- 0
-width$ -- 0
-write$ -- 0
-(There were 4 error messages)
+while$ -- 4
+width$ -- 2
+write$ -- 13
diff --git a/egpaper_final.dvi b/egpaper_final.dvi
diff --git a/egpaper_final.log b/egpaper_final.log
@@ -1,4 +1,4 @@
-This is pdfTeX, Version 3.1415926-1.40.10 (TeX Live 2009/Debian) (format=pdflatex 2011.11.2)  5 FEB 2012 21:26
+This is pdfTeX, Version 3.1415926-1.40.10 (TeX Live 2009/Debian) (format=pdflatex 2011.11.2)  5 FEB 2012 21:39
 entering extended mode
  %&-line parsing enabled.
 **egpaper_final.tex
@@ -191,29 +191,23 @@ Package color Info: Driver file: pdftex.def on input line 130.
 \everyMPtoPDFconversion=\toks21
 )
 LaTeX Font Info:    Font shape `OT1/ptm/bx/n' in size <14.4> not available
-(Font)              Font shape `OT1/ptm/b/n' tried instead on input line 45.
-LaTeX Font Info:    Try loading font information for U+msa on input line 45.
+(Font)              Font shape `OT1/ptm/b/n' tried instead on input line 41.
+LaTeX Font Info:    Try loading font information for U+msa on input line 41.
  (/usr/share/texmf-texlive/tex/latex/amsfonts/umsa.fd
 File: umsa.fd 2009/06/22 v3.00 AMS symbols A
 )
-LaTeX Font Info:    Try loading font information for U+msb on input line 45.
+LaTeX Font Info:    Try loading font information for U+msb on input line 41.
 
 (/usr/share/texmf-texlive/tex/latex/amsfonts/umsb.fd
 File: umsb.fd 2009/06/22 v3.00 AMS symbols B
 )
-LaTeX Font Info:    Try loading font information for OT1+pcr on input line 45.
+LaTeX Font Info:    Try loading font information for OT1+pcr on input line 41.
 
 (/usr/share/texmf-texlive/tex/latex/psnfss/ot1pcr.fd
 File: ot1pcr.fd 2001/06/04 font definitions for OT1/pcr.
 )
 LaTeX Font Info:    Font shape `OT1/ptm/bx/n' in size <12> not available
-(Font)              Font shape `OT1/ptm/b/n' tried instead on input line 49.
-LaTeX Font Info:    Font shape `OT1/ptm/bx/n' in size <10> not available
-(Font)              Font shape `OT1/ptm/b/n' tried instead on input line 50.
-
-
-LaTeX Warning: Citation `Authors11' on page 1 undefined on input line 50.
-
+(Font)              Font shape `OT1/ptm/b/n' tried instead on input line 45.
 Missing character: There is no � in font ptmr7t!
 Missing character: There is no � in font ptmr7t!
 Missing character: There is no � in font ptmr7t!
@@ -227,17 +221,17 @@ Missing character: There is no 
 Missing character: There is no � in font ptmr7t!
 Missing character: There is no � in font ptmr7t!
 
-Underfull \hbox (badness 3088) in paragraph at lines 65--66
+Underfull \hbox (badness 3088) in paragraph at lines 61--62
 []\OT1/ptm/m/n/10 For our ex-per-i-ments, we worked with movie re-
  []
 
 
-Underfull \hbox (badness 1803) in paragraph at lines 65--66
+Underfull \hbox (badness 1803) in paragraph at lines 61--62
 \OT1/ptm/m/n/10 views. Our data source was Pangs re-leased dataset
  []
 
 
-Underfull \hbox (badness 10000) in paragraph at lines 65--66
+Underfull \hbox (badness 10000) in paragraph at lines 61--62
 \OT1/ptm/m/n/10 (http://www.cs.cornell.edu/people/pabo/movie-review-
  []
 
@@ -247,14 +241,11 @@ Missing character: There is no 
 Missing character: There is no � in font ptmr7t!
 Missing character: There is no � in font ptmr7t!
 Missing character: There is no � in font ptmr7t!
-
-Underfull \vbox (badness 10000) has occurred while \output is active []
-
- [1{/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map}
+[1{/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map}
 
 
 ]
-Overfull \hbox (2.72952pt too wide) detected at line 84
+Overfull \hbox (2.72952pt too wide) detected at line 80
 \OT1/cmr/m/n/10 = \OML/cmm/m/it/10 P\OT1/cmr/m/n/10 (\OML/cmm/m/it/10 C\OT1/cmr
 /m/n/10 )\OML/cmm/m/it/10 P\OT1/cmr/m/n/10 (\OML/cmm/m/it/10 F[]\OMS/cmsy/m/n/1
 0 j\OML/cmm/m/it/10 C\OT1/cmr/m/n/10 )\OML/cmm/m/it/10 P\OT1/cmr/m/n/10 (\OML/c
@@ -264,12 +255,12 @@ m/m/it/10 P\OT1/cmr/m/n/10 (\OML/cmm/m/it/10 F[]; F[]; [] ; F[]\OMS/cmsy/m/n/10
  []
 
 
-Underfull \hbox (badness 10000) in paragraph at lines 105--106
+Underfull \hbox (badness 10000) in paragraph at lines 101--102
 []\OT1/ptm/m/n/10 [http://nlp.stanford.edu/IR-
  []
 
 
-Underfull \hbox (badness 10000) in paragraph at lines 105--106
+Underfull \hbox (badness 10000) in paragraph at lines 101--102
 \OT1/ptm/m/n/10 book/html/htmledition/naive-bayes-text-classification-
  []
 
@@ -289,12 +280,12 @@ Missing character: There is no 
 Missing character: There is no � in font ptmr7t!
 Missing character: There is no � in font ptmr7t!
 [2]
-Underfull \hbox (badness 1688) in paragraph at lines 145--146
+Underfull \hbox (badness 1688) in paragraph at lines 141--142
 \OT1/ptm/m/n/10 tions well, in part be-cause the pos-i-tive and neg-a-tive
  []
 
 
-Overfull \hbox (58.7314pt too wide) in paragraph at lines 145--146
+Overfull \hbox (58.7314pt too wide) in paragraph at lines 141--142
 \OT1/ptm/m/n/10 [http://www.cs.unb.ca/profs/hzhang/publications/FLAIRS04ZhangH.
 pdf]. 
  []
@@ -322,36 +313,33 @@ Missing character: There is no 
 Missing character: There is no � in font ptmr7t!
 Missing character: There is no � in font ptmr7t!
  [4]
-Overfull \hbox (4.21208pt too wide) in paragraph at lines 199--244
+Overfull \hbox (4.21208pt too wide) in paragraph at lines 195--240
 [][] 
  []
 
 (./egpaper_final.bbl) [5
 
-] [6] [7] (./egpaper_final.aux)
-
-LaTeX Warning: There were undefined references.
-
- ) 
+] [6] [7] (./egpaper_final.aux) ) 
 Here is how much of TeX's memory you used:
- 2139 strings out of 495061
- 25791 string characters out of 1182621
- 134725 words of memory out of 3000000
- 5284 multiletter control sequences out of 15000+50000
- 24844 words of font info for 61 fonts, out of 3000000 for 9000
+ 2138 strings out of 495061
+ 25774 string characters out of 1182621
+ 134728 words of memory out of 3000000
+ 5283 multiletter control sequences out of 15000+50000
+ 24902 words of font info for 61 fonts, out of 3000000 for 9000
  28 hyphenation exceptions out of 8191
  32i,9n,24p,794b,314s stack positions out of 5000i,500n,10000p,200000b,50000s
-{/usr/share/texmf-texlive/fonts/enc/dvips/base/8r.enc}</usr/share/texmf-texli
-ve/fonts/type1/public/amsfonts/cm/cmex10.pfb></usr/share/texmf-texlive/fonts/ty
-pe1/public/amsfonts/cm/cmmi10.pfb></usr/share/texmf-texlive/fonts/type1/public/
-amsfonts/cm/cmmi7.pfb></usr/share/texmf-texlive/fonts/type1/public/amsfonts/cm/
-cmr10.pfb></usr/share/texmf-texlive/fonts/type1/public/amsfonts/cm/cmr7.pfb></u
-sr/share/texmf-texlive/fonts/type1/public/amsfonts/cm/cmsy10.pfb></usr/share/te
-xmf-texlive/fonts/type1/public/amsfonts/cm/cmsy7.pfb></usr/share/texmf-texlive/
-fonts/type1/urw/courier/ucrr8a.pfb></usr/share/texmf-texlive/fonts/type1/urw/ti
-mes/utmb8a.pfb></usr/share/texmf-texlive/fonts/type1/urw/times/utmr8a.pfb></usr
-/share/texmf-texlive/fonts/type1/urw/times/utmri8a.pfb>
-Output written on egpaper_final.pdf (7 pages, 160339 bytes).
+{/usr/share/texmf-texlive/fonts/enc/dvips/base
+/8r.enc}</usr/share/texmf-texlive/fonts/type1/public/amsfonts/cm/cmex10.pfb></u
+sr/share/texmf-texlive/fonts/type1/public/amsfonts/cm/cmmi10.pfb></usr/share/te
+xmf-texlive/fonts/type1/public/amsfonts/cm/cmmi7.pfb></usr/share/texmf-texlive/
+fonts/type1/public/amsfonts/cm/cmr10.pfb></usr/share/texmf-texlive/fonts/type1/
+public/amsfonts/cm/cmr7.pfb></usr/share/texmf-texlive/fonts/type1/public/amsfon
+ts/cm/cmsy10.pfb></usr/share/texmf-texlive/fonts/type1/public/amsfonts/cm/cmsy7
+.pfb></usr/share/texmf-texlive/fonts/type1/urw/courier/ucrr8a.pfb></usr/share/t
+exmf-texlive/fonts/type1/urw/times/utmb8a.pfb></usr/share/texmf-texlive/fonts/t
+ype1/urw/times/utmr8a.pfb></usr/share/texmf-texlive/fonts/type1/urw/times/utmri
+8a.pfb>
+Output written on egpaper_final.pdf (7 pages, 160449 bytes).
 PDF statistics:
  71 PDF objects out of 1000 (max. 8388607)
  0 named destinations out of 1000 (max. 500000)
diff --git a/egpaper_final.pdf b/egpaper_final.pdf
diff --git a/egpaper_final.tex b/egpaper_final.tex
@@ -320,7 +320,7 @@ \subsection{Neighboring Domain Data}
 
 {\small
 \bibliographystyle{ieee}
-\bibliography{egbib}
+\bibliography{fpbib}
 }
 
 \end{document}
diff --git a/egpaper_final.tex~ b/egpaper_final.tex~
@@ -26,9 +26,7 @@
 %%%%%%%%% TITLE
 \title{Sentiment Classification using Machine Learning Techniques}
 
-\author{Pranjal Vashaspati\\
-Institution1\\
-Institution1 address\\
+\author{Pranjal Vachaspati\\
 {\tt\small pranjal@mit.edu}
 % For a paper whose authors are all at the same institutiton,
 % omit the following lines up until the closing ``}''.
@@ -37,8 +35,6 @@ Institution1 address\\
 % To save space, use either the email address or home page, not both
 \and
 Cathy Wu\\
-Institution2\\
-First line of institution2 address\\
 {\tt\small cathywu@mit.edu}
 }
 
@@ -47,7 +43,7 @@ First line of institution2 address\\
 
 %%%%%%%%% ABSTRACT
 \begin{abstract}
-We implement a series of classifiers (Naive Bayes, Maximum Entropy, and SVM) to distinguish positive and negative sentiment in critic and user reviews. We apply various processing methods, including negation tagging, part-of-speech tagging, and position tagging to achieve maximum accuracy. We test our classifiers on an external dataset to see how well they generalize. Finally, we use a majority-voting technique to combine classifiers and achieve accuracy of close to 90\% in 3-fold cross-validation.
+We implement a series of classifiers (Naive Bayes, Maximum Entropy, and SVM) to distinguish positive and negative sentiment in critic and user reviews. We apply various processing methods, including negation tagging, part-of-speech tagging, and position tagging to achieve maximum accuracy. We test our classifiers on an external dataset to see how well they generalize. Finally, we use a majority-voting technique to combine classifiers and achieve accuracy of close to 90\% in 3-fold cross-validation\cite{Martin}.
 \end{abstract}
 
 %%%%%%%%% BODY TEXT
@@ -322,7 +318,7 @@ verbs & Unigrams & maximum & Presence          & 0.45 & 0.45 & 0.42 & 0.38 & 0.3
 
 {\small
 \bibliographystyle{ieee}
-\bibliography{egbib}
+\bibliography{fpbib}
 }
 
 \end{document}
diff --git a/ieee.bst b/ieee.bst

Original file line number	Diff line number	Diff line change
`@@ -320,7 +320,7 @@ \subsection{Neighboring Domain Data}`
`320`	`320`
`321`	`321`	`{\small`
`322`	`322`	`\bibliographystyle{ieee}`
`323`		`-\bibliography{egbib}`
	`323`	`+\bibliography{fpbib}`
`324`	`324`	`}`
`325`	`325`
`326`	`326`	`\end{document}`