Skip to content

Commit

Permalink
Adding seqlearn library and hmm code
Browse files Browse the repository at this point in the history
  • Loading branch information
iankurgarg committed Apr 14, 2017
1 parent 9bfb645 commit 495c051
Show file tree
Hide file tree
Showing 142 changed files with 184,832 additions and 2 deletions.
28 changes: 28 additions & 0 deletions Python/rough_ankur.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import pandas as pd
import numpy as np
import sys

sys.path.insert(0,'../lib/seqlearn/')

from seqlearn.hmm import MultinomialHMM
from hmmlearn.hmm import GaussianHMM


input_data = pd.read_csv('../data/train_subject1_psd01.csv', header=None)

d1 = pd.read_csv('../data/train_subject1_psd01.csv',header=None)
d2 = pd.read_csv('../data/train_subject1_psd02.csv',header=None)
d3 = pd.read_csv('../data/train_subject1_psd03.csv',header=None)

input_data = pd.concat([d1, d2, d3], axis=0)
lengths = [len(d1), len(d2), len(d3)]

clf = MultinomialHMM()
clf.fit(input_data.iloc[:,:-1], input_data.iloc[:,-1], lengths)
pred = clf.predict(d3.iloc[:,:-1])
actual = d3.iloc[:,-1]
accuracy = sum(pred == actual)/float(len(actual))

print accuracy


4 changes: 2 additions & 2 deletions Rscripts/eda.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
dataset.test1 = read.csv('train_subject1_psd01.csv')
dataset.test1 = read.csv('../data/training_data_complete.csv')
plot(density(dataset.test1[dataset.test1$X7.00E.00==7,1]))
lines(density(dataset.test1[dataset.test1$X7.00E.00==2,1]))
lines(density(dataset.test1[dataset.test1$X7.00E.00==3,1]))
Expand All @@ -17,7 +17,7 @@ correlationDataframe = data.frame(correlationMatrix)

for(i in seq(1,96,1)){
for(j in seq(i,96,1)){
if(abs(correlationMatrix[i,j])>0.6 && i!=j){
if(abs(correlationMatrix[i,j])>0.70 && i!=j){
print(paste(i,j,correlationMatrix[i,j]))
}
}
Expand Down
19 changes: 19 additions & 0 deletions lib/seqlearn/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
*.py[cdo]
*.c
*.so
*~
.#*
*.sw[op]
.DS_Store
build/

pip-log.txt

*.tar.gz
*.tgz
*.zip

*.html
*.lprof

MANIFEST
32 changes: 32 additions & 0 deletions lib/seqlearn/.travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
language: python
python:
- "2.7"
- "3.4"

sudo: required
dist: trusty # We want a recent SciPy.

addons:
apt:
packages:
- python-numpy
- python-scipy
- python3-numpy
- python3-scipy
- libatlas-dev
- liblapack-dev
- gfortran

virtualenv:
system_site_packages: true

install:
- deactivate
- virtualenv --system-site-packages testenv
- source testenv/bin/activate
- pip install -U Cython nose scikit-learn
- pip install .

script:
- cd /tmp
- nosetests --exe seqlearn
19 changes: 19 additions & 0 deletions lib/seqlearn/COPYING
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
Copyright 2013-2014 Lars Buitinck / University of Amsterdam and contributors

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
4 changes: 4 additions & 0 deletions lib/seqlearn/MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
include README.rst

recursive-include doc *.rst Makefile conf.py
recursive-include examples *.bio *.py
2 changes: 2 additions & 0 deletions lib/seqlearn/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
here:
python setup.py build_ext --inplace
64 changes: 64 additions & 0 deletions lib/seqlearn/README.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
.. -*- mode: rst -*-
seqlearn
========

seqlearn is a sequence classification toolkit for Python. It is designed to
extend `scikit-learn <http://scikit-learn.org>`_ and offer as similar as
possible an API.


Compiling and installing
------------------------

Get NumPy >=1.6, SciPy >=0.11, Cython >=0.20.2 and a recent version of
scikit-learn. Then issue::

python setup.py install

to install seqlearn.

If you want to use seqlearn from its source directory without installing,
you have to compile first::

python setup.py build_ext --inplace


Getting started
---------------

The easiest way to start using seqlearn is to fetch a dataset in CoNLL 2000
format. Define a task-specific feature extraction function, e.g.::

>>> def features(sequence, i):
... yield "word=" + sequence[i].lower()
... if sequence[i].isupper():
... yield "Uppercase"
...

Load the training file, say ``train.txt``::

>>> from seqlearn.datasets import load_conll
>>> X_train, y_train, lengths_train = load_conll("train.txt", features)

Train a model::

>>> from seqlearn.perceptron import StructuredPerceptron
>>> clf = StructuredPerceptron()
>>> clf.fit(X_train, y_train, lengths_train)

Check how well you did on a validation set, say ``validation.txt``::

>>> X_test, y_test, lengths_test = load_conll("validation.txt", features)
>>> from seqlearn.evaluation import bio_f_score
>>> y_pred = clf.predict(X_test, lengths_test)
>>> print(bio_f_score(y_test, y_pred))

For more information, see the `documentation
<http://larsmans.github.io/seqlearn>`_.


|Travis|_

.. |Travis| image:: https://api.travis-ci.org/larsmans/seqlearn.png?branch=master
.. _Travis: https://travis-ci.org/larsmans/seqlearn
1 change: 1 addition & 0 deletions lib/seqlearn/doc/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
_build
136 changes: 136 additions & 0 deletions lib/seqlearn/doc/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# Makefile for Sphinx documentation
#

# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
PAPER =
BUILDDIR = _build

# User-friendly check for sphinx-build
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
endif

# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .

.PHONY: help clean html dirhtml singlehtml pickle json latex latexpdf text changes linkcheck doctest gettext

help:
@echo "Please use \`make <target>' where <target> is one of"
@echo " gh-pages to make HTML files and commit them to gh-pages"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " text to make text files"
@echo " texinfo to make Texinfo files"
@echo " info to make Texinfo files and run them through makeinfo"
@echo " gettext to make PO message catalogs"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " xml to make Docutils-native XML files"
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"

clean:
rm -rf $(BUILDDIR)/*

gh-pages: html
touch $(BUILDDIR)/html/.nojekyll
./commit-to-gh-pages.sh $(BUILDDIR)/html

html:
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."

dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."

singlehtml:
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."

pickle:
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
@echo
@echo "Build finished; now you can process the pickle files."

json:
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
@echo
@echo "Build finished; now you can process the JSON files."

latex:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."

latexpdf:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through pdflatex..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."

text:
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
@echo
@echo "Build finished. The text files are in $(BUILDDIR)/text."

texinfo:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
@echo "Run \`make' in that directory to run these through makeinfo" \
"(use \`make info' here to do that automatically)."

info:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo "Running Texinfo files through makeinfo..."
make -C $(BUILDDIR)/texinfo info
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."

gettext:
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
@echo
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."

changes:
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
@echo
@echo "The overview file is in $(BUILDDIR)/changes."

linkcheck:
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."

doctest:
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."

xml:
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
@echo
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."

pseudoxml:
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
@echo
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
Empty file.
Empty file.
14 changes: 14 additions & 0 deletions lib/seqlearn/doc/commit-to-gh-pages.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#! /bin/sh

# Commit the generated HTML pages to the branch gh-pages.
# Will not push them to GitHub.

set -e -v

treehash=$(./hash-tree.py "${1:-_build/html}")
parent=$(git rev-parse gh-pages)

msg="Regenerated docs for $(git rev-parse HEAD)"
commithash=$(echo "$msg" | git commit-tree $treehash -p $parent)
echo "Updating gh-pages to $commithash"
git update-ref refs/heads/gh-pages "$commithash"
Loading

0 comments on commit 495c051

Please sign in to comment.