stanfordnlp
diff --git a/‎.travis.yml
Lines changed: 16 additions & 0 deletions b/‎.travis.yml
Lines changed: 16 additions & 0 deletions
diff --git a/‎MANIFEST.in
Lines changed: 5 additions & 0 deletions b/‎MANIFEST.in
Lines changed: 5 additions & 0 deletions
diff --git a/‎README.rst
Lines changed: 30 additions & 0 deletions b/‎README.rst
Lines changed: 30 additions & 0 deletions
diff --git a/‎corenlp_protobuf/CoreNLP_pb2.py
Lines changed: 2681 additions & 0 deletions b/‎corenlp_protobuf/CoreNLP_pb2.py
Lines changed: 2681 additions & 0 deletions
diff --git a/‎corenlp_protobuf/__init__.py
Lines changed: 27 additions & 0 deletions b/‎corenlp_protobuf/__init__.py
Lines changed: 27 additions & 0 deletions
@@ -0,0 +1,16 @@
+# this file is *not* meant to cover or endorse the use of travis, but rather to
+# help confirm pull requests to this project.
+
+language: python
+
+env:
+  - TOXENV=py27
+  - TOXENV=py33
+  - TOXENV=py34
+
+install: pip install tox
+
+script: tox
+
+notifications:
+  email: false
@@ -0,0 +1,5 @@
+# Include the license file
+include LICENSE.txt
+
+# Include the data files
+recursive-include data *
@@ -0,0 +1,30 @@
+Stanford CoreNLP Python Bindings
+================================
+
+This package contains python bindings for [Stanford
+CoreNLP](https://github.com/stanfordnlp/CoreNLP)'s protobuf
+specifications, as generated by `protoc`. These bindings can used to
+parse binary data produced by, e.g., the [Stanford CoreNLP
+server](https://stanfordnlp.github.io/CoreNLP/corenlp-server.html).
+
+----
+
+Usage::
+  
+  from corenlp_protobuf import Document
+
+  # document.dat contains a serialized Document.
+  with open('document.dat', 'r') as f:
+    buffer = f.read()
+  doc = Document()
+  doc.ParseFromString(buffer)
+
+  # You can access the sentences from doc.sentence.
+  sentence = doc.sentence[0]
+
+  # You can access any property within a sentence.
+  print(sentence.text)
+
+  # Likewise for tokens
+  token = sentence.token[0]
+  print(token.lemma)
@@ -0,0 +1,27 @@
+from __future__ import absolute_import
+
+from google.protobuf.internal.decoder import _DecodeVarint
+from .CoreNLP_pb2 import *
+
+def parseFromDelimitedString(obj, buf, offset=0):
+    """
+    Stanford CoreNLP uses the Java "writeDelimitedTo" function, which
+    writes the size (and offset) of the buffer before writing the object.
+    This function handles parsing this message starting from offset 0.
+
+    @returns how many bytes of @buf were consumed.
+    """
+    size, pos = _DecodeVarint(buf, offset)
+    obj.ParseFromString(buf[offset+pos:offset+pos+size])
+    return pos+size
+
+def to_text(sentence):
+    """
+    Helper routine that converts a Sentence protobuf to a string from its tokens.
+    """
+    text = ""
+    for i, tok in enumerate(sentence.token):
+        if i != 0:
+            text += tok.before
+        text += tok.word
+    return text