Skip to content

Commit

Permalink
Adding Pyate phrases, minor tweaks to js regex, stoplist
Browse files Browse the repository at this point in the history
  • Loading branch information
JasonKessler committed Oct 8, 2020
1 parent c26e2ad commit 1002070
Show file tree
Hide file tree
Showing 8 changed files with 24 additions and 8 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
[![Gitter Chat](https://img.shields.io/badge/GITTER-join%20chat-green.svg)](https://gitter.im/scattertext/Lobby)
[![Twitter Follow](https://img.shields.io/twitter/follow/espadrine.svg?style=social&label=Follow)](https://twitter.com/jasonkessler)

# Scattertext 0.0.2.68
# Scattertext 0.0.2.69

A tool for finding distinguishing terms in corpora, and presenting them in an
interactive, HTML scatter plot. Points corresponding to terms are selectively labeled
Expand Down
2 changes: 1 addition & 1 deletion scattertext/Common.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
'you', "dasn't", 'all', 'ought', 'noun', 'was', 'who', 'let', 'didn', "y'all'd've", 'must', "to've", "'re", "'m",
'gon', 'do', 'isn', 'may', 'weren', "had've", 'they', "'ve", "may've", 'finna', 'which', "I'm'a", "n't", 'should',
"e'er", 'when', 'ta', 'nal', 'haven', "y'all'd'n've", "those're", 'don', 'wasn', 'everybody', 'wouldn', "ne'er",
'something', 'that', 'everyone', 'methinks', '-'
'something', 'that', 'everyone', 'methinks', '-', '-pron-'
}

# Qualitative Colors From Tableau
Expand Down
4 changes: 3 additions & 1 deletion scattertext/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from __future__ import print_function

version = [0, 0, 2, 68]
from scattertext.features.PyatePhrases import PyatePhrases

version = [0, 0, 2, 69]
__version__ = '.'.join([str(e) for e in version])
import re
import numpy as np
Expand Down
6 changes: 3 additions & 3 deletions scattertext/data/viz/scripts/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -1195,7 +1195,7 @@ buildViz = function (d3) {

// https://stackoverflow.com/questions/3446170/escape-string-for-use-in-javascript-regex
function escapeRegExp(string) {
return string.replace(/[#.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
return string.replace(/[#.*+?^${}()|[\]\\]'\%/g, '\\$&'); // $& means the whole matched string
}

/*
Expand All @@ -1208,11 +1208,11 @@ buildViz = function (d3) {
termToRegex = escapeRegExp(termToRegex);
console.log("termToRegex")
console.log(termToRegex)

var regexp = new RegExp(boundary + '('
+ removeUnderScoreJoin(
termToRegex.replace(' ', wordSep, 'gim')
)
+ ')' + boundary, 'gim');
) + ')' + boundary, 'gim');
console.log(regexp);
try {
regexp.exec('X');
Expand Down
2 changes: 1 addition & 1 deletion scattertext/features/FeatsFromSpacyDoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def _get_unigram_feats(self, sent):
elif tok.tag_ in self._tag_types_to_censor:
unigrams.append(tok.tag_)
elif self._use_lemmas and tok.lemma_.strip():
unigrams.append(self._post_process_term(tok.lemma_.strip()))
unigrams.append(self._post_process_term(tok.lemma_.strip().lower()))
elif tok.lower_.strip():
unigrams.append(self._post_process_term(tok.lower_.strip()))
return unigrams
Expand Down
13 changes: 13 additions & 0 deletions scattertext/features/PyatePhrases.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from collections import Counter
from scattertext.features.FeatsFromSpacyDoc import FeatsFromSpacyDoc


class PyatePhrases(FeatsFromSpacyDoc):
def __init__(self, extractor=None, **args):
import pyate
self._extractor = pyate.combo_basic if extractor is None else extractor
FeatsFromSpacyDoc.__init__(self, **args)

def get_feats(self, doc):
return Counter(self._extractor(str(doc)).to_dict())

1 change: 1 addition & 0 deletions scattertext/semioticsquare/SemioticSquareFromAxes.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,5 +74,6 @@ def get_lexicons(self, num_terms=10):
]:
#scores = np.linalg.norm(np.array([self.axes['x'] - x_coord, self.axes['y'] - y_coord]), 2, axis=0)
scores = self._distance_measure.distances(x_coord, y_coord, self.axes['x'], self.axes['y'])

lexicons[label] = list(self.axes.index[np.argsort(scores)])[:num_terms]
return lexicons
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from setuptools import setup, find_packages

setup(name='scattertext',
version='0.0.2.68',
version='0.0.2.69',
description='An NLP package to visualize interesting terms in text.',
url='https://github.com/JasonKessler/scattertext',
author='Jason Kessler',
Expand Down

0 comments on commit 1002070

Please sign in to comment.