Adding in code to ensure that term statistics will show up even if no…

… documents are present in visualization.
JasonKessler · Apr 26, 2020 · 948d244 · 948d244
1 parent 83c5db8
commit 948d244
Show file tree

Hide file tree

Showing 6 changed files with 43 additions and 15 deletions.
diff --git a/README.md b/README.md
@@ -3,7 +3,7 @@
 [![Gitter Chat](https://img.shields.io/badge/GITTER-join%20chat-green.svg)](https://gitter.im/scattertext/Lobby)
 [![Twitter Follow](https://img.shields.io/twitter/follow/espadrine.svg?style=social&label=Follow)](https://twitter.com/jasonkessler)
 
-# Scattertext 0.0.2.63
+# Scattertext 0.0.2.64
 
 A tool for finding distinguishing terms in corpora, and presenting them in an 
 interactive, HTML scatter plot. Points corresponding to terms are selectively labeled

diff --git a/demo_compact_suppress_documents.py b/demo_compact_suppress_documents.py
@@ -0,0 +1,22 @@
+import scattertext as st
+
+df = st.SampleCorpora.ConventionData2012.get_data().assign(
+    parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences)
+)
+
+corpus = st.CorpusFromParsedDocuments(
+    df, category_col='party', parsed_col='parse'
+).build().get_unigram_corpus().compact(st.AssociationCompactor(2000))
+
+html = st.produce_scattertext_explorer(
+    corpus,
+    category='democrat',
+    category_name='Democratic',
+    not_category_name='Republican',
+    minimum_term_frequency=0, pmi_threshold_coefficient=0,
+    width_in_pixels=1000, metadata=corpus.get_df()['speaker'],
+    transform=st.Scalers.dense_rank,
+    max_docs_per_category=0
+)
+open('./demo_compact_suppress_documents.html', 'w').write(html)
+print('open ./demo_compact_suppress_documents.html in Chrome')
diff --git a/demo_names.py b/demo_names.py
@@ -22,7 +22,8 @@
     minimum_term_frequency=0, pmi_threshold_coefficient=0,
     width_in_pixels=1000, metadata=corpus.get_df()['speaker'],
     transform=st.Scalers.dense_rank,
-    max_overlapping=10
+    max_overlapping=10,
+    max_docs_per_category=0
 )
-open('./demo_names.html', 'w').write(html)
-print('open ./demo_name.html in Chrome')
+open('./demo_names2.html', 'w').write(html)
+print('open ./demo_names2.html in Chrome')
diff --git a/scattertext/__init__.py b/scattertext/__init__.py
@@ -2,7 +2,7 @@
 
 from scattertext.diachronic.TimeStructure import TimeStructure
 
-version = [0, 0, 2, 63]
+version = [0, 0, 2, 64]
 __version__ = '.'.join([str(e) for e in version])
 import re
 import numpy as np

diff --git a/scattertext/data/viz/scripts/main.js b/scattertext/data/viz/scripts/main.js
@@ -750,7 +750,7 @@ buildViz = function (d3) {
             var info = termInfo.info;
             var notmatches = termInfo.notmatches;
             if (contexts[0].length + contexts[1].length + contexts[2].length + contexts[3].length == 0) {
-                return null;
+                //return null;
             }
             //!!! Future feature: context words
             //var contextWords = getContextWordSFS(info.term);
@@ -977,7 +977,7 @@ buildViz = function (d3) {
                 })
                 .reduce(function (a, b) {
                     return a + b;
-                })
+                }, 0);
 
             var notCategoryNumList = fullData.docs.categories.map(function (x, i) {
                 if (fullData.info.not_category_internal_names.indexOf(x) > -1) {
@@ -996,16 +996,21 @@ buildViz = function (d3) {
                 })
                 .reduce(function (a, b) {
                     return a + b;
-                });
+                }, 0);
 
             function getFrequencyDescription(name, count25k, count, ndocs) {
-                var desc = name + ' frequency: <div class=text_subhead>' + count25k
-                    + ' per 25,000 terms</div><div class=text_subhead>' + Math.round(ndocs)
-                    + ' per 1,000 docs</div>';
+                var desc = name + ' frequency: <div class=text_subhead>' + count25k + ' per 25,000 terms</div>';
+                if (!isNaN(Math.round(ndocs))) {
+                    desc += '<div class=text_subhead>' + Math.round(ndocs) + ' per 1,000 docs</div>';
+                }
                 if (count == 0) {
                     desc += '<u>Not found in any ' + name + ' documents.</u>';
                 } else {
-                    desc += '<u>Some of the ' + count + ' mentions:</u>';
+                    if (!isNaN(Math.round(ndocs))) {
+                        desc += '<u>Some of the ' + count + ' mentions:</u>';
+                    } else {
+                        desc += count + ' mentions';
+                    }
                 }
                 /*
                 desc += '<br><b>Discriminative:</b> ';
@@ -1063,7 +1068,7 @@ buildViz = function (d3) {
                         })
                         .reduce(function (a, b) {
                             return a + b;
-                        });
+                        }, 0);
 
                     d3.select("#" + divName + "-neuthead")
                         .style('fill', color(0))
@@ -1092,7 +1097,7 @@ buildViz = function (d3) {
                             })
                             .reduce(function (a, b) {
                                 return a + b;
-                            });
+                            }, 0);
 
                         d3.select("#" + divName + "-extrahead")
                             .style('fill', color(0))

diff --git a/setup.py b/setup.py
@@ -1,7 +1,7 @@
 from setuptools import setup, find_packages
 
 setup(name='scattertext',
-      version='0.0.2.63',
+      version='0.0.2.64',
       description='An NLP package to visualize interesting terms in text.',
       url='https://github.com/JasonKessler/scattertext',
       author='Jason Kessler',