Skip to content

Commit

Permalink
added Jeopardy and set aspect ratio to 1 for MDSplot
Browse files Browse the repository at this point in the history
  • Loading branch information
kshirley committed Dec 4, 2014
1 parent e668d3d commit 39660fa
Show file tree
Hide file tree
Showing 12 changed files with 121 additions and 72 deletions.
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by roxygen2 (4.0.2): do not edit by hand
# Generated by roxygen2 (4.0.1): do not edit by hand

export(createJSON)
export(jsPCA)
Expand Down
5 changes: 3 additions & 2 deletions R/createJSON.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
#' of the interactive viz. Default is 30. Recommended to be roughly
#' between 10 and 50.
#' @param lambda.step a value between 0 and 1.
#' Determines the grid of lambda values to iterate over when computing relevance.
#' Determines the interstep distance in the grid of lambda
#' values over which to iterate when computing relevance.
#' Default is 0.01. Recommended to be between 0.01 and 0.1.
#' @param mds.method a function that takes \code{phi} as an input and outputs
#' a K by 2 data.frame (or matrix). The output approximates the distance
Expand Down Expand Up @@ -152,7 +153,7 @@ createJSON <- function(phi = matrix(), theta = matrix(), doc.length = integer(),
names(mds.res) <- c("x", "y")
} else {
warning("Result of mds.method should be a matrix or data.frame.")
}
}
mds.df <- data.frame(mds.res, topics = seq_len(K), Freq = topic.proportion*100,
cluster = 1, stringsAsFactors = FALSE)
# note: cluster (should?) be deprecated soon.
Expand Down
21 changes: 17 additions & 4 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#' \item{phi}{phi, a matrix with the topic-term distributions}
#' \item{theta}{theta, a matrix with the document-topic distributions}
#' \item{doc.length}{doc.length, a numeric vector with token counts for each document}
#' \item{vocab}{vocab, a character vector containing the unique terms}
#' \item{vocab}{vocab, a character vector containing the terms}
#' \item{term.frequency}{term.frequency, a numeric vector of observed term frequencies}
#' }
#' @source \url{http://www.cs.princeton.edu/~blei/lda-c/index.html}
Expand All @@ -18,7 +18,7 @@
#' \item{phi}{phi, a matrix with the topic-term distributions}
#' \item{theta}{theta, a matrix with the document-topic distributions}
#' \item{doc.length}{doc.length, a numeric vector with token counts for each document}
#' \item{vocab}{vocab, a character vector containing the unique terms}
#' \item{vocab}{vocab, a character vector containing the terms}
#' \item{term.frequency}{term.frequency, a numeric vector of observed term frequencies}
#' }
#' @source \url{http://qwone.com/~jason/20Newsgroups/}
Expand All @@ -31,8 +31,21 @@
#' \item{phi}{phi, a matrix with the topic-term distributions}
#' \item{theta}{theta, a matrix with the document-topic distributions}
#' \item{doc.length}{doc.length, a numeric vector with token counts for each document}
#' \item{vocab}{vocab, a character vector containing the unique terms}
#' \item{vocab}{vocab, a character vector containing the terms}
#' \item{term.frequency}{term.frequency, a numeric vector of observed term frequencies}
#' }
#' @source \url{https://archive.ics.uci.edu/ml/datasets/Bag+of+Words}
"DailyKos"
"DailyKos"

#' Jeopardy Questions (including category name and answer)
#'
#' @format A list elements extracted from a topic model fit to this data
#' \describe{
#' \item{phi}{phi, a matrix with the topic-term distributions}
#' \item{theta}{theta, a matrix with the document-topic distributions}
#' \item{doc.length}{doc.length, a numeric vector with token counts for each document}
#' \item{vocab}{vocab, a character vector containing the terms}
#' \item{term.frequency}{term.frequency, a numeric vector of observed term frequencies}
#' }
#' @source \url{http://www.reddit.com/r/datasets/comments/1uyd0t/200000_jeopardy_questions_in_a_json_file}
"Jeopardy"
115 changes: 63 additions & 52 deletions inst/htmljs/ldavis.js
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,6 @@ LDAvis = function(to_select, json_file) {
// http://bl.ocks.org/d3noob/10633704
init_forms(topicID, lambdaID, visID);

// http://jsfiddle.net/AmanVirdi/hbP3y/
// kenny look at link above to create custom 'spinner' for topic increment/decrement buttons

// When the value of lambda changes, update the visualization
d3.select(lambda_select)
.on("mouseup", function() {
Expand Down Expand Up @@ -207,19 +204,32 @@ LDAvis = function(to_select, json_file) {
return d.x;
}); //d3.extent returns min and max of an array
var xdiff = xrange[1] - xrange[0],
xpad = 0.10;
var xScale = d3.scale.linear()
.range([0, mdswidth])
.domain([xrange[0] - xpad * xdiff, xrange[1] + xpad * xdiff]);

xpad = 0.05;
var yrange = d3.extent(mdsData, function(d) {
return d.y;
});
var ydiff = yrange[1] - yrange[0],
ypad = 0.10;
var yScale = d3.scale.linear()
.range([mdsheight, 0])
.domain([yrange[0] - ypad * ydiff, yrange[1] + ypad * ydiff]);
ypad = 0.05;

if (xdiff > ydiff) {
var xScale = d3.scale.linear()
.range([0, mdswidth])
.domain([xrange[0] - xpad * xdiff, xrange[1] + xpad * xdiff]);

var yScale = d3.scale.linear()
.range([mdsheight, 0])
//.domain([yrange[0] - ypad * ydiff, yrange[1] + ypad * ydiff]);
.domain([yrange[0] - 0.5*(xdiff - ydiff) - ypad*xdiff, yrange[1] + 0.5*(xdiff - ydiff) + ypad*xdiff]);
} else {
var xScale = d3.scale.linear()
.range([0, mdswidth])
// .domain([xrange[0] - xpad * xdiff, xrange[1] + xpad * xdiff]);
.domain([xrange[0] - 0.5*(ydiff - xdiff) - xpad*ydiff, xrange[1] + 0.5*(ydiff - xdiff) + xpad*ydiff]);

var yScale = d3.scale.linear()
.range([mdsheight, 0])
.domain([yrange[0] - ypad * ydiff, yrange[1] + ypad * ydiff]);
}

// Create new svg element (that will contain everything):
var svg = d3.select(to_select).append("svg")
Expand Down Expand Up @@ -397,8 +407,8 @@ LDAvis = function(to_select, json_file) {
.text("Intertopic Distance Map (via multidimensional scaling)")
.attr("x", mdswidth/2 + margin.left)
.attr("y", 30)
.style("font-size", "16px")
.style("text-anchor", "middle");
.style("font-size", "16px")
.style("text-anchor", "middle");

// establish layout and vars for bar chart
var barDefault2 = lamData.filter(function(d) {
Expand Down Expand Up @@ -534,11 +544,11 @@ LDAvis = function(to_select, json_file) {
.style("text-anchor", "middle")
.style("font-size", "16px")
.text("Top-" + R + " Most Salient Terms");

title.append("tspan")
.attr("baseline-shift", "super")
.attr("font-size", "12px")
.text("(1)");
.attr("baseline-shift", "super")
.attr("font-size", "12px")
.text("(1)");

// barchart axis adapted from http://bl.ocks.org/mbostock/1166403
var xAxis = d3.svg.axis().scale(x)
Expand All @@ -550,21 +560,21 @@ LDAvis = function(to_select, json_file) {
chart.attr("class", "xaxis")
.call(xAxis);

// dynamically create the topic and lambda input forms at the top of the page:
// dynamically create the topic and lambda input forms at the top of the page:
function init_forms(topicID, lambdaID, visID) {

// create container div for topic and lambda input:
var inputDiv = document.createElement("div");
inputDiv.setAttribute("id", "top");
var inputDiv = document.createElement("div");
inputDiv.setAttribute("id", "top");

// insert the input container just before the vis:
var visDiv = document.getElementById(visID);
document.body.insertBefore(inputDiv, visDiv);

// topic input container:
// topic input container:
var topicDiv = document.createElement("div");
topicDiv.setAttribute("style", "padding: 5px; background-color: #e8e8e8; position: absolute; top: 10px; left: 38px; height: 40px; width: " + mdswidth + "px; display: inline-block");
inputDiv.appendChild(topicDiv);
topicDiv.setAttribute("style", "padding: 5px; background-color: #e8e8e8; position: absolute; top: 10px; left: 38px; height: 40px; width: " + mdswidth + "px; display: inline-block");
inputDiv.appendChild(topicDiv);

var topicLabel = document.createElement("label");
topicLabel.setAttribute("for", topicID);
Expand Down Expand Up @@ -593,43 +603,43 @@ LDAvis = function(to_select, json_file) {
next.setAttribute("style", "margin-left: 5px");
next.innerHTML = "Next Topic";
topicDiv.appendChild(next);
/*
var clear = document.createElement("button");
clear.setAttribute("id", topicClear);
clear.setAttribute("style", "margin-left: 5px");
clear.innerHTML = "Clear Topic";
topicDiv.appendChild(clear);
*/

// lambda inputs
/*
var clear = document.createElement("button");
clear.setAttribute("id", topicClear);
clear.setAttribute("style", "margin-left: 5px");
clear.innerHTML = "Clear Topic";
topicDiv.appendChild(clear);
*/

// lambda inputs
var lambdaDivLeft = 8 + mdswidth + margin.left + termwidth;
var lambdaDivWidth = barwidth;
var lambdaDiv = document.createElement("div");
lambdaDiv.setAttribute("id", "lambdaInput");
lambdaDiv.setAttribute("style", "padding: 5px; background-color: #e8e8e8; position: absolute; top: 10px; left: " + lambdaDivLeft + "px; height: 40px; width: " + lambdaDivWidth + "px");
lambdaDiv.setAttribute("style", "padding: 5px; background-color: #e8e8e8; position: absolute; top: 10px; left: " + lambdaDivLeft + "px; height: 50px; width: " + lambdaDivWidth + "px");
inputDiv.appendChild(lambdaDiv);

var lambdaZero = document.createElement("div");
lambdaZero.setAttribute("style", "padding: 5px; height: 20px; width: 220px; font-family: sans-serif; position: absolute; top: 0px; left: 0px;");
lambdaZero.setAttribute("id", "lambdaZero");
lambdaDiv.appendChild(lambdaZero);
var xx = d3.select("#lambdaZero")
.append("text")
.attr("x", 0)
.attr("y", 0)
.style("font-size", "14px")
.text("Slide to adjust relevance metric:");
.append("text")
.attr("x", 0)
.attr("y", 0)
.style("font-size", "14px")
.text("Slide to adjust relevance metric:");
var yy = d3.select("#lambdaZero")
.append("text")
.attr("x", 125)
.attr("y", -5)
.style("font-size", "10px")
.style("position", "absolute")
.text("(2)");
.append("text")
.attr("x", 125)
.attr("y", -5)
.style("font-size", "10px")
.style("position", "absolute")
.text("(2)");

var lambdaLabel = document.createElement("label");
lambdaLabel.setAttribute("for", lambdaID);
lambdaLabel.setAttribute("style", "height: 20px; width: 200px; position: absolute; top: 25px; left: 90px; font-family: sans-serif; font-size: 14px");
lambdaLabel.setAttribute("style", "height: 20px; width: 60px; position: absolute; top: 25px; left: 90px; font-family: sans-serif; font-size: 14px");
lambdaLabel.innerHTML = "&#955 = <span id='" + lambdaID + "-value'>1</span>";
lambdaDiv.appendChild(lambdaLabel);

Expand All @@ -639,7 +649,7 @@ LDAvis = function(to_select, json_file) {
lambdaDiv.appendChild(sliderDiv);

var lambdaInput = document.createElement("input");
lambdaInput.setAttribute("style", "width: 250px; margin: 0px");
lambdaInput.setAttribute("style", "width: 250px; margin-top: -20px; margin-left: 0px; margin-right: 0px");
lambdaInput.type = "range";
lambdaInput.min = 0;
lambdaInput.max = 1;
Expand All @@ -654,13 +664,13 @@ LDAvis = function(to_select, json_file) {
.attr("width", 250)
.attr("height", 25);

var sliderScale = d3.scale.linear()
var sliderScale = d3.scale.linear()
.domain([0, 1])
.range([7.5, 242.5]) // trimmed by 7.5px on each side to match the input type=range slider:
.nice();

// adapted from http://bl.ocks.org/mbostock/1166403
var sliderAxis = d3.svg.axis()
var sliderAxis = d3.svg.axis()
.scale(sliderScale)
.orient("bottom")
.tickSize(10)
Expand All @@ -670,6 +680,7 @@ LDAvis = function(to_select, json_file) {
// group to contain the elements of the slider axis:
var sliderAxisGroup = scaleContainer.append("g")
.attr("class", "slideraxis")
.attr("margin-top", "-10px")
.call(sliderAxis);

// Another strategy for tick marks on the slider; simpler, but not labels
Expand Down Expand Up @@ -985,7 +996,7 @@ LDAvis = function(to_select, json_file) {
.attr("class", "bubble-tool") // set class so we can remove it when highlight_off is called
.style("text-anchor", "middle")
.style("font-size", "16px")
//.text(Freq + "% of tokens come from topic " + topics);
//.text(Freq + "% of tokens come from topic " + topics);
.text("Top-" + R + " Most Relevant Terms for Topic " + topics);

// grab the bar-chart data for this topic only:
Expand Down Expand Up @@ -1088,8 +1099,8 @@ LDAvis = function(to_select, json_file) {
circle.style.fill = color1;

var title = d3.selectAll(".bubble-tool")
.text("Top-" + R + " Most Salient Terms");
title.append("tspan")
.text("Top-" + R + " Most Salient Terms");
title.append("tspan")
.attr("baseline-shift", "super")
.attr("font-size", 12)
.text(1);
Expand Down
4 changes: 2 additions & 2 deletions man/AP.Rd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
% Generated by roxygen2 (4.0.2): do not edit by hand
% Generated by roxygen2 (4.0.1): do not edit by hand
\docType{data}
\name{AP}
\alias{AP}
Expand All @@ -8,7 +8,7 @@
\item{phi}{phi, a matrix with the topic-term distributions}
\item{theta}{theta, a matrix with the document-topic distributions}
\item{doc.length}{doc.length, a numeric vector with token counts for each document}
\item{vocab}{vocab, a character vector containing the unique terms}
\item{vocab}{vocab, a character vector containing the terms}
\item{term.frequency}{term.frequency, a numeric vector of observed term frequencies}
}}
\source{
Expand Down
4 changes: 2 additions & 2 deletions man/DailyKos.Rd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
% Generated by roxygen2 (4.0.2): do not edit by hand
% Generated by roxygen2 (4.0.1): do not edit by hand
\docType{data}
\name{DailyKos}
\alias{DailyKos}
Expand All @@ -8,7 +8,7 @@
\item{phi}{phi, a matrix with the topic-term distributions}
\item{theta}{theta, a matrix with the document-topic distributions}
\item{doc.length}{doc.length, a numeric vector with token counts for each document}
\item{vocab}{vocab, a character vector containing the unique terms}
\item{vocab}{vocab, a character vector containing the terms}
\item{term.frequency}{term.frequency, a numeric vector of observed term frequencies}
}}
\source{
Expand Down
24 changes: 24 additions & 0 deletions man/Jeopardy.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
% Generated by roxygen2 (4.0.1): do not edit by hand
\docType{data}
\name{Jeopardy}
\alias{Jeopardy}
\title{Jeopardy Questions (including category name and answer)}
\format{A list elements extracted from a topic model fit to this data
\describe{
\item{phi}{phi, a matrix with the topic-term distributions}
\item{theta}{theta, a matrix with the document-topic distributions}
\item{doc.length}{doc.length, a numeric vector with token counts for each document}
\item{vocab}{vocab, a character vector containing the terms}
\item{term.frequency}{term.frequency, a numeric vector of observed term frequencies}
}}
\source{
\url{http://www.reddit.com/r/datasets/comments/1uyd0t/200000_jeopardy_questions_in_a_json_file}
}
\usage{
Jeopardy
}
\description{
Jeopardy Questions (including category name and answer)
}
\keyword{datasets}

4 changes: 2 additions & 2 deletions man/TwentyNewsgroups.Rd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
% Generated by roxygen2 (4.0.2): do not edit by hand
% Generated by roxygen2 (4.0.1): do not edit by hand
\docType{data}
\name{TwentyNewsgroups}
\alias{TwentyNewsgroups}
Expand All @@ -8,7 +8,7 @@
\item{phi}{phi, a matrix with the topic-term distributions}
\item{theta}{theta, a matrix with the document-topic distributions}
\item{doc.length}{doc.length, a numeric vector with token counts for each document}
\item{vocab}{vocab, a character vector containing the unique terms}
\item{vocab}{vocab, a character vector containing the terms}
\item{term.frequency}{term.frequency, a numeric vector of observed term frequencies}
}}
\source{
Expand Down
8 changes: 4 additions & 4 deletions man/createJSON.Rd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
% Generated by roxygen2 (4.0.2): do not edit by hand
% Generated by roxygen2 (4.0.1): do not edit by hand
\name{createJSON}
\alias{createJSON}
\title{Create the JSON object to read into the javascript visualization}
Expand Down Expand Up @@ -34,9 +34,9 @@ createJSON(phi = matrix(), theta = matrix(), doc.length = integer(),
Recommended to be roughly between 10 and 50.}

\item{lambda.step}{a value between 0 and 1. Determines
the grid of lambda values to iterate over when computing
relevance. Default is 0.01. Recommended to be between
0.01 and 0.1.}
the interstep distance in the grid of lambda values over
which to iterate when computing relevance. Default is
0.01. Recommended to be between 0.01 and 0.1.}

\item{mds.method}{a function that takes \code{phi} as an
input and outputs a K by 2 data.frame (or matrix). The
Expand Down
2 changes: 1 addition & 1 deletion man/jsPCA.Rd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
% Generated by roxygen2 (4.0.2): do not edit by hand
% Generated by roxygen2 (4.0.1): do not edit by hand
\name{jsPCA}
\alias{jsPCA}
\title{Dimension reduction via Jensen-Shannon Divergence & Principal Components}
Expand Down
2 changes: 1 addition & 1 deletion man/runShiny.Rd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
% Generated by roxygen2 (4.0.2): do not edit by hand
% Generated by roxygen2 (4.0.1): do not edit by hand
\name{runShiny}
\alias{runShiny}
\title{Run shiny/D3 visualization}
Expand Down
Loading

0 comments on commit 39660fa

Please sign in to comment.