index.html

<meta charset="utf-8"/>
<html>
<head>
	<meta charset="utf-8">
	<meta name="description" content="Welcome to ENNUI - An elegant neural network user interface which allows you to easily design, train, and visualize neural networks.">
	<title>ENNUI ~ Elegant Neural Network User Interface ~</title>

	<!-- MathJax cdn to render latex -->
	<script type="text/javascript" async
		src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-MML-AM_CHTML">
  	</script>

	<!-- JSON-LD markup generated by Google Structured Data Markup Helper. -->
	<script type="application/ld+json">
	{
	  "@context" : "http://schema.org",
	  "@type" : "SoftwareApplication",
	  "name" : "ENNUI ~ an elegant neural network user interface ~",
	  "author" : [ {
		"@type" : "Person",
		"name" : "Jesse Michel"
	  }, {
		"@type" : "Person",
		"name" : "Zack Holbrook"
	  }, {
		"@type" : "Person",
		"name" : "Stefan Grosser"
	  }, {
		"@type" : "Person",
		"name" : "Hendrik Strobelt"
	  }, {
		"@type" : "Person",
		"name" : "Rikhav Shah"
	  } ]
	}
	</script>

	<!-- Global site tag (gtag.js) - Google Analytics -->
	<script async src="https://www.googletagmanager.com/gtag/js?id=UA-133726432-1"></script>

	<script>
		window.dataLayer = window.dataLayer || [];
		function gtag(){dataLayer.push(arguments);}
		gtag('js', new Date());
		gtag('config', 'UA-133726432-1');
	</script>

	<link rel="icon" type="image/x-icon" sizes="16x16" href="favicon.ico">
	<link rel='stylesheet' href='src/ui/style.css'>
	<script src='dist/bundle.js'></script>
</head>

<body>

<h1 style="display:none">ENNUI ~ Elegant Neural Network User Interface ~</h1>
<p style="display:none">ENNUI helps people learn about deep learning by building, training, and visualizing deep neural networks on the browser. It has an easy to use drag-and-drop interface. When you're ready to start coding you can export the network to produce code in Python or Julia! </p>


<h6 style="display:none">About ENNUI</h6>
<p style="display:none">
	ENNUI provides several tools for all stages of deep learning development. The canvas gives space to design neural network architectures with a drag-and-drop interface. This design is easily sharable with friends and coworkers by exporting to a link. <br/>
	Not only can you design neural networks, you can train them on several datasets: MNIST, CIFAR-10, and more! During training, you can track your network loss and accuracies in the Progress tab, as well as view of confusion matrix. <br/>
	Once training is complete, ENNUI provides a suite of neural network visualization tools to better understand your architecture. <br/>
	ENNUI is constantly updated with new features, so be sure to keep following!
</p>
<div id = 'main'>

	<!-- The leftmost strip to select tabs -->
	<div id = 'tabselector'>
		<div id = 'blanktab' class='top_neighbor_tab-selected'> </div>
		<div title = 'Network' class = 'tab-selected option tab-option' id = 'network' data-optionValue = 'network'>
			<svg class = 'icon' xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24"><path fill="none" d="M0 0h24v24H0V0z"/><path d="M11.99 18.54l-7.37-5.73L3 14.07l9 7 9-7-1.63-1.27zM12 16l7.36-5.73L21 9l-9-7-9 7 1.63 1.27L12 16zm0-11.47L17.74 9 12 13.47 6.26 9 12 4.53z"/></svg>
		</div>
		<div title = 'Progress' class = 'option tab-option bottom_neighbor_tab-selected' id = 'progress' data-optionValue = 'progress'>
			<svg class = 'icon' xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24"><path fill="none" d="M0 0h24v24H0V0z"/><path d="M13.5 13.48l-4-4L2 16.99l1.5 1.5 6-6.01 4 4L22 6.92l-1.41-1.41z"/></svg>
		</div>
		<div title = 'Visualization' class = 'option tab-option' id = 'visualization' data-optionValue = 'visualization'>
			<svg class = 'icon' xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24"><path fill="none" d="M0 0h24v24H0z"/><path d="M11 9h2v2h-2V9zm-2 2h2v2H9v-2zm4 0h2v2h-2v-2zm2-2h2v2h-2V9zM7 9h2v2H7V9zm12-6H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 18H7v-2h2v2zm4 0h-2v-2h2v2zm4 0h-2v-2h2v2zm2-7h-2v2h2v2h-2v-2h-2v2h-2v-2h-2v2H9v-2H7v2H5v-2h2v-2H5V5h14v6z"/></svg>
		</div>
		<div id = 'middleblanktab' > </div>

		<div title = 'Education' class = 'option tab-option' id = 'education' data-optionValue = 'education'>
			<svg class = 'icon' xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24"><path fill="none" d="M0 0h24v24H0V0z"/><path d="M12 3L1 9l4 2.18v6L12 21l7-3.82v-6l2-1.09V17h2V9L12 3zm6.82 6L12 12.72 5.18 9 12 5.28 18.82 9zM17 15.99l-5 2.73-5-2.73v-3.72L12 15l5-2.73v3.72z"/></svg>
		</div>
		<div id = 'bottomblanktab' > </div>
	</div>

	<!-- The left panel (menu) -->
	<div id = 'menu'>
		<div id = 'networkMenu'>
			<div id = 'layers' class = 'category'>
				<div class = 'categoryTitle' data-expanded = 'true'>
					<div class='expander'>
						<svg height="24px" width="24px">
							<path d="M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z" style="fill:#FFFFFF;"></path>
						</svg>
					</div>
					<div class='categoryTitleText'>
						Layers
					</div>

				</div>
				<div class = 'option select-option' data-optionValue = 'dense'> Dense </div>
				<div class = 'option select-option' data-optionValue = 'conv2D'> Convolution </div>
				<div class = 'option select-option' data-optionValue = 'maxPooling2D'> Max Pooling </div>


				<div class = 'option-dropdown'>
					<div style="float:left">More</div>
					<div style="float:right">〉</div>
					<div class='dropdown-content left'>
						<div title = 'Modify batches of data during training to make them more similar, resulting in faster convergence and better results.'
							 class = 'option select-option' data-optionValue = 'batchnorm'> Batch Normalization</div>
						<div title = 'Ignore a different random portion of the weights each batch for better generalization and faster training.'
							 class = 'option select-option' data-optionValue = 'dropout'> Dropout</div>
						<div title = 'Flatten a list of 2D images into a 1D feature vector.'
							 class = 'option select-option' data-optionValue = 'flatten'> Flatten</div>
						<div title = 'Concatenate two or more inputs that are all 1D or all 2D.'
							 class = 'option select-option' data-optionValue = 'concatenate'> Concatenate</div>
						<div title = 'Add two or more inputs together.'
							 class = 'option select-option last-dropdown' data-optionValue = 'add'> Add</div>
					</div>
				</div>
			</div>

			<div id = 'activations' class = 'category'>
				<div class = 'categoryTitle' data-expanded = 'true'>
					<div class='expander'>
						<svg height="24px" width="24px">
							<path d="M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z" style="fill:#FFFFFF;"></path>
						</svg>
					</div>
					<div class='categoryTitleText'>
						Activations
					</div>
				</div>
				<div class = 'option select-option' data-optionValue = 'relu'> ReLU </div>
				<div class = 'option select-option' data-optionValue = 'sigmoid'> Sigmoid </div>
				<div class = 'option select-option' data-optionValue = 'tanh'> Tanh </div>
			</div>
			<div id = 'templates' class = 'bottomCategory'>
				<div class = 'categoryTitle' data-expanded = 'true'>
					<div class='expander'>
						<svg height="24px" width="24px">
							<path d="M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z" style="fill:#FFFFFF;"></path>
						</svg>
					</div>
					<div class='categoryTitleText'>
						Templates
					</div>
				</div>
				<div class = 'option select-option' data-optionValue = 'blank'> Blank </div>
				<div class = 'option select-option' data-optionValue = 'default'> Default </div>
				<div class = 'option select-option' data-optionValue = 'resnet'> ResNet </div>
			</div>
		</div>

		<div id = 'progressMenu' style="display: none">
			<div id = 'optimizers' class = 'category'>
				<div class = 'categoryTitle' data-expanded = 'true'>
					<div class='expander'>
						<svg height="24px" width="24px">
							<path d="M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z" style="fill:#FFFFFF;"></path>
						</svg>
					</div>
					<div class='categoryTitleText'>
						Optimizers
					</div>
				</div>
				<div id = "defaultOptimizer" class = 'option select-option selected' id = 'sgd' data-optionValue = 'sgd'> SGD </div>
				<div id = 'rmsprop' class = 'option select-option' data-optionValue = 'rmsprop'> RMSprop </div>
				<div id = 'adagrad' class = 'option select-option' data-optionValue = 'adagrad'> Adagrad </div>
				<div id = 'adam' class = 'option select-option' data-optionValue = 'adam'> Adam </div>
			</div>
			<div id = 'losses' class = 'category'>
				<div class = 'categoryTitle' data-expanded = 'true'>
					<div class='expander'>
						<svg height="24px" width="24px">
							<path d="M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z" style="fill:#FFFFFF;"></path>
						</svg>
					</div>
					<div class='categoryTitleText'>
						Losses
					</div>
				</div>
				<div id = 'defaultLoss' class = 'option select-option selected' data-optionValue = 'categoricalCrossentropy'>CrossEntropy</div>
				<div id = 'hinge' class = 'option select-option' data-optionValue = 'hinge'> Hinge </div>
				<div id = 'meanSquaredError' class = 'option select-option' data-optionValue = 'meanSquaredError'> MSE </div>
				<div id = 'meanAbsoluteError' class = 'option select-option' data-optionValue = 'meanAbsoluteError'> MAE </div>
			</div>
		</div>

		<div id = 'visualizationMenu' style="display: none">
			<div id = 'classes' class = 'category'>
				<div class = 'categoryTitle' data-expanded = 'true'>
					<div class='expander'>
						<svg height="24px" width="24px">
							<path d="M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z" style="fill:#FFFFFF;"></path>
						</svg>
					</div>
					<div class='categoryTitleText'>
						Classes
					</div>
				</div>
				<div class = 'option select-option selected' data-optionValue = 'all'> All </div>
				<div class = 'option select-option' data-optionValue = '0'> 0 </div>
				<div class = 'option select-option' data-optionValue = '1'> 1 </div>
				<div class = 'option select-option' data-optionValue = '2'> 2 </div>
				<div class = 'option select-option' data-optionValue = '3'> 3 </div>
				<div class = 'option select-option' data-optionValue = '4'> 4 </div>
				<div class = 'option select-option' data-optionValue = '5'> 5 </div>
				<div class = 'option select-option' data-optionValue = '6'> 6 </div>
				<div class = 'option select-option' data-optionValue = '7'> 7 </div>
				<div class = 'option select-option' data-optionValue = '8'> 8 </div>
				<div class = 'option select-option' data-optionValue = '9'> 9 </div>
			</div>
		</div>

		<div id = 'educationMenu' style="display: none">
			<div id = 'educationLayers' class = 'category'>
				<div class = 'categoryTitle' data-expanded = 'true'>
					<div class='expander'>
						<svg height="24px" width="24px">
							<path d="M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z" style="fill:#FFFFFF;"></path>
						</svg>
					</div>
					<div class='categoryTitleText'>
						Articles
					</div>

				</div>

				<div class = 'option select-option education-option' data-optionValue = 'Overview'> Overview </div>
				<div class = 'option select-option education-option' data-optionValue = 'Overfitting'> Overfitting</div>
				<div class = 'option select-option education-option' data-optionValue = 'ResNets'> ResNets </div>
			</div>

			<div id = 'educationStory' class = 'category'>
				<div class = 'categoryTitle' data-expanded = 'true'>
					<div class='expander'>
						<svg height="24px" width="24px">
							<path d="M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z" style="fill:#FFFFFF;"></path>
						</svg>
					</div>
					<div class='categoryTitleText'>
						Layers
					</div>
				</div>

				<div class = 'option select-option education-option' data-optionValue = 'Concatenate'> Concatenate</div>
				<div class = 'option select-option education-option' data-optionValue = 'Convolution'> Convolution </div>
				<div class = 'option select-option education-option' data-optionValue = 'Dropout'> Dropout</div>
				<div class = 'option select-option education-option' data-optionValue = 'Flatten'> Flatten</div>

			</div>

		</div>

	</div>

	<!-- The middle canvas -->
	<div id = 'middle'>

		<div id = 'networkTab'>
			<svg id = 'svg'> </svg>
		</div>

		<div id = 'progressTab' style="display: none">

			<div id="loss-canvas"></div>

			<div id="accuracy-canvas"></div>

			<div id="confusion-matrix-canvas"></div>
		</div>

		<div id = 'visualizationTab' style="display: none">
			<div id='visulaization'></div>
			<div id='images'></div>
		</div>

		<div id = 'informationOverlay'>
			<div id='information'>Welcome to ENNUI
				<div id="informationBody">~ an elegant neural network user interface ~</div>
				<div class="informationRow">
					<div class="informationColumn">
						Start Building <br></br>
						<svg class = 'icon' xmlns="http://www.w3.org/2000/svg" width="30%" max-height="30%" viewBox="0 0 24 24"><path fill="none" d="M0 0h24v24H0V0z"/><path d="M11.99 18.54l-7.37-5.73L3 14.07l9 7 9-7-1.63-1.27zM12 16l7.36-5.73L21 9l-9-7-9 7 1.63 1.27L12 16zm0-11.47L17.74 9 12 13.47 6.26 9 12 4.53z"/></svg>
					</div>
					<div class="informationBlankColumn"></div>
					<div id="informationEducation" class="informationColumn">
						Explore Deep Learning <br></br>
						<svg class = 'icon' xmlns="http://www.w3.org/2000/svg" width="30%" max-height="30%" viewBox="0 0 24 24"><path fill="none" d="M0 0h24v24H0V0z"/><path d="M12 3L1 9l4 2.18v6L12 21l7-3.82v-6l2-1.09V17h2V9L12 3zm6.82 6L12 12.72 5.18 9 12 5.28 18.82 9zM17 15.99l-5 2.73-5-2.73v-3.72L12 15l5-2.73v3.72z"/></svg>
					</div>
				</div>
				<div id = 'acknowledgements'>
					Developed by (<a class="overlayLinks" href="mailto:ennui-devs@mit.edu">ennui-devs@mit.edu</a>) <br>
					Jesse Michel, Zack Holbrook, Stefan Grosser, Rikhav Shah <br>
					with advising from <a class="overlayLinks" href="http://hendrik.strobelt.com/" target="_blank">Hendrik Strobelt</a> and <a class="overlayLinks" href="http://www-math.mit.edu/~gs/" target="_blank">Gilbert Strang</a>.<br>
					First prototyped at <a class="overlayLinks" href="https://devpost.com/software/ennui" target="_blank">HackMIT</a>.
					Open-sourced on <a class="overlayLinks" href="https://github.com/martinjm97/ENNUI" target="_blank">GitHub</a>.
				</div>
			</div>
		</div>

		<div id = 'educationTab' style="display: none">
			<div id="educationOverview">
				<div class="educationTitle" style="padding-top: 0px"> Learn About Deep Neural Networks </div>
				<div class="educationSection"> Introduction to ENNUI </div>
				<div class="educationContent">
					If you are new to ENNUI, a good place to start is to watch
					the quick tutorial below:
				</div>
				<!--<iframe class="educationVideo" src="https://www.youtube.com/embed/m0YnwAtPbb8" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe> -->

				<div style="text-align: center; font-size: 25px;"><a href="https://www.youtube.com/watch?v=m0YnwAtPbb8" target="blank">ENNUI Tutorial</a></div>
				<div class="educationSection"> Foundations of Deep Learning </div>
				<div class="educationContent">
					If you are new to machine learning, the lecture video below is an excellent introduction.
					This lecture is from the 18.065 class from Spring of 2019. If you want to learn more check out the <a href="http://math.mit.edu/~gs/learningfromdata/">learning from data textbook</a>.
				</div>
				<div style="text-align: center; font-size: 25px;"><a href="https://video.odl.mit.edu/videos/9101a72a7d994d53800d1398fd885b88/embed/?start=339" target="blank">Gilbert Strang: Deep Learning</a></div>
				<!-- <iframe class="educationVideo" src="https://video.odl.mit.edu/videos/9101a72a7d994d53800d1398fd885b88/embed/?start=339" scrolling="no" frameborder="0" allowfullscreen></iframe> -->
			</div>

			<div id="educationConvolution">
				<div class="educationTitle"> Convolutional Nets </div>
				<div class="educationAuthor">by <i>Gilbert Strang</i></div>

				<div class="educationContent">
					<p>The words <strong>weight sharing</strong> give the key idea of CNNs. The weight matrix <span class="math inline">\(A\)</span> that connects one layer to the next layer has only a small number of independent entries. So optimizing those weights is much faster than for a fully-connected (dense) architecture.</p>
					<p>In a one-dimensional problem, suppose the inputs (layer zero) are given by a vector <span class="math inline">\(v = (v_1,...,v_n)\)</span>. A convolution will multiply <span class="math inline">\(v\)</span> by a weight matrix <span class="math inline">\(A\)</span> with constant diagonals. Then the same set of weights (say 3 weights) is repeated all along the layer :</p>
					<p><span class="math display">\[A=
					\begin{bmatrix}
						a_{-1} &amp; a_0 &amp; a_1 &amp;  &amp;  \\
						&amp; a_{-1} &amp; a_0 &amp; a_1 &amp;  \\
						&amp; &amp; a_{-1} &amp; a_0 &amp; a_1 \\
					\end{bmatrix}\]</span> This <span class="math inline">\(A\)</span> has <span class="math inline">\(n = 5\)</span> inputs and <span class="math inline">\(m = 3\)</span> outputs. A is <strong>shift-invariant</strong>: a convolution = filter = Toeplitz matrix. Convolutions are especially important for images with many pixels. The <span class="math inline">\(3\)</span> independent weights <span class="math inline">\(a_{-1}, a_0, a_1\)</span> might change to <span class="math inline">\(3 \times 3=9\)</span> weights in 2D. Those 9 numbers are <span class="math inline">\(a_{ij}\)</span> for <span class="math inline">\(i = -1,0,1\)</span> and <span class="math inline">\(j = -1,0,1\)</span>. An input with its 8 neighbors (in a <span class="math inline">\(3 \times 3\)</span> square) is multiplied by <span class="math inline">\(a_{00}\)</span> and its <span class="math inline">\(8\)</span> neighbors–that sum of 9 terms gives one output in <span class="math inline">\(Av\)</span>. As usual, a bias vector <span class="math inline">\(b\)</span> is added and each component of <span class="math inline">\(Av + b\)</span> is activated (or not) by a function like ReLU : the new layer contains <span class="math inline">\(\textrm{ReLU}(Av + b)\)</span>.</p>
					<p>This 2D matrix <span class="math inline">\(A\)</span> is not so easy to display. You should see that a <span class="math inline">\(3 \times 3\)</span> square around an input vector <span class="math inline">\(v\)</span> of size <span class="math inline">\(n \times n\)</span> will produce an output <span class="math inline">\(Av\)</span> of size <span class="math inline">\((n - 2) \times (n - 2)\)</span>, in the same way that 5 inputs gave 3 outputs in 1D. Notice that we only had <span class="math inline">\(3 \times 3 = 9\)</span> (or maybe <span class="math inline">\(5 \times 5 = 25\)</span>) independent weights in 2D, because the convolution not only <strong>shares weights</strong> but it is <strong>local</strong>.</p>
				</div>
			</div>

			<div id="educationResNets">
				<div class="educationTitle"> Residual Networks (ResNets) </div>
				<div class="educationAuthor">by <i>Zack Holbrook</i> and <i>Jesse Michel</i></div>

				<div class="educationContent">
					<p>In 2015, a research team at Microsoft won the <a href="http://www.image-net.org/challenges/LSVRC/">ImageNet Large Scale Visual Recognition Challenge</a> with record performance using a ResNet. Since 2015, variants of ResNets have dominated the competition, exceeding human performance in the given task. They have become a popular choice of architecture for a broad variety of image recognition tasks and are relatively easy to implement and train.</p>
					<div class="educationSection">ResNet Architecture</div>
					<p>ResNets are a type of convolutional neural network (CNN) that have <strong>identity shortcuts</strong>, which are paths through the network that skip layers, thereby creating shortcuts through the network. We provide a typical example of a ResNet below:

						<img class="educationImage" src="dist/resnet.png" alt="Resnet image" width="50%">

						<div class="modelLink">
							<a class="modelLink" target="_newtab" href="http://math.mit.edu/ennui/#%7B%22graph%22:%5B%7B%22layer_name%22:%22Input%22,%22children_ids%22:%5B5,9%5D,%22parent_ids%22:%5B%5D,%22params%22:%7B%22dataset%22:%22mnist%22%7D,%22id%22:0,%22xPosition%22:100,%22yPosition%22:377%7D,%7B%22layer_name%22:%22Conv2D%22,%22children_ids%22:%5B6%5D,%22parent_ids%22:%5B0%5D,%22params%22:%7B%22filters%22:16,%22kernelSize%22:%5B3,3%5D,%22strides%22:%5B1,1%5D,%22activation%22:%22relu%22%7D,%22id%22:5,%22xPosition%22:169,%22yPosition%22:280%7D,%7B%22layer_name%22:%22Add%22,%22children_ids%22:%5B7,10%5D,%22parent_ids%22:%5B0,6%5D,%22params%22:%7B%22activation%22:%22relu%22%7D,%22id%22:9,%22xPosition%22:276,%22yPosition%22:411%7D,%7B%22layer_name%22:%22Conv2D%22,%22children_ids%22:%5B9%5D,%22parent_ids%22:%5B5%5D,%22params%22:%7B%22filters%22:16,%22kernelSize%22:%5B3,3%5D,%22strides%22:%5B1,1%5D%7D,%22id%22:6,%22xPosition%22:294,%22yPosition%22:280%7D,%7B%22layer_name%22:%22Conv2D%22,%22children_ids%22:%5B8%5D,%22parent_ids%22:%5B9%5D,%22params%22:%7B%22filters%22:16,%22kernelSize%22:%5B3,3%5D,%22strides%22:%5B1,1%5D,%22activation%22:%22relu%22%7D,%22id%22:7,%22xPosition%22:414,%22yPosition%22:280%7D,%7B%22layer_name%22:%22Add%22,%22children_ids%22:%5B11%5D,%22parent_ids%22:%5B9,8%5D,%22params%22:%7B%22activation%22:%22relu%22%7D,%22id%22:10,%22xPosition%22:521,%22yPosition%22:412%7D,%7B%22layer_name%22:%22Conv2D%22,%22children_ids%22:%5B10%5D,%22parent_ids%22:%5B7%5D,%22params%22:%7B%22filters%22:16,%22kernelSize%22:%5B3,3%5D,%22strides%22:%5B1,1%5D%7D,%22id%22:8,%22xPosition%22:541,%22yPosition%22:280%7D,%7B%22layer_name%22:%22Flatten%22,%22children_ids%22:%5B12%5D,%22parent_ids%22:%5B10%5D,%22params%22:%7B%7D,%22id%22:11,%22xPosition%22:708,%22yPosition%22:463%7D,%7B%22layer_name%22:%22Dense%22,%22children_ids%22:%5B13%5D,%22parent_ids%22:%5B11%5D,%22params%22:%7B%22units%22:32,%22activation%22:%22relu%22%7D,%22id%22:12,%22xPosition%22:702,%22yPosition%22:434%7D,%7B%22layer_name%22:%22Dropout%22,%22children_ids%22:%5B1%5D,%22parent_ids%22:%5B12%5D,%22params%22:%7B%22rate%22:0.5%7D,%22id%22:13,%22xPosition%22:778,%22yPosition%22:365%7D,%7B%22layer_name%22:%22Output%22,%22children_ids%22:%5B%5D,%22parent_ids%22:%5B13%5D,%22params%22:%7B%7D,%22id%22:1,%22xPosition%22:900,%22yPosition%22:377%7D%5D,%22hyperparameters%22:%7B%22learningRate%22:0.01,%22batchSize%22:64,%22optimizer_id%22:%22defaultOptimizer%22,%22epochs%22:6,%22loss_id%22:%22defaultLoss%22%7D%7D">
								Model Link
							</a>
						</div>

						Identity shortcuts mean that the learned parameters are residuals. Mathematically, if <span class="math inline">\(R(x)\)</span> is a sequence of convolutional layers with ReLUs, known as a <strong>residual block</strong>, for example, let <span class="math display">\[R(x) = \textrm{Conv}(\textrm{ReLU}(\textrm{Conv}(x))).\]</span> Then the output of the residual block will be <span class="math inline">\(R(x) + x\)</span>, where <span class="math inline">\(x\)</span> is the identity pass-through. If the neural network is trying to approximate some function <span class="math inline">\(F(x)\)</span>, then a perfect residual block <span class="math inline">\(R^*(x)\)</span> will be such that <span class="math inline">\(R^*(x) = F(x) - x\)</span>, which is exactly a residual after subtracting the input image.</p>
					<div class="educationSection">Advantages of ResNets</div>
					<p>The amazing property of ResNets is its ability to scale well, leading to deeper neural networks that still train well. As networks get larger, a number of problems arise.</p>
					<p>Large networks tend to train slowly, but the <strong>weight sharing</strong> of CNNs mean that each residual block has relatively few parameters to train. Large networks also tend to have the problem of the <strong>vanishing gradient</strong> – where weight updates from gradient descent diminish to the point where the network does not improve even with more training time. The identity shortcuts in ResNets give a path for the gradients to flow, avoiding the problem of the vanishing gradient.</p>
				</div>
			</div>

			<div id="educationFlatten">
				<div class="educationTitle"> Flatten Layers </div>
				<div class="educationAuthor">by <i>Zack Holbrook</i> and <i>Jesse Michel</i></div>

				<div class="educationContent">
					<p>The flatten layer takes in a multidimensional input and produces a single dimensional output. For example, the CIFAR dataset is a collection of images which is 3-dimensional since it is a collection of 2-dimensional images that are 32x32 pixels and has 3 color channels (Red, Green, Blue). A Flatten layer could take data from this dataset as an input, and would produce a 1 dimensional vector of size 32*32*3 = 3072.</p>
				</div>
			</div>

			<div id="educationConcatenate">
				<div class="educationTitle"> Concatenate Layers </div>
				<div class="educationAuthor">by <i>Zack Holbrook</i> and <i>Jesse Michel</i></div>

				<div class="educationContent">
					<p>A concatenate layer takes two or more layers and concatenates the outputs into a single output by stacking the inputs. For example, it would concatenate two vectors of size 10 into a vector of size 20 by stacking one on top of the other.</p>
				</div>
			</div>

			<div id="educationDropout">
				<div class="educationTitle"> Dropout Layers </div>
				<div class="educationAuthor">by <i>Stefan Grosser</i> and <i>Jesse Michel</i></div>

				<div class="educationContent">
					<p>Dropout layers ignore a random fraction of the incoming units during training time. For example, if the rate is 0.1, then on each forward pass the dropout layer will randomly select 10% of the weights and assign them to 0. Adding a dropout layer with a rate of 0 will make no change to the network, while a rate of 1 will have the dropout layer output 0.</p>

					<p>Dropout is commonly used to prevent <strong>overfitting</strong> (for more information, see our write-up on the topic). One may think of dropout as having the network learn a collection of weak classifiers that during testing come together to create a more powerful classifier. For those who are familiar with this terminology, it is boosting using an ensemble model. It also has the convenient property of speeding up training since fewer weights are required in each forward pass.</p>
				</div>
			</div>

			<div id="educationOverfitting">
				<div class="educationTitle"> Overfitting </div>
				<div class="educationAuthor">by <i>Stefan Grosser</i> and <i>Jesse Michel</i></div>
				<div class="educationContent">

					<p>A neural network sometimes learns too well. It identifies trends specific only to the training data and thus fails to <strong>generalize</strong>. This problem of fitting the training data too closely is called <strong>overfitting</strong>. The following figure shows the decision boundary – the curve that determines the prediction of the classifier – in the cases of underfitting, fitting well (normal), and overfitting.</p>

					<img class="educationImage" src="dist/overfitti_ng.png" alt="Possible decision boundaries " />
					<div class="modelLink">
							<a class="modelLink" target="_newtab" href="http://mlwiki.org/index.php/Overfitting">
								Source: ML Wiki
							</a>
					</div>
					<p>When a classifier overfits, it performs far better on the training data than on the test data. Therefore, the training accuracy will be much higher than the validation accuracy and the training loss will be much lower than the validation loss. We have provided an example of this below. </p>

					<img class="educationImage" style="float: left; max-width: 50%;" src="dist/loss_overfit.png" title="fig:" alt="Visualization of overfitting during training" />


					<img class="educationImage" style="float: right; max-width: 50%;" src="dist/accuracy_overfit.png" title="fig:" alt="Visualization of overfitting during training" />

					<div style="margin-top:10px;">
						<!-- TODO: padding above -->
						The architecture used for this example is shown below.
					</div>


					<div class="figure">

						<img class="educationImage" style="max-width: 50%;" src="dist/overfitting_network.png" alt="Network architecture" >


						<div class="modelLink">
								<a class="modelLink" target="_newtab" href="https://math.mit.edu/ennui/#%7B%22graph%22:%5B%7B%22layer_name%22:%22Input%22,%22children_ids%22:%5B2%5D,%22parent_ids%22:%5B%5D,%22params%22:%7B%22dataset%22:%22cifar%22%7D,%22id%22:0,%22xPosition%22:100,%22yPosition%22:399%7D,%7B%22layer_name%22:%22Conv2D%22,%22children_ids%22:%5B3%5D,%22parent_ids%22:%5B0%5D,%22params%22:%7B%22filters%22:16,%22kernelSize%22:%5B3,3%5D,%22strides%22:%5B1,1%5D,%22kernelRegularizer%22:%22none%22,%22regScale%22:0.1,%22activation%22:%22relu%22%7D,%22id%22:2,%22xPosition%22:261,%22yPosition%22:453%7D,%7B%22layer_name%22:%22Flatten%22,%22children_ids%22:%5B1%5D,%22parent_ids%22:%5B2%5D,%22params%22:%7B%7D,%22id%22:3,%22xPosition%22:585,%22yPosition%22:484%7D,%7B%22layer_name%22:%22Output%22,%22children_ids%22:%5B%5D,%22parent_ids%22:%5B3%5D,%22params%22:%7B%7D,%22id%22:1,%22xPosition%22:900,%22yPosition%22:399%7D%5D,%22hyperparameters%22:%7B%22learningRate%22:0.1,%22batchSize%22:64,%22optimizer_id%22:%22defaultOptimizer%22,%22epochs%22:15,%22loss_id%22:%22defaultLoss%22%7D%7D">
									Model Link
								</a>
						</div>
					</div>
					<br/><br/>
					<p>Overfitting exemplifies why cross validation is so important; without a validation set, we would not be able to identify that the model will fail to generalize.</p>
					<p>So, how can we combat overfitting and make sure that a model finds generalizable traits?</p>
					<div class="educationSection">Regularization</div>
					<p>One way to prevent overfitting is regularization, which takes the form of adding in a new term that guides the model towards a simpler solution. Recall that in classification problems we begin with pairs of inputs and their classifications <span class="math display">\[(x_1, y_1), (x_2, y_2), \dots, (x_n, y_n).\]</span> From this, we want to find a function <span class="math inline">\(f\)</span> that will accurately predict the classes of new samples of data. So if our original problem was <span class="math display">\[\min_f \sum_{i=1}^{n} C(f(x_i), y_i),\]</span> where <span class="math inline">\(C\)</span> calculates the cost of predicting <span class="math inline">\(f(x_i)\)</span> when the true value is <span class="math inline">\(y_i\)</span>, then the regularized loss will be <span class="math display">\[\min_f \sum_{i=1}^{n} C(f(x_i), y_i) + \lambda R(f),\]</span> where <span class="math inline">\(R(f)\)</span> is the regularization term that is defined to be larger when <span class="math inline">\(f\)</span> is more complex and <span class="math inline"> \(\lambda>0\) </span> is a tunable parameter controlling the amount of regularization. There are various definitions of how complex a layer is, but in our case we will say that a layer with a lower <span class="math inline">\(L2\)</span>-norm is less complex. Formally, we define <span class="math inline">\(L2\)</span>-norm for a matrix <span class="math inline">\(A\)</span> as <span class="math display">\[\text{norm}(A) = \sqrt{\sum_i \sum_j a_{ij}^2}.\]</span> For example, given matrix <span class="math display">\[A =
					\begin{bmatrix}
					1 &amp; 2 \\
					0 &amp; -2
					\end{bmatrix},\]</span> the L2-norm is <span class="math display">\[||A||_2 = \sqrt{1^2 + 2^2 + 0^2 + (-2)^2} = 3.\]</span></p>
					<p>There are several reasons why penalizing an increased <span class="math inline">\(L2\)</span>-norm is a reasonable action. If we assume that a classifier is going to overfit, then adding in the penalty <span class="math inline">\(\lambda R(f)\)</span> will guide the decision boundary away from that state. This can be seen as adding “wiggle room&quot; to the classifier. Moreover, this penalty of a high <span class="math inline">\(L2\)</span>-norm is a way to encourage discarding useless information. The penalty term drives layer weights to be smaller, and the closer a layer weight gets to zero, the smaller its impact is as a feature.</p>
					<p>This notion of complexity leads to <span class="math inline">\(L1\)</span>- and <span class="math inline">\(L2\)</span>-norms becoming forms of regularization. In the case of <span class="math inline">\(L2\)</span>-regularization, we can add <span class="math inline">\(\lambda ||W||_2\)</span> to our loss function for a given layer <span class="math inline">\(W.\)</span> There are other ways to regularize, but for now let us take a look at a different approach. </p>
					<!-- TODO: Maybe explain L1-regularization in the future -->
					<div class="educationSection">Dropout</div>
					<p>Another way to fight overfitting is a technique known as dropout. Dropout layers ignore a random fraction of the input units during training time (see our explanation on dropout layers for more information). There are two intuitions for why dropout helps prevent overfitting. Dropout can be seen a type of ensemble learning -- taking a collection of weak (underfit) classifiers and combing their classifications in some way, e.g., taking the majority class. For each batch, a new section of the network is trained as a weak classifier. During validation, the whole network is used, effectively combining all of the classifiers to provide a single result. Another view is that over many runs, dropout forces all parts of a network architecture to be used. Therefore, no one feature of the training set will be too influential, avoiding the network from focusing on artifacts that are only specific to the training set.</p>
					<div class="educationSection">Conclusion</div>
					<p>Overfitting hinders the performance of classifiers on unseen data. Regularization and dropout are two widely-used and easy-to-implement approaches for combatting overfitting. Combining these methods with cross validation makes it far easier to build more generalizable models.</p>
				</div>

			</div>


			<div style="height:100px;"> </div>


		</div>

		<div id = 'loadingDataTab' style="display: none">
			<div id='loadingMNIST'>
				Loading <span id="datasetLoadingName">MNIST</span> dataset
			</div>
		</div>

		<!-- Error popup -->
		<div id = 'error' style="display: none">
			<svg id = 'x' xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24"><path fill="none" d="M0 0h24v24H0V0z"/><path d="M19 6.41L17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41z"/></svg>
			<div id = 'errorMessage'> </div>
		</div>

	</div>


	<!-- The right panel -->
	<div id = 'paramshell'>
		<div class = 'trainbox' id = 'trainbox'>
			<div id = 'train' class = 'train' data-actionType = 'json'> Train </div>
		</div>

		<div class = 'category' id = 'kerasinfo'>
			<div class = 'categoryTitle' data-expanded = 'true'>
				<div class='expander'>
					<svg height="24px" width="24px">
						<path d="M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z" style="fill:#FFFFFF;"></path>
					</svg>
				</div>
				<div class='categoryTitleText'>
					Model Status
				</div>
			</div>
			<div class = 'parambox'>
				<div id = 'ti_training' class = 'paramline'>
					<div class = 'paramname'>Training:</div>
					<div class = 'paramvalue'>No</div>
				</div>
				<div id = 'ti_acc' class = 'paramline'>
					<div class = 'paramname'>Accuracy:</div>
					<div class = 'paramvalue'>N/A</div>
				</div>
				<div id = 'ti_loss' class = 'paramline'>
					<div class = 'paramname'>Loss:</div>
					<div class = 'paramvalue'>N/A</div>
				</div>
				<div id = 'ti_vacc' class = 'paramline'>
					<div class = 'paramname'>Validation Acc:</div>
					<div class = 'paramvalue'>N/A</div>
				</div>
				<div id = 'ti_vloss' class = 'paramline'>
					<div class = 'paramname'>Validation Loss:</div>
					<div class = 'paramvalue'>N/A</div>
				</div>
			</div>
		</div>
		<div class="category">
			<div class = 'categoryTitle' data-expanded = 'true'>
				<div class='expander'>
					<svg height="24px" width="24px">
						<path d="M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z" style="fill:#FFFFFF;"></path>
					</svg>
				</div>
				<div class='categoryTitleText'>
					Share
				</div>
			</div>
			<div id="exportPython" class="select-option right-option">Export to Python</div>
			<div id="exportJulia" class="select-option right-option">Export to Julia</div>
			<div id="copyModel" class="select-option right-option">Copy model link</div>
		</div>
		<div id = 'networkParamshell'>

			<div class = 'category' id='paramtruck'>
				<div class = 'categoryTitle' data-expanded = 'true'>
					<div class='expander'>
						<svg height="24px" width="24px">
							<path d="M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z" style="fill:#FFFFFF;"></path>
						</svg>
					</div>
					<div class='categoryTitleText'>
						Parameters
					</div>
				</div>
				<div id='defaultparambox' class = 'parambox'>Click on a layer to view and change its parameters.</div>
			</div>
		</div>

		<div id = 'progressParamshell' style="display: none">
			<div class = 'category' id='paramtruck'>
				<div class = 'categoryTitle' data-expanded = 'true'>
					<div class='expander'>
						<svg height="24px" width="24px">
							<path d="M16.59 8.59L12 13.17 7.41 8.59 6 10l6 6 6-6z" style="fill:#FFFFFF;"></path>
						</svg>
					</div>
					<div class='categoryTitleText'>
						Hyperparams
					</div>
				</div>
				<div class = 'parambox'>
					<div class = 'paramline'>
						<div class="paramname" data-name="lr">Learning rate: </div>
						<input id="learningRate" class="paramvalue hyperparamvalue" value="0.01">
					</div>
					<div class = 'paramline'>
						<div class="paramname" data-name="epochs">Epochs: </div>
						<input id="epochs" class="paramvalue hyperparamvalue" value="6">
					</div>
					<div class = 'paramline'>
						<div class="paramname" data-name="lr">Batch Size: </div>
						<input id="batchSize" class="paramvalue hyperparamvalue" value="64">
					</div>
				</div>
			</div>
		</div>

		<div id = 'visualizationParamshell' style="display: none">
		</div>

		<div id = 'educationParamshell' style="display: none">
		</div>

	</div>

</div>
<div id='footer'>
	<a href='https://accessibility.mit.edu'>Accessibility</a>
</div>
</body>
</html>