spencermwoo
diff --git a/‎README.md‎
Lines changed: 4 additions & 1 deletion b/‎README.md‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎analysis/analysis.py‎
Lines changed: 2 additions & 1 deletion b/‎analysis/analysis.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎analysis/graph.py‎
Lines changed: 29 additions & 7 deletions b/‎analysis/graph.py‎
Lines changed: 29 additions & 7 deletions
diff --git a/‎analysis/stats.py‎
Lines changed: 18 additions & 6 deletions b/‎analysis/stats.py‎
Lines changed: 18 additions & 6 deletions
diff --git a/‎analysis/util.py‎
Lines changed: 8 additions & 3 deletions b/‎analysis/util.py‎
Lines changed: 8 additions & 3 deletions
@@ -1,9 +1,12 @@
 # randomness-in-programming-languages
-An example of random number generation in different programming [languages](/sources#future-languages)
+An example of random number generation in different programming [languages](/sources#completed-languages)
 
 # hacktoberfest
 A hactoberfest-friendly project
 
+# file structure
+Overview of folder layout
+
 ## Contribute
 Create an example of generating a million random numbers and calculating the percentage for each bucket.  
 
 
@@ -1,6 +1,6 @@
 import argparse
 
-from stats import analysis_all
+from stats import analysis_all, write_analysis
 from graph import plot_multis, plot_individuals, plot_analysis
 
 # Generate Graphs
@@ -11,6 +11,7 @@
 # Graph Analysis
 # ====
 analysisList = analysis_all()
+write_analysis(analysisList)
 plot_analysis(analysisList)
 
 # ===
 
@@ -1,6 +1,7 @@
 import matplotlib.pyplot as plt
 
 from util import read_output_files_and_perform, perform_probability_per_language, parse
+from stats import normalize_group, perc
 
 # this plots multiple result files
 def multiplot(languages, numbers, trials, *args):
@@ -10,7 +11,10 @@ def multiplot(languages, numbers, trials, *args):
 	for (language, filename, x, y) in perform_probability_per_language(languages, numbers, trials):
 		_plot(x, y, language)
 
-	_plot_graph('number', 'probability', f'multi_{numbers}_{trials}', True)
+	fewer = False
+	if numbers == 1000:
+		fewer = True
+	_plot_graph('number', 'probability', f'multi_{numbers}_{trials}', True, fewer)
 
 def singleplot_all(languages, numbers, trials, include_expected):
 	for language in languages:
@@ -27,10 +31,11 @@ def singleplot(language, numbers, trials, include_expected=False):
 	filename = f'{language}_{numbers}_{trials}'
 	with open(filename) as file:
 		for line in file:
-			n, probability = parse(line)
+			if ":" in line:
+				n, probability = parse(line)
 
-			x.append(n)
-			y.append(probability)
+				x.append(n)
+				y.append(probability)
 			# deviations.append(calculate_deviation(n, probability, expected))
 
 	_plot(x, y, language)
@@ -47,9 +52,17 @@ def _plot(x, y, label):
 def _bar(x, y):
 	plt.bar(x, y, width=0.4, label=y)
 
-def _plot_graph(x_axis, y_axis, title, save=False):
+def _plot_graph(x_axis, y_axis, title, save=False, fewer=False):
 	plt.legend(loc='best')
 
+	# plt.margins(0.1)
+	# plt.figure(figsize=(20,5))
+
+	# plt.figure(figsize=[12.8, 9.6])
+	# if fewer:
+	# 	plt.locator_params(axis='x', nbins=10)
+	# 	plt.locator_params(axis='y', nbins=10)
+
 	plt.xlabel(x_axis)
 	plt.ylabel(y_axis)
 
@@ -66,22 +79,31 @@ def plot_individuals(include_expected=False):
 	read_output_files_and_perform(singleplot_all, include_expected)
 
 def plot_multis():
+	plt.figure(figsize=(25.6, 19.2))
 	read_output_files_and_perform(multiplot)
 
 def plot_analysis(analysisList):
+	plt.figure(figsize=(25.6, 19.2))
 	numbers, trials = None, None
 	for trialList in analysisList:
 
 		# trialList = normalize_group(trialList)
+		# ymin = 0
+		# ymax = 0
 		for i, analysis in enumerate(trialList):
 			std, filename = analysis
 
 			language, numbers, trials = filename.split("_")
 
-			# if i==0: plt.axis(ymin=std-perc(std))
-			# elif i==len(trialList)-1: plt.axis(ymax=std+perc(std))
+			if i==0: ymin = plt.ylim(ymin=max(std, 0))
+			elif i==len(trialList)-1: plt.ylim(ymax=std)
 
+			# TODO: each language should use the same color in all graphs
 			_bar(language, std)
+
+		# TODO: why y-axis has negative scale (???)
+		# plt.axis(xmin=0, ymin=ymin, ymax=ymax)
+		# print(plt.axis())
 		_plot_graph('language', 'std', f'analysis_{numbers}_{trials}', True)
 
 	return
@@ -1,8 +1,8 @@
 import statistics as st
 import heapq
-# from sklearn import preprocessing
+from sklearn import preprocessing
 
-from util import read_output_files_and_perform, perform_probability_per_language
+from util import read_output_files_and_perform, perform_probability_per_language, write_to_file
 
 def calculate_standard_deviation(language, data, sample_size):
 	# if language == 'expected': 
@@ -29,11 +29,23 @@ def analysis_one(languages, numbers, trials):
 	# split by trial, however variance is already split
 	return [heapq.heappop(resHeap) for i in range(len(resHeap))]
 
-# def normalize_group(analysisList):
-# 	return preprocessing.minmax_scale(analysisList, feature_range=(analysisList[0], analysisList[-1]))
+def write_analysis(analysisList):
+	for trialList in analysisList:
+		d = []
+		numbers, trials = '', ''
+		for i, analysis in enumerate(trialList):
 
-# def perc(num):
-# 	return 0
+			language, numbers, trials = analysis[1].split("_")
+
+			d.append(f'{language}:{analysis[0]}')
+
+		write_to_file(f'graphs/multi_{numbers}_{trials}_data', '\n'.join(d))
+
+def normalize_group(analysisList):
+	return preprocessing.minmax_scale(analysisList, feature_range=(analysisList[0], analysisList[-1]))
+
+def perc(num):
+	return 0
 
 def analysis_all():
 	analysisList = read_output_files_and_perform(analysis_one)
 
@@ -15,6 +15,10 @@ def read_output_files_and_perform(func, *args):
 
 	return r
 
+def write_to_file(filename, data):
+	with open(filename, 'w') as f:
+		f.write(data)
+
 # def calculate_probability(filename):
 # 	with open(filename) as file:
 # 		x, y = [], []
@@ -33,10 +37,11 @@ def perform_probability_per_language(languages, numbers, trials):
 		with open(filename) as file:
 			x, y = [], []
 			for line in file:
-				n, probability = parse(line)
+				if ":" in line:
+					n, probability = parse(line)
 
-				x.append(n)
-				y.append(probability)
+					x.append(n)
+					y.append(probability)
 
 			yield (language, filename, x,y)