Skip to content

Commit

Permalink
General update, mostly sourcecode, Jul 2018
Browse files Browse the repository at this point in the history
  • Loading branch information
Derek-Jones committed Jul 1, 2018
1 parent e7a2d74 commit c96a988
Show file tree
Hide file tree
Showing 257 changed files with 3,492 additions and 653 deletions.
4 changes: 4 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
Change log

1 Jul 18

General update, mostly source code related, plus added TAGs

2 Apr 18

Reliability draft pdf release, plus more data and tweaks
Expand Down
50 changes: 50 additions & 0 deletions benchmark/1509-all64.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#
# 1509-all64.R, 28 May 18
# Data from:
# Array Layouts for Comparison-Based Searching
# Paul-Virak Khuong and Pat Morin
#
# Example from:
# Empirical Software Engineering using R
# Derek M. Jones

source("ESEUR_config.r")


# library("dplyr")


pal_col=rainbow(5)


plot_run=function(df, col_str="black")
{
lines(df$items, df$seconds, col=col_str)
}


all64=read.csv(paste0(ESEUR_dir, "benchmark/1509-all64.csv.xz"), as.is=TRUE)


plot(0, type="n", log="xy",
xaxs="i", yaxs="i",
xlim=range(all64$items), ylim=range(all64$seconds),
xlab="Array size", ylab="Runtime (secs)\n")

# d_ply(all64, .(alg), plot_run)

plot_run(subset(all64, alg == "eytzinger_bf"), pal_col[1])
plot_run(subset(all64, alg == "eytzinger_branchy"), pal_col[2])
plot_run(subset(all64, alg == "sorted_bfp"), pal_col[3])
plot_run(subset(all64, alg == "btree16_bf_a"), pal_col[4])
plot_run(subset(all64, alg == "btree32_a"), pal_col[5])


# Sizes of L1, L2, and L3 cache
lines(c(2^13, 2^13), c(1e-3, 2), col="grey")
text(2^13, 1, "L1")
lines(c(2^16, 2^16), c(1e-3, 2), col="grey")
text(2^16, 1, "L2")
lines(c(2^21, 2^21), c(1e-3, 2), col="grey")
text(2^21, 1, "L3")

Binary file added benchmark/1509-all64.csv.xz
Binary file not shown.
4 changes: 2 additions & 2 deletions communicating/github-lang-pairs.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# github-lang-pairs.R, 14 May 14
# github-lang-pairs.R, 31 May 18
#
# Data from:
# Popularity, interoperability, and impact of programming languages in 100,000 open source projects
Expand All @@ -15,7 +15,7 @@ library("igraph")
library("plyr")


git_lang=read.csv(paste0(ESEUR_dir, "src_measure/github-lang-use.csv.xz"), as.is=TRUE)
git_lang=read.csv(paste0(ESEUR_dir, "sourcecode/github-lang-use.csv.xz"), as.is=TRUE)

git_lang=subset(git_lang, language != "")

Expand Down
29 changes: 29 additions & 0 deletions developers/a013582.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#
# a013582.R, 15 May 18
# Data from:
# A MODEL OF HUMAN COGNITIVE BEHAVIOR IN WRITING CODE FOR COMPUTER PROGRAMS, VOL I
# Ruven Brooks
#
# Example from:
# Empirical Software Engineering using R
# Derek M. Jones
#
# TAG cognition debugging experiment LOC


source("ESEUR_config.r")


a013=read.csv(paste0(ESEUR_dir, "developers/a013582.csv.xz"), as.is=TRUE)
a013$position=1:nrow(a013)

plot(~ log(Writing)+log(Debugging)+log(Lines), data=a013)

# d_mod=glm(log(Debugging) ~ log(Lines), data=a013)
# There is a learning effect
d_mod=glm(log(Debugging) ~ log(position)*log(Lines)-log(Lines), data=a013)
summary(d_mod)

w_mod=glm(log(Lines) ~ log(Writing), data=a013)
summary(w_mod)

Binary file added developers/a013582.csv.xz
Binary file not shown.
106 changes: 106 additions & 0 deletions developers/adelson1981.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#
# adelson1981.R, 12 May 18
# Data from:
# Problem solving and the development of abstract categories in programming languages
# Beth Adelson
#
# Example from:
# Empirical Software Engineering using R
# Derek M. Jones
#
# TAG experiment cognition memory-recall


source("ESEUR_config.r")


plot_layout(2, 1)


# Martix for holding the item pairwise distances
mk_item_mat=function()
{
item_mat=matrix(data=0, nrow=length(line_loc), ncol=length(line_loc))
colnames(item_mat)=line_loc
rownames(item_mat)=line_loc

return(item_mat)
}



# Calculate the distance between all pairs of items in a list
calc_dist=function(items)
{

# Calculate distance between one item and all other items
item_dist=function(X)
{
items=items[!is.na(items)]

op_pos=which(items == X)
# Missing items are given fixed distance from all other items
if (length(op_pos) == 0)
{
recall_mat[cbind(rep(X, length(line_loc)), line_loc)] <<- item_na_len
recall_mat[cbind(line_loc, rep(X, length(line_loc)))] <<- item_na_len
recall_mat[X, X] <<- 0
return(0)
}

dist_vec=abs(1:length(items) - op_pos)

# print(c(X, dist_vec))

recall_mat[cbind(rep(X, length(items)), items)] <<- dist_vec

return(0)
}

recall_mat=mk_item_mat()
d_mat=sapply(line_loc, item_dist)

return(recall_mat)
}


adel=read.csv(paste0(ESEUR_dir, "developers/adelson1981.csv.xz"), as.is=TRUE)

# Three programs and the lines they contain
line_loc=c("1_0", "1_1", "1_2", "1_3", "1_4",
"2_0", "2_1", "2_2", "2_3", "2_4",
"3_0", "3_1", "3_2", "3_3", "3_4", "3_5")
# Program statement kind. This entry has no NAs
line_kind=adel$e7C[order(adel$e7p)]

# Seems as good a value as any other
item_na_len=length(line_loc)/2

teacher_dist=calc_dist(adel$e6p)
teacher_dist=teacher_dist+calc_dist(adel$e7p)
teacher_dist=teacher_dist+calc_dist(adel$e8p)
teacher_dist=teacher_dist+calc_dist(adel$e9p)
teacher_dist=teacher_dist+calc_dist(adel$e10p)

d_dist=dist(teacher_dist/5)
hc=hclust(d_dist)

plot(hc, main="Teachers", sub="", col=point_col,
xlab="Items", ylab="")


student_dist=calc_dist(adel$n1p)
student_dist=student_dist+calc_dist(adel$n2p)
student_dist=student_dist+calc_dist(adel$n3p)
student_dist=student_dist+calc_dist(adel$n4p)
student_dist=student_dist+calc_dist(adel$n5p)

rownames(student_dist)=paste0(line_loc, "-", line_kind)

d_dist=dist(student_dist/5)
hc=hclust(d_dist)

plot(hc, main="Students", sub="", col=point_col,
xlab="Items", ylab="")


Binary file added developers/adelson1981.csv.xz
Binary file not shown.
6 changes: 3 additions & 3 deletions developers/api-struct/api-robinson.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# api-robinson.R, 10 Oct 16
# api-robinson.R, 18 May 18
#
# Data from:
# Developer characterization of data structure fields decisions
Expand All @@ -16,7 +16,7 @@ library("seriation")
library("grid") # Yes, seriation uses grid graphics


pal_col=diverge_hcl(10)
pal_col=heat_hcl(10)

fields=read.csv(paste0(ESEUR_dir, "developers/api-struct/similar_08.csv.xz"), as.is=TRUE)
rownames(fields)=colnames(fields)
Expand All @@ -26,5 +26,5 @@ fmat=as.matrix(fields)
fdist = as.dist(1 - fmat/max(fmat))
fser = seriate(fdist, method="BBURCG")

pimage(fdist, fser, col=pal_col, key=FALSE, gp=gpar(cex=0.8))
pimage(fdist, fser, col=rev(pal_col), key=FALSE, gp=gpar(cex=0.8))

23 changes: 13 additions & 10 deletions developers/like-n-dis.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# like-n-dis.R, 17 Apr 17
# like-n-dis.R, 23 Apr 18
# Data from:
# The sources and Consequences of the Fluent Processing of numbers
# The Sources and Consequences of the Fluent Processing of numbers
# Dan King and Chris Janiszewski
#
# Example from:
Expand All @@ -11,18 +11,21 @@
source("ESEUR_config.r")


plot_wide()

plot_layout(3, 1, default_width=14)
plot_layout(2, 1)


lnd=read.csv(paste0(ESEUR_dir, "developers/like-n-dis.csv.xz"), as.is=TRUE)

plot(lnd$Number, lnd$Like, type="l",
xlab="Like", ylab="Number")
plot(lnd$Number, lnd$Like, type="l", col=point_col,
xaxs="i",
xlab="Number", ylab="Like\n")
lines(loess.smooth(lnd$Number, lnd$Like, span=0.3), col=loess_col)


spectrum(lnd$Like, main="Spectrum density", sub="", col=point_col,
xlab="Frequency", ylab="Density\n")

# spectrum(lnd$Dislike)

spectrum(lnd$Like)
spectrum(lnd$Dislike)
spectrum(lnd$Neutral)
# spectrum(lnd$Neutral)

43 changes: 43 additions & 0 deletions ecosystems/CompWorld85.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#
# CompWorld85.R, 3 Jun 18
# Data from:
# Hardware Roundup
# Tom Henkel
# ComputerWorld, 19 Aug 1985, pages 23--34
#
# Example from:
# Empirical Software Engineering using R
# Derek M. Jones
#
# TAG hardware performance price 1985


source("ESEUR_config.r")


library("plyr")


plot_pts=function(df)
{
points(df$Memory.Size, df$Purchase.Price, col=df$col_str)

# points(as.numeric(df$Relative.Performance), df$Purchase.Price, col=df$col_str)
}


cw85=read.csv(paste0(ESEUR_dir, "ecosystems/CompWorld85.csv.xz"), as.is=TRUE)

cat_str=unique(cw85$Category)
pal_col=rainbow(length(cat_str))
cw85$col_str=mapvalues(cw85$Category, cat_str, pal_col)

plot(1, type="n", log="xy",
# xlim=c(5, 5000), ylim=range(cw85$Purchase.Price),
xlim=range(cw85$Memory.Size), ylim=range(cw85$Purchase.Price),
xlab="Memory (MB)", ylab="Purchase price ($)\n")

d_ply(cw85, .(Category), plot_pts)

legend(x="bottomright", legend=cat_str, bty="n", fill=pal_col, cex=1.2)

Binary file added ecosystems/CompWorld85.csv.xz
Binary file not shown.
30 changes: 30 additions & 0 deletions ecosystems/GovMachineHistComputer.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#
# GovMachineHistComputer.R, 19 May 18
# Data from:
# The Government Machine {A} Revolutionary History of the Computer
# Jon Agar
#
# Example from:
# Empirical Software Engineering using R
# Derek M. Jones
#
# TAG government hardware card office-equipement

source("ESEUR_config.r")


pal_col=rainbow(4)


cards=read.csv(paste0(ESEUR_dir, "ecosystems/GovMachineHistComputer.csv.xz"), as.is=TRUE)

plot(cards$Financial_year, cards$Typewriters_duplicators, log="y", col=pal_col[1],
ylim=c(9e3, 1e6),
xlab="Year", ylab="Expenditure (in #)\n")

points(cards$Financial_year, cards$Other, col=pal_col[2])
points(cards$Financial_year, cards$Tabulators_rent, col=pal_col[3])
points(cards$Financial_year, cards$Cards, col=pal_col[4])

legend(x="topleft", legend=c("Typewriters/duplicators", "Other", "Tabulator rental", "Cards"), bty="n", fill=pal_col, cex=1.2)

Binary file added ecosystems/GovMachineHistComputer.csv.xz
Binary file not shown.
41 changes: 41 additions & 0 deletions ecosystems/eindor1985.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#
# eindor1985.R, 1 Jun 18
# Data from:
# Grosch's Law Re-revisited: {CPU} Power and the Cost of Computation
# Phillip Ein-Dor
#
# Example from:
# Empirical Software Engineering using R
# Derek M. Jones
#
# TAG hardware performance MIPS cost memory


source("ESEUR_config.r")


library("plyr")


plot_pts=function(df)
{
points(df$Max_Memory, df$Average_cost, col=df$col_str)
# points(df$MIPS, df$Average_cost, col=df$col_str)
}


ein=read.csv(paste0(ESEUR_dir, "ecosystems/eindor1985.csv.xz"), as.is=TRUE)

cat_str=unique(ein$Category)
pal_col=rainbow(length(cat_str))
ein$col_str=mapvalues(ein$Category, cat_str, pal_col)

plot(0.1, type="n", log="xy",
yaxs="i",
xlim=range(ein$Max_Memory), ylim=c(1, 800),
xlab="Maximum memory (Kbytes)", ylab="Average cost\n")

d_ply(ein, .(Category), plot_pts)

legend(x="bottomright", legend=cat_str, bty="n", fill=pal_col, cex=1.2)

Binary file added ecosystems/eindor1985.csv.xz
Binary file not shown.
Loading

0 comments on commit c96a988

Please sign in to comment.