mi <- read.csv('MutualInformationBNC.csv')
mi <- rbind(mi, read.csv('MutualInformationGutenberg.csv'))
Type <- rep('English', nrow(mi))
mi <- cbind(Type, mi)
write.csv(mi, 'MutualInformationEnglishCorpus.csv', row.names=FALSE)
mi <- rbind(mi, read.csv('MutualInformationBPICDataSets.csv'))
write.csv(mi, 'MutualInformationComplete.csv', row.names=FALSE)

mi <- read.csv('MutualInformationComplete.csv')
mi$Dist <- as.factor(mi$Dist)

s <- aggregate(mi[,5], list(Corpus=mi$Corpus, Dist=mi$Dist), NROW)
s <- cbind(s, aggregate(mi[,5:6], list(Corpus=mi$Corpus, Dist=mi$Dist), sd)[,3:4])
s <- cbind(s, aggregate(mi[,5:6], list(Corpus=mi$Corpus, Dist=mi$Dist), mean)[,3:4])
colnames(s) <- c('Corpus', 'Dist', 'n', 'MI.sd', 'MI.norm.sd', 'MI.mean', 'MI.norm.mean')
s$MI.sem <- s$MI.sd / sqrt(s$n)
s$MI.norm.sem <- s$MI.norm.sd / sqrt(s$n)
s$MI.ci <- s$MI.sem * qt(.95/2 + 0.5, s$n-1)
s$MI.norm.ci <- s$MI.norm.sem * qt(.95/2 + 0.5, s$n-1)

write.csv(s, 'MutualInformation.Summary.By.Corpus.csv', row.names=FALSE)

s$Dist <- as.numeric(levels(s$Dist))[s$Dist]


pdf('MutualInformationPlot.pdf')

plot(s$MI.norm.mean[s$Corpus=='BNC'] ~ s$Dist[s$Corpus=='BNC'], type="b", pch=0, ylab="Mutual Information I(X,Y) in bits", xlab="Distance D(X,Y)", xlim=c(1,10000), ylim=c(0,1), log="x", col='red')

lines(s$MI.norm.mean[s$Corpus=='Gutenberg'] ~ s$Dist[s$Corpus=='Gutenberg'], type="b", pch=3, col='magenta')

lines(s$MI.norm.mean[s$Corpus=='BPIC2012'] ~ s$Dist[s$Corpus=='BPIC2012'], type="b", pch=1, col='blue')

lines(s$MI.norm.mean[s$Corpus=='BPIC2013'] ~ s$Dist[s$Corpus=='BPIC2013'], type="b", pch=2, col='green')

f1 <- function(x) exp(-x/5)
plot(f1, 1, 10000, add=TRUE, col="black")

legend("topright", c("BNC", "Gutenberg", "BPIC2012", "BPIC2013"), col=c("red", "magenta", "blue", "green", "cyan", "brown"), border="black", lwd=1, pch=c(0, 3, 1, 2, 4, 5))

dev.off()

