z <- read.csv('ZipfBNC.csv')
z <- rbind(z, read.csv('ZipfGutenberg.csv'))
Type <- rep('English', nrow(z))
z <- cbind(Type, z)
write.csv(z, 'ZipfEnglishCorpus.csv', row.names=FALSE)
z <- rbind(z, read.csv('ZipfBPICDataSets.csv'))
write.csv(z, 'ZipfComplete.csv', row.names=FALSE)

z <- read.csv('ZipfComplete.csv')

s <- aggregate(z[,5], list(Type=z$Type, Corpus=z$Corpus, Rank=z$Rank), mean)
colnames(s) <- c('Type', 'Corpus', 'Rank', 'RelFreq')
write.csv(s, 'Zipf.Summary.By.Corpus.csv', row.names=FALSE)

pdf('ZipfPlot1.pdf')

plot(s$RelFreq[s$Corpus=='BNC'] ~ s$Rank[s$Corpus=='BNC'], type="l", lty=2, lwd=2, ylab="Relative Frequency", xlab="Rank", xlim=c(1,100), ylim=c(0,1), col='red')

lines(s$RelFreq[s$Corpus=='Gutenberg'] ~ s$Rank[s$Corpus=='Gutenberg'], type="l", lwd=2, lty=3, col='magenta')

lines(s$RelFreq[s$Corpus=='BPIC2012'] ~ s$Rank[s$Corpus=='BPIC2012'], type="l", lwd=2, lty=4, col='blue')

lines(s$RelFreq[s$Corpus=='BPIC2013'] ~ s$Rank[s$Corpus=='BPIC2013'], type="l", lwd=2, lty=5, col='darkgreen')

f1 <- function(x) x**(-1)
plot(f1, 1, 100, add=TRUE, col="black")

legend("topright", c("BNC", "Gutenberg", "BPIC2012", "BPIC2013", "Zipf's Law"), col=c("red", "magenta", "blue", "darkgreen", "black"), lty=c(2, 3, 4, 5, 1), border="black", lwd=2 )

dev.off()

z <- read.csv('ZipfComplete.csv')

s <- aggregate(z[,5], list(Type=z$Type, Rank=z$Rank), mean)
colnames(s) <- c('Type', 'Rank', 'RelFreq')
write.csv(s, 'Zipf.Summary.By.Type.csv', row.names=FALSE)

pdf('ZipfPlot2.pdf')

plot(s$RelFreq[s$Type=='English'] ~ s$Rank[s$Type=='English'], type="l", lty=2, lwd=2, ylab="log(Relative Frequency)", xlab="log(Rank)", xlim=c(1,100), ylim=c(0,1), col='red')

lines(s$RelFreq[s$Type=='Process'] ~ s$Rank[s$Type=='Process'], type="l", lty=3, lwd=2, col='blue')

f1 <- function(x) x**(-1)
plot(f1, 1, 100, add=TRUE, col="black", lwd=2)

legend("topright", c("English", "Process", "Zipf's Law"), col=c("red", "blue", "black"), border="black", lty=c(2, 3, 1), lwd=2)

dev.off()

