inPath = "bpi_data"
outName = "ZipfBPICDataSets.csv"
outFile = open(outName, "w")
outFile.write("Dateset, Rank, RelFreq \n")
eoc = '[EOC]'

for inName in ['BPIC_Challenge_2012.extract.txt',
               'BPIC_Challenge_2012.extract.complete.txt',
               'BPIC_Challenge_2012.extract.with.resource.txt',
               'BPIC_Challenge_2012.extract.complete.with.resource.txt',
               'BPI_Challenge_2013_closed_problems.extract.txt',
               'BPI_Challenge_2013_incidents.extract.txt',
               'BPI_Challenge_2013_closed_problems.extract.with.group.txt',
               'BPI_Challenge_2013_incidents.extract.with.group.txt',
               ]:

    print(inName)
    inFile = open(inPath + "/" + inName, "r")
    inText = inFile.read().replace("\n", " ").split()

    Ifreqs = dict()
    for word in inText:
        if word != eoc:
            if word in Ifreqs:
                Ifreqs[word]+=1
            else:
                Ifreqs[word]=1

    Ifreqs = sorted(Ifreqs.values(), reverse=True)
    l = float(len(inText))
    t = Ifreqs[0]/l

    for (rank, freq) in enumerate(Ifreqs):
        outFile.write(inName + ", {0}, {1}\n".format(rank + 1, freq/l/t))
        if rank > 100: break
    outFile.flush()

outFile.close()
