import os

inPath = "/home/joerg/PycharmProjects/TFStart/corpus/masc_500k_texts"
ending = ".txt"
outName = "ZipfMASC.csv"
outFile = open(outName, "w")
outFile.write("Dateset, Rank, RelFreq \n")
eoc = '.'

for root, dirs, fnames in os.walk(inPath):
    for fname in fnames:
        if fname.endswith(ending):
            inName = os.path.join(root, fname)

            print(inName)
            inFile = open(inName, "r")
            inText = inFile.read().replace("\n", " ").split()

            Ifreqs = dict()
            for word in inText:
                if word != eoc:
                    if word in Ifreqs:
                        Ifreqs[word]+=1
                    else:
                        Ifreqs[word]=1

            Ifreqs = sorted(Ifreqs.values(), reverse=True)
            l = float(len(inText))
            t = Ifreqs[0]/l

            for (rank, freq) in enumerate(Ifreqs):
                outFile.write(inName + ", {0}, {1}\n".format(rank + 1, freq/l/t))
                if rank > 100: break
            outFile.flush()

outFile.close()

