import re,sys, os, cPickle, codecs def writeReport(lastArt,count,r): if count > 25: r.write(lastArt+ " occurred "+str(count)+" times\n") print("%s occurred %s times") % (lastArt,str(count)) return (lastArt,0) def main(): if len(sys.argv) < 2: print("Provide wikidump folder name") sys.exit(0) print "Listing" articles = os.listdir(sys.argv[-1]) print "SORTING" articles.sort() print "FINISHED SORTING" r = open('report.txt','w') lastArt = '' count = 0 for i in range(len(articles)): if ":" in articles[i]: currentArt = articles[i][:articles[i].index(':')] if (currentArt == lastArt): count = count + 1 elif count != 0: lastArt,count = writeReport(lastArt,count,r) else: lastArt = currentArt elif count != 0: lastArt,count = writeReport(lastArt,count,r) else: lastArt = '' r.close() if __name__ == '__main__': main()