| 注册
请输入搜索内容

热门搜索

Java Linux MySQL PHP JavaScript Hibernate jQuery Nginx
en9
10年前发布

Python 的mapreduce 单词统计

#!/usr/bin/env python  import random     # 'abc..z'  alphaStr = "".join(map(chr, range(97,123)))  fp = open("word.txt", "w")  maxIter = 100000  for i in range(maxIter):      word = ""      len =random.randint(1,5)      for j in range(len):          word + = alphaStr[random.randint(0,25)]          fp.write(word + '\n')  fp.close()        cat word.txt | ./wordcount_mapper.py | ./wordcount_reducer.py .     word count reduce,   python     #filename:  wordcount_reducer.py  from  operator import itemgetter  import sys     wordcount = {}  for line in sys.stdin:      word, count = line.strip().split('\t',1)      try:          count = int(count)          wordcount[word] = wordcount.get(word,0) + count      except ValueError          pass     sorted_wordcount = sorted(wordcount.iterms(), key = itemgettter(0))  for word,count in sorted_wordcount:      print("%s\t%s") %(word, count)