通过三种不同的方式逐字逐字地遍历文件的内容(以古腾堡计划的《绿野仙踪》为例):
from __future__ import with_statementimport timeimport refrom cStringIO import StringIOdef word_iter_std(filename): start = time.time() with open(filename) as f: for line in f: for word in line.split(): yield word print 'iter_std took %0.6f seconds' % (time.time() - start)def word_iter_re(filename): start = time.time() with open(filename) as f: txt = f.read() for word in re.finditer('w+', txt): yield word print 'iter_re took %0.6f seconds' % (time.time() - start)def word_iter_stringio(filename): start = time.time() with open(filename) as f: io = StringIO(f.read()) for line in io: for word in line.split(): yield word print 'iter_io took %0.6f seconds' % (time.time() - start)woo = '/tmp/woo.txt'for word in word_iter_std(woo): passfor word in word_iter_re(woo): passfor word in word_iter_stringio(woo): pass
导致:
% python /tmp/junk.pyiter_std took 0.016321 secondsiter_re took 0.028345 secondsiter_io took 0.016230 seconds
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)