python中的大量字符串替换？_随笔

python中的大量字符串替换？

mydict = {“&y”:”33[0;30m”,
“&c”:”33[0;31m”,
“&b”:”33[0;32m”,
“&Y”:”33[0;33m”,
“&u”:”33[0;34m”}
mystr = “The &yquick &cbrown &bfox &Yjumps over the &ulazy dog”

for k, v in mydict.iteritems():    mystr = mystr.replace(k, v)print mystrThe ←[0;30mquick ←[0;31mbrown ←[0;32mfox ←[0;33mjumps over the ←[0;34mlazy dog

我比较了一些解决方案：

mydict = dict([('&' + chr(i), str(i)) for i in list(range(65, 91)) + list(range(97, 123))])# random inserts between keysfrom random import randintrawstr = ''.join(mydict.keys())mystr = ''for i in range(0, len(rawstr), 2):    mystr += chr(randint(65,91)) * randint(0,20) # insert between 0 and 20 charsfrom time import time# How many times to run each solutionrep = 10000print 'Running %d times with string length %d and '       'random inserts of lengths 0-20' % (rep, len(mystr))# My solutiont = time()for x in range(rep):    for k, v in mydict.items():        mystr.replace(k, v)    #print(mystr)print '%-30s' % 'Tor fixed & variable dict', time()-tfrom re import sub, compile, escape# Peter Hansent = time()for x in range(rep):    sub(r'(&[a-zA-Z])', r'%(1)s', mystr) % mydictprint '%-30s' % 'Peter fixed & variable dict', time()-t# Claudiudef multiple_replace(dict, text):     # Create a regular expression  from the dictionary keys    regex = compile("(%s)" % "|".join(map(escape, dict.keys())))    # For each match, look-up corresponding value in dictionary    return regex.sub(lambda mo: dict[mo.string[mo.start():mo.end()]], text)t = time()for x in range(rep):    multiple_replace(mydict, mystr)print '%-30s' % 'Claudio variable dict', time()-t# Claudiu - Precompiledregex = compile("(%s)" % "|".join(map(escape, mydict.keys())))t = time()for x in range(rep):    regex.sub(lambda mo: mydict[mo.string[mo.start():mo.end()]], mystr)print '%-30s' % 'Claudio fixed dict', time()-t# Andrew Y - variable dictdef mysubst(somestr, somedict):  subs = somestr.split("&")  return subs[0] + "".join(map(lambda arg: somedict["&" + arg[0:1]] + arg[1:], subs[1:]))t = time()for x in range(rep):    mysubst(mystr, mydict)print '%-30s' % 'Andrew Y variable dict', time()-t# Andrew Y - fixeddef repl(s):  return mydict["&"+s[0:1]] + s[1:]t = time()for x in range(rep):    subs = mystr.split("&")    res = subs[0] + "".join(map(repl, subs[1:]))print '%-30s' % 'Andrew Y fixed dict', time()-t

Python 2.6的结果

Running 10000 times with string length 490 and random inserts of lengths 0-20Tor fixed & variable dict      1.04699993134Peter fixed & variable dict    0.218999862671Claudio variable dict          2.48400020599Claudio fixed dict  0.0940001010895Andrew Y variable dict         0.0309998989105Andrew Y fixed dict 0.0310001373291

claudiu和andrew的解决方案都保持为0，因此我不得不将其增加到10000次运行。

我在 Python 3
（由于Unipre）中运行了它，将chars从39替换为1024（38是＆符，所以我不想包含它）。字符串长度最大为10.000，包括大约980个替换字符串，长度为0-20的可变随机插入。从39到1024的unipre值会导致字符长度分别为1和2个字节，这可能会影响某些解决方案。

mydict = dict([('&' + chr(i), str(i)) for i in range(39,1024)])# random inserts between keysfrom random import randintrawstr = ''.join(mydict.keys())mystr = ''for i in range(0, len(rawstr), 2):    mystr += chr(randint(65,91)) * randint(0,20) # insert between 0 and 20 charsfrom time import time# How many times to run each solutionrep = 10000print('Running %d times with string length %d and '       'random inserts of lengths 0-20' % (rep, len(mystr)))# Tor Valamo - too long#t = time()#for x in range(rep):#    for k, v in mydict.items():#        mystr.replace(k, v)#print('%-30s' % 'Tor fixed & variable dict', time()-t)from re import sub, compile, escape# Peter Hansent = time()for x in range(rep):    sub(r'(&[a-zA-Z])', r'%(1)s', mystr) % mydictprint('%-30s' % 'Peter fixed & variable dict', time()-t)# Peter 2def dictsub(m):    return mydict[m.group()]t = time()for x in range(rep):    sub(r'(&[a-zA-Z])', dictsub, mystr)print('%-30s' % 'Peter fixed dict', time()-t)# Claudiu - too long#def multiple_replace(dict, text): #    # Create a regular expression  from the dictionary keys#    regex = compile("(%s)" % "|".join(map(escape, dict.keys())))##    # For each match, look-up corresponding value in dictionary#    return regex.sub(lambda mo: dict[mo.string[mo.start():mo.end()]], text)##t = time()#for x in range(rep):#    multiple_replace(mydict, mystr)#print('%-30s' % 'Claudio variable dict', time()-t)# Claudiu - Precompiledregex = compile("(%s)" % "|".join(map(escape, mydict.keys())))t = time()for x in range(rep):    regex.sub(lambda mo: mydict[mo.string[mo.start():mo.end()]], mystr)print('%-30s' % 'Claudio fixed dict', time()-t)# Separate setup for Andrew and gnibbler optimized dictmydict = dict((k[1], v) for k, v in mydict.items())# Andrew Y - variable dictdef mysubst(somestr, somedict):  subs = somestr.split("&")  return subs[0] + "".join(map(lambda arg: somedict[arg[0:1]] + arg[1:], subs[1:]))def mysubst2(somestr, somedict):  subs = somestr.split("&")  return subs[0].join(map(lambda arg: somedict[arg[0:1]] + arg[1:], subs[1:]))t = time()for x in range(rep):    mysubst(mystr, mydict)print('%-30s' % 'Andrew Y variable dict', time()-t)t = time()for x in range(rep):    mysubst2(mystr, mydict)print('%-30s' % 'Andrew Y variable dict 2', time()-t)# Andrew Y - fixeddef repl(s):  return mydict[s[0:1]] + s[1:]t = time()for x in range(rep):    subs = mystr.split("&")    res = subs[0] + "".join(map(repl, subs[1:]))print('%-30s' % 'Andrew Y fixed dict', time()-t)# gnibblert = time()for x in range(rep):    myparts = mystr.split("&")    myparts[1:]=[mydict[x[0]]+x[1:] for x in myparts[1:]]    "".join(myparts)print('%-30s' % 'gnibbler fixed & variable dict', time()-t)

结果：

Running 10000 times with string length 9491 and random inserts of lengths 0-20Tor fixed & variable dict      0.0 # disqualified 329 secsPeter fixed & variable dict    2.07799983025Peter fixed dict    1.53100013733 Claudio variable dict          0.0 # disqualified, 37 secsClaudio fixed dict  1.5Andrew Y variable dict         0.578000068665Andrew Y variable dict 2       0.56299996376Andrew Y fixed dict 0.56200003624gnibbler fixed & variable dict 0.530999898911

（**请注意，gnibbler的代码使用了不同的字典，其中的键不包含’＆’。Andrew的代码也使用了该备用字典，但并没有太大的区别，也许只是0.01倍的加速。）

欢迎分享，转载请注明来源：内存溢出

原文地址: http://outofmemory.cn/zaji/5643892.html

python中的大量字符串替换？

发表评论

评论列表（0条）