目标,从给的读取给的ip地址文件解析出ip地域名并输出CSV文件,我选用的是开源ip2region。ip2region地址
下载好后直接用pycharm打开,因为我用的是python所以其他语言我就忽略了。这里我对代码进行了编辑从而实现自己的目的。
主要对benchmark.py进行了修改。代码如下
import threading
import time, sys
from ip2Region import Ip2Region
class BenchmarkThread(threading.Thread):
__searcher = None
__lock = None
def __init__(self, searcher, lock):
self.__searcher = searcher
self.__lock = lock
threading.Thread.__init__(self)
def run(self):
#输入路径,每行为一个IP地址
for IP in open("D:\****\****.txt"):
self.__lock.acquire()
try:
data = self.__searcher.memorySearch(IP)
region=str(data["region"].decode('utf-8'))
#print(region.split("|"))
city=""
province=""
regions=region.split("|")
if(regions[3]=="0"):
city=""
else:
city=regions[3]
if (regions[2] == "0"):
province = ""
else:
province = regions[2]
print(IP.strip()+","+regions[0]+province+city+regions[4])
result=IP.strip()+","+regions[0]+province+city+" "+regions[4]
with open('*****.csv', 'a') as f: # 设置文件对象
f.write(result+"\n")
finally:
self.__lock.release()
if __name__ == "__main__":
dbFile = "D:\pythonProject\hx_hdfs_local\ip2region-master\data\ip2region.db"
if ( len(sys.argv) > 2 ):
dbFile = sys.argv[1];
threads = []
searcher = Ip2Region(dbFile)
lock = threading.Lock()
for i in range(1):
t = BenchmarkThread(searcher, lock)
threads.append(t)
sTime = time.time() * 1
for t in threads:
t.start()
for t in threads:
t.join()
eTime = time.time() * 1
#print("Benchmark done: %5f" % (eTime - sTime))
结果对比:
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)