python按层级找出xml文件的差异

python按层级找出xml文件的差异,第1张

使用Python可以按层级比较XML文件差异,可以使用lxml包中的diff函数,允许指定差异深度,而深度需要根据两个XML文件的差异而定。另外还可以使用difflib库中的diff()函数,它返回XML文件树形结构差异,可以轻松实现层级比对。

import hashlib

import os

import sys

if len(sys.argv) <3:#1

print("You need to specify two directories:")#1

print(sys.argv[0], "<directory 1><directory 2>")#1

sys.exit() #1

directory1 = sys.argv[1] #2

directory2 = sys.argv[2] #2

print("Comparing:")

print(directory1)

print(directory2)

for directory in [directory1, directory2]:

if not os.access(directory, os.F_OK):

print(directory, "is not a valid directory!")

sys.exit()

def md5(file_path):

if os.path.isdir(file_path):

return '1'

read_file = open(file_path,'r')

the_hash = hashlib.md5()

for line in read_file.readlines():

the_hash.update(line.encode('utf8'))

read_file.close()

return the_hash.hexdigest()

def directory_listing(directory_name):

dir_file_list = {}

dir_root = None

dir_trim = 0

for path, dirs, files in os.walk(directory_name):

if dir_root is None:

dir_root = path

dir_trim = len(dir_root)

print("dir", directory_name,)

print("root is", dir_root)

trimmed_path = path[dir_trim:]

if trimmed_path.startswith(os.path.sep):

trimmed_path = trimmed_path[1:]

#print "path is", path, " and trimmed_path is", trimmed_path

for each_file in files + dirs:

file_path = os.path.join(trimmed_path, each_file)

dir_file_list[file_path] = True

return (dir_file_list, dir_root)

dir1_file_list, dir1_root = directory_listing(directory1)

dir2_file_list, dir2_root = directory_listing(directory2)

results = {}

for file_path in dir2_file_list.keys():

if file_path not in dir1_file_list:

results[file_path] = "not found in directory 1"

else:

#print file_path, "found in directory 1 and 2"

file1 = os.path.join(dir1_root, file_path)

file2 = os.path.join(dir2_root, file_path)

if md5(file1) != md5(file2):

results[file_path] = "is different in directory 2"

else:

results[file_path] = "is the same in both"

for file_path, value in dir1_file_list.items():

if file_path not in results:

results[file_path] = "not found in directory 2"

for path, result in sorted(results.items()):

print(path, result)


欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/tougao/11998883.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2023-05-20
下一篇 2023-05-20

发表评论

登录后才能评论

评论列表(0条)

保存