先用sqlite做一个版本,sqlite是一个关系型的嵌入式数据库,可以支持复杂的数据库逻辑,但是,相对的效率也不是很nice,而这里要用到的数据逻辑并不复杂,因此可以使用KV数据库,改进版本是使用了RedIEs 这一Nosql数据库,但RedIEs需要在使用机上开启监听端口,因此也不是很适合,最后改用了LevelDB,效率较sqlite提高了20倍左右。
__author__ = 'glcsnz123'# -*- Coding: utf-8 -*-import time,os,datetimeimport thread,sys,signalimport sqlite3import logging,atexitimport upyunimport Queueclass EmptyLogger: def error(self,st): pass def deBUG(self,st): pass def info(self,st): pass def warning(self,st): pass#------------LOG CONfig-----------------LOGGER = EmptyLogger()__LOGLEVEL = logging.INFOLOGfile = "/tmp/UpYunSync.log"#--------------------------------------def InitLogger(): global LOGGER,LOGfile,__LOGLEVEL if __LOGLEVEL == logging.CRITICAL or not os.access(os.path.dirname(LOGfile),os.W_OK): LOGGER = EmptyLogger() return LOGGER = logging.getLogger() hdlr = logging.fileHandler(LOGfile) formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') hdlr.setFormatter(formatter) LOGGER.addHandler(hdlr) LOGGER.setLevel(__LOGLEVEL)def SetLoggerLevel(loglevel=""):#DEBUG < INFO < WARNING < ERROR < OFF global __LOGLEVEL if loglevel.upper() == "INFO": LOGLEVEL = logging.INFO elif loglevel.upper() == "WARNING": LOGLEVEL = logging.WARNING elif loglevel.upper() == "ERROR": LOGLEVEL = logging.ERROR elif loglevel.upper() == "DEBUG": LOGLEVEL = logging.DEBUG elif loglevel.upper() == "OFF": LOGLEVEL = logging.CRITICAL else: LOGLEVEL = logging.NOTSETdef getLastModifyTime(pathname):#获取最后一次修改时间 if os.path.isfile(pathname) or os.path.isdir(pathname): #如果存在该文件的话 return (datetime.datetime.fromtimestamp(os.path.getmtime(pathname))).strftime("%F %X") return (datetime.datetime.fromtimestamp(0)).strftime("%F %X")class UpYunUprSync: def __init__(self,BUCKETname='uprsyncx',USERname='admin',PASSWORD='adminadmin',SRC_DIR="/home/glcsnz123/Django-1.5.4",DST_DIR="/zlssasj",DBfilename=".UpYunsqlite.db"): if SRC_DIR.endswith(os.sep): SRC_DIR = SRC_DIR[0:-1:1] if DST_DIR.endswith(os.sep): DST_DIR = DST_DIR[0:-1:1] self.__BUCKETname = BUCKETname self.__USERname = USERname self.__PASSWORD = PASSWORD self.__SRC_DIR = SRC_DIR self.__DST_DIR = DST_DIR self.__headerS = {"x-gmkerl-rotate": "180"} self.__DBfilename = DBfilename self.__WORKER_liMIT = 10 self.__ErrorfileList = [] self.__mkdirList = [] # 初始化sqlites self.__sqlites = UpYunsqlite(self.__SRC_DIR,self.__DBfilename) # daemon 配置信息 self.pIDfile = "/tmp/uprsync.pID" self.stdout = "/dev/null" self.stderr = "/dev/null" def setThreadNumlimit(self,T_limit): self.__WORKER_liMIT = max(1,T_limit) def __getfileList(self,fpath): start = datetime.datetime.Now() if fpath.endswith(os.sep): pathStack = [fpath[0:-1:1]] else: pathStack = [fpath] self.__DFS_FINISHED = False self.__JobfileList = Queue.Queue(100000) #用来存放需要上传的文件的路径 self.__JobPathList = Queue.Queue() #用来存放需要创建的目录 dirList = [] #初始化目录 try: tmpList = os.Listdir(fpath) except OSError,e: print "[ERROR]Permission DenIEd!\n " + fpath,"\n" LOGGER.error("[ERROR]Permission DenIEd!\n " + fpath + "\n\n") self.__DFS_FINISHED = True return if self.__DST_DIR != "": self.__CilentUp.mkdir(self.__DST_DIR) currentDir = os.sep.join(pathStack) sqlPathList = self.__sqlites.getPathFromsql(currentDir) sqlfileList = self.__sqlites.getfileFromsql(currentDir) for filename in tmpList: fullname = os.sep.join(pathStack) + os.sep + filename if os.path.isdir(fullname): if sqlPathList.has_key(filename) == False: self.__JobPathList.put(fullname) dirList.append(filename) elif os.path.islink(fullname): print "[WARNING]file:",fullname,"is a symbol link file\n" LOGGER.warning(fullname.join(["[WARNING]file: "," is a symbol link file\n\n"])) elif filename == self.__DBfilename:#数据库文件,不做任何处理! continue else: if sqlfileList.has_key(filename) == False: res = 1 elif sqlfileList[filename] == getLastModifyTime(fullname): print "[DEBUG]file:"," is not modifIEd\n" LOGGER.deBUG(fullname.join(["[DEBUG]file: "," is not modifIEd\n\n"])) continue else: res = 0 self.__JobfileList.put((fullname,res),block=True) while dirList.__len__() > 0: if dirList[-1] == "": pathStack.pop() dirList.pop() continue try: tmpList = os.Listdir(os.sep.join(pathStack) + os.sep + dirList[-1]) except: print "[ERROR]Permission DenIEd!\n" + os.sep.join(pathStack) + os.sep + dirList[-1] + "\n" LOGGER.error("[ERROR]Permission DenIEd!\n" + os.sep.join(pathStack) + os.sep + dirList[-1] + "\n\n") continue pathStack.append(dirList[-1]) dirList.pop() dirList.append("") currentDir = os.sep.join(pathStack) sqlPathList = self.__sqlites.getPathFromsql(currentDir) sqlfileList = self.__sqlites.getfileFromsql(currentDir) for filename in tmpList: fullname = os.sep.join([currentDir,filename]) if os.path.isdir(fullname): if sqlPathList.has_key(filename) == False: self.__JobPathList.put(fullname) dirList.append(filename) elif os.path.islink(fullname): print "[WARNING]file:"," is a symbol link file!\n" LOGGER.warning(fullname.join(["[WARNING]file: "," is a symbol link file\n\n"])) else: if sqlfileList.has_key(filename) == False: res = 1 elif sqlfileList[filename] == getLastModifyTime(fullname): print "[DEBUG]file:",block=True) #此处代表已经完成了对目录的遍历工作,标记 self.__DFS_FINISHED = True print "[INFO] Finish the dfs after",(datetime.datetime.Now() - start).seconds,"s\n" LOGGER.info("[INFO] Finish the dfs after " + (datetime.datetime.Now() - start).seconds.__str__() + " s\n\n") def __fileSync(self,fpath): try: self.__CilentUp.put("".join([self.__DST_DIR,fpath[self.__SRC_DIR.__len__():]]),open(fpath).read()) print "".join([self.__DST_DIR,fpath[self.__SRC_DIR.__len__():]]) + " oked\n" LOGGER.deBUG("".join([self.__DST_DIR,fpath[self.__SRC_DIR.__len__():]]) + " oked\n\n") return True except upyun.UpYunClIEntException as ce: self.__ErrorfileList.append(fpath) print "".join([self.__DST_DIR,fpath[self.__SRC_DIR.__len__():]]) + " Failed!\n" print "Except an UpYunClIEntException ..." print "Error Message: " + ce.msg + "\n" LOGGER.error("\n".join(["".join([self.__DST_DIR,fpath[self.__SRC_DIR.__len__():]]) + " Failed!\n","Except an UpYunClIEntException ...","Error Message: " + ce.msg + "\n\n"])) return False except upyun.UpYunServiceException as se: self.__ErrorfileList.append(fpath) print "".join([self.__DST_DIR,fpath[self.__SRC_DIR.__len__():]]) + " Failed\n" print "Except an UpYunServiceException ..." print "http Status Code: " + str(se.status) print "Error Message: " + se.msg + "\n" LOGGER.error("\n".join(["".join([self.__DST_DIR,fpath[self.__SRC_DIR.__len__():]]) + " Failed\n","Except an UpYunServiceException ...\nhttp Status Code: " + str(se.status),"Error Message: " + se.msg + "\n\n"])) if se.err: print se.err LOGGER.error(se.err + "\n\n") return False def __Worker(self,pID): try: waiting = 1 while True: try: if self.__DFS_FINISHED == False: self.__mkdirList[pID] = self.__JobPathList.get(block=True,timeout=5) else: self.__mkdirList[pID] = self.__JobPathList.get(block=False) fpath = self.__mkdirList[pID] while True: flag = 0 for i in range(self.__WORKER_liMIT): if i != pID and self.__mkdirList[i] != "" and fpath.startswith(self.__mkdirList[i]): flag = 1 break if flag == 0: break else: time.sleep(2) self.__CilentUp.mkdir("".join([self.__DST_DIR,fpath[self.__SRC_DIR.__len__():]])) self.__mkdirList[pID] = "" self.__sqlites.insertPathTosql(fpath) continue except Queue.Empty,efr: pass except Exception,ex: pass try: fpath = self.__JobfileList.get(block=True,timeout=waiting) except Exception,e: if self.__DFS_FINISHED: return else: waiting = min(waiting * 2,30) continue waiting = max(waiting/2,1) if os.access(fpath[0],os.R_OK) == False: self.__ErrorfileList.append(fpath[0]) print fpath[0].join(["[ERROR] "," Permission DenIEd!Need Read access\n\n"]) LOGGER.error(fpath[0].join(["[ERROR] "," Permission DenIEd!Need Read access\n\n"])) continue res = self.__fileSync(fpath[0]) #if sync success,update the datebase. if res == True: if fpath[1] == 1: self.__sqlites.insertfileTosql(fpath[0]) elif fpath[1] == 0: self.__sqlites.updatefileTosql(fpath[0]) else: print "==============ERROR===================" except Exception,e: print "[WARNING] a thread dIEd.",e,"\n" LOGGER.warning("[WARNING] a thread dIEd." + e.__str__() + "\n\n") finally: self.__WORKER_Now -= 1 self.__mkdirList[pID]="" def __InitLogIn(self): self.__CilentUp = upyun.UpYun(self.__BUCKETname,self.__USERname,self.__PASSWORD,timeout=30,endpoint=upyun.ED_auto) def runMultiThreadSync(self): start = datetime.datetime.Now() self.__WORKER_Now = self.__WORKER_liMIT self.__mkdirList = [""] * self.__WORKER_liMIT self.__InitLogIn() thread.start_new_thread(self.__getfileList,(self.__SRC_DIR,)) time.sleep(3) for i in range(self.__WORKER_liMIT): thread.start_new_thread(self.__Worker,(i,)) while self.__WORKER_Now > 0: while self.__WORKER_Now < self.__WORKER_liMIT and (self.__JobPathList.qsize()>0 or self.__JobfileList.qsize()>0): thread.start_new_thread(self.__Worker,()) self.__WORKER_Now += 1 self.__mkdirList.append("") print "[INFO] Create a new Thread! \n" LOGGER.info("[INFO] Create a new Thread! \n\n") # time.sleep(20) for i in range(4): print self.__JobfileList.qsize(),self.__JobPathList.qsize() LOGGER.deBUG("[INFO]" + str(self.__JobfileList.qsize()) + " files are found and waiting for sync.") time.sleep(5) self.RollBack() print "[INFO]Finish uprsync after " + (datetime.datetime.Now() - start).seconds.__str__() + " s\n" LOGGER.info("[INFO]Finish uprsync after " + (datetime.datetime.Now() - start).seconds.__str__() + " s\n\n") def RollBack(self): for fullname in self.__ErrorfileList: print fullname.join(["[WARNING] "," is rolling back!"]) LOGGER.warning(fullname.join(["[WARNING] "," is rolling back!"])) pathname = os.path.dirname(fullname)[self.__SRC_DIR.__len__():] while os.path.basename(pathname) != "": self.__sqlites.rollBackPathTosql(pathname) pathname = os.path.dirname(pathname) #Daemon ------------------------------------------ def __daemonize(self): try: pID = os.fork() if pID > 0: sys.exit(0) except OSError,ose: sys.stderr.write("[Daemon ERROR]fork #1 Failed: %d (%s)\n" % (ose.errno,ose.strerror)) LOGGER.error("[Daemon ERROR]fork #1 Failed: %d (%s)\n\n" % (ose.errno,ose.strerror)) sys.exit(1) #脱离终端 os.setsID() #修改当前工作目录 os.chdir("/") #重设文件创建权限 os.umask(0) #第二次fork,禁止进程重新打开控制终端 try: pID = os.fork() if pID > 0: sys.exit(0) except OSError,e: sys.stderr.write("[Daemon ERROR]fork #2 Failed: %d (%s)\n" % (e.errno,e.strerror)) LOGGER.error("[Daemon ERROR]fork #2 Failed: %d (%s)\n\n" % (e.errno,e.strerror)) sys.exit(1) sys.stdout.flush() sys.stderr.flush() so = file(self.stdout,'a+') se = file(self.stderr,'a+',0) #重定向标准输出/错误 os.dup2(so.fileno(),sys.stdout.fileno()) os.dup2(se.fileno(),sys.stderr.fileno()) #注册程序退出时的函数,即删掉pID文件 atexit.register(self.__delpID) pID = str(os.getpID()) file(self.pIDfile,'w+').write("%s\n" % pID) def __delpID(self): os.remove(self.pIDfile) def start(self): """ Start the daemon """ print "[Deamon DEBUG]start in Daemon\n" LOGGER.deBUG("[Daemon DEBUG]start in Daemon\n\n") # Check for a pIDfile to see if the daemon already runs try: pf = file(self.pIDfile,'r') pID = int(pf.read().strip()) pf.close() except IOError: pID = None if pID: message = "pIDfile %s already exist. Daemon already running?\n\n" sys.stderr.write(message % self.pIDfile) LOGGER.error("[Daemon ERROR]pIDfile %s already exist. Daemon already running?\n\n" % self.pIDfile) sys.exit(1) # Start the daemon self.__daemonize() self.__run() def stop(self): """ Stop the daemon """ # Get the pID from the pIDfile print "[Deamon DEBUG]stop in Daemon\n" LOGGER.deBUG("[Daemon DEBUG]stop in Daemon\n\n") try: pf = file(self.pIDfile,'r') pID = int(pf.read().strip()) pf.close() except IOError: pID = None if not pID: message = "pIDfile %s does not exist. Daemon not running?\n" sys.stderr.write(message % self.pIDfile) LOGGER.error("[Daemon ERROR]pIDfile %s does not exist. Daemon not running?\n\n" % self.pIDfile) return # not an error in a restart # Try killing the daemon process try: while True: os.kill(pID,signal.SIGTERM) time.sleep(0.1) except OSError,err: err = str(err) if err.find("No such process") > 0: if os.path.exists(self.pIDfile): os.remove(self.pIDfile) else: print str(err) sys.exit(1) def restart(self): """ Restart the daemon """ self.stop() self.start() def __run(self): self.runMultiThreadSync()#---------------------------- DB ----------------------------------------class UpYunsqlite: """ 元数据的 *** 作 """ def __init__(self,SRC_DIR="/home/glcsnz123",DBfilename="example.db"): self.SRC_DIR = SRC_DIR self.DBfilename = DBfilename self.__InitConnect() self.locksql = thread.allocate_lock() def __InitDBfile(self): if not os.access(self.SRC_DIR,os.W_OK): print "[ERROR]No write access in current directory" LOGGER.error("[ERROR]No write access in current directory") sys.exit("403") conn = sqlite3.connect(os.sep.join([self.SRC_DIR,self.DBfilename])) cur = conn.cursor() cur.execute("CREATE table fileModify(ID INTEGER PRIMARY KEY autoINCREMENT,filename VARCHAR(256),\ pathname VARCHAR(256),last_modify DATE)") cur.execute("CREATE table PathModify(ID INTEGER PRIMARY KEY autoINCREMENT,pathname VARCHAR(256),\ fatherpath VARCHAR(256),last_modify DATE)") cur.close() conn.close() def __InitConnect(self): if os.path.isfile(os.sep.join([self.SRC_DIR,self.DBfilename])) == False: self.__InitDBfile() self.CONN = sqlite3.connect(os.sep.join([self.SRC_DIR,self.DBfilename]),check_same_thread=False) self.CUR = self.CONN.cursor() def getPathFromsql(self,fapath): query = "select pathname,last_modify from PathModify where fatherpath='%s'" % ( self.__rpQuota(fapath[self.SRC_DIR.__len__():])) try: self.locksql.acquire() self.CUR.execute(query) res = self.CUR.fetchall() finally: self.locksql.release() resObj = {} for i in range(res.__len__()): resObj[res[i][0]] = res[i][1] return resObj def getfileFromsql(self,fapath): query = "select filename,last_modify from fileModify where pathname = '%s'" % ( self.__rpQuota(fapath[self.SRC_DIR.__len__():])) try: self.locksql.acquire() self.CUR.execute(query) res = self.CUR.fetchall() finally: self.locksql.release() resObj = {} for i in range(res.__len__()): resObj[res[i][0]] = res[i][1] return resObj def updatefileTosql(self,fpath): query = r"update fileModify SET last_modify = '%s' where filename = '%s' AND pathname = '%s'" % ( getLastModifyTime(fpath),self.__rpQuota(os.path.basename(fpath)),self.__rpQuota(os.path.dirname(fpath)[self.SRC_DIR.__len__():])) try: self.locksql.acquire() self.CUR.execute(query) self.CONN.commit() except sqlite3.OperationalError,soe: print "[Error] Syntax error!\nError query: " + query,"\nError Path: " + fpath + '\n' LOGGER.error("[Error] Syntax error!\nError query: " + query + "\nError Path: " + fpath + '\n\n') finally: self.locksql.release() def insertfileTosql(self,fpath): query = r"insert into fileModify(filename,pathname,last_modify) values('%s','%s','%s')" % ( self.__rpQuota(os.path.basename(fpath)),self.__rpQuota(os.path.dirname(fpath)[self.SRC_DIR.__len__():]),getLastModifyTime(fpath)) try: self.locksql.acquire() self.CUR.execute(query) self.CONN.commit() except sqlite3.OperationalError,"\nError Path: " + fpath + '\n' LOGGER.error("[Error] Syntax error!\nError query: " + query + "\nError Path: " + fpath + '\n\n') finally: self.locksql.release() def updatePathTosql(self,pathname): query = r"update PathModify SET last_modify = '%s' where pathname = '%s' and fatherpath='%s'" % ( getLastModifyTime(pathname),self.__rpQuota(os.path.basename(pathname)),self.__rpQuota(os.path.dirname(pathname)[self.SRC_DIR.__len__():])) try: self.locksql.acquire() self.CUR.execute(query) self.CONN.commit() except sqlite3.OperationalError,soe: print "[Error] Syntax error!\n Error query: " + query,"\nError Path: " + pathname + "\n" LOGGER.error("[Error] Syntax error!\n Error query: " + query + "\nError Path: " + pathname + "\n\n") finally: self.locksql.release() def rollBackPathTosql(self,pathname): query = r"update PathModify SET last_modify = '%s' where pathname = '%s' and fatherpath='%s'" % ( getLastModifyTime(""),self.__rpQuota(os.path.dirname(pathname))) try: self.locksql.acquire() self.CUR.execute(query) self.CONN.commit() except sqlite3.OperationalError,"\nError Path: " + pathname + "\n" LOGGER.error("[Error] Syntax error!\n Error query: " + query + "\nError Path: " + pathname + "\n\n") finally: self.locksql.release() def insertPathTosql(self,pathname): query = r"insert into PathModify(pathname,fatherpath,'%s')" % ( self.__rpQuota(os.path.basename(pathname)),self.__rpQuota(os.path.dirname(pathname)[self.SRC_DIR.__len__():]),getLastModifyTime(pathname)) try: self.locksql.acquire() self.CUR.execute(query) self.CONN.commit() except sqlite3.OperationalError,"\nError Path: " + pathname + "\n" LOGGER.error("[Error] Syntax error!\n Error query: " + query + "\nError Path: " + pathname + "\n\n") finally: self.locksql.release() def __rpQuota(self,st): return st.replace("'",r"//")if __name__ == "__main__": InitLogger() ups = UpYunUprSync() ups.setThreadNumlimit(20) ups.runMultiThreadSync()################################ 问题主要出在了数据库的询问上,所以必须减少数据库的访问# getfileList函数主要的花费是在mkdir和数据库 *** 作上。,数据库 *** 作占了50%的花费# mkdir大概花了25%的时间代价#
ReadMe.txt基本函数接口初始化日志: InitLogger() 默认日志等级为INFO 设置日志等级 SetLoggerLevel(log_level) 参数log_level 为日志的等级 可选日志等级有(不区分大小写): "DEBUG" "INFO" "WARNING" "ERROR" "OFF" 其中OFF表示关闭日志功能,另外,如果未初始化日志,日志功能默认是关闭的。 日志的存放地址默认为/tmp/UpYunSync.log。同时,也可通过以下方法来设定日志文件地址: UpYunrSync.LOGfile = LOG_file 其中参数LOG_file 为日志文件的地址初始化UpYunrSync import UpYunrSync ups = UpYunrSync.UpYunUprSync(BUCKETname,USERname,PASSWORD,SRC_DIR,DST_DIR,DBfilename) 其中参数 bucket 为空间名称,username 和 password 分别为授权 *** 作员帐号和密码,SRC_DIR和DST_DIR分别为需要同步的本地目录和服务器目录,必选。 参数DBfilename为存储本地文件元数据的sqlite文件,默认值为.UpYunsqlite.db设置线程开启个数 ups.setThreadNumlimit(Thread_Num) 参数Thread_Num为上传文件的线程个数,线程个数并不是越多越好,应当根据所要上传的目录中文件个数以及大小来确定。同步目录 终端直接同步文件形式 ups.runMultiThreadSync() 执行文件同步的 *** 作 后台守护进程同步文件形式 ups.start() ups.stop() ups.restart() 三个方法分别是启动进程、停止进程和重启进程。 后台进程模式下,分别设置程序标准输出、错误输出和PID文件位置的方法如下: ups.stdout = Stdout_file ups.stderr = Stderr_file ups.pIDfile = PID_file 其中,参数Stdout_file 和 Stderr_file 分别是标准输出和错误输出的文件地址,默认值为/dev/null。PID_file为进程的PID文件位置,默认值为/tmp/uprsync.pID总结
以上是内存溢出为你收集整理的实习第一周——UpYunUprSync——Sqlite版本全部内容,希望文章能够帮你解决实习第一周——UpYunUprSync——Sqlite版本所遇到的程序开发问题。
如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)