def readFromPdfAndStroe(fnamein,pagestart,pagesend,fnameout)->None:
df = pd.core.frame.DataFrame(camelot.read_pdf(
fnamein, pages=str(pagestart), flavor='stream')[0].data)
for x in range(pagestart+1, pagesend):
print(x)
tt = pd.core.frame.DataFrame(camelot.read_pdf(
fnamein, pages=str(x), flavor='stream')[0].data)
df.reset_index(drop=True, inplace=True)
df = df.append(tt) # Example, adndt-99-69.pdf
df.reset_index(drop=True, inplace=True)
df.to_json(fnameout+'.json') # outfname
df.to_excel(fnameout+'.xlsx', index=False)
df.to_csv(fnameout+'.csv')
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)