[sql] view plain copy print?
CREATE DATABASE php_excel
USE php_excel
CREATE TABLE IF NOT EXISTS php_excel(
id int(20) NOT NULL AUTO_INCREMENT PRIMARY KEY,
gid varchar(20) NOT NULL,
stu_no varchar(20) NOT NULL,
name varchar(45) NOT NULL,
age int(4) NOT NULL
)ENGINE=MyISAM DEFAULT CHARSET=utf8
第二步:前台index.php文件。
[html] view plain copy print?
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/htmlcharset=utf-8" />
<title>phpexcel导入excel数据到MYSQL数据库</title>
</head>
<body>
<form name="frm1" action="insertdb.php" method="post" enctype="multipart/form-data">
<input name="filename" type="file" />
<input name="submit" type="submit" value="import" />
</form>
</body>
</html>
第三步:向数据库插入数据的insertdb.php文件。
[php] view plain copy print?
session_start()
header("Content-type:text/htmlcharset:utf-8")
//全局变量
$succ_result=0
$error_result=0
$file=$_FILES['filename']
$max_size="2000000"//最大文件限制(单位:byte)
$fname=$file['name']
$ftype=strtolower(substr(strrchr($fname,'.'),1))
//文件格式
$uploadfile=$file['tmp_name']
if($_SERVER['REQUEST_METHOD']=='POST'){
if(is_uploaded_file($uploadfile)){
if($file['size']>$max_size){
echo "Import file is too large"
exit
}
if($ftype!='xls'){
echo "Import file type is error"
exit
}
}else{
echo "The file is not empty!"
exit
}
}
require("./conn.php") //连接mysql数据库
//调用phpexcel类库
require_once 'PHPExcel.php'
require_once 'PHPExcel\IOFactory.php'
require_once 'PHPExcel\Reader\Excel5.php'
$objReader = PHPExcel_IOFactory::createReader('Excel5')//use excel2007 for 2007 format
$objPHPExcel = $objReader->load($uploadfile)
$sheet = $objPHPExcel->getSheet(0)
$highestRow = $sheet->getHighestRow()// 取得总行数
$highestColumn = $sheet->getHighestColumn()// 取得总列数
$arr_result=array()
$strs=array()
for($j=2$j<=$highestRow$j++)
{
unset($arr_result)
unset($strs)
for($k='A'$k<= $highestColumn$k++)
{
//读取单元格
$arr_result .= $objPHPExcel->getActiveSheet()->getCell("$k$j")->getValue().','
}
$strs=explode(",",$arr_result)
$sql="insert into php_excel(gid,stu_no,name,age) values ($strs[0],'$strs[1]','$strs[2]',$strs[3])"
echo $sql."<br/>"
mysql_query("set names utf8")
$result=mysql_query($sql) or die("执行错误")
$insert_num=mysql_affected_rows()
if($insert_num>0){
$succ_result+=1
}else{
$error_result+=1
}
}
echo "插入成功".$succ_result."条数据!!!<br>"
echo "插入失败".$error_result."条数据!!!"
其中conn.php代码如下:
[php] view plain copy print?
$mysql=mysql_connect("localhost","root","") or die("数据库连接失败!")
mysql_select_db("php_excel",$mysql)
mysql_query("set names utf8")
我的导入效果如下:
至此,从Excel文件读取数据批量导入到Mysql数据库完成。
先贴原来的导入数据代码:48304ba5e6f9fe08f3fa1abda7d326ab.png
8
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "www.settings")
'''
Django 版本大于等于1.7的时候,需要加上下面两句
import django
django.setup()
否则会抛出错误 django.core.exceptions.AppRegistryNotReady: Models aren't loaded yet.
'''
import django
if django.VERSION >= (1, 7):#自动判断版本
django.setup()
from arrears.models import D072Qf
import xlrd #excel读工具
from datetime import datetime
from xlrd import xldate_as_tuple
import time
import random
time1 = time.time()
#data= xlrd.open_workbook('11.xlsx') 打开文件
with xlrd.open_workbook('11.xlsx') as data:
print u"读取文件结束,开始导入!"
time2 = time.time()
table = data.sheet_by_index(0) #获取工作表
time3 = time.time()
n=1
x = y = z = 0
WorkList = []
for line in range(n,table.nrows):#nrows = table.nrows #行数 ncols = table.ncols #列数 print sh.row_values(rownum)
row = table.row_values(line)
if row: #查看行值是否为空
for i in [0,1,2,4,28,30,32]:
if type(row[i]) == float:
row[i] = int(row[i])
if D072Qf.objects.filter(acct_month = row[0],serv_id=row[1]).exists():#判断该行值是否在数据库中重复
x = x + 1 #重复值计数
else:
WorkList.append(D072Qf(acct_month=row[0],serv_id=row[1],acc_nbr=row[2],user_name=row[3],acct_code=row[4],
acct_name=row[5],product_name=row[6],current_charge=row[7],one_charge=row[8],
two_charge=row[9],three_charge=row[10],four_charge=row[11],five_charge=row[12],
six_charge=row[13],seven_charge=row[14],eight_charge=row[15],nine_charge=row[16],
ten_charge=row[17],eleven_charge=row[18],twelve_charge=row[19],oneyear_charge=row[20],
threeyear_charge=row[21],upthreeyear_charge=row[22],all_qf=row[23],morethree_qf=row[24],
aging=row[25],serv_state_name=row[26],mkt_chnl_name=row[27],mkt_chnl_id=row[28],
mkt_region_name=row[29],mkt_region_id=row[30],mkt_grid_name=row[31],mkt_grid_id=row[32],
prod_addr=row[33]))
y = y + 1 #非重复计数
else:
z = z + 1 #空行值计数
n = n + 1
if n % 9999 == 0:
D072Qf.objects.bulk_create(WorkList)
WorkList = []
time.sleep(random.random()) #让Cpu随机休息0 <= n <1.0 s
print "导入成功一次!"
print '数据导入成功,导入'+str(y)+'条,重复'+str(x)+'条,有'+str(z)+'行为空!'
time4 = time.time()
print "读取文件耗时"+str(time2-time1)+"秒,导入数据耗时"+str(time4-time3)+"秒!"
48304ba5e6f9fe08f3fa1abda7d326ab.png
这条代码目前未全部将十几万行数据全部导入数据库中,只花了1个小时把5万行数据导入其中后,后面越来越慢,主要慢在excel表到了7万行数据左右后,读取excel中数据很慢了,总体来说影响导入速度有几个原因:
1、一直以来采用xlrd导入xls格式文件,如果文件有十几万行,只是读取文件就会花200秒,若换成csv则几乎不花时间
2、代码中这行语句也会影响速度,特别当数据库中数据很大时:if D072Qf.objects.filter(acct_month = row[0],serv_id=row[1]).exists():#判断该行值是否在数据库中重复
3、若一次性将字典添加十几万行数据,就windows的cpu而已是遭受不住的!所以建议1万条数据导入一次后,清空列表
改善后的代码:
优化部分:采用csv格式取消掉检查重复数据语句每5万导入一次数据
48304ba5e6f9fe08f3fa1abda7d326ab.png
#coding:utf-8
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "www.settings")
'''
Django 版本大于等于1.7的时候,需要加上下面两句
import django
django.setup()
否则会抛出错误 django.core.exceptions.AppRegistryNotReady: Models aren't loaded yet.
'''
import django
if django.VERSION >= (1, 7):#自动判断版本
django.setup()
from arrears.models import D072Qf
import time
import random
time1 = time.time()
f = open('11.csv')
print u"读取文件结束,开始导入!"
time2 = time.time()
WorkList = []
next(f) #将文件标记移到下一行
y = 0
n = 1
for line in f:
row = line.replace('"','') #将字典中的"替换空
row = row.split('') #按对字符串进行切片
y = y + 1
WorkList.append(D072Qf(acct_month=row[0],serv_id=row[1],acc_nbr=row[2],user_name=row[3],acct_code=row[4],
acct_name=row[5],product_name=row[6],current_charge=row[7],one_charge=row[8],
two_charge=row[9],three_charge=row[10],four_charge=row[11],five_charge=row[12],
six_charge=row[13],seven_charge=row[14],eight_charge=row[15],nine_charge=row[16],
ten_charge=row[17],eleven_charge=row[18],twelve_charge=row[19],oneyear_charge=row[20],
threeyear_charge=row[21],upthreeyear_charge=row[22],all_qf=row[23],morethree_qf=row[24],
aging=row[25],serv_state_name=row[26],mkt_chnl_name=row[27],mkt_chnl_id=row[28],
mkt_region_name=row[29],mkt_region_id=row[30],mkt_grid_name=row[31],mkt_grid_id=row[32],
prod_addr=row[33]))
n = n + 1
if n%50000==0:
print n
D072Qf.objects.bulk_create(WorkList)
WorkList = []
time3 = time.time()
print "读取文件耗时"+str(time2-time1)+"秒,导入数据耗时"+str(time3-time2)+"秒!"
time3 = time.time()
print n
D072Qf.objects.bulk_create(WorkList)
print "读取文件耗时"+str(time2-time1)+"秒,导入数据耗时"+str(time3-time2)+"秒!"
WorkList = []
print "成功导入数据"+str(y)+"条"
f.close()
48304ba5e6f9fe08f3fa1abda7d326ab.png
结果让人大吃一惊!!!,只耗时73秒
48304ba5e6f9fe08f3fa1abda7d326ab.png
Python 2.7.10 (default, May 23 2015, 09:40:32) [MSC v.1500 32 bit (Intel)] on win32
Type "copyright", "credits" or "license()" for more information.
>>>================================ RESTART ================================
>>>
读取文件结束,开始导入!
50000
读取文件耗时0.0秒,导入数据耗时34.3279998302秒!
100000
读取文件耗时0.0秒,导入数据耗时67.3599998951秒!
138400
读取文件耗时0.0秒,导入数据耗时73.4379999638秒!
成功导入数据138399条
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)