抓取各大网站的数据插入数据库 这样就不用为没有数据而烦恼了
获取百度的歌曲名 歌手和链接!!
package webTools
import java io BufferedReader
import java io IOException
import java io InputStreamReader
import java io UnsupportedEncodingException
import MalformedURLException
import URL
import java util ArrayList
import java util HashMap
import java util List
import java util regex Matcher
import java util regex Pattern
import dbTools DBTools
public class IOTOWeb {
public String getHtmlContent(String URL) {
URL url = null
String rowContent =
StringBuffer Content = new StringBuffer()
try {
url = new URL(URL)
BufferedReader in = new BufferedReader(new InputStreamReader(url
openStream() gb ))
while ((rowContent = in readLine()) != null) {
Content append(rowContent)
}
in close()
} catch (MalformedURLException e) {
// TODO Auto generated catch block
e printStackTrace()
} catch (UnsupportedEncodingException e) {
// TODO Auto generated catch block
e printStackTrace()
} catch (IOException e) {
// TODO Auto generated catch block
e printStackTrace()
}
return Content toString()
}
public List getLink(String Content) {
ArrayList listLink = new ArrayList()
String regex = <td[^>]*>[\\(]*<a[^>]*href=(\ ([^\ ]*)\ |\ ([^\ ]*)\ |([^\\s>]*))[^>]*>( *?)[\\)]*[\\s]*</td>
Pattern pattern = pile(regex Pattern DOTALL)
Matcher matcher = pattern matcher(Content)
while (matcher find()) {
listLink add(matcher group())
}
return listLink
}
public List<String>getHref(String Content) {
String regex
List listtHref = new ArrayList()
regex = href=(\ ([^\ ]*)\ |\ ([^\ ]*)\ |([^\\s>]*))\
Pattern pa = pile(regex Pattern DOTALL)
Matcher ma = pa matcher(Content)
while (ma find()) {
listtHref add(ma group() replaceFirst( href=\ ) replace( \
))
}
return listtHref
}
public List<String>getPerson(String Content) {
String regex
List list = new ArrayList()
regex = ]*href=(\"([^\"]*)\"|\ ([^\ ]*)\ |([^\\s>]*))[^>]*>( *?)\\ >\\(<a[^>]*href=(\ ([^\ ]*)\ |\ ([^\ ]*)\ |([^\\s>]*))[^>]*>( *?)\\)
Pattern pa = pile(regex Pattern DOTALL)
Matcher ma = pa matcher(Content)
while (ma find()) {
list add(ma group() replaceFirst( href=\ ) replace( \ ))
}
return list
}
public List<String>getSongName(String Content) {
String regex
List listPerson = new ArrayList()
regex = <a[^>]*href=(\ ([^\ ]*)\ |\ ([^\ ]*)\ |([^\\s>]*))[^>]*>( *?)</a>\\s
Pattern pa = pile(regex Pattern DOTALL)
Matcher ma = pa matcher(Content)
while (ma find()) {
listPerson add(ma group())
}
return listPerson
}
public String getMainContent(String Content) {
String regex = <table width=\ %\ align=\ center\ cellpadding=\ \ cellspacing=\ \ class=\ list\ >( *?)</table>
StringBuffer mainContent = new StringBuffer()
Pattern pattern = pile(regex Pattern DOTALL)
Matcher matcher = pattern matcher(Content)
while (matcher find()) {
mainContent append(matcher group())
}
return mainContent toString()
}
public String outTag(final String s) {
return s replaceAll( <*?>)
}
DBTools dbTools = new DBTools()
public void getFromBaiduMap (String URL) throws Throwable {
HashMap ContentMap = new HashMap()
String Content = getHtmlContent(URL)
String mainContent = getMainContent(Content)
List listLink = getLink(mainContent)
for (int j = j <listLink size()j++) {
String tdTag = listLink get(j) toString()
List songNameList = getSongName(tdTag)
String songName = outTag(songNameList get( ) toString())
List personList = getPerson(tdTag)
String songPerson =
if (personList size() != ) {
for (int n = n <personList size()n++) {
// System out println(personList get(n) toString())
songPerson = outTag(personList get(n) toString())
}
} else {
songPerson = 无
}
// System out print(songNameList get( ) toString())
List hrefList = getHref(songNameList get( ) toString())
String songHref = hrefList get( ) toString()
System out println()
String sql = insert into song(songName songPerson songHref) values(? ? ?)
ArrayList list_values = new ArrayList()
list_values add(songName)
list_values add(songPerson)
list_values add(songHref)
dbTools update(sql list_values)
}
}
}
DBTools数据库链接类
package dbTools
import java util ArrayList
import java sql *
public class DBTools {
private PreparedStatement preparedStatement
private ResultSet resultSet
private Connection connection
public DBTools() {
try {
Class forName( mysql jdbc Driver )
} catch (ClassNotFoundException e) {
// TODO Auto generated catch block
e printStackTrace()
}
try {
connection = DriverManager getConnection(
jdbc:mysql://localhost: /TestURL root zhuyi )
} catch (SQLException e) {
// TODO Auto generated catch block
e printStackTrace()
}
}
public ArrayList query(String sql ArrayList list_values) throws Throwable {
ArrayList listRows = new ArrayList()
preparedStatement = connection prepareStatement(sql)
for (int i = i <list_values size()i++) {
preparedStatement setObject(i + list_values get(i))
}
resultSet = preparedStatement executeQuery()
while (resultSet next()) {
String[] rowinfo = new String[resultSet getMetaData()
getColumnCount()]
for (int i = i <rowinfo lengthi++) {
rowinfo[i] = resultSet getString(i + )
}
listRows add(rowinfo)
}
return listRows
}
public void update(String sql ArrayList list_values) throws Throwable {
preparedStatement = connection prepareStatement(sql)
for (int i = i <list_values size()i++) {
preparedStatement setObject(i + list_values get(i))
}
preparedStatement executeUpdate()
preparedStatement close()
}
}
Servlet调用
package controller
import java io IOException
import java io PrintWriter
import java util List
import javax servlet ServletException
import javax servlet HttpServlet
import javax servlet HttpServletRequest
import javax servlet HttpServletResponse
import webTools IOTOWeb
public class TestURL extends HttpServlet {
/**
* Constructor of the object
*/
public TestURL() {
super()
}
/**
* Destruction of the servlet <br>
*/
public void destroy() {
super destroy()// Just puts destroy string in log
// Put your code here
}
/**
* The doGet method of the servlet <br>
*
* This method is called when a form has its tag value method equals to get
*
* @param request
* the request send by the client to the server
* @param response
* the response send by the server to the client
* @throws ServletException
* if an error occurred
* @throws IOException
* if an error occurred
*/
public void doGet(HttpServletRequest request HttpServletResponse response)
throws ServletException IOException {
try {
IOTOWeb iotoWeb = new IOTOWeb()
iotoWeb getFromBaiduMap ( ?id= ?top )
} catch (Throwable e) {
// TODO Auto generated catch block
e printStackTrace()
}
}
/**
* The doPost method of the servlet <br>
*
* This method is called when a form has its tag value method equals to
* post
*
* @param request
* the request send by the client to the server
* @param response
* the response send by the server to the client
* @throws ServletException
* if an error occurred
* @throws IOException
* if an error occurred
*/
public void doPost(HttpServletRequest request HttpServletResponse response)
throws ServletException IOException {
response setContentType( text/ )
PrintWriter out = response getWriter()
out
println( <!DOCTYPE HTML PUBLIC \ //W C//DTD HTML Transitional//EN\ >)
out println( <HTML>)
out println( <HEAD><TITLE>A Servlet</TITLE></HEAD>)
out println( <BODY>)
out print( This is )
out print(this getClass())
out println( using the POST method )
out println( </BODY>)
out println( </HTML>)
out flush()
out close()
}
/**
* Initialization of the servlet <br>
*
* @throws ServletException
* if an error occurs
*/
public void init() throws ServletException {
// Put your code here
}
}
获取金书网的图书名
代码
package webTools
import java io BufferedReader
import java io InputStreamReader
import URL
import java util ArrayList
import java util List
import java util regex Matcher
import java util regex Pattern
import dbTools DBTools
public class GetBook {
public String getHtmlContent(String URL) throws Throwable {
URL url = null
String rowContent =
StringBuffer Content = new StringBuffer()
url = new URL(URL)
BufferedReader in = new BufferedReader(new InputStreamReader(url
openStream() gb ))
while ((rowContent = in readLine()) != null) {
Content append(rowContent)
}
in close()
return Content toString()
}
public String getBookName(String Content) {
String bookName =
String regex = <span class=\ style \ >[^>]*</span>
Pattern pattern = pile(regex Pattern DOTALL)
Matcher matcher = pattern matcher(Content)
if (matcher find()) {
bookName = matcher group()
}
return bookName
}
public String outTag(final String s) {
return s replaceAll( <*?>)
}
DBTools dbtools = new DBTools()
public void getFromJINSHU(String URL) throws Throwable {
String Content = getHtmlContent(URL)
String bookName = outTag(getBookName(Content))
if (bookName != null &&! equals(bookName)) {
System out println(bookName)
String sql = insert into bookinfo(bookName) values(?)
ArrayList list_values = new ArrayList()
list_values add(bookName)
dbtools update(sql list_values)
}
}
}
调用Servlet
代码
package controller
import java io IOException
import java io PrintWriter
import javax servlet ServletException
import javax servlet HttpServlet
import javax servlet HttpServletRequest
import javax servlet HttpServletResponse
import webTools GetBook
public class TestBook extends HttpServlet {
/**
* Constructor of the object
*/
public TestBook() {
super()
}
/**
* Destruction of the servlet <br>
*/
public void destroy() {
super destroy()// Just puts destroy string in log
// Put your code here
}
/**
* The doGet method of the servlet <br>
*
* This method is called when a form has its tag value method equals to get
*
* @param request
* the request send by the client to the server
* @param response
* the response send by the server to the client
* @throws ServletException
* if an error occurred
* @throws IOException
* if an error occurred
*/
int i =
public void doGet(HttpServletRequest request HttpServletResponse response)
throws ServletException IOException {
GetBook bookinfo = new GetBook()
for (i <i++) {
String bookURL = /booksinfo/ / + i
+ l
try {
bookinfo getFromJINSHU(bookURL)
} catch (Throwable e) {
i++
doPost(request response)
}
}
}
/**
* The doPost method of the servlet <br>
*
* This method is called when a form has its tag value method equals to
* post
*
* @param request
* the request send by the client to the server
* @param response
* the response send by the server to the client
* @throws ServletException
* if an error occurred
* @throws IOException
* if an error occurred
*/
public void doPost(HttpServletRequest request HttpServletResponse response)
throws ServletException IOException {
GetBook bookinfo = new GetBook()
for (i <i++) {
String bookURL = /booksinfo/ / + i
+ l
try {
bookinfo getFromJINSHU(bookURL)
} catch (Throwable e) {
i++
doGet(request response)
}
}
}
/**
* Initialization of the servlet <br>
*
* @throws ServletException
* if an error occurs
*/
public void init() throws ServletException {
// Put your code here
}
}
lishixinzhi/Article/program/Java/hx/201311/25707BufferedReader input
try {
String s = new String()
input = new BufferedReader(new FileReader("f:\\123.txt"))
while ((s = input.readLine()) != null) { // 判断是否读到了最后一行
String info[] = s.split(" ")
System.out.println( info[0] + " " + info[1] + " " + info[2] )
}
input.close()
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace()
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace()
}
把info[0] + " " + info[1] + " " + info[2] 这三个值放在insert语句里就行了 经过测试
不知道你要什么样的文本,文本中的内容是否是有格式的:
这里提供下思路,供参考:
1.文本文件,基本上式字符格式的了,可以用Reader io流
2.如果是格式化的文本,可以按数据的长度读取, readInt readByte...
3.保存到数据库 当然用JDBC了,如果你读取出来封装成POJO了,也可以选择 OM框架
import java.io.BufferedReader
import java.io.FileInputStream
import java.io.IOException
import java.io.InputStreamReader
/**
* 文件读取和写入数据库
* @author 樊云升
*
*/
public class FilesReader {
public FilesReader(){
}
/**
* 读取文件内容
* @param FILE
* @return
*/
public String re_content(String FILE){
String content=""
try{
BufferedReader bufRead=new BufferedReader(new InputStreamReader(new FileInputStream(FILE)))
String str
while((str=bufRead.readLine())!=null){
content+=str+"\r\n"
}
}catch(IOException ioe){
ioe.printStackTrace()
}
return content
}
/**
* 将特定字符写入数据库中(原来我写的是重写文件,你这里这里将content写入数据库就OK)
* @param path
* @return
*/
public boolean writeFile(String content){
try{
//数据库写入代码
}catch(Exception e){
out.close()
return false
}
return true
}
public static void main(String[] args) {
String content=new FilesReader().re_content("D:\\AJAX.htm")
new FilesReader().writeFile(content)
}
}
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)