静态页面如何做全文搜索？_随笔

JS 全文搜索

实例:

<html>

<head>

<title>搜索结果－www.51windows.Net</title>

<style>

<!--

body, td, input, select { font-family: Verdanafont-size: 10pt }

-->

</style>

</head>

<font color=red>请另存为html文件再搜。。</font>

<input type="hidden" name="list"

value="http://www.njcatv.net~南京有线电视台网站|njcatv 南京有线电视台网页特效影音空间新闻 javascript java applet 健康之桥^为广大南京市民提供全方位的信息和帮助*http://www.sina.com.cn~最全的中文新闻发部站点|sina 新浪新闻 super 王志东聊天室^国内最大的新闻网站!*http://www.163.net~最大的综合中文网站|163 netease 网易丁磊新闻 yeah 126 娱乐 freemail 聊天室^最大的综合中文网站,为你提供全方位的各种信息，是你上网的好去处!*http://www.sohu.com~最大的中文引擎搜索站点|sohu search 搜索引擎张朝阳聊天室^最大的中文引擎搜索站点，是你上网查找资料的最好去处!*http://www.sohu.com~最大的中文引擎搜索站点|sohu search 搜索引擎张朝阳聊天室^最大的中文引擎搜索站点，是你上网查找资料的最好去处!*http://www.sohu.com~最大的中文引擎搜索站点|sohu search 搜索引擎张朝阳聊天室^最大的中文引擎搜索站点，是你上网查找资料的最好去处!*http://www.sohu.com~最大的中文引擎搜索站点|sohu search 搜索引擎张朝阳聊天室^最大的中文引擎搜索站点，是你上网查找资料的最好去处!*http://www.sohu.com~最大的中文引擎搜索站点|sohu search 搜索引擎张朝阳聊天室^最大的中文引擎搜索站点，是你上网查找资料的最好去处!*http://www.51windows.com~无忧视窗-windows学习站|51windows 海娃无忧视窗^学习windows的好地方">

</form>

<!--

function Page(url,title,keywords,description) {

while ((url.length >0) &&(url.charAt(0) == " ")) {

url = url.substring(1,url.length)

}

this.url = url

while ((title.length >0) &&(title.charAt(0) == " ")) {

title = title.substring(1,title.length)

}

this.title = title

this.keywords = keywords

this.description = description

return this

}

function Database() {

var pos = 0

while ((pos1 = amorphous.indexOf("~",pos)) != -1) {

pos2 = amorphous.indexOf("|",pos1+1)

pos3 = amorphous.indexOf("^",pos2+1)

pos4 = amorphous.indexOf("*",pos3+1)

if ((pos2 != -1)

&&(pos2 <pos3) &&(pos3 <pos4)

&&(pos4 <= amorphous.indexOf("*",pos))) {

this[database_length++] = new Page(amorphous.substring(pos,pos1),

amorphous.substring(pos1+1,pos2),

amorphous.substring(pos2+1,pos3),

amorphous.substring(pos3+1,pos4))

pos = pos4+1

} else { // error reading amorphous database

if (pos+30 <= amorphous.length)

alert('Error reading in amorphous database around "'

+ amorphous.substring(pos,pos+30) + '"')

pos = amorphous.indexOf("*",pos) + 1

}

return this

}

function search(str) {

menu_length = 0

temp = new Object()

temp_length = 0

words_length = 0

words = new Object()

pos = 0

while ((pos = str.indexOf(" ")) != -1

&&and_search != "exact") {

words[words_length] = str.substring(0,pos)

if (words[words_length].length >0)

words_length++

if (str.length == 1)

str=""

else

str = str.substring(pos+1,str.length)

}

if (str.length >0)

words[words_length++] = str

for (q=0q<words_lengthq++) {

temp_length = 0

str = words[q].toLowerCase()

len = (and_search=="and"&&q>0?menu_length:database_length)

for (n=0n<lenn++) {

if (and_search=="and"&&q>0) {

combo = (menu[n].title + " " + menu[n].description

+ " " + menu[n].keywords).toLowerCase()

} else {

combo = (database[n].title + " " + database[n].description

+ " " + database[n].keywords).toLowerCase()

}

if (combo.indexOf(str) != -1) // found

temp[temp_length++] = (and_search=="and"&&q>0?menu[n]:database[n])

}

if (and_search!="and" &&q>0) {

added = 0

for (i=0i<temp_lengthi++) {

duplicate = false

for (j=0j<menu_length&&!duplicatej++) {

if (menu[j] == temp[i]) {

duplicate = true

}

if (!duplicate)

menu[menu_length+(added++)] = temp[i]

}

menu_length += added

} else {

for(h=0h<temp_lengthh++)

menu[h] = temp[h]

menu_length = temp_length

}

function entry() {

if ((document.entryform.keyword.value.length == 0)

|| (document.entryform.keyword.value == " ")) {

alert("你必须填写关键字!")

return false

}

and_search = (document.entryform.and_or.selectedIndex == 0?"and":"or")

if (document.entryform.and_or.selectedIndex == 2)

and_search = "exact"

location.href = location.pathname + "?"

+ escape(document.entryform.keyword.value)

+ (and_search != "or"?"&"+and_search:"")

return false

}

function redWord(str) {

for(r=0r<words_lengthr++) {

pos = -3

word = words[r].toLowerCase()

while ((pos = str.toLowerCase().indexOf(word,pos+3)) != -1) {

val = pos+word.length

str = str.substring(0,pos) + "*"

+ str.substring(pos,val) + "|"

+ str.substring(val,str.length)

}

pos = -16

while ((pos = str.toLowerCase().indexOf("*",pos+16)) != -1)

str = str.substring(0,pos) + "<font color=red>"

+ str.substring(pos+1,str.length)

pos = -7

while ((pos = str.toLowerCase().indexOf("|",pos+7)) != -1)

str = str.substring(0,pos) + "</font>"

+ str.substring(pos+1,str.length)

return str

}

var amorphous = document.database.list.value

temp_str = amorphous.substring(amorphous.length-2,amorphous.length)

if (temp_str.indexOf("*") == -1)

amorphous += "* "

else

amorphous += " "// amorphous database must have characters after last asterisk

database_length = 0// Netscape 2 fix

var database = new Database()// read in from amorphous database

menu_length = 0// Netscape 2 fix

var menu = new Object()

string = ""

and_search = "or"

if (location.search.length >1) {

string = unescape(location.search.substring(1,location.search.length))

pos = 0

while ((pos = string.indexOf('"',pos)) != -1) {

string = string.substring(0,pos) + '\\"' + string.substring(pos+1,string.length)

pos += 2

}

if (string.substring(string.length-4,string.length) == "&and") {

string = string.substring(0,string.length-4)

and_search = "and"

} else if (string.substring(string.length-6,string.length) == "&exact") {

string = string.substring(0,string.length-6)

and_search = "exact"

} else if (string.substring(string.length-3,string.length) == "&or") {

string = string.substring(0,string.length-3)

and_search = "or"

}

search(string)

}

document.write('<form name="entryform" onSubmit="return entry()">'

+'Search for:<input type="text" size=22 '

+'name="keyword" value="'+string+'">'

+'<input type="button" value="Search" onClick="entry()"><br><select name="and_or" '

+'size=1><option'+(and_search=="and"?" selected":"")+'>Search All word '

+'(AND)<option'+(and_search=="or"?" selected":"")+'>Search any word '

+'(OR)<option'+(and_search=="exact"?" selected":"")+'>Exact '

+'word</select></form><br>')

if (location.search.length >1)

document.write('查询结果:<br><br>\n')

for (n=0n<menu_lengthn++)

document.write('<a href="'+menu[n].url+'">'+menu[n].title

+'</a><br>'+redWord(menu[n].description)+'<br>Keywords: '

+redWord(menu[n].keywords)+'<br><br>\n')

if ((menu_length == 0) &&(location.search.length >1))

document.write('对不起:你查询的关键字"'+string+'"没有发现!\n')

// -->

</script>

</body>

</html>

目前行业网站的全文检索的方式主要有两种

方式一:通过数据库自带的全文索引

方式二:通过程序来自建全文索引系统

以sql server 2005为例

2005本身就自带全文索引功能,你可以先对数据库表

建立索引,具体如何建索引网上搜索一下,建立完索引之后,你就可以用sql来实现检索功能,例如:select * from ytbxw where

contaiins(字段,' 中国')多个查询值之间可以用and 或

or来实现,在单表以及单表视图上建全文索引对2005来说根本不是问题,但在多表视图建全文索引2005目前还无法实现这个功能,拿

www.ytbxw.com为例,其每个栏目的信息都是分开存放的,所以在检索上就无法用该方法来解决这个问题.

下面重点说一下如何用程序来实现检索功能

如果你想自己开发一个全文检索系统,我想这是相当复杂事情,要想实现也不是那么容易的事情,所以在这里我推荐一套开源程序,那就是dotlucene,我想大家可能都听过这个东东吧,那我就讲讲如何来实现多表情况下的全文检索.

1、新建winform项目，把lucene.net.dll添加到该项目中来

2、创建一个类，类名可以自己取

public class indexer

{

private indexwriter writer

　／／在指定路径下创建索引文件

public indexer(string directory)

{

writer = new indexwriter(directory, new standardanalyzer(), true)

writer.setusecompoundfile(true)

}

　／／将信息添加到索引文件中

field.text:为索引＋读取

field.unindexed:不需要做索引

public void addhtmldocument(string path,string title,string content)

{

document doc = new document()

doc.add(field.text("text", content))

doc.add(field.unindexed("path", path))

doc.add(field.text("title", title))

writer.adddocument(doc)

}

／／解析html，过滤html代码

private string parsehtml(string html)

{

string temp = regex.replace(html, "<[^>]*>", "")

return temp.replace(" ", " ")

}

／／从页面中获取文章标题

private string gettitle(string html)

{

match m = regex.match(html, "<title>(.*)</title>")

if (m.groups.count == 2)

return m.groups[1].value

return "(unknown)"

}

//添加新闻到索引

public void addnews()

{

//从数据库获取记录（这部分略过）

for (int i = 1i <= pagesizei++)

{

rootid = int.parse(dr["classid"].tostring().substring(0, 2))

// 写入索引

addhtmldocument(http://www.ytbxw.com + dr["id"].tostring() + ".html",

dr["title"].tostring(), parsehtml(dr["content"].tostring()))

}/info/

}

／／关闭索引

public void close()

{

writer.optimize()

writer.close()

}

欢迎分享，转载请注明来源：内存溢出

原文地址: http://outofmemory.cn/zaji/7239088.html

静态页面如何做全文搜索？

发表评论

评论列表（0条）