C#读取htm文件_教程

你说的这个一般叫做网页采集，几句话说不清的，不过百度下C# 采集　可以找到很多现成的代码，不过不能直接用，因为它的原理就是读取html的源代码，然后从中按你的需要去分析截取你需要的内容。

#include <windows.h>

#include <stdio.h>

int main()

{

WinExec("iexplore C:\index.html", SW_SHOW)

return 0

}

#include <stdio.h>

#include <math.h>

void fetch_str(char *str_in, char *str_out)

int main(){

char test[] = "<a>This is the <...>string</a>"

char result[256]

fetch_str(test, result)

printf("\ntest\t=%s\n", test)

printf("\nresult\t=%s\n",result)

return 1

}

void fetch_str(char *str_in, char *str_out)

{

char begin_str[] = "<a>"

char end_str[] = "</a>"

int index_end =0

int index_begin=0

int flag_begin =0

int flag_end =0

int str_index=0

int i

// to find the max index of str_in

while(str_in[str_index]!='\0')

{

str_index++

}

str_index--

//printf("%s %s", begin_str, end_str)

int count=0

while(str_in[count]!='\0')

{

// to find the begin index of the target string

if( flag_begin==0 &&count<=(str_index-2) )

{

if( str_in[count]==begin_str[0] &&str_in[count+1]==begin_str[1] &&str_in[count+2]==begin_str[2] )

{

flag_begin=1

index_begin=count+3

}

// to find the end index of the target string

//if( flag_end==0 &&count<=(str_index-3) )

if(count<=(str_index-3) )

{

if( str_in[count]==end_str[0] &&str_in[count+1]==end_str[1] &&str_in[count+2]==end_str[2] &&str_in[count+3]==end_str[3])

{

flag_end=1

index_end=count-1

}

count++

}

//printf("\nbegin_index=%d, end_index=%d\n", index_begin, index_end)

// to copy the target string to str_out

count=0

for(i=index_begini<=index_endi++)

{

str_out[count]=str_in[i]

count++

}

str_out[count]='\0'

return

}

欢迎分享，转载请注明来源：内存溢出

原文地址: http://outofmemory.cn/tougao/8131438.html

C#读取htm文件

发表评论

评论列表（0条）