c语言提取html标签内容

c语言提取html标签内容,第1张

#include <stdio.h>

#include <math.h>

void fetch_str(char *str_in, char *str_out)

int main(){

char test[] = "<a>This is the <...>string</a>"

char result[256]

fetch_str(test, result)

printf("\ntest\t=%s\n", test)

printf("\nresult\t=%s\n",result)

return 1

}

void fetch_str(char *str_in, char *str_out)

{

char begin_str[] = "<a>"

char end_str[] = "</a>"

int index_end =0

int index_begin=0

int flag_begin =0

int flag_end =0

int str_index=0

int i

// to find the max index of str_in

while(str_in[str_index]!='\0')

{

str_index++

}

str_index--

//printf("%s %s", begin_str, end_str)

int count=0

while(str_in[count]!='\0')

{

// to find the begin index of the target string

if( flag_begin==0 &&count<=(str_index-2) )

{

if( str_in[count]==begin_str[0] &&str_in[count+1]==begin_str[1] &&str_in[count+2]==begin_str[2] )

{

flag_begin=1

index_begin=count+3

}

}

// to find the end index of the target string

//if( flag_end==0 &&count<=(str_index-3) )

if(count<=(str_index-3) )

{

if( str_in[count]==end_str[0] &&str_in[count+1]==end_str[1] &&str_in[count+2]==end_str[2] &&str_in[count+3]==end_str[3])

{

flag_end=1

index_end=count-1

}

}

count++

}

//printf("\nbegin_index=%d, end_index=%d\n", index_begin, index_end)

// to copy the target string to str_out

count=0

for(i=index_begini<=index_endi++)

{

str_out[count]=str_in[i]

count++

}

str_out[count]='\0'

return

}

参考下面代码:

#include <stdio.h>

#include <streamhtmlparser/htmlparser.h>

int main(void)

{

unsigned int getchar_ret

htmlparser_ctx *parser = htmlparser_new()

while ((getchar_ret = getchar()) != EOF) {

char c = (char)getchar_ret

/* If we received a '$' character, we output the current tag and attribute

* * name to stdout. */

if (c == '$') {

printf("[[ ")

if (htmlparser_tag(parser))printf("tag=%s ", htmlparser_tag(parser))

if (htmlparser_attr(parser)) printf("attr=%s ", htmlparser_attr(parser))

printf("]]")

/* If we read any other character, we pass it to the parser and echo it to

* * stdout. */

} else {

htmlparser_parse_chr(parser, c)

putchar(c)

}

}

}

1."COM组建"为"COM组件"。

2.建议还是不要手动解析HTML,那个似乎太费劲了。

3.在实际运用中,我也用过一些其它的XML解析工具,比如libxml,但感觉在WINDOWS还是MSXML最方便。


欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/zaji/6274119.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2023-03-19
下一篇 2023-03-19

发表评论

登录后才能评论

评论列表(0条)

保存