码:
sub func{ my ($comID,$mechlink) = @_; my $mechanize = WWW::Mechanize->new( noproxy => 0,stack_depth => 5,autocheck => 1 ); $mechanize->proxy( https => undef ); eval{ my $me = $mechanize->get($mechlink); $me->is_success or dIE $me->status_line; }; return $comID if ($@); my $stream = HTML::TokeParser->new( $mechanize->{content} ) or dIE $!; while ( $tag = $stream->get_tag('td') ) { if( $tag->[1]{class} eq 'dateStamp' ) { $dt = $stream->get_trimmed_text('/td'); $tag = $stream->get_tag; $tag = $stream->get_tag; $name = $stream->get_trimmed_text('/td') if( $tag->[1]{class} eq 'name' ); return $comID unless( $tag->[1]{class} eq 'name' ); $tag = $stream->get_tag; $tag = $stream->get_tag; $tag = $stream->get_tag; $tag = $stream->get_tag; $info = $stream->get_trimmed_text('/td'); print "$name?\n"; return $retval if($info eq $comID); print "You've Got Mail! $info $comID\n"; $tcount++; $retval = $info if($tcount == 1); $tag = $stream->get_tag; $tag = $stream->get_tag; $tag = $stream->get_tag; $link = "http://www.abc.com".$tag->[1]{href} if ($tag->[0] eq 'a' ); my $outlook = new Mail::Outlook(); my $message = $outlook->create(); $message->To('abc@def.com'); $message->Cc('abc@def.com;abc@def.com'); my $hd = "$name - $info"; $message->Subject($hd); $message->Body(" "); $message->Attach($link); $message->send; }}}@H_419_4@解决方法 对于这种任务,我更喜欢使用 HTML::TableExtract.它非常容易使用:
use HTML::tableExtract;$te = HTML::tableExtract->new( headers => [qw(header1 header2)]);$te->parse($HTML);foreach $ts ($te->tables) { foreach $row ($ts->rows) { my ($fIEld1,$fIEld2) = @$row; # Your code here }}@H_419_4@ @H_419_4@ @H_419_4@ @H_419_4@ 总结
以上是内存溢出为你收集整理的Perl:网站抓取时出现意外行为全部内容,希望文章能够帮你解决Perl:网站抓取时出现意外行为所遇到的程序开发问题。
如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)