demo:
#!/usr/bin/perl -w# Perl pragma to restrict unsafe constructsuse strict;# use LWP::UserAgent modeluse LWP::UserAgent;# main functionsub main { # get params # @_ # Within a subroutine the array @_ contains the parameters passed to that subroutine. # InsIDe a subroutine,@_ is the default array for the array operators push,pop,shift,and unshift. my $url = 'http://www.taobao.com'; dIE "no url param!\n" unless $url; # create LWP::UserAgent object my $ua = LWP::UserAgent->new; # set connect timeout $ua->timeout(20); # set User-Agent header $ua->agent("Mozilla/4.0 (compatible; MSIE 8.0; windows NT 5.1; TrIDent/4.0; SV1; .NET CLR 2.0.50727)"); # send url use get mothed,and store response at var $resp my $resp = $ua->get($url); # check response if ($resp->is_success) { # get response content(HTML source code) my $content = $resp->decoded_content; # use Regex get page Title from $content if ( $content =~ m{<Title>(.*)</Title>}si ) { # <Title>(.+?)</Title> (.+?) match Title string,use () to store this str at a special variable (this is a perl variable ),# The bracketing construct ( ... ) creates capture groups (also referred to as capture buffers). To refer to the current contents of a group later on,within the same pattern,use for the first, for the second,and so on. my $head = ; print "find page Title : $head\n"; } else { print "no page Title for url : $url\n"; } } else { #display status information and exit dIE $resp->status_line; }}# pass params to main function,# @ARGV# The array @ARGV contains the command-line arguments intended for the script.main(@ARGV);总结
以上是内存溢出为你收集整理的Perl抓取网页信息全部内容,希望文章能够帮你解决Perl抓取网页信息所遇到的程序开发问题。
如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)