The following simple perl script allows to automatically search some text, like html link, into a web page:
#!/usr/bin/perl -w
use strict;
#this module allows to delete duplicate entries
use List::MoreUtils qw(uniq);
# this is the module tointeract with web page
use LWP::Simple;
#variables declaration
my @match;
my @match_uniq;
my $file = <your temp file>";
#I'm used the "get" method of LWP::Simple module
my $webpage = get("http://<your url>);
#write my web page into the file
open WH, "> $file" or die $!;
print WH $webpage;
close WH;
#put the file into a array to manipulate it
open RH, "< $file" or die $!;
my @file = <RH>;
close RH;
#search server name and push them into array @match
foreach (@file) {
if (/https?:\/\/(\w+\.\w+\.\w+)/) {
push (@match, $1);
}
}
#remove duplicate entries
@match_uniq= uniq(@match);
#strip out "www."
foreach (@match_uniq) {
s/www\.//g;
}
foreach (@match_uniq) {
print "$_\n";
}
Perl - How to Parse a web page
Posted on giovedì 23 dicembre 2010
by Ivano Binetti
Iscriviti a:
Commenti sul post (Atom)
0 Responses to "Perl - How to Parse a web page":
Posta un commento