Platinum Partner

Extract The Body Of An HTML Document

For example, print out just the body of Google's home page:


use LWP::UserAgent;
use HTML::TreeBuilder;

$ua = LWP::UserAgent->new;
my $req = HTTP::Request->new(GET => 'http://www.google.com/');
my $res = $ua->request($req);

if ($res->is_success) {
  my $tree = HTML::TreeBuilder->new_from_content($res->content);
  $tree->elementify();
  my $body = $tree->find('body');
  foreach $e ($body->content_list())
  {
    print $e->as_HTML();
  }
}

{{ tag }}, {{tag}},

{{ parent.title || parent.header.title}}

{{ parent.tldr }}

{{ parent.urlSource.name }}
{{ parent.authors[0].realName || parent.author}}

{{ parent.authors[0].tagline || parent.tagline }}

{{ parent.views }} ViewsClicks
Tweet

{{parent.nComments}}