Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:Tomcat42
perl-HTML-TagParser
parse.pl
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File parse.pl of Package perl-HTML-TagParser
#! /usr/bin/perl -w -t # # a small test script, that # takes your favourite web pages and turns it into # a parsed perl data structure. use strict; use HTML::TagParser; use Data::Dumper; $Data::Dumper::Sortkeys = 1; my $text = eval { local $/; open my $in, "<", shift; <$in>}; my $dom = HTML::TagParser->new($text); my $r = {}; for my $e ($dom->getElementsByTagName('form')) { my $s = $e->subTree; my $sr = {}; for my $se ($s->getElementsByTagName('input')) { store_element($sr, $se, 'input', 0); } for my $se ($s->getElementsByTagName('textarea')) { store_element($sr, $se, 'textarea', 1); } store_element($r, $e, 'form', $sr); } for my $e ($dom->getElementsByTagName('a')) { my $attr = $e->attributes; next unless defined $attr->{href}; store_element($r, $e, 'href', 0); } die Dumper $r; exit 0; ############################################## sub store_element { my ($r, $e, $tn, $container) = @_; my $attr = $e->attributes; my $inp = { tagname => $e->tagName, text => $e->innerText()||'', }; delete $inp->{text} unless length $inp->{text}; delete $attr->{'/'}; if ($container) { $inp->{attr} = $attr; if (ref $container) { for my $c (keys %$container) { $inp->{$c} = $container->{$c} } } } else { ## inline attr. for my $a (keys %$attr) { $inp->{$a} = $attr->{$a} } } push @{$r->{$tn}}, $inp; for my $a qw(id name type) { push @{$r->{$tn.'_by_'.$a}{$attr->{$a}}}, $inp if defined $attr->{$a}; } }
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor