my $x = $_[0];
foreach my $c ($x->content_list) {
next if (ref($c) && $c->tag() eq "~comment");
- print "$c\n" unless ref($c);
+ next if (ref($c) && $c->tag() eq "script");
+ next if (ref($c) && $c->tag() eq "style");
+ if (!ref($c)) {
+ print "$c\n";
+ }
if (ref($c) && $c->attr('alt')) {
print $c->attr('alt')."\n";
}
+ if (ref($c) && $c->attr('title')) {
+ print $c->attr('title')."\n";
+ }
+ if (ref($c) && $c->tag() eq "input" && $c->attr('value')) {
+ print $c->attr('value')."\n";
+ }
if (ref($c) && $c->tag() eq 'meta') {
print $c->attr('content')."\n ";
}
if( !defined(%{$strhash}->{$str}) )
{
# the line is not already in the list so add it
- %{$strhash}->{$str} = 1;
+ %{$strhash}->{$str}=1;
}
}
}
while( my $line = <$fh> )
{
chomp $line;
-
+
# extracts the two fields
- my ($original, $translated) = split(/$split_char/, $line, 2);
+ my ($original, $translated,$nb) = split(/$split_char/, $line, 3);
if($translated ne "")
{
# the key exist but has no translation.
%{$strhash}->{$original} = 1;
}
-
+
}
-
+
close($fh);
-
+
return %{$strhash};
}