*** empty log message ***
authortipaul <tipaul>
Wed, 8 Dec 2004 10:37:19 +0000 (10:37 +0000)
committertipaul <tipaul>
Wed, 8 Dec 2004 10:37:19 +0000 (10:37 +0000)
misc/translator/text-extract.pl [new file with mode: 0755]

diff --git a/misc/translator/text-extract.pl b/misc/translator/text-extract.pl
new file mode 100755 (executable)
index 0000000..f876e78
--- /dev/null
@@ -0,0 +1,33 @@
+#!/usr/bin/perl
+use HTML::Tree;
+use Getopt::Std;
+getopt("f:");
+       my $tree = HTML::TreeBuilder->new; # empty tree
+
+       $tree->parse_file($opt_f);
+       sub give_id {
+               my $x = $_[0];
+               foreach my $c ($x->content_list) {
+                       next if (ref($c) && $c->tag() eq "~comment");
+                       next if (ref($c) && $c->tag() eq "script");
+                       next if (ref($c) && $c->tag() eq "style");
+                       if (!ref($c)) {
+                               print "$c\n";
+                       }
+                       if (ref($c) && $c->attr('alt')) {
+                               print $c->attr('alt')."\n";
+                       }
+                       if (ref($c) && $c->attr('title')) {
+                               print $c->attr('title')."\n";
+                       }
+                       if (ref($c) && $c->tag() eq "input" && $c->attr('value')) {
+                               print $c->attr('value')."\n";
+                       }
+                       if (ref($c) && $c->tag() eq 'meta') {
+                               print $c->attr('content')."\n ";
+                       }
+                       give_id($c) if ref $c; # ignore text nodes
+               }
+       };
+       give_id($tree);
+       $tree = $tree->delete;