#!/usr/bin/perl -w ##################################################################### # genBlog - my RSS feed generator # # 2006 (C) Horms # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation; either version 2 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA # 02111-1307 USA # ##################################################################### use lib $ENV{HOME} . "/public_html/pleb_blossom/local/share/perl"; use strict; use Time::Local; use HTML::Chunks::Super; my $RSS_GENERATOR="Horms' genBlog v0.1"; my $RSS_DOCS="http://blogs.law.harvard.edu/tech/rss"; my $RSS_TTL="60"; my $RSS_SKIP_HOURS="15,16,17,18,19,20,21,22,23,0,1,13,14"; #N.B GMT my $RSS_SKIP_DAYS=""; #N.B GMT my $RSS_isPermaLink="false"; my $RSS_ENCODING = "UTF-8"; my $RSS_TITLE ="Horms"; my $RSS_LINK = "http://www.vergenet.net/~horms"; my $RSS_DESCRIPTION = "Themable Horms!"; my $RSS_LANGUAGE = "en"; my $SITE_DOMAIN = "http://www.vergenet.net"; my $BLOG_ARCHIVE_DIR = "/~horms/pleb_blossom/archives"; sub cat_file { my ($outfh, $infn) = (@_); my $infh; if (not defined ($infn)) { return; } open($infh, "<$infn") or die "Could not open \"$infn\"\n"; while (<$infh>) { print $outfh $_; } close($infh); } sub subs_filename { my ($template, $no) = (@_); $template =~ s/XXXXXXX/$no/g; return $template; } sub open_output { my ($data) = (@_); my ($fn, $pagenr); if (not defined $data->{outfn}) { return; } if ($data->{count} == 0 or not defined $data->{max_per_page}) { $pagenr = 0; } else { $pagenr = $data->{count} / $data->{max_per_page}; } $fn = subs_filename($data->{outfn}, $pagenr); if (defined $data->{openfn} and $data->{openfn} eq $fn) { return; } $data->{openfn} = $fn; open($data->{outfh}, ">$fn") || die "Could not open $fn for writing"; } sub close_output { my ($data) = (@_); if (not defined $data->{outfh}) { return; } undef $data->{openfn}; close($data->{outfh}); } sub __foreach_txt_flatten_dir { my $data = shift(@_); my $fn = shift(@_); my $done = shift(@_); my $fh; my $fn2; my @todo; opendir $fh, "$fn" || die "Could not open \"$fn\""; while($fn2 = readdir($fh)) { if ($fn2 eq "." or $fn2 eq "..") { next; } push @todo, "$fn/$fn2"; } closedir($fh); __foreach_txt_flatten($data, \@todo, $done); } sub __foreach_txt_flatten { my ($data, $todo, $done) = (@_); my $fn; for $fn (@$todo) { if (defined $data->{max_per_page} and $data->{count} ne 0 and $data->{count} % $data->{max_per_page} == 0 and defined $data->{max_pages} and $data->{count}/$data->{max_per_page} == $data->{max_pages}) { return; } if (-d $fn) { __foreach_txt_flatten_dir($data, $fn, $done); } elsif ($fn =~ m/.txt$/i) { push @$done, $fn; $data->{count}++; } } } sub foreach_txt_flatten { my ($data) = shift @_; my @list; if (defined $data->{max_per_page} and ($data->{max_per_page} eq 0 or (defined $data->{max_pages} and $data->{max_pages} eq 0))) { return @list; } $data->{count} = 0; __foreach_txt_flatten($data, \@_, \@list); $data->{count} = 0; return @list; } sub foreach_txt { my $func = shift @_; my $data = shift @_; my @list; my @todo; if (defined($data->{max_per_page}) and $data->{max_per_page} <= 0) { return; } if (defined($data->{max_pages}) and $data->{max_pages} <= 0) { return; } @list = foreach_txt_flatten($data, @_); if (not defined $data->{max_per_page}) { $data->{max_per_page} = $#list + 1; } # List is already trimmed according to # $data->{max_per_page} and $data->{max_pages} as neccessary while (@todo = splice(@list, 0, $data->{max_per_page})) { open_output($data); $data->{subtotal} += $#todo + 1; &$func($data, @todo); $data->{count} += $#todo + 1; close_output($data); } } sub __msg_parse { my ($fh, $line) = (@_); my $key; my @value; my $in_body; while (1) { if (not defined($$line)) { $$line = <$fh>; } if (not defined($$line)) { last; } chomp($$line); if ($$line eq "") { if (defined($key)) { last; } elsif (not defined($in_body)) { undef($$line); $in_body = 1; next; } } elsif (not defined($in_body) and $$line =~ /^([^:]*):(\s+(.*))?/) { if (scalar(@value) != 0) { last; } $key = $1; if (defined($3)) { push @value, $3; } } else { push @value, $$line; } undef($$line); } if (scalar(@value) == 0 and not defined($key)) { return; } return { "key" => $key, "value" => \@value }; } sub msg_parse { my ($fn) = (@_); my $fh; my $line; my @msg; my $msg_item; open($fh, "<$fn") or die "unable to open \"$fn\"\n"; while ($msg_item = __msg_parse($fh, \$line)) { push @msg, $msg_item; } #close($fh); return @msg; } sub msg_to_hash_getkey { my ($elem) = (@_); if (defined($elem->{key})) { return lc $elem->{key}; } return "BODY"; } sub msg_to_hash_getvalue { my ($elem) = (@_); return $elem->{value}; } sub msg_to_hash { return map { msg_to_hash_getkey($_) => msg_to_hash_getvalue($_) } @_; } sub template_handler_cb { my ($engine, $element, $data) = (@_); my @msg; my %msg_hash; my $basename; foreach my $entry (@{$data->{"priv.fn"}}) { @msg = msg_parse($entry); if (scalar(@msg) == 0) { return 1; } $basename = $entry; $basename =~ s/.*\///; %msg_hash = msg_to_hash(@msg); $engine->output('entry', { count => $data->{"priv.count"}++, total => $data->{subtotal}, anchor => $basename, archive => archive_link($entry), title => msg_flatten_value($msg_hash{title}, "\n", $data->{xhtml}), BODY => msg_flatten_value($msg_hash{BODY}, "\n", $data->{xhtml}), date => msg_flatten_value($msg_hash{date}, "\n", $data->{xhtml})}, ) or die; } } sub template_handler { my $data = shift @_; my $oldfh; if (not defined $data->{outfn} or not defined $data->{templatefn}) { return; } $data->{"priv.count"} = $data->{count}; # Test to see if the templatefn is accessable, # as Chunks just dies horribly if it isn't open TMP, "<$data->{templatefn}" or die "Could not open \"" . $data->{templatefn} . "\""; close TMP; my $engine = new HTML::Chunks::Super($data->{templatefn}) or die; $data->{"priv.fn"} = \@_; $oldfh = select($data->{outfh}); $engine->output('blogPage', { entries => \&template_handler_cb }, $data) or die; select ($oldfh); delete $data->{"priv.fn"}; delete $data->{"priv.count"}; } sub gen_rss_msg_elem { my ($fh, $header) = (@_); my $key; my $v; my @value; if (defined($header->{key})) { $key = $header->{key}; $key =~ tr/A-Z/a-z/; } else { $key = "description"; } @value = @{$header->{value}}; if ($key eq "date") { $key = "pubDate"; } elsif ($key eq "description") { unshift @value, ""; } print $fh "\t\t\t<". $key . ">"; if (scalar @value == 1) { print $fh $value[0]; } else { print $fh "\n"; for $v (@value) { print $fh "\t\t\t\t" . $v . "\n"; } print $fh "\t\t\t"; } print {$fh} "\n"; } sub archive_link { my ($fn) = (@_); $fn =~ s/.*\///; return $BLOG_ARCHIVE_DIR . "#" . $fn; } sub archive_link_fq { return $SITE_DOMAIN . archive_link(@_); } sub gen_rss_handler { my $data = shift; my @msg; my $msg_elem; my $fn; gen_rss_header ($data, @_); foreach $fn (@_) { @msg = msg_parse($fn); if (scalar(@msg) == 0) { die; } print {$data->{outfh}} "\t\t\n"; print {$data->{outfh}} "\t\t\t" . archive_link_fq($fn) . "\n"; print {$data->{outfh}} "\t\t\t" . archive_link_fq($fn) . "\n"; for $msg_elem (@msg) { gen_rss_msg_elem($data->{outfh}, $msg_elem); } print {$data->{outfh}} "\t\t\n"; } gen_rss_footer ($data, @_); } sub gen_rss_show_list { my $data = shift; my $list = shift; my $tag = shift; my $sub_tag = shift; my $prefix = shift; if (defined $list) { my @e = split /,/, $list; if ($#e > 0) { print {$data->{outfh}} "$prefix<$tag>\n"; for (@e) { print {$data->{outfh}} "$prefix\t<$sub_tag>$_\n"; } print {$data->{outfh}} "$prefix\n"; } } } sub gen_rss_header { my $data = shift; my $date; print {$data->{outfh}} "\n" . "\n" . "\t\n" . "\t\t$RSS_TITLE\n" . "\t\t$RSS_LINK\n" . "\t\t$RSS_DESCRIPTION\n" . "\t\t$RSS_LANGUAGE\n"; for (@_) { $date = parse_date($_); last; } if (defined $date) { print {$data->{outfh}} "\t\t$date\n"; } if (defined $RSS_GENERATOR) { print {$data->{outfh}} "\t\t$RSS_GENERATOR\n"; } if (defined $RSS_DOCS) { print {$data->{outfh}} "\t\t$RSS_DOCS\n"; } if (defined $RSS_TTL) { print {$data->{outfh}} "\t\t$RSS_TTL\n"; } if (defined $RSS_SKIP_HOURS) { gen_rss_show_list($data, $RSS_SKIP_HOURS, "skipHours", "hour", "\t\t"); } if (defined $RSS_SKIP_DAYS) { gen_rss_show_list($data, $RSS_SKIP_DAYS, "skipDays", "day", "\t\t"); } } sub gen_rss_footer { my $data = shift; my $date; print {$data->{outfh}} "\t\n" . "\n"; } sub gen_rss { my $control = { "max_per_page" => 4, "max_pages" => 1, "outfn" => "rss.xml", }; foreach_txt(\&gen_rss_handler, $control, @_); } sub gen_html { my $templatefn = shift @_; my $control = { "max_per_page" => 5, "max_pages" => 1, "outfn" => "index.html", "templatefn" => $templatefn, }; foreach_txt(\&template_handler, $control, @_); } sub gen_html_archive { my $templatefn = shift @_; my $control = { "outfn" => "archives/index.shtml", "templatefn" => $templatefn, }; foreach_txt(\&template_handler, $control, @_); } sub gen_xhtml { my $templatefn = shift @_; my $control = { "max_per_page" => 5, "max_pages" => 1, "outfn" => "index.html", "templatefn" => $templatefn, "xhtml" => 1, }; foreach_txt(\&template_handler, $control, @_); } sub gen_xhtml_archive { my $templatefn = shift @_; my $control = { "outfn" => "archives/index.shtml", "templatefn" => $templatefn, "xhtml" => 1, }; foreach_txt(\&template_handler, $control, @_); } # Mon, 20 Mar 2006 12:12:53 +0900 # N.B: Only handles numeric timezones # Returned epoch is GMT, offset is also in seconds sub rfc2822_to_epoch_and_offset { my ($date) = (@_); my $mday; my $mon; my $year; my $hour; my $min; my $sec; my $tz_sign; my $tz_hour; my $tz_min; my $tz_offset; my $epoch; my $months = { "Jan" => 0, "Feb" => 1, "Mar" => 2, "Apr" => 3, "May" => 4, "Jun" => 5, "Jul" => 6, "Aug" => 7, "Sep" => 8, "Oct" => 9, "Nov" => 10, "Dec" => 11, }; unless ($date =~ m/^([A-Z][a-z][a-z]), ([0123][0-9]) ([A-Z][a-z][a-z]) ([0-9][0-9][0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) ([+-])([0-9][0-9])([0-9][0-9])$/) { print STDERR "Invalid date (regex missmatch): \"$date\"\n"; return 0; } $mday = $2; $mon = $3; $year = $4; $hour = $5; $min = $6; $sec = $7; $tz_sign = $8; $tz_hour = $9; $tz_min = $10; unless (defined($months->{$mon})) { print STDERR "Invalid date (unknown month): \"$date\"\n"; } $epoch = timegm($sec, $min, $hour, $mday, $months->{$mon}, $year); $tz_offset = ($tz_hour * 60 * 60) + ($tz_min * 60); if ($tz_sign eq "-") { $tz_offset = -$tz_offset; } $epoch += $tz_offset; return ($epoch, $tz_offset); } sub rfc2822_to_epoch { my ($epoch) = rfc2822_to_epoch_and_offset(@_); return $epoch; } sub msg_flatten_value { my ($value, $delim, $xhtml) = (@_); my $value_str; my $value_sub_str; for $value_sub_str (@{$value}) { if (not defined($value_str)) { $value_str = $value_sub_str; } else { $value_str .= $delim . $value_sub_str; } } # A quick hack to convert html tags to xhtml #
->
#
->
# -> if (defined $xhtml) { $value_str =~ s/<(br|hr|img[^>]*)>/<$1 \/>/im; } return $value_str; } sub parse_date { my ($fn) = (@_); my $msg_elem; my $date; for $msg_elem (msg_parse($fn)) { if (not defined($msg_elem->{key}) or scalar($msg_elem->{value}) == 0 or $msg_elem->{key} !~ m/date/i) { next; } $date = msg_flatten_value($msg_elem->{value}, " "); return $date; } return undef; } sub get_date_handler { my $data = shift @_; my $date; my $fn; foreach $fn (@_) { $date = parse_date($fn); if (not defined $date) { print STDERR "warning: no date: $fn, skipping"; } else { $date = rfc2822_to_epoch($date); push @{$data->{priv}}, { "fn" => $fn, "date" => $date }; } } return 1; } sub get_date { my @date; my $d; my @fn = (); my $control = { "priv" => \@date, }; foreach_txt(\&get_date_handler, $control, @_); for $d (sort { $b->{date} <=> $a->{date} } @date) { push @fn, $d->{fn}; } return @fn; } # Main { my @fn; @fn = get_date("data/"); gen_rss(@fn); gen_html("metadata/html.template", @fn); gen_html_archive("metadata/archive.template", @fn); }