#!/path/to/yer/perl -T use strict; =pod =head1 NAME rss2rss =head1 SUMMARY # Stick this in your crontab 0 0-23 1-31 1-12 * /yer/path/to/rss2rss =head1 DESCRIPTION This is a quick re-hashing of an earlier script I wrote named rss2blogger. It operates on the same principle of slurping in (n) number of RSS files and creating a new thingy from them. Rather than posting the RSS items to a Blogger server, rss2rss creates a new RSS file. Fun for the family! =cut # CPAN modules use Carp; use HTTP::Request; use LWP::UserAgent; use Storable; use XML::RSS; # Things you want to define # use constant DEBUG => 0; use constant VERBOSE => 0; # The path to a file where Storable.pm # can save the $store_file hash. This # data is used to prevent duplicate # postings for an RSS feed. use constant STORE => ""; # In order to prevent the STORE file # from getting too big, we periodically # delete data whose created time is # older than MAX_DAYS use constant MAX_DAYS => 60; # Put yer unquoted URLs here # URLs should be separated by a space # or a newline use constant FEEDS => qw []; # An optional file for the script to write RSS data to. # If empty, the script prints the resultant RSS thingy # to STDOUT use constant NEWRSS_FILE => ""; # If you don't already understand why you'd want another # version of RSS, you probably don't want to... use constant NEWRSS_VERSION => "1.0"; # See docs for XML::RSS use constant NEWRSS_CHANNEL => (); use constant NEWRSS_IMAGE => (); use constant NEWRSS_TEXTINPUT => (); # # Okay, stop defining use constant NOW_IN_SECONDS => time; use constant SECONDS_IN_DAY => 60 * 60 * 24; my $store_hash = {}; my $publish = 0; my $pid = 0; my %entity = ( nbsp => " ", iexcl => "¡", cent => "¢", pound => "£", curren => "¤", yen => "¥", brvbar => "¦", sect => "§", uml => "¨", copy => "©", ordf => "ª", laquo => "«", not => "¬", shy => "­", reg => "®", macr => "¯", deg => "°", plusmn => "±", sup2 => "²", sup3 => "³", acute => "´", micro => "µ", para => "¶", middot => "·", cedil => "¸", sup1 => "¹", ordm => "º", raquo => "»", frac14 => "¼", frac12 => "½", frac34 => "¾", iquest => "¿", Agrave => "À", Aacute => "Á", Acirc => "Â", Atilde => "Ã", Auml => "Ä", Aring => "Å", AElig => "Æ", Ccedil => "Ç", Egrave => "È", Eacute => "É", Ecirc => "Ê", Euml => "Ë", Igrave => "Ì", Iacute => "Í", Icirc => "Î", Iuml => "Ï", ETH => "Ð", Ntilde => "Ñ", Ograve => "Ò", Oacute => "Ó", Ocirc => "Ô", Otilde => "Õ", Ouml => "Ö", times => "×", Oslash => "Ø", Ugrave => "Ù", Uacute => "Ú", Ucirc => "Û", Uuml => "Ü", Yacute => "Ý", THORN => "Þ", szlig => "ß", agrave => "à", aacute => "á", acirc => "â", atilde => "ã", auml => "ä", aring => "å", aelig => "æ", ccedil => "ç", egrave => "è", eacute => "é", ecirc => "ê", euml => "ë", igrave => "ì", iacute => "í", icirc => "î", iuml => "ï", eth => "ð", ntilde => "ñ", ograve => "ò", oacute => "ó", ocirc => "ô", otilde => "õ", ouml => "ö", divide => "÷", oslash => "ø", ugrave => "ù", uacute => "ú", ucirc => "û", uuml => "ü", yacute => "ý", thorn => "þ", yuml => "ÿ", ); my $entities = join('|', keys %entity); { &main(); exit; } sub main { # Some basic sanity checking croak "No feeds. Nothing to do." if (! FEEDS); if (-f STORE) { $store_hash = retrieve(STORE) || croak "Unable to retrieve \$store_hash. $!"; } # Prune the store_hash my $prune_time = (NOW_IN_SECONDS - ((SECONDS_IN_DAY)*MAX_DAYS)); foreach my $f (keys %$store_hash) { foreach my $t (keys %{ $store_hash->{"$f"} }) { if ($store_hash->{"$f"}{"$t"}{"created"} < $prune_time) { print "Deleting from \$store_hash : $t\n" if (VERBOSE); delete $store_hash->{"$f"}{"$t"}; } } } my $newrss = XML::RSS->new(version => NEWRSS_VERSION) || croak "Failed to create new rss object : $!"; $newrss->channel( NEWRSS_CHANNEL ); $newrss->image( NEWRSS_IMAGE ); $newrss->textinput( NEWRSS_TEXTINPUT ); my $ua = LWP::UserAgent->new() || croak "Failed to create LWP object. $!"; foreach my $url ( FEEDS ) { my $req = HTTP::Request->new(GET=>$url) || &{ carp "Failed to create HTTP object. $!"; next; }; my $rss = XML::RSS->new() || &{ carp "Failed to create RSS object. $!"; }; my $rc = $ua->request($req); if ($rc->{'_rc'} ne "200") { carp "Failed to slurp $url." if (VERBOSE); next; } eval { $rss->parse($rc->{'_content'}); }; if ($@) { carp "There was an error parsing $url\n$@" if (VERBOSE); next; } if (! scalar @{ $rss->{'items'} }) { carp "$url returned no items." if (VERBOSE); next; } my $feed = $rss->{'channel'}{'title'}; if (! $feed) { carp "Unable to determine title for $url\n" if (VERBOSE); next; } my $enc_feed = $feed; &encode(\$enc_feed); foreach my $item (@{$rss->{'items'}}) { my $title = $item->{'title'}; my $link = $item->{'link'}; my $desc = $item->{'description'}; # We have already posted this item # Move along. These are not the posts # you're looking for. next if ($store_hash->{ "$feed" }{ "$title" }); map { &encode(\$_); } ($title, $desc, $link); $newrss->add_item( title => "$enc_feed, $title", description => $desc, link => $link, ); } } store $store_hash, STORE || croak "Failed to store \$store_hash. $!"; # Write to a file # or write to STDOUT if (NEWRSS_FILE) { $newrss->save(NEWRSS_FILE) || croak $!; } else { print $newrss->as_string(); } return 1; } sub encode { my $text = shift; return unless (ref($text) eq "SCALAR"); $$text =~ s/&(?!(#[0-9]+|#x[0-9a-fA-F]+|\w+);)/&/g; $$text =~ s/&($entities);/$entity{$1}/g; $$text =~ s/