#!/usr/bin/perl

sub usage { die "

rss-to-site.pl -- a handy perl script to convert My Netscape-style RSS
(Rich Site Summary) files to a .site file, or at least nearly.

usage: rss-to-site.pl http://url.rss > whatever.site

"; }

$VERSION = "rss-to-site.pl 1.1";

use LWP::UserAgent;
use URI::URL;
use HTTP::Request::Common;

$main::useragent = new LWP::UserAgent;
$main::useragent->env_proxy;

$url = $ARGV[0];

if (!defined $url) {
  &usage;
}

$req = new HTTP::Request ('GET', $url);
$req->header ("Accept-Language" => "en",
      "Accept-Charset" => "iso-8859-1,*,utf-8");
$resp = $main::useragent->request ($req);

if (!$resp->is_success) {
  die "HTTP GET failed: ".$resp->status_line." ($url)\n";
}

$_ = $resp->content;
s/\s+/ /g; s/>\s+/>/g; s/\s+</</g;

$name = $url;
$desc = '(no description)';

m,<channel>.*?<title>(.*?)</title>.*?</channel>,i
				&& ($name = $1);

m,<channel>.*?<description>(.*?)</description>.*?</channel>,i
				&& ($desc = $1);

@links = m,<item>.*?<link>(.*?)</link>.*?</item>,ig;

# TODO -- use heuristics to work out a good StoryURL for this site

print <<__ENDOFMAIN;
URL:		$url
Name:		$name
Description:	$desc
ContentsFormat:	rss

StoryURL:	/.*\.s?html?

# TODO -- edit the StoryURL line above and make a good story URL for
# this site. Here's some sample URLs taken from the RSS file, for
# guidance:
#
__ENDOFMAIN

foreach $_ (@links) {
  /<url>/ && (s/^.*<url>(.*)<\/url>.*$/$1/g);
  next unless /^http:/i;
  print "#    $_\n";
}

print "
# You may also want to add a StoryStart and StoryEnd line to
# clean up the stories. Here's sample lines (you need to edit them):
#
# StoryStart: [some distinctive text before the start of the story text]
# StoryEnd: [some distinctive text after the end of the story text]

# (This is a sitescooper site file. see http://sitescooper.cx/
# It was generated from the site's RSS by $VERSION.)
";


=head1 NAME

rss-to-site - convert a "My Netscape" RSS file to a Sitescooper .site file.

=head1 SYNOPSIS

rss-to-site http://url.rss > whatever.site

=head1 DESCRIPTION

This script will try to convert a My Netscape-style RSS (Rich Site Summary)
file to a .site file suitable for use with B<sitescooper>.

Provide the URL of the RSS file as the command-line argument, and it'll try to
work out a decent site file for that site.  Currently the site file will still
require a little bit of hand-editing afterwards.

=head1 SEE ALSO

C<sitescooper>(1), C<rss-to-site>(1), C<subs-to-site>(1)

=head1 AUTHOR

Justin Mason E<lt>jm /at/ jmason.orgE<gt>

=head1 COPYRIGHT

Copyright (C) 1999-2000 Justin Mason

This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your option)
any later version.

This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., 59
Temple Place - Suite 330, Boston, MA  02111-1307, USA, or read it on
the web at http://www.gnu.org/copyleft/gpl.html .

=cut
