#!/usr/bin/perl # -*- Perl -*- # Fri 30 Jun 11:35:28 CDT 2006 ############################################################################### # Written by Tim Skirvin . Copyright 2004-2006, # Tim Skirvin. Redistribution terms are below. ############################################################################### our $VERSION = "0.10"; ############################################################################### ### User Configuration ######################################################## ############################################################################### use vars qw( $LOCALCONF $KIBOZEDIR $DB_TYPE $ARCHIVEGROUP $DEBUG $QUIET $VERBOSE $URL $PREFIX $DOMAIN $FROM ); ## Things used to actually get the sentnews file and parse it. I'll further ## document these later. $URL = "http://www.stanford.edu/~tskirvin/sentnews"; $PREFIX = "invalid.notrealid"; $DOMAIN = "news.killfile.org"; $FROM = 'tskirvin@unknown.site.invalid (Tim Skirvin)'; ## Rather than having everything in this shared configuration, load this ## file to get additional configuration. This file contains additional ## perl. $LOCALCONF = "$ENV{'HOME'}/.kibozerc"; ## Where should we store all of our files? This needs to be set or ## nothing will run; also, the directory must already exist. $KIBOZEDIR = ""; ## What kind of database should we use to store history information? $DB_TYPE = "DB_File"; ## If we're storing everything to an archive group, you can set it here. $ARCHIVEGROUP = ""; ## Do we want to print debugging information? Use verbose status messages? ## Be entirely silent? Set these things here. $DEBUG = 0; $VERBOSE = 0; $QUIET = 0; ## If the modules are set up in a non-standard place, edit this line ## as appropriate. BEGIN { use lib '/home/tskirvin/dev/news-archive'; } ############################################################################### ### main() #################################################################### ############################################################################### use strict; use News::Archive; use News::Article; use News::Article::Mbox; use LWP::Simple; use Date::Parse; use Getopt::Std; # Error Codes our %ERROR = ( 'SUCCESS' => 0, 'CONFIG' => 1, 'SERVER' => 2 ); # Command-line configuration use vars qw( %OPTS ); getopts('hvVQdc:a:', \%OPTS); # Load local configuration from local configuration file $LOCALCONF = $OPTS{'c'} if $OPTS{'c'}; if ( $LOCALCONF && -r $LOCALCONF ) { do $LOCALCONF } &Usage if $OPTS{'h'}; # Print usage information and exit &Version if $OPTS{'v'}; # Print version information and exit # Other command-line parsing; overrides localconf $DEBUG = $OPTS{'d'} if defined $OPTS{'d'}; $VERBOSE = $OPTS{'V'} || $DEBUG || $VERBOSE || 0; # Give verbose messages? $QUIET = $OPTS{'Q'} || $QUIET || 0; $ARCHIVEGROUP = $OPTS{'A'} if defined $OPTS{'A'}; # Create the options hash to start a News::Archive object our %OPTHASH; $OPTHASH{ 'basedir' } = $KIBOZEDIR if $KIBOZEDIR; $OPTHASH{ 'db_type' } = $DB_TYPE if $DB_TYPE; $OPTHASH{ 'debug' } = $DEBUG if $DEBUG; ## Download the most recent version of the sentnews file from $URL, and get ## the articles from it. my $content = get($URL); die "Couldn't get $URL: $!\n" unless defined $content; my @content = split(/\n/, $content); my (@lines, @from); foreach ( @content ) { my $line = $_; if ($line =~ /^From: .*/) { push @from, $line; } elsif ($line =~ /^\s*$/) { push @lines, reverse @from if @from; @from = (); push @lines, $line; } else { push @lines, $line; } } map { s/$/\n/ } @lines ; # Put the newlines back in my @articles = News::Article::Mbox->read_mbox(\@lines); ## Parse the articles into something more reasonable, before we start pushing ## them into the archives. foreach my $article (@articles) { my $timestamp = $$article{TIMESTAMP} || ""; my $stamp = $timestamp; $stamp =~ s/^From \S+\s*//; $article->add_message_id($PREFIX, $DOMAIN); if ($article->header('from') eq 'tskirvin') { $article->set_headers('from', $FROM ); } my @body = $article->body; map { s/([Tt])umati/$1\*mati/g; } @body; unless ($article->header('date')) { $article->set_headers('date', $stamp); } $article->set_body(@body); } # Create the News::Archive object after we have the articles my $archive = new News::Archive ( %OPTHASH ) or Exit('SERVER', "Couldn't create/load archive item: ", News::Archive->error); $archive->lock or Exit ('SERVER', "Couldn't lock archive: $!"); # If we've got $ARCHIVEGROUP set, then make sure we're subscribed to it. $archive->subscribe($ARCHIVEGROUP) if ($ARCHIVEGROUP); # Check for each article. my $count = 0; foreach my $article (@articles) { next unless $article; my $messageid = $article->header('message-id'); # If this article has already been processed, skip it next unless $messageid; if ( $archive->article( $messageid ) ) { warn "Already processed '$messageid'\n" if $VERBOSE; next; } # We need at least this many more headers to do anything with it next unless $article->header('from'); next unless $article->header('newsgroups'); next unless $article->header('subject'); next unless $article->header('date'); # Debugging hook; don't worry about this in general. # $article->write(\*STDOUT); # Get the list of groups we're supposed to be saving the article into my @groups = split('\s*,\s*', $article->header('newsgroups') ); map { s/\s+//g } @groups; # Make sure we're subscribed to all these groups foreach (@groups) { $archive->subscribe($_) } push @groups, $ARCHIVEGROUP if $ARCHIVEGROUP; # We're subscribed already # Actually save the article. my $ret = $archive->save_article( [ @{$article->rawheaders}, '', @{$article->body} ], @groups ); $count++ if $ret; if ($VERBOSE) { warn $ret ? "Accepted article $messageid\n" : "Couldn't save article $messageid\n" } } $archive->close; Exit('SUCCESS', $count ? "$count articles archived" : ""); ############################################################################### ### Subroutines ############################################################### ############################################################################### ## Exit ( CODE, REASON ) # Exits the program cleanly with a proper error message sub Exit { my ($code, $reason, @details) = @_; exit $ERROR{$code} unless $reason; # map { $_ = " - $_" } @details; $reason = join("\n", $reason, @details, ''); warn $reason if (!$QUIET && $reason); exit $ERROR{$code}; } ## version ( [VERSION] ) # Returns the program name and version number sub version { my $version = shift || $VERSION || "unknown"; my $prog = $0; $prog =~ s%.*/%%g; # Clean up the program name "$prog v$VERSION" } ############################################################################### ### Documentation ############################################################# ############################################################################### =head1 NAME getposts - get posts for News::Archive from an mbox file =head1 SYNOPSIS mbox2news.pl [-hvVQd] [-c configfile] [-a groupname] < ARTICLE =head1 DESCRIPTION (OLD) mbox2news.pl is a part of the News::Archive package, which is used to archive news articles in a reasonably efficient manner. This particular script is meant for use for personal archives, loading articles (or mbox-style archives) on STDIN and loading them into $KIBOZEDIR. Without any options, mbox2news.pl will look for input from STDIN. Configuration information is stored in the configuration file (B). Command-line options are as follows: =over 4 =item -h Print this usage information and exit. =item -v Print version information and exit. =item -d Debug; print debugging information (implies -V) =item -c configfile Load this configuration file instead of default (~/.kibozerc). =item -a groupname Add this group to all articles we download, so that they can be accessed uniformly. =item -V Verbose; print information on every article. =item -Q Quiet mode; only print that which is absolutely necessary (and even then think about it). =back =head1 NOTES This is essentially a counterpart to kiboze.pl; while that program goes to news servers and downloads new messages, mbox2news.pl uses existing messages. It is primarily meant to convert from old kiboze archives, or anything similar. This script was meant to work with mbox-style archives; this is compatible with some single articles, however, so you can feed them in directly as well. You may well need a helper script to make some programs give you mbox-compatible archives. This distribution comes with 'nnparse', which is used to fix the 'sentnews' items from the nn news reader; you can use it as an example if you wish. =head1 REQUIREMENTS B, B, [...] =head1 SEE ALSO B, B, B =head1 AUTHOR Tim Skirvin =head1 LICENSE This code may be redistributed under the same terms as Perl itself. =head1 HOMEPAGE http://www.killfile.org/~tskirvin/software/news-archive/ =head1 COPYRIGHT Copyright 2003-2006, Tim Skirvin =cut ############################################################################### ### Version History ########################################################### ############################################################################### # 0.10 Fri 30 Jun 11:40:44 CDT 2006 tskirvin ### Initial version, based off of mbox2news.pl and some other code. Not ### really documented worth anything. Oh well.