#!/usr/bin/env perl -w
use strict;
use LWP::UserAgent;
use XML::RSS;
use File::Spec;
use HTTP::Cookies::Netscape;
use encoding 'Big5';

my $BASE_URL = "http://www.pcdvd.com.tw";


# 請在此更新您的 Firefox's  cookies.txt 路徑，
# Windows 平台下請將路徑中反斜線 \ 改為 / 斜線
my $cookie_jar = HTTP::Cookies::Netscape->new(
  file	=> "/home/ada/.mozilla/firefox/cvstr9dz.default/cookies.txt",
);

my $browser = LWP::UserAgent->new();
$browser->cookie_jar( $cookie_jar );


if ( @ARGV < 3 ) {
    print STDERR "Usages: \n";
    print STDERR
        '    perl pcdvd-rss.pl $forumNo $threadCount $outputPath [$OUTPUT_FILE]'
        . "\n\n";
    print STDERR '    * $OUTPUT_FILE is defaulted by pcdvd-$forumNo.xml'
        . "\n";
    exit -1;
}

my $FORUM_NUM   = $ARGV[0];
my $THREAD_SIZE = $ARGV[1];
my $OUTPUT_PATH = $ARGV[2];
my $OUTPUT_FILE = "pcdvd-" . $FORUM_NUM . ".xml";
if ( $ARGV[3] ) {
    $OUTPUT_FILE = $ARGV[3];
}

my $RSS = File::Spec->catfile( $OUTPUT_PATH, $OUTPUT_FILE );

print $FORUM_NUM . "\t"
    . $THREAD_SIZE . "\t"
    . $OUTPUT_PATH . "\t"
    . $OUTPUT_FILE . "\t"
    . $RSS . "\n";

my $URL = $BASE_URL
    . "/forumdisplay.php?s=&f="
    . $FORUM_NUM
    . "&page=1&pp="
    . $THREAD_SIZE
    . "&sort=lastpost&order=desc&daysprune=2";

my $content = $browser->get($URL);

$_ = $content->as_string();
m#<title>(.*?)</title>#s;
my $TITLE = $1;
m#<meta name="description" content="(.*?)" />#s;
my $DESCRIPTION = $1;

s#.*<!-- show threads -->##s;
s#<!-- end show threads -->.*##s;
s#<!--.*?-->##gs;
s#\t# #g;
s#\n##g;
s#\r##g;
s#&nbsp;# #g;

#s#  *# #g;
s#<br />##g;
s#置頂:##g;
s#<table.*?>##g;
s#<img.*?>##g;
s#<td.*?>#<td>#g;

#s#<span.*?>#<span>#gi;
s#<div.*?>##gi;
s|<a href="#" onclick="who\([0-9]*\); return false;">|<a>|g;
s#<span[^>]*> *</span>##g;
s#<span[^>]*>\(.*?\)</span>##g;
s#<span style=.*?</span>##g;
s#<td> *</td>##g;
s#<a>[0-9,]*</a>##g;
s#由<a href="member.*?</a>發表##g;
s#target=".*?"##g;
s#</div>##g;
s#  *# #g;
s#</td> *</tr>##g;
s#<td> *</td>##g;
s# <tr> *<td>##g;
s#<span class=.*?>##g;
s#</span>##g;

s#</td> *<td># \t #g;
s#   *##g;
s#</table>#\n#g;
s#\n$##gs;
s#^ *##g;
s#^ *##g;
s#\?s=.*?&amp;#\?#g;

s#<a href="showthread.php\?goto=newpost&amp;t=[0-9]*"></a> ##g;

my @threads = split "\n";

my $rss = new XML::RSS( version => '2.0' );
$rss->channel(
    title          => $TITLE,
    link           => $BASE_URL,
    language       => 'zh_TW',
    description    => $DESCRIPTION,
    copyright      => 'PCDVD數位科技討論區',
    managingEditor => 'PCDVD數位科技討論區',
    webMaster      => 'PCDVD數位科技討論區'
);

foreach (@threads) {

    print $_ . "\n\n";
    

    if (m#<a href="(.*?)" >(.*?)</a>\t(.*?)\t *<a href="(.*?)">#i) {

        print $1 . "\n\n";     
  
        $rss->add_item(
            title       => "$2 $3",
            link        => $BASE_URL . "/" . $1,
            description => "$2 $3"
        );
    }
}

$rss->save($RSS);

