use LWP::Simple;
use Unicode::Japanese;
require 'cgi-lib.pl';

$s = Unicode::Japanese->new();

&ReadParse(*in);

$cgidir = $ENV{'CGIDIR'};
$baseurl = "http://www.janjan.jp";
$imgurl = "/img/header_01.gif";
if($in{'category'}){
  $category = $in{'category'};
}else{
  $category = "culture";
}
$outfile = "janjan_${category}.xml";

%categories = (
  "culture" => "",
  "living" => "炵",
  "area" => "n",
  "media" => "fBA",
  "government" => "",
  "business" => "rWlX",
  "world" => "E",
);

$geturl = "$baseurl/$category/list.php";
#$rss = "$baseurl/$outfile";
$rss = "http://localhost$cgidir/$outfile";

($mday,$mon,$year) = (localtime)[3..5];
$mon += 1;
$year += 1900;
$cdate = sprintf("%4d-%1.2d-%1.2d",$year,$mon,$mday);

@lines = split(/\n/,get($geturl));

foreach $line (@lines){
  if($line =~ /^\s+<!--repeat article-->$/i){
    $insw = 1;next;
  }elsif($line =~ /^\s+<!--repeat article end-->$/i){
    last;
  }
  if($insw == 1){
    if(@subcategories = ($line =~ /alt="([^"]+)"/gi)){
      while($subcategories[$#subcategories] =~ /NA/){
        pop(@subcategories);
      }
      while($subcategories[0] =~ /NA/){
        shift(@subcategories);
      }
      $subject = join(":", @subcategories);
    }
    if($line =~ /(<a href=|<font size=)/i){
      $msw = 1;
    }
    if($msw == 1){
      $mline .= $line;
      if($mline =~ /<a href="([^"]+)">([^<]+)<\/a>/i){
        $url = $1;
        $title = $2;
        $msw = 0;$mline = "";
      }elsif($mline =~ /<font size="[^"]+">([^\(]+)\(([^\)]+)\)(\d{4}\/\d{2}\/\d{2})<\/font>/i){
        $description = $1;
        $creator = $2;
        $date = $3;
        $items{$url} = join("\t",$date,$subject,$title,$creator,$description);
        $msw = 0;$mline = "";
      }
      next;
    }
    next;
  }
}

open(OUT, "> $outfile");
$header = <<HERE1;
<?xml version=\"1.0\" encoding=\"utf-8\"?>

<rdf:RDF 
  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  xmlns:dc="http://purl.org/dc/elements/1.1/"
  xmlns="http://purl.org/rss/1.0/"
> 

  <channel rdf:about=\"$rss\">
    <title>JANJAN NPO^C^[lbgV: $categories{$category}</title>
    <link>$rss</link>
    <description>wJANJANx͎sL҂ɂȂăj[X𑗂NPO^C^[lbgVłB</description>
    <dc:publisher>JANJAN</dc:publisher>
    <dc:rights>Copyright c 2003 JAN JAN</dc:rights>
    <dc:date>$cdate</dc:date>

    <image rdf:resource=\"$baseurl$imgurl\" />

    <items>
      <rdf:Seq>
HERE1

$s->set($header,'sjis');
$uheader = $s->getu;
print OUT $uheader;

foreach (sort keys %items){
   $str = "      <rdf:li resource=\"$baseurl$_\" />\n";
   $s->set($str,'sjis');
   $ustr = $s->getu;
   print OUT $ustr;
}

$mheader = <<HERE2;
      </rdf:Seq>
    </items>

  </channel>

  <image rdf:about=\"$baseurl$imgurl\">
    <title>JANJAN NPO^C^[lbgV</title>
    <url>$baseurl$imgurl</url>
    <link>$baseurl</link>
  </image>
HERE2
$s->set($mheader,'sjis');
$umheader = $s->getu;
print OUT $umheader;

foreach (sort keys %items){
    ($date,$subject,$title,$creator,$description) = split(/\t/,$items{$_});
     $date =~ tr/\//-/;
$item = <<ITEM;

  <item rdf:about=\"$baseurl$_\">
    <title>$title</title>
    <link>$baseurl$_</link>
    <dc:description>$description</dc:description>
    <dc:subject>$subject</dc:subject>
    <dc:creator>$creator</dc:creator>
    <dc:date>$date</dc:date>
  </item>

ITEM
    $s->set($item,'sjis');
    $uitem = $s->getu;
    print OUT $uitem;
}

print OUT "</rdf:RDF>\n";
close(OUT);

$rss_encoding = &juri_encode($rss);
print <<NEWSLINK;
Content-type: text/html

<HTML>
<HEAD>
  <TITLE>JANJANj[X</TITLE>
  <META HTTP-EQUIV="Content-Type" content="text/html; charset=Shift_JIS">
<LINK REL="STYLESHEET" TYPE="text/css" HREF="/mystyle.css">
</HEAD>
<BODY>
<div class=\"emph\">JANJANj[X: $categories{$category}</div>
<p><a href=\"$cgidir/rss2html.cgi?rss=$rss_encoding\" target="submain">$rss</a></p>
</BODY>
</HTML>
NEWSLINK

sub juri_encode{
  local($str) = @_;
  $str =~ s/([^a-z0-9\-_.!*'\(\)~ ])/length($1) == 2 ? sprintf "%%%1s%1s%%%1s%1s", split("",unpack("H4", $1)) : sprintf "%%%02X", ord($1)/egi;
  $str =~ tr/ /+/;
  return $str;
}
