http://dx.doi.org/10.1371/journal.pbio.0030024 // // param: $toctxt - a string containing the RSS XML source // returns: an array of string paper IDs (e.g. "0030024", "0030025") // function tocToIDs($toctxt) { $ret = array(); $lines = split("\n", $toctxt); for ($i=0; $i $lastissue) { // not out yet. } else if ($vol==1 && $issue > $firstissues) { // doesn't exist... the first volume of PLoS Biology only had 3 issues. } else { // Fetch the table of contents in RSS format. // URL of format: http://biology.plosjournals.org/perlserv/?request=get-toc-rss&issn=1545-7885&volume=1&issue=1 $u = $tocurlprefix."volume=".$vol."&issue=".$issue; $toctxt = file_get_contents($u); file_put_contents("tocs/toc-".$vol."-".$issue.".xml", $toctxt); print "=== Writing $vol $issue table of contents\n"; // make a dir for the contents... $dir = "vol-".$vol.".issue-".$issue; @mkdir($dir); // fetch the paper list... $ids = tocToIDs($toctxt); for ($i=0; $i