([^<]*)/', $record, $stuff); $record = $stuff[1]; return($record); } private function parse_marc_row($lineray){ //print_r($lineray); $marcrow = array(); unset($lineray[0]); foreach($lineray as $element){ //print_r($element); // $element = ereg_replace('\.$', '', ereg_replace('[;|,]$', '', trim($element))); $count[$element{0}]++; $elementname = $element{0}.$count[$element{0}]; $marcrow[$elementname] = trim(str_replace(' ', ' ', substr($element, 1))); } //print_r($marcrow); return($marcrow); } public function parse_marc($marcrecord){ $atomic = array(); $marcrecord = str_replace("\n ", ' ', $marcrecord); $details = explode("\n", $marcrecord); // unset($details[0]); array_pop($details); array_shift($details); $details[0] = str_replace('LEADER ', '000 ', $details[0]); //print_r($details); foreach($details as $line){ unset($lineray); unset($marc); $line = trim($line); $lineray = substr($line, 0, 3) . '|' . substr($line, 4, 2) . '|a' . substr($line, 7); $atomic['rawrecordtype'] = 'iiimarc'; $atomic['rawrecord'] .= $lineray . "\n"; //echo "
$line"; } //Format if((!$atomic['format']) && ($lineray[0] > 239) && ($lineray[0] < 246)){ $marc = self::parse_marc_row($lineray); $temp = ucwords(strtolower(str_replace('[', '', str_replace(']', '', $marc['h1'])))); if(eregi('^book', $temp)){ $format = 'Book'; $formats = 'Books'; }else if(eregi('^micr', $temp)){ $format = 'Microform'; }else if(eregi('^electr', $temp)){ $format = 'Website'; $formats = 'Websites'; }else if(eregi('^vid', $temp)){ $format = 'Video'; }else if(eregi('^motion', $temp)){ $format = 'Video'; }else if(eregi('^audi', $temp)){ $format = 'Audio'; }else if(eregi('^cass', $temp)){ $format = 'Audio'; }else if(eregi('^phono', $temp)){ $format = 'Audio'; }else if(eregi('^record', $temp)){ $format = 'Audio'; }else if(eregi('^sound', $temp)){ $format = 'Audio'; }else if(eregi('^carto', $temp)){ $format = 'Map'; $formats = 'Maps'; }else if(eregi('^map', $temp)){ $format = 'Map'; $formats = 'Maps'; }else if(eregi('^globe', $temp)){ $format = 'Map'; $formats = 'Maps'; }else if($temp){ $format = 'Classroom Material'; //$format = $temp; } if(!$formats) $formats = $format; if($format){ $atomic['format'][] = $format; $tags[] = 'scrib:format='. trim($formats); } } // print_r($lineray); } if(!$atomic['format'][0]){ $atomic['format'][0] = 'Book'; $tags[] = 'scrib:format=Books'; } if(!$atomic['catdate'][0]) $atomic['catdate'][0] = '1984-01-01'; if($atomic['pubyear'][0] > (date(Y) + 5)) $atomic['pubyear'][0] = substr($atomic['catdate'][0],0,4); if($atomic['pubyear'][0]){ $atomic['pubdate'] = $atomic['pubyear'][0].substr($atomic['catdate'][0],4); $tags[] = 'scrib:pubyear='. $atomic['pubyear'][0]; } foreach($atomic['isbn'] as $temp){ $tags[] = 'scrib:isbn='. trim($temp); } if($atomic['alttitle']) $atomic['title'] = array_unique(array_merge($atomic['title'], $atomic['alttitle'])); foreach($atomic['subjkey'] as $temp){ $tags[] = 'scrib:subject='. trim($temp); } $atomic['tags'] = $tags; // print_r($atomic); return($atomic); } public function scrapeit($host, $bibn){ global $scrib, $wpdb, $scrib_importer, $scrib_templates; // if(!$scrib_importer->post_exists(substr(ereg_replace('[^a-z|0-9]', '', strtolower($_REQUEST['sourceid'])), 0, 2) . $bibn)){ $iiirecord = $this->parse_marc($this->fetchmarc($this->marcurl($host, $bibn))); if($iiirecord['title']){ if($scrib->options['amazonapi'] && $enrichinfo = $scrib_importer->enrich($iiirecord['isbn'])){ //print_r($amazoninfo); $iiirecord['asin'] = $amazoninfo['asin']; $iiirecord['img'] = $amazoninfo['img']; $iiirecord['amzn'] = $amazoninfo; if($iiirecord['amzn']['review'][0]['content']) $iiirecord['shortdescription'] = $scrib_importer->summarize($iiirecord['amzn']['review'][0]['content']); } $iiirecord['the_title'] = $iiirecord['title'][0]; $iiirecord['the_pubdate'] = $iiirecord['pubdate'][0]; $iiirecord['the_catdate'] = $iiirecord['catdate'][0]; $iiirecord['the_sourceid'] = substr(ereg_replace('[^a-z|0-9]', '', strtolower($_REQUEST['sourceid'])), 0, 2) . $bibn; $iiirecord['tags'][] = 'scrib:sourceid='. $iiirecord['the_sourceid']; $iiirecord['the_excerpt'] = $scrib_templates->summary($iiirecord); $iiirecord['the_content'] = $scrib_templates->full($iiirecord); //print_r($iiirecord); // $scrib_importer->insertpost($iiirecord); flush(); // if(($iiirecord['img']['thumb']['url'] && ($bibn < 1200000)) || (!$scrib_importer->post_exists(substr(ereg_replace('[^a-z|0-9]', '', strtolower($_REQUEST['sourceid'])), 0, 2) . $bibn))){ // $scrib_importer->insertpost($iiirecord); $scrib_importer->insertharvest($iiirecord); flush(); // }else{ // echo "
ignored $bibn, already in catalog.
"; // } } } } if($_REQUEST['sourceid'] && $_REQUEST['host'] && $_REQUEST['bibn']){ $scrib_III_scraper = new Scrib_III_scraper; $scrib_importer = new Scrib_importer; $scrib_templates = new Scrib_default_templates; if($_REQUEST['to'] && $_REQUEST['bibn'] <= $_REQUEST['to']){ for ($i = $_REQUEST['bibn']; $i < $_REQUEST['bibn'] + 25; $i++) { $scrib_III_scraper->scrapeit($_REQUEST['host'], $i); } $nextbibn = $i; echo ' '; }else{ define('SCRIB_DEBUG', true); $scrib_III_scraper->scrapeit($_REQUEST['host'], $_REQUEST['bibn']); } $prefs['lasthost'] = $_REQUEST['host']; $prefs['lastbibn'] = $_REQUEST['bibn']; $prefs['lastsourceid'] = $_REQUEST['sourceid']; update_option('scrib_IIIimporter', $prefs); }else{ $prefs = get_option('scrib_IIIimporter'); ?>