Documentation here.'; // Function that will handle the wizard-like behaviour function dispatch() { if (empty ($_GET['step'])) $step = 0; else $step = (int) $_GET['step']; // load the header $this->header(); switch ($step) { case 0 : $this->greet(); break; case 1 : check_admin_referer('import-upload'); $this->marc_accept_file(); break; case 2: $this->marc_parse_file(); break; case 3: $this->harest_import(); break; case 4: $this->ktnxbye(); break; } // load the footer $this->footer(); } function header() { echo '
'; echo '

'.__('Scriblio Catalog Importer').'

'; } function footer() { echo '
'; } function greet() { echo '
'; echo '

'.__('Howdy! Start here to import MaRC records into Scriblio.').'

'; echo '

'.__('This has not been tested much. Mileage may vary.').'

'; echo '

'; wp_import_upload_form("admin.php?import=$this->importer_code&step=1"); echo '

'; echo '
'; echo '

or jump immediately to

'; echo '
'; echo '
'; } function ktnxbye() { echo '
'; echo '

'.__('All done.').'

'; echo '
'; } function marc_accept_file(){ $prefs = get_option('scrib_marcimporter'); $temp['scrib_marc-sourceprefix'] = $prefs['scrib_marc-sourceprefix']; $temp['scrib_marc-sourcefield'] = $prefs['scrib_marc-sourcefield']; $temp['scrib_marc-warnings'] = array(); $temp['scrib_marc-errors'] = array(); $temp['scrib_marc-record_start'] = 0; $temp['scrib_marc-records_harvested'] = 0; update_option('scrib_marcimporter', $temp); $this->marc_options(); } function marc_options(){ global $file; if(empty($this->id)){ $file = wp_import_handle_upload(); if ( isset($file['error']) ) { echo '

'.__('Sorry, there has been an error.').'

'; echo '

' . $file['error'] . '

'; return; } $this->file = $file['file']; $this->id = (int) $file['id']; } $prefs = get_option('scrib_marcimporter'); echo '
'; echo '

'.__('MaRC file options.').'

'; echo '

'.__('All Scriblio records have a 'sourceid,' a unique alphanumeric string that's used to avoid creating duplicate records and, in some installations, link back to the source system for current availability information.').'

'; echo '

'.__('The sourceid is made up of two parts: the prefix that you assign, and a ID info from the source record. Many systems assign unique numbers to each record, the challenge is figuring out which field to use.').'

'; echo '
'; ?>



'; echo '
'; echo '
'; } function marc_parse_file(){ $interval = 2500; if( empty( $_REQUEST[ 'scrib_marc-record_start' ] )) $n = 0; else $n = (int) $_REQUEST[ 'scrib_marc-record_start' ]; ini_set('memory_limit', '1024M'); set_time_limit(0); ignore_user_abort(TRUE); $this->id = (int) $_GET['id']; $this->file = get_attached_file($this->id); if(empty($_POST['scrib_marc-sourceprefix']) || empty($_POST['scrib_marc-sourcefield']) || empty($this->file)){ echo '

'.__('Sorry, there has been an error.').'

'; echo '

Please complete all fields

'; return; } // save these settings so we can try them again later $prefs = get_option('scrib_marcimporter'); $prefs['scrib_marc-sourceprefix'] = stripslashes($_POST['scrib_marc-sourceprefix']); $prefs['scrib_marc-sourcefield'] = stripslashes($_POST['scrib_marc-sourcefield']); update_option('scrib_marcimporter', $prefs); error_reporting(E_ERROR); // initialize the marc library require_once(ABSPATH . PLUGINDIR .'/'. plugin_basename(dirname(__FILE__)) .'/includes/php-marc.php'); $file = new File($this->file); $prefs['scrib_marc-records_count'] = count($file->raw); update_option('scrib_marcimporter', $prefs); if($n > 0 || count($file->raw) > $interval) $file->raw = array_slice($file->raw, $n, $interval); if(!empty($_POST['scrib_marc-debug'])){ $record = $file->next(); echo '

The MaRC Record:

';			
			print_r($record->fields());
			echo '

The Tags and Display Record:

';
			$test_pancake = $this->marc_parse_record($record->fields());
			print_r($test_pancake);
			echo '
'; echo '

The Raw Excerpt:

'. $test_pancake['the_excerpt'] .'

'; echo '

The Raw Content:

'. $test_pancake['the_content'] .'

'; echo '

The SourceID: '. $test_pancake['the_sourceid'] .'

'; // bring back that form echo '

'.__('File Options').'

'; echo '

File has '. $prefs['scrib_marc-records_count'] .' records.

'; $this->marc_options(); }else{ // import with status $count = 0; echo "

Reading the file and parsing ". $file->num_records() ." records. Please be patient.

"; echo '
    '; while($file->pointer < count($file->raw)){ if($record = $file->next()){ $bibr = &$this->marc_parse_record($record->fields()); echo "
  1. {$bibr['the_title']} {$bibr['the_sourceid']}
  2. "; $count++; } } echo '
'; $prefs['scrib_marc-warnings'] = array_merge($prefs['scrib_marc-warnings'], $file->warn); $prefs['scrib_marc-errors'] = array_merge($prefs['scrib_marc-errors'], $file->error); $prefs['scrib_marc-records_harvested'] = $prefs['scrib_marc-records_harvested'] + $count; update_option('scrib_marcimporter', $prefs); if(count($file->raw) >= $interval){ $prefs['scrib_marc-record_start'] = $n + $interval; update_option('scrib_marcimporter', $prefs); $this->marc_options(); ?>

marc_done(); ?> tagno == 100) || ($field->tagno == 110)){ $temp = ereg_replace(',$', '', $field->subfields['a'] .' '. $field->subfields['d']); $atomic['author'][] = mb_convert_encoding($temp, 'UTF-8'); }else if($field->tagno == 110){ $temp = $field->subfields['a']; $atomic['author'][] = mb_convert_encoding($temp, 'UTF-8'); }else if(($field->tagno > 699) && ($field->tagno < 721)){ $temp = ereg_replace(',$', '', $field->subfields['a'] .' '. $field->subfields['d']); $atomic['author'][] = mb_convert_encoding($temp, 'UTF-8'); //Standard Numbers }else if($field->tagno == 10){ $temp = explode(' ', trim($field->subfields['a'])); $atomic['lccn'][] = ereg_replace('[^0-9]', '', $temp[0]); }else if($field->tagno == 20){ $temp = trim($field->subfields['a']) . ' '; $temp = ereg_replace('[^0-9|x|X]', '', strtolower(substr($temp, 0, strpos($temp, ' ')))); $atomic['isbn'][] = $temp; }else if($field->tagno == 22){ $temp = trim($field->subfields['a']) . ' '; $temp = ereg_replace('[^0-9|x|X|\-]', '', strtolower(substr($temp, 0, strpos($temp, ' ')))); $atomic['issn'][] = $temp; //Call Numbers }else if($field->tagno == 852){ // callnums from InfoCenter $temp = trim($field->subfields['h']); $atomic['callnumber'][] = $temp; $temp = trim($field->subfields['b']); $atomic['location'][] = $temp; $atomic['acqdate'][] = $field->subfields[x]{14}.$field->subfields[x]{15}.$field->subfields[x]{16}.$field->subfields[x]{17} .'-'. $field->subfields[x]{18}.$field->subfields[x]{19} .'-'. $field->subfields[x]{20}.$field->subfields[x]{21}; //Titles }else if($field->tagno == 245){ $temp = ucwords(trim(trim(trim(ereg_replace('/$', '', $field->subfields['a']) .' '. trim(ereg_replace('/$', '', $field->subfields['b']))), ',.;:/'))); $atomic['title'][] = $temp; $atomic['attribution'][] = trim(trim(trim($field->subfields['c']), ',.;:/')); }else if($field->tagno == 240){ $temp = ucwords(trim(trim(trim(ereg_replace('/$', '', $field->subfields['a']) .' '. trim(ereg_replace('/$', '', $field->subfields['b']))), ',.;:/'))); $atomic['alttitle'][] = $temp; }else if(($field->tagno > 719) && ($field->tagno < 741)){ $temp = $field->subfields['a']; $atomic['alttitle'][] = $field->subfields['a']; //Dates }else if($field->tagno == 260){ $temp = str_pad(substr(ereg_replace('[^0-9]', '', $field->subfields['c']), 0, 4), 4 , '5'); $atomic['pubyear'][] = $temp; }else if($field->tagno == 005){ $atomic['catdate'][] = $field->data{0}.$field->data{1}.$field->data{2}.$field->data{3} .'-'. $field->data{4}.$field->data{5} .'-'. $field->data{6}.$field->data{7}; }else if($field->tagno == 008){ $atomic['pubyear'][] = substr($field->data, 14, 4); //Subjects }else if(($field->tagno > 599) && ($field->tagno < 700)){ $atomic['subject'][] = trim(trim(implode(' -- ', $field->subfields)), '.'); if($atomic['subjkey']){ $atomic['subjkey'] = array_unique(array_merge($atomic['subjkey'], array_map($trimmer, array_values($field->subfields)))); }else{ $atomic['subjkey'] = array_map($trimmer, array_values($field->subfields)); } //URLs }else if($field->tagno == 856){ unset($temp); $temp['href'] = $temp['title'] = str_replace(' ', '', $field->subfields['u']); $temp['title'] = trim(parse_url( $temp['href'] , PHP_URL_HOST), 'www.'); if($field->subfields['3']) $temp['title'] = $field->subfields['3']; if($field->subfields['z']) $temp['title'] = $field->subfields['z']; $atomic['url'][] = ''. $temp['title'] .''; //Notes }else if(($field->tagno > 299) && ($field->tagno < 400)){ $atomic['physdesc'][] = implode(' ', array_values($field->subfields)); }else if(($field->tagno > 399) && ($field->tagno < 500)){ $atomic['title'][] = implode("\n", array_values($field->subfields)); }else if(($field->tagno > 799) && ($field->tagno < 841)){ $atomic['series'][] = implode("\n", array_values($field->subfields)); }else if(($field->tagno > 499) && ($field->tagno < 600)){ $line = implode("\n", array_values($field->subfields)); if($field->tagno == 504) continue; if($field->tagno == 505){ $atomic['contents'][] = str_replace(array('> ','> ','> '), '>', '
  • '. str_replace('--', "
  • \n
  • ", trim(str_replace(array(' ', ' ', ' '), ' ', $line))) .'
  • '); continue; } $atomic['notes'][] = str_replace(' ', ' ', $line); } //Format if((!$atomic['format']) && ($field->tagno > 239) && ($field->tagno < 246)){ $temp = ucwords(strtolower(str_replace('[', '', str_replace(']', '', $field->subfields['h'])))); if(eregi('^book', $temp)){ $format = 'Book'; $formats = 'Books'; }else if(eregi('^micr', $temp)){ $format = 'Microform'; }else if(eregi('^electr', $temp)){ $format = 'Website'; $formats = 'Websites'; }else if(eregi('^vid', $temp)){ $format = 'Video'; }else if(eregi('^motion', $temp)){ $format = 'Video'; }else if(eregi('^audi', $temp)){ $format = 'Audio'; }else if(eregi('^cass', $temp)){ $format = 'Audio'; }else if(eregi('^phono', $temp)){ $format = 'Audio'; }else if(eregi('^record', $temp)){ $format = 'Audio'; }else if(eregi('^sound', $temp)){ $format = 'Audio'; }else if(eregi('^carto', $temp)){ $format = 'Map'; $formats = 'Maps'; }else if(eregi('^map', $temp)){ $format = 'Map'; $formats = 'Maps'; }else if(eregi('^globe', $temp)){ $format = 'Map'; $formats = 'Maps'; }else if($temp){ $format = 'Classroom Material'; //$format = $temp; } if(!$formats) $formats = $format; if($format){ $atomic['format'][] = $format; $atomic['formats'][] = $formats; } } } } if(!$atomic['format'][0]){ $atomic['format'][0] = 'Book'; } if(!$atomic['acqdate']) $atomic['acqdate'] = $atomic['catdate']; if(!$atomic['catdate'][0]) $atomic['catdate'][0] = '1984-01-01'; if($atomic['pubyear'][0] > (date(Y) + 5)) $atomic['pubyear'][0] = substr($atomic['catdate'][0],0,4); if($atomic['pubyear'][0]){ $atomic['pubdate'][] = $atomic['pubyear'][0].substr($atomic['catdate'][0],4); } if($atomic['alttitle']) $atomic['title'] = array_unique(array_merge($atomic['title'], $atomic['alttitle'])); $atomic['the_sourceid'] = substr(ereg_replace('[^a-z|0-9]', '', strtolower($_POST['scrib_marc-sourceprefix'])), 0, 2) . trim(eval('return($marcrecord'. str_replace(array('(',')','$'), '', stripslashes($_POST['scrib_marc-sourcefield'])) .');')); if(!empty($atomic['title']) && !empty($atomic['the_sourceid'])){ $atomic['tags']['subj'] = $atomic['subjkey']; $atomic['tags']['auth'] = $atomic['author']; $atomic['tags']['isbn'] = $atomic['isbn']; $atomic['tags']['title'] = $atomic['title']; $atomic['tags']['format'] = $atomic['formats']; if($sweets = $this->get_sweets($atomic['isbn'])){ if(!empty($sweets['img'])); $atomic['img'] = $sweets['img']; if(!empty($sweets['summary'])){ $atomic['shortdescription'] = html_entity_decode(html_entity_decode($sweets['summary'])); } } $atomic['the_title'] = $atomic['title'][0]; $atomic['the_pubdate'] = $atomic['pubdate'][0]; $atomic['the_acqdate'] = $atomic['acqdate'][0]; $atomic['the_excerpt'] = $this->the_excerpt($atomic); $atomic['the_content'] = $this->the_content($atomic); $this->insert_harvest($atomic); return($atomic); }else{ return(FALSE); } } function marc_done(){ $prefs = get_option('scrib_marcimporter'); // click next echo '
    '; if(count($prefs['scrib_marc-warnings'])){ echo '

    Warnings

    '; echo 'bottom · errors'; echo '
    1. '; echo implode($prefs['scrib_marc-warnings'], '
    2. '); echo '
    '; } if(count($prefs['scrib_marc-errors'])){ echo '

    Errors

    '; echo 'bottom · warnings'; echo '
    1. '; echo implode($prefs['scrib_marc-errors'], '
    2. '); echo '
    '; } echo '

    '.__('Processing complete.').'

    '; echo '

    '. $prefs['scrib_marc-records_harvested'] .' of '. $prefs['scrib_marc-records_count'] .' '.__('records harvested.').' with '. count($prefs['scrib_marc-warnings']) .' warnings and '. count($prefs['scrib_marc-errors']) .' errors.

    '; echo '

    '.__('Continue to the next step to publish those harvested catalog entries.').'

    '; echo '
    '; echo '

    '; echo '
    '; echo '
    '; } function harest_import() { global $wpdb; $interval = 50; if( isset( $_GET[ 'n' ] ) == false ) { $n = 0; } else { $n = intval( $_GET[ 'n' ] ); } $posts = $wpdb->get_results('SELECT SQL_CALC_FOUND_ROWS * FROM '. $this->harvest_table .' WHERE imported = 0 LIMIT 0,'. $interval, ARRAY_A); if( is_array( $posts ) ) { $count = $wpdb->get_var('SELECT FOUND_ROWS()'); echo "

    Fetching records in batches of $interval...importing them...making coffee. Please be patient.

    "; echo '
      '; foreach( $posts as $post ) { $post_id = $this->insert_post(unserialize($post['content'])); if($post_id){ $wpdb->get_var('UPDATE '. $this->harvest_table .' SET imported = 1 WHERE source_id = "'. $post['source_id'] .'"'); echo '
    1. '. get_the_title($post_id) .'
    2. '; }else{ $wpdb->get_var('UPDATE '. $this->harvest_table .' SET imported = -1 WHERE source_id = "'. $post['source_id'] .'"'); } } echo '
    '; ?>

    '. ($count - $interval) .' records remain to be imported.

    '; } else { echo '

    That's all folks. kthnxbye.

    '; } } function insert_harvest($bibr) { global $wpdb; $wpdb->get_results("REPLACE INTO $this->harvest_table (source_id, harvest_date, imported, content) VALUES ('". $wpdb->escape($bibr['the_sourceid']) ."', NOW(), 0, '". $wpdb->escape(serialize($bibr)) ."')"); } function post_exists($sourceid) { if($post_id = get_objects_in_term( is_term($sourceid), 'sourceid' )) return($post_id[0]); // return(FALSE); } function insert_post($bibr){ // return(1); global $wpdb, $bsuite, $scrib; if($this->post_exists($bibr['the_sourceid'])) $postdata['ID'] = $this->post_exists($bibr['the_sourceid']); $postdata['post_title'] = $wpdb->escape(str_replace('\"', '"', $bibr['the_title'])); $postdata['post_date'] = $bibr['the_pubdate']; $postdata['post_date_gmt'] = $bibr['the_acqdate']; $postdata['comment_status'] = get_option('default_comment_status'); $postdata['ping_status'] = get_option('default_pingback_flag'); $postdata['post_status'] = 'publish'; $postdata['post_type'] = 'post'; $postdata['post_content'] = $wpdb->escape(str_replace('\"', '"', $bibr['the_content'])); $postdata['post_excerpt'] = $wpdb->escape(str_replace('\"', '"', $bibr['the_excerpt'])); $postdata['post_author'] = $scrib->options['catalog_author_id']; $post_id = wp_insert_post($postdata); // insert the post if($post_id){ $bsuite->searchsmart_upindex($post_id, $bibr['the_content'], $bibr['post_title']); // update the full text index wp_set_object_terms($post_id, $bibr['the_sourceid'], 'sourceid', TRUE); //insert the tags if(is_array($bibr['tags'])){ $bibr['tags'] = array_filter($bibr['tags']); foreach(array_keys($bibr['tags']) as $taxonomy){ $bibr['tags'][$taxonomy] = array_filter($bibr['tags'][$taxonomy]); register_taxonomy( $taxonomy, 'post' ); wp_set_object_terms($post_id, $bibr['tags'][$taxonomy], $taxonomy, TRUE); } } return($post_id); } return(FALSE); } function get_altisbn($isbn) { $result = array(); // OCLC's xISBN // http://www.oclc.org/research/projects/xisbn/ if($xml = file_get_contents('http://labs.oclc.org/xisbn/' . $isbn)){ foreach ($xml->xpath('/idlist/isbn') as $temp) $isbn[] = (string) $temp; } // the first element of the array is always the same as the query ISBN, delete it array_shift($result); /* Also note LibraryThing's thingISBN http://www.librarything.com/thingology/2006/06/introducing-thingisbn_14.php 'http://www.librarything.com/api/thingISBN/' . $isbn; */ return($result); } function get_sweets($isbn_list){ foreach($isbn_list as $isbn){ //echo $isbn . "
    \n"; // there's an ugly hack/work around below. For some reason the API server is returning crunk at the head of the serialized result. I'm removing it with substring, but i should fix it at the source. $record = unserialize(substr(file_get_contents('http://api.scriblio.net/v01a/enrich/?isbn='. $isbn),3)); if($record['status']){ return($record); } } } function get_summarized($text){ // api: http://api.scriblio.net/v01a/summarize/?text=... // The POST URL and parameters $request = 'http://api.scriblio.net/v01a/summarize/'; $postargs = 'text='.urlencode($text); // Get the curl session object $session = curl_init($request); // Set the POST options. curl_setopt ($session, CURLOPT_POST, true); curl_setopt ($session, CURLOPT_POSTFIELDS, $postargs); curl_setopt($session, CURLOPT_HEADER, FALSE); curl_setopt($session, CURLOPT_RETURNTRANSFER, true); // Do the POST and then close the session $response = curl_exec($session); curl_close($session); if(!empty($response)) return($response); else return(FALSE); } function the_excerpt($bibr){ $result = ''; // echo($result); return($result); } function the_content($bibr){ $result = ''; return($result); } // Default constructor function Scrib_import() { global $wpdb; $this->harvest_table = $wpdb->prefix . 'scrib_harvest'; register_taxonomy( 'sourceid', 'post' ); } } // Instantiate and register the importer include_once(ABSPATH . 'wp-admin/includes/import.php'); if(function_exists('register_importer')) { $scrib_import = new Scrib_import(); register_importer($scrib_import->importer_code, $scrib_import->importer_name, $scrib_import->importer_desc, array (&$scrib_import, 'dispatch')); } add_action('activate_'.plugin_basename(__FILE__), 'scrib_importer_activate'); function scrib_importer_activate() { global $wp_db_version, $scrib_import; // Deactivate on pre 2.3 blogs if($wp_db_version<6075) { $current = get_settings('active_plugins'); array_splice($current, array_search( plugin_basename(__FILE__), $current), 1 ); update_option('active_plugins', $current); do_action('deactivate_'.plugin_basename(__FILE__)); return(FALSE); } global $wpdb; $charset_collate = ''; if ( version_compare(mysql_get_server_info(), '4.1.0', '>=') ) { if ( ! empty($wpdb->charset) ) $charset_collate = "DEFAULT CHARACTER SET $wpdb->charset"; if ( ! empty($wpdb->collate) ) $charset_collate .= " COLLATE $wpdb->collate"; } require_once(ABSPATH . 'wp-admin/includes/upgrade.php'); dbDelta(" CREATE TABLE $scrib_import->harvest_table ( source_id varchar(50) NOT NULL default '', harvest_date timestamp NOT NULL default '0000-00-00 00:00:00', imported tinyint(1) default '0', content longtext NOT NULL, PRIMARY KEY (source_id), KEY imported (imported) ) $charset_collate"); } ?>