Java tutorial
/** * Copyright (c) 2009 University of Rochester * * This program is free software; you can redistribute it and/or modify it under the terms of the MIT/X11 license. The text of the * license can be found at http://www.opensource.org/licenses/mit-license.php and copy of the license can be found on the project * website http://www.extensiblecatalog.org/. * */ package info.extensiblecatalog.OAIToolkit.importer; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.log4j.Logger; import info.extensiblecatalog.OAIToolkit.api.Importer; import info.extensiblecatalog.OAIToolkit.utils.Logging; /** * Command line arguments * <dl> * <dt>-convert</dt> * <dd>Flag to convert file(s) with raw MARC records into MARCXML</dd> * <dt>-load</dt> * <dd>Flag to load file(s) into the OAI repository</dd> * <dt>-source <dir></dt> * <dd>The directory where the toolkit looks for files to process</dd> * <dt>-destination</dt> * <dd>The directory that the toolkit moves the source files into as it successfully * completes the processing of each file</dd> * <dt>-destination_xml</dt> * <dd>The directory that the toolkit places MARCXML versions of the source data.</dd> * <dt>-error</dt> * <dd>The directory that the toolkit moves files into when there is a processing * error for that file.</dd> * <dt>-error_xml</dt> * <dd>The directory that the toolkit places MARCXML versions of the source data, * if that MARCXML file was unable to be loaded into the OAI repository due * to an error condition.</dd> * <dt>-log</dt> * <dd>Path to log file for warnings and errors</dd> * <dt>-log_detail</dt> * <dd>Flag to offer more detailed processing log information</dd> * <dt>-marc_schema</dt> * <dd>XML Schema file for MARCXML validation</dd> * <dt>-marc_encoding</dt> * <dd>The encoding of the MARC file</dd> * <dt>-char_conversion</dt> * <dd>The character conversion method. Possible values: MARC8 (Ansel), * ISO5426, ISO6937, none</dd> * <dt>-split_size</dt> * <dd>How many records can an XML file contain?</dd> * <dt>-lucene_index</dt> * <dd>The Lucene index directory to create</dd> * <dt>-storage_type</dt> * <dd>The storage type of records: MySQL, mixed, Lucene</dd> * </dl> */ public class CLIProcessor { private static String programmer_log = "programmer"; private static final Logger prglog = Logging.getLogger(programmer_log); //private static final Logger logger = Logging.getLogger(); /** * Setup the command line options object * @return the command line options object */ public static Options getCommandLineOptions() { Option convert = new Option("convert", "Flag to convert file(s) with raw MARC records into MARCXML"); OptionBuilder.withArgName("modify"); OptionBuilder.hasArg(); OptionBuilder.withDescription("Flag to modify MARCXML file(s) with XSLT transromation with the" + "given XSLT files before loading into the OAI repository"); Option modify = OptionBuilder.create("modify"); Option load = new Option("load", "Flag to load file(s) into the OAI repository"); Option production = new Option("production", "Flag to switch production mode. It means, that the" + "toolkit won't create any temporary files (except error" + " records) so it read MARC records, process it, and put " + "directly to the database or the target file format [DEPRECATED: OPTION IS NO LONGER AVAILABLE]"); OptionBuilder.withArgName("source"); OptionBuilder.hasArg(); OptionBuilder.withDescription("The directory where the toolkit looks " + "for files to process"); Option source = OptionBuilder.create("source"); OptionBuilder.withArgName("destination"); OptionBuilder.hasArg(); OptionBuilder.withDescription("The directory that the toolkit moves the source files into " + "as it successfully completes the processing of each file."); Option destination = OptionBuilder.create("destination"); OptionBuilder.withArgName("destination_xml"); OptionBuilder.hasArg(); OptionBuilder .withDescription("The directory that the toolkit places " + "MARCXML versions of the source data."); Option destination_xml = OptionBuilder.create("destination_xml"); OptionBuilder.withArgName("destination_modifiedxml"); OptionBuilder.hasArg(); OptionBuilder.withDescription( "The directory that the toolkit places " + "modified MARCXML versions of the source data."); Option destination_modifiedxml = OptionBuilder.create("destination_modifiedxml"); OptionBuilder.withArgName("error"); OptionBuilder.hasArg(); OptionBuilder.withDescription("The directory that the toolkit moves files into when " + "there is a processing error for that file."); Option error = OptionBuilder.create("error"); OptionBuilder.hasArg(); OptionBuilder.withArgName("error_xml"); OptionBuilder.withDescription("The directory that the toolkit places MARCXML versions of " + "the source data, if that MARCXML file was unable to be " + "loaded into the OAI repository due to an error condition."); Option error_xml = OptionBuilder.create("error_xml"); OptionBuilder.hasArg(); OptionBuilder.withArgName("error_modifiedxml"); OptionBuilder.withDescription("The directory that the toolkit places MARCXML versions of " + "the source data, if that MARCXML file was unable to be " + "loaded into the OAI repository due to an error condition."); Option error_modifiedxml = OptionBuilder.create("error_modifiedxml"); OptionBuilder.withArgName("log"); OptionBuilder.hasArg(); OptionBuilder.withDescription("The directory of log files for " + "warnings and errors"); Option log = OptionBuilder.create("log"); Option log_detail = new Option("log_detail", "Flag to offer more detailed processing log information"); OptionBuilder.withArgName("marc_schema"); OptionBuilder.hasArg(); OptionBuilder.withDescription("XML Schema file for MARCXML validation"); Option marc_schema = OptionBuilder.create("marc_schema"); OptionBuilder.withArgName("marc_encoding"); OptionBuilder.hasArg(); OptionBuilder.withDescription("The encoding of the MARC file"); Option marc_encoding = OptionBuilder.create("marc_encoding"); OptionBuilder.withArgName("char_conversion"); OptionBuilder.hasArg(); OptionBuilder.withDescription( "The character conversion method. " + "Possible values: MARC8 (Ansel), ISO5426, ISO6937, none"); Option char_conversion = OptionBuilder.create("char_conversion"); OptionBuilder.withArgName("split_size"); OptionBuilder.hasArg(); OptionBuilder.withDescription("How many records can an XML file" + " contain?"); Option split_size = OptionBuilder.create("split_size"); OptionBuilder.withArgName("lucene_index"); OptionBuilder.hasArg(); OptionBuilder.withDescription("Lucene index directory."); Option lucene_index = OptionBuilder.create("lucene_index"); //OptionBuilder.withArgName("storage_type"); //OptionBuilder.hasArg(); //OptionBuilder.withDescription("The storage type of records: MySQL," + //" mixed, Lucene."); //Option storage_type = OptionBuilder.create("storage_type"); Option indent_xml = new Option("indent_xml", "Flag to indent XML"); Option xml_version_11 = new Option("xml_version_11", "Flag to create XML 1.1 instead of 1.0"); Option translate_leader_bad_chars_to_zero = new Option("translate_leader_bad_chars_to_zero", "Change the Bad characters in the leader to zeros"); Option translate_nonleader_bad_chars_to_spaces = new Option("translate_nonleader_bad_chars_to_spaces", "Change the Bad characters in the control and the data fields to spaces"); Option modify_validation = new Option("modify_validation", "Perform validation check during modify step"); OptionBuilder.withArgName("replace_repository_code"); OptionBuilder.hasArg(); OptionBuilder.withDescription("Replace repository code."); Option replace_repository_code = OptionBuilder.create("replace_repository_code"); OptionBuilder.withArgName("convert_dir"); OptionBuilder.hasArg(); OptionBuilder.withDescription("The directory of marc files after " + "process."); Option convert_dir = OptionBuilder.create("convert_dir"); Option fileof_deleted_records = new Option("fileof_deleted_records", "The source marc file should be considered as deleted"); Option lucene_statistics = new Option("lucene_statistics", "Statistics for Lucene Database"); Option lucene_dump_ids = new Option("lucene_dump_ids", "List all ids (001s) to standard output"); OptionBuilder.withArgName("load_dir"); OptionBuilder.hasArg(); OptionBuilder.withDescription("The directory of marcxml files after" + " process."); Option load_dir = OptionBuilder.create("load_dir"); OptionBuilder.withArgName("modify_dir"); OptionBuilder.hasArg(); OptionBuilder.withDescription("The directory of modified marcxml " + "files after process."); Option modify_dir = OptionBuilder.create("modify_dir"); OptionBuilder.withArgName("error_suffix"); OptionBuilder.hasArg(); OptionBuilder.withDescription("The suffix of directory name of error" + " files after process."); Option error_suffix = OptionBuilder.create("error_suffix"); OptionBuilder.withArgName("destination_suffix"); OptionBuilder.hasArg(); OptionBuilder.withDescription("The suffix of directory name of files" + " after process."); Option destination_suffix = OptionBuilder.create("destination_suffix"); OptionBuilder.withArgName("default_repository_code"); OptionBuilder.hasArg(); OptionBuilder.withDescription("The default value of " + "repository code (field 003)."); Option default_repository_code = OptionBuilder.create("default_repository_code"); Option delete = new Option("delete", "Delete temporary files?"); Option ignore_repository_code = new Option("ignore_repository_code", "Do not merge 003 and 001 together."); Option help = new Option("help", "list available options"); Options options = new Options(); options.addOption(production); options.addOption(convert); options.addOption(modify); options.addOption(load); options.addOption(source); options.addOption(destination); options.addOption(destination_xml); options.addOption(error); options.addOption(error_xml); options.addOption(log); options.addOption(log_detail); options.addOption(marc_schema); options.addOption(marc_encoding); options.addOption(char_conversion); options.addOption(split_size); options.addOption(lucene_index); //options.addOption(storage_type); options.addOption(indent_xml); options.addOption(xml_version_11); options.addOption(translate_leader_bad_chars_to_zero); options.addOption(modify_validation); options.addOption(translate_nonleader_bad_chars_to_spaces); options.addOption(fileof_deleted_records); options.addOption(lucene_statistics); options.addOption(lucene_dump_ids); options.addOption(replace_repository_code); options.addOption(convert_dir); options.addOption(load_dir); options.addOption(modify_dir); options.addOption(error_suffix); options.addOption(destination_suffix); options.addOption(delete); options.addOption(destination_modifiedxml); options.addOption(error_modifiedxml); options.addOption(default_repository_code); options.addOption(ignore_repository_code); options.addOption(help); return options; } public static void process(String[] args, Importer importer) { CommandLineParser parser = new GnuParser(); Options options = CLIProcessor.getCommandLineOptions(); try { // parse the command line arguments CommandLine line = parser.parse(options, args); // production if (line.hasOption("production")) { /**** * WE NO LONGER HONOR THIS OPTION: * * There appears to be a lot of duplicated code for this in the Converter class, * and it hasn't been kept up-to-date (or is just plain wrong). * * Most importantly, this code doesn't track the OAI ID whatsoever. VERY IMPORTANT! */ //importer.configuration.setProductionMode(true); } // convert if (line.hasOption("convert")) { importer.configuration.setNeedConvert(true); } // modify if (line.hasOption("modify")) { importer.configuration.setNeedModify(true); importer.configuration.setXsltString(line.getOptionValue("modify")); } // load if (line.hasOption("load")) { importer.configuration.setNeedLoad(true); } // log_detail if (line.hasOption("log_detail")) { importer.configuration.setNeedLogDetail(true); } // source if (line.hasOption("source")) { importer.configuration.setSourceDir(line.getOptionValue("source")); } // destination if (line.hasOption("destination")) { importer.configuration.setDestinationDir(line.getOptionValue("destination")); } // destination_xml if (line.hasOption("destination_xml")) { importer.configuration.setDestinationXmlDir(line.getOptionValue("destination_xml")); } // destination_modifiedxml if (line.hasOption("destination_modifiedxml")) { importer.configuration.setDestinationModifiedXmlDir(line.getOptionValue("destination_modifiedxml")); } // error if (line.hasOption("error")) { importer.configuration.setErrorDir(line.getOptionValue("error")); } // error_xml if (line.hasOption("error_xml")) { importer.configuration.setErrorXmlDir(line.getOptionValue("error_xml")); } // error_modifiedxml if (line.hasOption("error_modifiedxml")) { importer.configuration.setErrorXmlDir(line.getOptionValue("error_modifiedxml")); } // marc_schema if (line.hasOption("marc_schema")) { importer.configuration.setMarcSchema(line.getOptionValue("marc_schema")); } // log if (line.hasOption("log")) { importer.configuration.setLogDir(line.getOptionValue("log")); } // marc_encoding if (line.hasOption("marc_encoding")) { importer.configuration.setMarcEncoding(line.getOptionValue("marc_encoding")); } // char_conversion if (line.hasOption("char_conversion")) { importer.configuration.setCharConversion(line.getOptionValue("char_conversion")); } // split_size if (line.hasOption("split_size")) { importer.configuration.setSplitSize(line.getOptionValue("split_size")); } // lucene_index if (line.hasOption("lucene_index")) { importer.configuration.setLuceneIndex(line.getOptionValue("lucene_index")); } // storage_type // if (line.hasOption("storage_type")) { // importer.configuration.setStorageType(line.getOptionValue( // "storage_type")); // } // indent_xml if (line.hasOption("indent_xml")) { importer.configuration.setDoIndentXml(true); } // xml_version_11 if (line.hasOption("xml_version_11")) { importer.configuration.setCreateXml11(true); } // modify_validation if (line.hasOption("modify_validation")) { importer.configuration.setModifyValidation(true); } // translate_leader_bad_chars_to_zero if (line.hasOption("translate_leader_bad_chars_to_zero")) { importer.configuration.setTranslateLeaderBadCharsToZero(true); } // translate_nonleader_bad_chars_to_spaces if (line.hasOption("translate_nonleader_bad_chars_to_spaces")) { importer.configuration.setTranslateNonleaderBadCharsToSpaces(true); } // convert_dir if (line.hasOption("convert_dir")) { importer.configuration.setConvertDir(line.getOptionValue("convert_dir")); } // modify_dir if (line.hasOption("modify_dir")) { importer.configuration.setModifyDir(line.getOptionValue("modify_dir")); } // load_dir if (line.hasOption("load_dir")) { importer.configuration.setLoadDir(line.getOptionValue("load_dir")); } if (line.hasOption("lucene_statistics")) { importer.configuration.setLuceneStatistics(true); } if (line.hasOption("lucene_dump_ids")) { importer.configuration.setLuceneDumpIds(true); } // error_suffix if (line.hasOption("error_suffix")) { importer.configuration.setErrorSuffix(line.getOptionValue("error_suffix")); } // destination_suffix if (line.hasOption("destination_suffix")) { importer.configuration.setDestinationSuffix(line.getOptionValue("destination_suffix")); } // delete if (line.hasOption("delete")) { importer.configuration.setDoDeleteTemporaryFiles(true); } // default_repository_code if (line.hasOption("default_repository_code")) { importer.configuration.setDefaultRepositoryCode(line.getOptionValue("default_repository_code")); } // ignore_repository_code if (line.hasOption("ignore_repository_code")) { importer.configuration.setIgnoreRepositoryCode(true); } if (line.hasOption("fileof_deleted_records")) { importer.configuration.setFileOfDeletedRecords(true); } } catch (ParseException e) { prglog.error("[PRG] Parsing of command line arguments failed. " + "Reason: " + e.getMessage()); } catch (Exception e) { //e.printStackTrace(); prglog.error("[PRG] " + e); } } /** * Print help message to the standard output */ public static void help() { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("Importer", getCommandLineOptions()); } }