Java tutorial
package de.fhg.iais.asc.workflow; /****************************************************************************** * Copyright 2011 (c) Fraunhofer IAIS Netmedia http://www.iais.fraunhofer.de * * ************************************************************************** * * Licensed under the Apache License, Version 2.0 (the "License"); you may * * not use this file except in compliance with the License. * * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * * software distributed under the License is distributed on an "AS IS" BASIS, * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * * See the License for the specific language governing permissions and * * limitations under the License. * ******************************************************************************/ import java.io.File; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.TimeZone; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.time.DateFormatUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import ch.qos.logback.classic.LoggerContext; import ch.qos.logback.classic.spi.ILoggingEvent; import ch.qos.logback.core.Appender; import ch.qos.logback.core.FileAppender; import de.fhg.iais.asc.commons.AscConfigEval; import de.fhg.iais.asc.commons.AscConfiguration; import de.fhg.iais.asc.commons.AscContext; import de.fhg.iais.asc.commons.LogMessageBuilder; import de.fhg.iais.asc.commons.logging.AscConcordanceAnalysis; import de.fhg.iais.asc.commons.logging.AscDublicateAnalysis; import de.fhg.iais.asc.commons.logging.AscSchematronAnalysis; import de.fhg.iais.asc.commons.logging.AscTimeparserAnalysis; import de.fhg.iais.asc.commons.logging.AscTransformErrorAnalysis; import de.fhg.iais.asc.commons.logging.AscTransformStatistic; import de.fhg.iais.asc.commons.state.ASCProgress; import de.fhg.iais.asc.commons.state.ASCState; import de.fhg.iais.asc.contexts.AscProvider; import de.fhg.iais.asc.contexts.AscProviderIngest; import de.fhg.iais.asc.directories.AscDirectory; import de.fhg.iais.asc.logging.ILogVocabulary; import de.fhg.iais.asc.oai.localwriter.RepositoryWriter; import de.fhg.iais.asc.oai.strategy.AbstractHarvester; import de.fhg.iais.asc.oai.strategy.HarvesterFactory; import de.fhg.iais.asc.sipmaker.SipMakerFactory; import de.fhg.iais.asc.submitter.Submitter; import de.fhg.iais.asc.util.ProviderInfoRequester; import de.fhg.iais.commons.configuration.IResourceLocator; import de.fhg.iais.commons.time.StopWatch; import de.fhg.iais.cortex.report.common.sip.ErrorSip; /** * This class manages the workflow of the Augmented SIP Creator (ASC) * * @author hjh, cto, kst */ public class ASCWorkflow { private static final Logger LOG = LoggerFactory.getLogger(ASCWorkflow.class); private AscConfiguration config = null; private final SipMakerFactory makerFactory; /** * Constructor initializes the ASC * * @param homeDir */ public ASCWorkflow(IResourceLocator transformationsDir, IResourceLocator homeDir) { this.makerFactory = new SipMakerFactory(transformationsDir); } /** * Start of the ASC workflow * * @param configs A list of configurations. Each of them is is processed as a single ASC-run. More than * one configuration is used within the batch process. */ public void run(List<AscConfiguration> configs) { for (AscConfiguration config : configs) { try { this.config = config; run(); this.config = null; ASCState ascState = config.getASCState(); if (ascState.isCancelRequested()) { ascState.finishCancel(); return; } } catch (Exception e) { // Log the exception and report it to the UI. LOG.error("Single ASC run failed", e); ASCState ascState = config.getASCState(); ascState.reportProblem(e.getLocalizedMessage()); } finally { this.config = null; } } } private boolean run() { // TODO: Create new AscContext with information from this.config somewhere in the beginning of this method. this.config.resetASCStateObject(); boolean doHarvest = AscConfigEval.isHarvestEnabled(this.config); boolean doTransform = AscConfigEval.isTransformEnabled(this.config); boolean doSubmit = AscConfigEval.isSubmitEnabled(this.config); boolean testTransform = AscConfigEval.isTestTransformEnabled(this.config); StopWatch watch = StopWatch.start(); // Set the ASCState Object ASCState ascState = this.config.getASCState(); ascState.setRunning(); ASCProgress myProgress = ascState.getProgressInstance(); myProgress.setMaxFilesEach(this.config.get(AscConfiguration.MAX_FILES_EACH, 0)); myProgress.setHarvesting(doHarvest); myProgress.setTransforming(doTransform); myProgress.setSending(doSubmit); // Manages the ASC's directories. AscProvider provider = new AscProvider(this.config); // Contains the provider ingests to process. Iterable<AscProviderIngest> toProcess = null; String providerFile = provider.checkedGetDirectory(AscDirectory.BOOKKEEPING) + File.separator + "global.log"; setAscRootLogger(providerFile); LOG.info("Source: " + this.config.get(AscConfiguration.SOURCE_FOLDER, "?") + ", Format: " + this.config.get(AscConfiguration.FORMAT, "?") + " event: " + this.config.get(AscConfiguration.INGEST_EVENT, "?")); // Interrupt if needed. if (interruptIfNeeded(ascState)) { return false; } // Harvesting. if (doHarvest) { // Fill in the provider infos. ProviderInfoRequester.addTo(this.config, true); final AscProviderIngest ingestEvent = this.harvest(provider); toProcess = Collections.singleton(ingestEvent); } if (interruptIfNeeded(ascState)) { return false; } // Transformation. if (doTransform || testTransform) { AscContext ascContext = new AscContext(ILogVocabulary.CONTEXT_TRANSFORMATION + "()"); if (toProcess == null) { // If harvesting did not occur, the provider infos // have not been completed yet -- do it now. ProviderInfoRequester.addTo(this.config, true); // If harvesting did not occur, the ingest events to process have not been set yet. // If so, then expand the configured ingest event id filter based on the // ingest event ids in the inbox. toProcess = expandIngests(provider, AscDirectory.INBOX); } this.transform(provider, toProcess, ascContext); } if (interruptIfNeeded(ascState)) { return false; } // Submission. if (doSubmit) { if (toProcess == null) { // If harvesting and transformation did not occur, the ingest events // have not been set yet. If so, then expand the configured ingest event // id filter based on the ingest event ids in the outbox. toProcess = expandIngests(provider, AscDirectory.OUTBOX); // If harvesting and transformation did not occur, the provider infos // have not been completed yet -- do it now if the provider ID is not set. if (this.config.isEmpty(AscConfiguration.PROVIDER_ID)) { ProviderInfoRequester.addTo(this.config, true); } } this.submit(toProcess); } LOG.info("Processing time: " + watch.stopTimeFormat("DDD'D:'HH'h:'mm'm:'ss's'", TimeZone.getTimeZone("GMT0"))); ascState.displayMessage("Processing time: " + watch.stopTimeFormat("DDD'D:'HH'h:'mm'm:'ss's'", TimeZone.getTimeZone("GMT0"))); return !ascState.isCancelRequested(); } /** * Gets a list of ingest events ids in a certain directory based on the ingest event id filter configured * in {@link AscConfiguration}. * * @see AscProvider#expandIngests(String, AscDirectory) * @param ascDirectories The directory that the ASC uses, non-{@code null}. * @param ascRootDirectory The root directory in which to expand the ingest event id filter, non-{@code null}. * @return A series of concrete ingest event ids. */ private Iterable<AscProviderIngest> expandIngests(AscProvider provider, AscDirectory dirSelector) { final AscConfiguration config = provider.getConfiguration(); final String ingestFilter = config.get(AscConfiguration.INGEST_EVENT, ""); final Collection<AscProviderIngest> ingests = provider.expandIngests(ingestFilter, dirSelector); if (ingests.isEmpty()) { LOG.warn(LogMessageBuilder.getMessage( "Did not find any matching ingest event directories in " + provider.getDirectory(dirSelector), config)); } return ingests; } private boolean interruptIfNeeded(ASCState ascState) { if (ascState.isCancelRequested()) { return true; } return false; } /** * Performs the harvesting. * * @param provider The current {@link AscProvider}. * @return An {@link AscProviderIngest} for the new ingest event, non-{@code null}. */ private AscProviderIngest harvest(AscProvider provider) { // Create an ingest event. String eventId = this.config.get(AscConfiguration.INGEST_EVENT, ""); if (StringUtils.isEmpty(eventId) || "*".equals(eventId)) { eventId = String.valueOf(System.currentTimeMillis() / 1000); } final AscProviderIngest ingestEvent = provider.createIngest(eventId); LOG.info("Cleaning up errors from previous runs ..."); this.config.getASCState().displayMessage("Cleaning up errors from previous runs ..."); ingestEvent.removeOldErrors(ErrorSip.SECTION_HARVEST); LOG.info("Harvesting data ..."); LOG.info("Start Harvesting"); this.config.getASCState().displayMessage("Harvesting data ..."); // Read out proxy information if given. String proxyhost = this.config.get(AscConfiguration.OAIPMH_PROXYHOST, ""); Integer proxyport = null; try { proxyport = Integer.parseInt(this.config.get(AscConfiguration.OAIPMH_PROXYPORT, "invalid")); } catch (NumberFormatException ex) { proxyhost = null; proxyport = null; } // Create a harvester. final String oaiSource = this.config.get(AscConfiguration.OAI_SOURCE, ""); final String harvestingformat = this.config.get(AscConfiguration.META_DATA_FORMAT, this.config.get(AscConfiguration.FORMAT, "")); AbstractHarvester harvester = HarvesterFactory.getInstance().getHarvester( this.config.get(AscConfiguration.STRATEGY, ""), oaiSource, harvestingformat, proxyhost, proxyport, this.config.get(AscConfiguration.HARVESTING_FROM_DATE, ""), this.config.get(AscConfiguration.HARVESTING_UNTIL_DATE, ""), this.config, this.config.getASCState(), ingestEvent); if (!oaiSource.isEmpty()) { // If no OAI source is specified, it does not make sense to harvest (and the harvester will throw NullPointerExceptions). StopWatch s = StopWatch.start(); int n = 0; // Create a FileRepositoryWriter. File targetDirectory = ingestEvent.getDirectory(AscDirectory.INBOX); RepositoryWriter writer = new RepositoryWriter(targetDirectory, this.config.get(AscConfiguration.MAX_FILES_EACH, 0), oaiSource, eventId); // Harvest. final String oaiSets = this.config.get(AscConfiguration.OAI_SET, ""); if (StringUtils.isEmpty(oaiSets)) { n = harvester.retrieveAll(writer); } else { String[] sets = oaiSets.split(","); for (int i = 0; i < sets.length; i++) { sets[i] = sets[i].trim(); } for (String set : sets) { n = harvester.retrieveSet(set, writer); } } this.config.getASCState().displayMessage(n + " elements in " + DateFormatUtils.formatUTC(s.stop(), DateFormatUtils.ISO_TIME_NO_T_FORMAT.getPattern())); LOG.info(n + " elements in " + DateFormatUtils.ISO_TIME_NO_T_FORMAT.getPattern()); s.stop("Time used for harvesting"); } else { LOG.info("Skipping harvesting, because oai_source is empty for the current provider"); } return ingestEvent; } /** * Performs the transformation. * * @param provider The current {@link AscProvider}. * @param ingestEvents The ingest events that need to be processed, non-{@code null}. * @param parentAscContext The ASC context in which this method is executed, non-{@code null}. */ private void transform(AscProvider provider, Iterable<AscProviderIngest> ingestEvents, AscContext parentAscContext) { ASCTransform ascTransform = new ASCTransform(provider.getConfiguration(), this.makerFactory); String convertPath = (String) this.config.get(AscConfiguration.CONVERT_LOCATION); String scaleMode = (String) this.config.get(AscConfiguration.SCALE_IMAGES_MODE); File converter = new File(convertPath); if (!scaleMode.equals("none") && !converter.exists()) { LOG.info("The image scaler could not be found. Maybe the path is not set correct: " + convertPath); return; } for (AscProviderIngest ingestEvent : ingestEvents) { registerLoggers(provider, ingestEvent); ingestEvent.logStartTransformduration(); ascTransform.run(ingestEvent, parentAscContext); ingestEvent.logStopTransformduration(); ingestEvent.logTransformAnalysis(); ingestEvent.createFileWithTransformedIds(); // binaries statistics; the config of an ingestEvent will be send to the IM (see bookkeeping: zProvider file) ingestEvent.addBinaryStatisticToConfig(); ingestEvent.logAscConfig(); } } private void registerLoggers(AscProvider provider, AscProviderIngest ingestEvent) { final String bookkeepingDir = provider.checkedGetDirectory(AscDirectory.BOOKKEEPING) + File.separator + ingestEvent.getIngestEventId(); setAscRootLogger(bookkeepingDir + File.separator + "asc.log"); String ascTransformAnalysisLogFile = bookkeepingDir + File.separator + "analysis.log"; setAscLogger(ascTransformAnalysisLogFile, "ASC_TRANSFORM_ANALYSIS", AscTransformStatistic.class); String timeparserLogFile = bookkeepingDir + File.separator + "timeparser.log"; setAscLogger(timeparserLogFile, "TIMEPARSER", AscTimeparserAnalysis.class); String dublicateLogFileName = bookkeepingDir + File.separator + "dublicate.log"; setAscLogger(dublicateLogFileName, "DUBLICATE", AscDublicateAnalysis.class); String concordanceLogFileName = bookkeepingDir + File.separator + "concordance.log"; setAscLogger(concordanceLogFileName, "CONCORDANCE", AscConcordanceAnalysis.class); String schematronLogFileName = bookkeepingDir + File.separator + "schematronErrors.log"; setAscLogger(schematronLogFileName, "SCHEMATRON", AscSchematronAnalysis.class); String errorLogFileName = bookkeepingDir + File.separator + "transformationErrors.log"; setAscLogger(errorLogFileName, "TRANSFORMATION_ERROR", AscTransformErrorAnalysis.class); } /** * Performs the submission. * * @param ingestEvents The ingest events that need to be processed, non-{@code null}. */ private void submit(Iterable<AscProviderIngest> ingestEvents) { StopWatch s = StopWatch.start(); ASCState ascState = this.config.getASCState(); Submitter submitter = new Submitter(this.config); for (AscProviderIngest ingestEvent : ingestEvents) { LOG.info("Cleaning up errors from previous submissions ..."); ascState.displayMessage("Cleaning up errors from previous submissions ..."); ingestEvent.removeOldErrors(ErrorSip.SECTION_SEND); LOG.info("Submitting ..."); ascState.displayMessage("Submitting ..."); ingestEvent.addBinaryStatisticToConfig(); try { submitter.process(ingestEvent); } catch (Exception e) { String message = LogMessageBuilder.getMessage("FATAL: Submission of SIPs failed", this.config.get(AscConfiguration.PROVIDER_ID, null), ingestEvent.getIngestEventId()); LOG.error(message, e); this.config.getASCState().reportProblem(message); } } s.stop("Time used for submitting"); } public String getIngestId() { if (this.config != null) { return this.config.get(AscConfiguration.INGEST_EVENT, ""); } return ""; } public AscConfiguration getCurrentConfiguration() { return this.config; } @SuppressWarnings("rawtypes") private void setAscRootLogger(String fileName) { LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory(); ch.qos.logback.classic.Logger rootLogger = loggerContext.getLogger("root"); Appender<ILoggingEvent> rootAppender = rootLogger.getAppender("ASC_LOGGER"); FileAppender fileAppender = null; if (rootAppender instanceof FileAppender) { fileAppender = (FileAppender) rootAppender; } if (fileAppender == null) { rootLogger.error( "FileAppender \"ASC_LOGGER\" not found in logback.xml - no provider specific logging possible"); } else { fileAppender.setFile(fileName); fileAppender.start(); } } @SuppressWarnings("rawtypes") private void setAscLogger(String fileName, String appenderName, Class loggerClass) { LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory(); ch.qos.logback.classic.Logger logger = loggerContext.getLogger(loggerClass); Appender<ILoggingEvent> rootAppender = logger.getAppender(appenderName); FileAppender fileAppender = null; if (rootAppender instanceof FileAppender) { fileAppender = (FileAppender) rootAppender; } if (fileAppender == null) { logger.error("FileAppender \"" + appenderName + "\" not found in logback.xml - no provider specific logging statistic possible"); } else { fileAppender.setFile(fileName); fileAppender.start(); } } }