de.fhg.iais.asc.workflow.ASCWorkflow.java Source code

Java tutorial

Introduction

Here is the source code for de.fhg.iais.asc.workflow.ASCWorkflow.java

Source

package de.fhg.iais.asc.workflow;

/******************************************************************************
 * Copyright 2011 (c) Fraunhofer IAIS Netmedia  http://www.iais.fraunhofer.de *
 * ************************************************************************** *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may    *
 * not use this file except in compliance with the License.                   *
 * You may obtain a copy of the License at                                    *
 * http://www.apache.org/licenses/LICENSE-2.0                                 *
 * Unless required by applicable law or agreed to in writing,                 *
 * software distributed under the License is distributed on an "AS IS" BASIS, *
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   *
 * See the License for the specific language governing permissions and        *
 * limitations under the License.                                             *
 ******************************************************************************/

import java.io.File;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.TimeZone;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.time.DateFormatUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import ch.qos.logback.classic.LoggerContext;
import ch.qos.logback.classic.spi.ILoggingEvent;
import ch.qos.logback.core.Appender;
import ch.qos.logback.core.FileAppender;
import de.fhg.iais.asc.commons.AscConfigEval;
import de.fhg.iais.asc.commons.AscConfiguration;
import de.fhg.iais.asc.commons.AscContext;
import de.fhg.iais.asc.commons.LogMessageBuilder;
import de.fhg.iais.asc.commons.logging.AscConcordanceAnalysis;
import de.fhg.iais.asc.commons.logging.AscDublicateAnalysis;
import de.fhg.iais.asc.commons.logging.AscSchematronAnalysis;
import de.fhg.iais.asc.commons.logging.AscTimeparserAnalysis;
import de.fhg.iais.asc.commons.logging.AscTransformErrorAnalysis;
import de.fhg.iais.asc.commons.logging.AscTransformStatistic;
import de.fhg.iais.asc.commons.state.ASCProgress;
import de.fhg.iais.asc.commons.state.ASCState;
import de.fhg.iais.asc.contexts.AscProvider;
import de.fhg.iais.asc.contexts.AscProviderIngest;
import de.fhg.iais.asc.directories.AscDirectory;
import de.fhg.iais.asc.logging.ILogVocabulary;
import de.fhg.iais.asc.oai.localwriter.RepositoryWriter;
import de.fhg.iais.asc.oai.strategy.AbstractHarvester;
import de.fhg.iais.asc.oai.strategy.HarvesterFactory;
import de.fhg.iais.asc.sipmaker.SipMakerFactory;
import de.fhg.iais.asc.submitter.Submitter;
import de.fhg.iais.asc.util.ProviderInfoRequester;
import de.fhg.iais.commons.configuration.IResourceLocator;
import de.fhg.iais.commons.time.StopWatch;
import de.fhg.iais.cortex.report.common.sip.ErrorSip;

/**
 * This class manages the workflow of the Augmented SIP Creator (ASC)
 *
 * @author hjh, cto, kst
 */
public class ASCWorkflow {
    private static final Logger LOG = LoggerFactory.getLogger(ASCWorkflow.class);

    private AscConfiguration config = null;
    private final SipMakerFactory makerFactory;

    /**
     * Constructor initializes the ASC
     *
     * @param homeDir
     */
    public ASCWorkflow(IResourceLocator transformationsDir, IResourceLocator homeDir) {
        this.makerFactory = new SipMakerFactory(transformationsDir);
    }

    /**
     * Start of the ASC workflow
     *
     * @param configs A list of configurations. Each of them is is processed as a single ASC-run. More than
     *        one configuration is used within the batch process.
     */
    public void run(List<AscConfiguration> configs) {
        for (AscConfiguration config : configs) {
            try {
                this.config = config;
                run();
                this.config = null;

                ASCState ascState = config.getASCState();
                if (ascState.isCancelRequested()) {
                    ascState.finishCancel();
                    return;
                }
            } catch (Exception e) {
                // Log the exception and report it to the UI.
                LOG.error("Single ASC run failed", e);
                ASCState ascState = config.getASCState();
                ascState.reportProblem(e.getLocalizedMessage());
            } finally {
                this.config = null;
            }

        }
    }

    private boolean run() {
        // TODO: Create new AscContext with information from this.config somewhere in the beginning of this method.
        this.config.resetASCStateObject();

        boolean doHarvest = AscConfigEval.isHarvestEnabled(this.config);
        boolean doTransform = AscConfigEval.isTransformEnabled(this.config);
        boolean doSubmit = AscConfigEval.isSubmitEnabled(this.config);
        boolean testTransform = AscConfigEval.isTestTransformEnabled(this.config);

        StopWatch watch = StopWatch.start();

        // Set the ASCState Object
        ASCState ascState = this.config.getASCState();
        ascState.setRunning();
        ASCProgress myProgress = ascState.getProgressInstance();

        myProgress.setMaxFilesEach(this.config.get(AscConfiguration.MAX_FILES_EACH, 0));

        myProgress.setHarvesting(doHarvest);
        myProgress.setTransforming(doTransform);
        myProgress.setSending(doSubmit);

        // Manages the ASC's directories.
        AscProvider provider = new AscProvider(this.config);

        // Contains the provider ingests to process.
        Iterable<AscProviderIngest> toProcess = null;

        String providerFile = provider.checkedGetDirectory(AscDirectory.BOOKKEEPING) + File.separator
                + "global.log";
        setAscRootLogger(providerFile);

        LOG.info("Source: " + this.config.get(AscConfiguration.SOURCE_FOLDER, "?") + ", Format: "
                + this.config.get(AscConfiguration.FORMAT, "?") + " event: "
                + this.config.get(AscConfiguration.INGEST_EVENT, "?"));

        // Interrupt if needed.
        if (interruptIfNeeded(ascState)) {
            return false;
        }

        // Harvesting.
        if (doHarvest) {
            // Fill in the provider infos.
            ProviderInfoRequester.addTo(this.config, true);

            final AscProviderIngest ingestEvent = this.harvest(provider);
            toProcess = Collections.singleton(ingestEvent);
        }

        if (interruptIfNeeded(ascState)) {
            return false;
        }

        // Transformation.
        if (doTransform || testTransform) {
            AscContext ascContext = new AscContext(ILogVocabulary.CONTEXT_TRANSFORMATION + "()");

            if (toProcess == null) {
                // If harvesting did not occur, the provider infos
                // have not been completed yet -- do it now.
                ProviderInfoRequester.addTo(this.config, true);
                // If harvesting did not occur, the ingest events to process have not been set yet.
                // If so, then expand the configured ingest event id filter based on the
                // ingest event ids in the inbox.
                toProcess = expandIngests(provider, AscDirectory.INBOX);
            }

            this.transform(provider, toProcess, ascContext);
        }

        if (interruptIfNeeded(ascState)) {
            return false;
        }

        // Submission.
        if (doSubmit) {
            if (toProcess == null) {
                // If harvesting and transformation did not occur, the ingest events
                // have not been set yet. If so, then expand the configured ingest event
                // id filter based on the ingest event ids in the outbox.
                toProcess = expandIngests(provider, AscDirectory.OUTBOX);

                // If harvesting and transformation did not occur, the provider infos
                // have not been completed yet -- do it now if the provider ID is not set.
                if (this.config.isEmpty(AscConfiguration.PROVIDER_ID)) {
                    ProviderInfoRequester.addTo(this.config, true);
                }
            }

            this.submit(toProcess);
        }

        LOG.info("Processing time: "
                + watch.stopTimeFormat("DDD'D:'HH'h:'mm'm:'ss's'", TimeZone.getTimeZone("GMT0")));
        ascState.displayMessage("Processing time: "
                + watch.stopTimeFormat("DDD'D:'HH'h:'mm'm:'ss's'", TimeZone.getTimeZone("GMT0")));

        return !ascState.isCancelRequested();
    }

    /**
     * Gets a list of ingest events ids in a certain directory based on the ingest event id filter configured
     * in {@link AscConfiguration}.
     *
     * @see AscProvider#expandIngests(String, AscDirectory)
     * @param ascDirectories The directory that the ASC uses, non-{@code null}.
     * @param ascRootDirectory The root directory in which to expand the ingest event id filter, non-{@code null}.
     * @return A series of concrete ingest event ids.
     */
    private Iterable<AscProviderIngest> expandIngests(AscProvider provider, AscDirectory dirSelector) {
        final AscConfiguration config = provider.getConfiguration();
        final String ingestFilter = config.get(AscConfiguration.INGEST_EVENT, "");

        final Collection<AscProviderIngest> ingests = provider.expandIngests(ingestFilter, dirSelector);
        if (ingests.isEmpty()) {
            LOG.warn(LogMessageBuilder.getMessage(
                    "Did not find any matching ingest event directories in " + provider.getDirectory(dirSelector),
                    config));
        }

        return ingests;
    }

    private boolean interruptIfNeeded(ASCState ascState) {
        if (ascState.isCancelRequested()) {
            return true;
        }
        return false;
    }

    /**
     * Performs the harvesting.
     *
     * @param provider The current {@link AscProvider}.
     * @return An {@link AscProviderIngest} for the new ingest event, non-{@code null}.
     */
    private AscProviderIngest harvest(AscProvider provider) {
        // Create an ingest event.
        String eventId = this.config.get(AscConfiguration.INGEST_EVENT, "");
        if (StringUtils.isEmpty(eventId) || "*".equals(eventId)) {
            eventId = String.valueOf(System.currentTimeMillis() / 1000);
        }
        final AscProviderIngest ingestEvent = provider.createIngest(eventId);

        LOG.info("Cleaning up errors from previous runs ...");
        this.config.getASCState().displayMessage("Cleaning up errors from previous runs ...");
        ingestEvent.removeOldErrors(ErrorSip.SECTION_HARVEST);

        LOG.info("Harvesting data ...");
        LOG.info("Start Harvesting");

        this.config.getASCState().displayMessage("Harvesting data ...");

        // Read out proxy information if given.
        String proxyhost = this.config.get(AscConfiguration.OAIPMH_PROXYHOST, "");
        Integer proxyport = null;
        try {
            proxyport = Integer.parseInt(this.config.get(AscConfiguration.OAIPMH_PROXYPORT, "invalid"));
        } catch (NumberFormatException ex) {
            proxyhost = null;
            proxyport = null;
        }

        // Create a harvester.
        final String oaiSource = this.config.get(AscConfiguration.OAI_SOURCE, "");
        final String harvestingformat = this.config.get(AscConfiguration.META_DATA_FORMAT,
                this.config.get(AscConfiguration.FORMAT, ""));
        AbstractHarvester harvester = HarvesterFactory.getInstance().getHarvester(
                this.config.get(AscConfiguration.STRATEGY, ""), oaiSource, harvestingformat, proxyhost, proxyport,
                this.config.get(AscConfiguration.HARVESTING_FROM_DATE, ""),
                this.config.get(AscConfiguration.HARVESTING_UNTIL_DATE, ""), this.config, this.config.getASCState(),
                ingestEvent);

        if (!oaiSource.isEmpty()) {
            // If no OAI source is specified, it does not make sense to harvest (and the harvester will throw NullPointerExceptions).

            StopWatch s = StopWatch.start();
            int n = 0;

            // Create a FileRepositoryWriter.
            File targetDirectory = ingestEvent.getDirectory(AscDirectory.INBOX);
            RepositoryWriter writer = new RepositoryWriter(targetDirectory,
                    this.config.get(AscConfiguration.MAX_FILES_EACH, 0), oaiSource, eventId);

            // Harvest.
            final String oaiSets = this.config.get(AscConfiguration.OAI_SET, "");
            if (StringUtils.isEmpty(oaiSets)) {
                n = harvester.retrieveAll(writer);
            } else {
                String[] sets = oaiSets.split(",");

                for (int i = 0; i < sets.length; i++) {
                    sets[i] = sets[i].trim();
                }

                for (String set : sets) {
                    n = harvester.retrieveSet(set, writer);
                }
            }

            this.config.getASCState().displayMessage(n + " elements in "
                    + DateFormatUtils.formatUTC(s.stop(), DateFormatUtils.ISO_TIME_NO_T_FORMAT.getPattern()));
            LOG.info(n + " elements in " + DateFormatUtils.ISO_TIME_NO_T_FORMAT.getPattern());

            s.stop("Time used for harvesting");
        } else {
            LOG.info("Skipping harvesting, because oai_source is empty for the current provider");
        }

        return ingestEvent;
    }

    /**
     * Performs the transformation.
     *
     * @param provider The current {@link AscProvider}.
     * @param ingestEvents The ingest events that need to be processed, non-{@code null}.
     * @param parentAscContext The ASC context in which this method is executed, non-{@code null}.
     */
    private void transform(AscProvider provider, Iterable<AscProviderIngest> ingestEvents,
            AscContext parentAscContext) {
        ASCTransform ascTransform = new ASCTransform(provider.getConfiguration(), this.makerFactory);
        String convertPath = (String) this.config.get(AscConfiguration.CONVERT_LOCATION);
        String scaleMode = (String) this.config.get(AscConfiguration.SCALE_IMAGES_MODE);

        File converter = new File(convertPath);
        if (!scaleMode.equals("none") && !converter.exists()) {
            LOG.info("The image scaler could not be found. Maybe the path is not set correct: " + convertPath);
            return;
        }

        for (AscProviderIngest ingestEvent : ingestEvents) {
            registerLoggers(provider, ingestEvent);
            ingestEvent.logStartTransformduration();
            ascTransform.run(ingestEvent, parentAscContext);
            ingestEvent.logStopTransformduration();
            ingestEvent.logTransformAnalysis();
            ingestEvent.createFileWithTransformedIds();

            // binaries statistics; the config of an ingestEvent will be send to the IM (see bookkeeping: zProvider file)
            ingestEvent.addBinaryStatisticToConfig();
            ingestEvent.logAscConfig();
        }
    }

    private void registerLoggers(AscProvider provider, AscProviderIngest ingestEvent) {
        final String bookkeepingDir = provider.checkedGetDirectory(AscDirectory.BOOKKEEPING) + File.separator
                + ingestEvent.getIngestEventId();

        setAscRootLogger(bookkeepingDir + File.separator + "asc.log");

        String ascTransformAnalysisLogFile = bookkeepingDir + File.separator + "analysis.log";
        setAscLogger(ascTransformAnalysisLogFile, "ASC_TRANSFORM_ANALYSIS", AscTransformStatistic.class);

        String timeparserLogFile = bookkeepingDir + File.separator + "timeparser.log";
        setAscLogger(timeparserLogFile, "TIMEPARSER", AscTimeparserAnalysis.class);

        String dublicateLogFileName = bookkeepingDir + File.separator + "dublicate.log";
        setAscLogger(dublicateLogFileName, "DUBLICATE", AscDublicateAnalysis.class);

        String concordanceLogFileName = bookkeepingDir + File.separator + "concordance.log";
        setAscLogger(concordanceLogFileName, "CONCORDANCE", AscConcordanceAnalysis.class);

        String schematronLogFileName = bookkeepingDir + File.separator + "schematronErrors.log";
        setAscLogger(schematronLogFileName, "SCHEMATRON", AscSchematronAnalysis.class);

        String errorLogFileName = bookkeepingDir + File.separator + "transformationErrors.log";
        setAscLogger(errorLogFileName, "TRANSFORMATION_ERROR", AscTransformErrorAnalysis.class);

    }

    /**
     * Performs the submission.
     *
     * @param ingestEvents The ingest events that need to be processed, non-{@code null}.
     */
    private void submit(Iterable<AscProviderIngest> ingestEvents) {
        StopWatch s = StopWatch.start();
        ASCState ascState = this.config.getASCState();
        Submitter submitter = new Submitter(this.config);

        for (AscProviderIngest ingestEvent : ingestEvents) {
            LOG.info("Cleaning up errors from previous submissions ...");
            ascState.displayMessage("Cleaning up errors from previous submissions ...");
            ingestEvent.removeOldErrors(ErrorSip.SECTION_SEND);

            LOG.info("Submitting ...");
            ascState.displayMessage("Submitting ...");
            ingestEvent.addBinaryStatisticToConfig();

            try {
                submitter.process(ingestEvent);
            } catch (Exception e) {
                String message = LogMessageBuilder.getMessage("FATAL: Submission of SIPs failed",
                        this.config.get(AscConfiguration.PROVIDER_ID, null), ingestEvent.getIngestEventId());
                LOG.error(message, e);
                this.config.getASCState().reportProblem(message);

            }
        }
        s.stop("Time used for submitting");
    }

    public String getIngestId() {
        if (this.config != null) {
            return this.config.get(AscConfiguration.INGEST_EVENT, "");
        }
        return "";
    }

    public AscConfiguration getCurrentConfiguration() {
        return this.config;
    }

    @SuppressWarnings("rawtypes")
    private void setAscRootLogger(String fileName) {
        LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory();
        ch.qos.logback.classic.Logger rootLogger = loggerContext.getLogger("root");
        Appender<ILoggingEvent> rootAppender = rootLogger.getAppender("ASC_LOGGER");
        FileAppender fileAppender = null;
        if (rootAppender instanceof FileAppender) {
            fileAppender = (FileAppender) rootAppender;
        }
        if (fileAppender == null) {
            rootLogger.error(
                    "FileAppender \"ASC_LOGGER\" not found in logback.xml - no provider specific logging possible");
        } else {
            fileAppender.setFile(fileName);
            fileAppender.start();
        }
    }

    @SuppressWarnings("rawtypes")
    private void setAscLogger(String fileName, String appenderName, Class loggerClass) {
        LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory();
        ch.qos.logback.classic.Logger logger = loggerContext.getLogger(loggerClass);
        Appender<ILoggingEvent> rootAppender = logger.getAppender(appenderName);
        FileAppender fileAppender = null;
        if (rootAppender instanceof FileAppender) {
            fileAppender = (FileAppender) rootAppender;
        }
        if (fileAppender == null) {
            logger.error("FileAppender \"" + appenderName
                    + "\" not found in logback.xml - no provider specific logging statistic possible");
        } else {
            fileAppender.setFile(fileName);
            fileAppender.start();
        }
    }

}