de.tudarmstadt.ukp.dkpro.argumentation.io.writer.json.JsonStreamDumpWriter.java Source code

Java tutorial

Introduction

Here is the source code for de.tudarmstadt.ukp.dkpro.argumentation.io.writer.json.JsonStreamDumpWriter.java

Source

/*
 * Copyright 2016
 * Ubiquitous Knowledge Processing (UKP) Lab
 * Technische Universitt Darmstadt
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package de.tudarmstadt.ukp.dkpro.argumentation.io.writer.json;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.Callable;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;

import de.tudarmstadt.ukp.dkpro.argumentation.io.annotations.AnnotatedDocument;
import de.tudarmstadt.ukp.dkpro.argumentation.io.annotations.SpanAnnotationGraph;
import de.tudarmstadt.ukp.dkpro.argumentation.io.annotations.SpanTextLabel;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;

/**
 * TODO: Implement usage of {@link JsonStreamDumpWriter#PARAM_OUTPUT_FILE} param in calling classes
 *
 * @author Todd Shore
 * @since Apr 29, 2016
 *
 */
public final class JsonStreamDumpWriter extends JCasAnnotator_ImplBase {

    private class DirectoryDumper implements Callable<Void> {

        @Override
        public Void call() throws IOException {
            for (final Entry<String, AnnotatedDocument<SpanTextLabel>> documentAnnotation : documentAnnotations
                    .entrySet()) {
                final String docId = documentAnnotation.getKey();
                final String docFilename = docId + ".json";
                final File docFile = new File(outputPath, docFilename);
                try (PrintWriter outputWriter = new PrintWriter(new FileWriter(docFile, false))) {
                    OBJECT_MAPPER.writeValue(outputWriter, documentAnnotation.getValue());
                }
            }
            return null;
        }

    }

    private class FileDumper implements Callable<Void> {
        @Override
        public Void call() throws IOException {
            try (PrintWriter outputWriter = createOutputWriter(outputPath)) {
                OBJECT_MAPPER.writeValue(outputWriter, documentAnnotations);
            }
            return null;
        }

        private PrintWriter createOutputWriter(final File outputPath) throws IOException {
            // default to System.out
            return outputPath == null ? new PrintWriter(System.out)
                    : new PrintWriter(new FileWriter(outputPath, false));
        }

    }

    /**
     * The parameter referring to the path to write the processed results to.
     */
    public static final String PARAM_OUTPUT_FILE = "outputPath";

    private static final Log LOG = LogFactory.getLog(JsonStreamDumpWriter.class);

    private static final ObjectMapper OBJECT_MAPPER = createObjectMapper();

    /**
     * TODO: Make this configurable via parameters passed to annotators of this class
     */
    private static ObjectMapper createObjectMapper() {
        final ObjectMapper result = new ObjectMapper();
        result.enable(SerializationFeature.INDENT_OUTPUT);
        return result;
    }

    private Map<String, AnnotatedDocument<SpanTextLabel>> documentAnnotations;

    @ConfigurationParameter(name = PARAM_OUTPUT_FILE, mandatory = false, description = "The path to write the processed results to.")
    private File outputPath;

    private Callable<Void> pathWriter;

    @Override
    public void collectionProcessComplete() throws AnalysisEngineProcessException {
        try {
            pathWriter.call();
        } catch (final Exception e) {
            throw new AnalysisEngineProcessException(e);
        }
        documentAnnotations = null;
    }

    @Override
    public void initialize(final UimaContext context) throws ResourceInitializationException {
        super.initialize(context);
        if (outputPath != null && outputPath.isDirectory()) {
            pathWriter = new DirectoryDumper();
        } else {
            pathWriter = new FileDumper();
        }

        // TODO: Make initial capacity configurable
        documentAnnotations = new HashMap<>();
    }

    @Override
    public void process(final JCas jCas) throws AnalysisEngineProcessException {
        final String documentId = DocumentMetaData.get(jCas).getDocumentId();
        LOG.info(String.format("Processing document \"%s\".", documentId));
        final JCasTextSpanAnnotationGraphFactory converter = new JCasTextSpanAnnotationGraphFactory();
        final SpanAnnotationGraph<SpanTextLabel> spanAnnotations = converter.apply(jCas);
        final AnnotatedDocument<SpanTextLabel> doc = new AnnotatedDocument<>(jCas.getDocumentText(),
                spanAnnotations);

        documentAnnotations.put(documentId, doc);

    }
}