annis.gui.exporter.GeneralTextExporter.java Source code

Java tutorial

Introduction

Here is the source code for annis.gui.exporter.GeneralTextExporter.java

Source

/*
 * Copyright 2009-2011 Collaborative Research Centre SFB 632 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package annis.gui.exporter;

import annis.exceptions.AnnisCorpusAccessException;
import annis.exceptions.AnnisQLSemanticsException;
import annis.exceptions.AnnisQLSyntaxException;
import annis.libgui.Helper;
import annis.model.AnnisNode;
import annis.model.Annotation;
import annis.service.ifaces.AnnisResult;
import annis.service.ifaces.AnnisResultSet;
import annis.service.objects.AnnisAttribute;
import annis.service.objects.Match;
import annis.service.objects.MatchGroup;
import annis.service.objects.SubgraphFilter;
import annis.utils.LegacyGraphConverter;
import com.google.common.base.Splitter;
import com.google.common.base.Stopwatch;
import com.google.common.escape.Escaper;
import com.google.common.eventbus.EventBus;
import com.google.common.net.UrlEscapers;
import com.sun.jersey.api.client.GenericType;
import com.sun.jersey.api.client.WebResource;
import de.hu_berlin.german.korpling.saltnpepper.salt.saltCommon.SaltProject;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.io.Writer;
import java.rmi.RemoteException;
import java.util.*;
import java.util.concurrent.TimeUnit;
import javax.ws.rs.core.MediaType;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.LoggerFactory;

public abstract class GeneralTextExporter implements Exporter, Serializable {

    private static final org.slf4j.Logger log = LoggerFactory.getLogger(GeneralTextExporter.class);

    private final static Escaper urlPathEscape = UrlEscapers.urlPathSegmentEscaper();

    @Override
    public boolean convertText(String queryAnnisQL, int contextLeft, int contextRight, Set<String> corpora,
            List<String> keys, String argsAsString, WebResource annisResource, Writer out, EventBus eventBus) {
        try {
            // int count = service.getCount(corpusIdList, queryAnnisQL);

            if (keys == null || keys.isEmpty()) {
                // auto set
                keys = new LinkedList<>();
                keys.add("tok");
                List<AnnisAttribute> attributes = new LinkedList<>();

                for (String corpus : corpora) {
                    attributes.addAll(annisResource.path("corpora").path(urlPathEscape.escape(corpus))
                            .path("annotations").queryParam("fetchvalues", "false")
                            .queryParam("onlymostfrequentvalues", "false").get(new AnnisAttributeListType()));
                }

                for (AnnisAttribute a : attributes) {
                    if (a.getName() != null) {
                        String[] namespaceAndName = a.getName().split(":", 2);
                        if (namespaceAndName.length > 1) {
                            keys.add(namespaceAndName[1]);
                        } else {
                            keys.add(namespaceAndName[0]);
                        }
                    }
                }
            }

            Map<String, String> args = new HashMap<>();
            for (String s : argsAsString.split("&|;")) {
                String[] splitted = s.split("=", 2);
                String key = splitted[0];
                String val = "";
                if (splitted.length > 1) {
                    val = splitted[1];
                }
                args.put(key, val);
            }

            int stepSize = 10;

            // 1. Get all the matches as Salt ID
            InputStream matchStream = annisResource.path("search/find/")
                    .queryParam("q", Helper.encodeJersey(queryAnnisQL))
                    .queryParam("corpora", StringUtils.join(corpora, ",")).accept(MediaType.TEXT_PLAIN_TYPE)
                    .get(InputStream.class);

            try (BufferedReader inReader = new BufferedReader(new InputStreamReader(matchStream, "UTF-8"))) {
                WebResource subgraphRes = annisResource.path("search/subgraph");
                MatchGroup currentMatches = new MatchGroup();
                String currentLine;
                int offset = 0;
                // 2. iterate over all matches and get the sub-graph for a group of matches
                while (!Thread.currentThread().isInterrupted() && (currentLine = inReader.readLine()) != null) {
                    Match match = Match.parseFromString(currentLine);

                    currentMatches.getMatches().add(match);

                    if (currentMatches.getMatches().size() >= stepSize) {
                        WebResource res = subgraphRes.queryParam("left", "" + contextLeft).queryParam("right",
                                "" + contextRight);

                        if (args.containsKey("segmentation")) {
                            res = res.queryParam("segmentation", args.get("segmentation"));
                        }

                        SubgraphFilter filter = getSubgraphFilter();
                        if (filter != null) {
                            res = res.queryParam("filter", filter.name());
                        }

                        Stopwatch stopwatch = new Stopwatch();
                        stopwatch.start();
                        SaltProject p = res.post(SaltProject.class, currentMatches);
                        stopwatch.stop();

                        // dynamically adjust the number of items to fetch if single subgraph
                        // export was fast enough
                        if (stopwatch.elapsed(TimeUnit.MILLISECONDS) < 500 && stepSize < 50) {
                            stepSize += 10;
                        }

                        convertText(LegacyGraphConverter.convertToResultSet(p), keys, args, out,
                                offset - currentMatches.getMatches().size());

                        currentMatches.getMatches().clear();

                        if (eventBus != null) {
                            eventBus.post(offset + 1);
                        }
                    }
                    offset++;
                } // end for each line

                if (Thread.interrupted()) {
                    // return from loop and abort export
                    log.info("Exporter job was interrupted");
                    return false;
                }

                // query the left over matches
                if (!currentMatches.getMatches().isEmpty()) {
                    WebResource res = subgraphRes.queryParam("left", "" + contextLeft).queryParam("right",
                            "" + contextRight);
                    if (args.containsKey("segmentation")) {
                        res = res.queryParam("segmentation", args.get("segmentation"));
                    }

                    SubgraphFilter filter = getSubgraphFilter();
                    if (filter != null) {
                        res = res.queryParam("filter", filter.name());
                    }

                    SaltProject p = res.post(SaltProject.class, currentMatches);
                    convertText(LegacyGraphConverter.convertToResultSet(p), keys, args, out,
                            offset - currentMatches.getMatches().size() - 1);
                }
                offset = 0;

            }

            out.append("\n");
            out.append("\n");
            out.append("finished");

            return true;

        } catch (AnnisQLSemanticsException | AnnisQLSyntaxException | AnnisCorpusAccessException
                | RemoteException ex) {
            log.error(null, ex);
        } catch (IOException ex) {
            log.error(null, ex);
        }
        return false;
    }

    public void convertText(AnnisResultSet queryResult, List<String> keys, Map<String, String> args, Writer out,
            int offset) throws IOException {
        Map<String, Map<String, Annotation>> metadataCache = new HashMap<>();

        List<String> metaKeys = new LinkedList<>();
        if (args.containsKey("metakeys")) {
            Iterable<String> it = Splitter.on(",").trimResults().split(args.get("metakeys"));
            for (String s : it) {
                metaKeys.add(s);
            }
        }

        int counter = 0;

        for (AnnisResult annisResult : queryResult) {
            Set<Long> matchedNodeIds = annisResult.getGraph().getMatchedNodeIds();

            counter++;
            out.append((counter + offset) + ". ");
            List<AnnisNode> tok = annisResult.getGraph().getTokens();

            for (AnnisNode annisNode : tok) {
                Long tokID = annisNode.getId();
                if (matchedNodeIds.contains(tokID)) {
                    out.append("[");
                    out.append(annisNode.getSpannedText());
                    out.append("]");
                } else {
                    out.append(annisNode.getSpannedText());
                }

                //for (Annotation annotation : annisNode.getNodeAnnotations()){
                //      out.append("/"+annotation.getValue());
                //}

                out.append(" ");

            }
            out.append("\n");

            if (!metaKeys.isEmpty()) {
                String[] path = annisResult.getPath();
                appendMetaData(out, metaKeys, path[path.length - 1], annisResult.getDocumentName(), metadataCache);
            }
            out.append("\n");
        }

    }

    public void appendMetaData(Writer out, List<String> metaKeys, String toplevelCorpus, String documentName,
            Map<String, Map<String, Annotation>> metadataCache) throws IOException {
        Map<String, Annotation> metaData = new HashMap<>();
        if (metadataCache.containsKey(toplevelCorpus + ":" + documentName)) {
            metaData = metadataCache.get(toplevelCorpus + ":" + documentName);
        } else {
            List<Annotation> asList = Helper.getMetaData(toplevelCorpus, documentName);
            for (Annotation anno : asList) {
                metaData.put(anno.getQualifiedName(), anno);
                metaData.put(anno.getName(), anno);
            }
            metadataCache.put(toplevelCorpus + ":" + documentName, metaData);
        }

        for (String key : metaKeys) {
            Annotation anno = metaData.get(key);
            if (anno != null) {
                out.append("\tmeta::" + key + "\t" + anno.getValue()).append("\n");
            }
        }
    }

    @Override
    public boolean isCancelable() {
        return true;
    }

    public abstract SubgraphFilter getSubgraphFilter();

    private static class AnnisAttributeListType extends GenericType<List<AnnisAttribute>> {

        public AnnisAttributeListType() {
        }
    }
}