eu.project.ttc.models.index.JsonTermIndexIO.java Source code

Introduction

Here is the source code for eu.project.ttc.models.index.JsonTermIndexIO.java
Source

/*******************************************************************************
 * Copyright 2015-2016 - CNRS (Centre National de Recherche Scientifique)
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
 *******************************************************************************/
package eu.project.ttc.models.index;

import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;

import eu.project.ttc.api.JsonOptions;
import eu.project.ttc.engines.desc.Lang;
import eu.project.ttc.models.Component;
import eu.project.ttc.models.CompoundType;
import eu.project.ttc.models.ContextVector;
import eu.project.ttc.models.Document;
import eu.project.ttc.models.OccurrenceStore;
import eu.project.ttc.models.Term;
import eu.project.ttc.models.TermBuilder;
import eu.project.ttc.models.TermIndex;
import eu.project.ttc.models.TermOccurrence;
import eu.project.ttc.models.TermVariation;
import eu.project.ttc.models.TermWord;
import eu.project.ttc.models.VariationType;
import eu.project.ttc.models.Word;
import eu.project.ttc.models.WordBuilder;
import eu.project.ttc.models.occstore.MemoryOccurrenceStore;
import eu.project.ttc.models.occstore.MongoDBOccurrenceStore;

public class JsonTermIndexIO {

    private static final Logger LOGGER = LoggerFactory.getLogger(JsonTermIndexIO.class);

    /*
     * Error messages for parsing
     */
    private static final String MSG_EXPECT_PROP_FOR_VAR = "Expecting %s property for term variation";
    private static final String MSG_EXPECT_PROP_FOR_OCCURRENCE = "Expecting %s property for occurrence";
    private static final String MSG_EXPECT_PROP_FOR_TERM_WORD = "Expecting %s property for term word";

    /*
     * Occurrence storing options
     */
    private static final String OCCURRENCE_STORAGE_EMBEDDED = "embedded";
    private static final String OCCURRENCE_STORAGE_MONGODB = "MongoDB";

    /*
     * Json properties
     */
    private static final String WORDS = "words";
    private static final String METADATA = "metadata";
    private static final String LANG = "lang";
    private static final String NAME = "name";
    private static final String CORPUS_ID = "corpus-id";
    private static final String LEMMA = "lemma";
    private static final String STEM = "stem";
    private static final String COMPOUND_TYPE = "compound_type";
    private static final String COMPONENTS = "components";
    private static final String BEGIN = "begin";
    private static final String END = "end";
    private static final String TERMS = "terms";
    private static final String ID = "id";
    private static final String GROUPING_KEY = "key";
    private static final String SYN = "syn";
    private static final String FREQUENCY = "freq";
    private static final String SPOTTING_RULE = "rule";
    private static final String TERM_VARIATIONS = "variations";
    private static final String VARIANT_TYPE = "type";
    private static final String INFO = "info";
    private static final String BASE = "base";
    private static final String VARIANT = "variant";
    private static final String VARIANT_SCORE = "vscore";
    private static final String RANK = "rank";
    private static final String SPECIFICITY = "spec";

    //   private static final String RULE = "rule";
    private static final String FILE = "file";
    private static final String OCCURRENCES = "occurrences";
    private static final String TEXT = "text";
    private static final String INPUT_SOURCES = "input_sources";
    private static final String CONTEXT = "context";
    private static final String CO_OCCURRENCES = "cooccs";
    private static final String NB_COCCS = "cnt";
    private static final String ASSOC_RATE = "assoc_rate";
    private static final String CO_TERM = "co_term";
    private static final String TOTAL_COOCCURRENCES = "total_cooccs";
    private static final String OCCURRENCE_STORAGE = "occurrence_storage";
    private static final String OCCURRENCE_MONGODB_STORE_URI = "occurrence_store_mongodb_uri";

    private static final String FREQ_NORM = "f_norm";
    private static final String GENERAL_FREQ_NORM = "gf_norm";
    private static final String NB_WORD_ANNOTATIONS = "wordsNum";
    private static final String NB_SPOTTED_TERMS = "spottedTermsNum";

    /**
     * Loads the json-serialized term index into the param {@link TermIndex} object.
     * 
     * @param reader
     * @param options
     *          The deserialization {@link IOOptions}.
     * @return
     * @throws JsonParseException
     * @throws IOException
     */
    public static TermIndex load(Reader reader, JsonOptions options) throws JsonParseException, IOException {
        TermIndex termIndex = null;
        JsonFactory jsonFactory = new JsonFactory();
        JsonParser jp = jsonFactory.createParser(reader); // or Stream, Reader
        jp.enable(JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES);
        jp.enable(JsonParser.Feature.STRICT_DUPLICATE_DETECTION);
        String fieldname;
        String compLemma = null;
        int fileSource = -1;
        String wordLemma = null;
        String syntacticLabel = null;
        int begin = -1;
        int end = -1;
        int nbWordAnnos = -1;
        int nbSpottedTerms = -1;
        Term b;
        Term v;
        String text;
        String base;
        String variant;
        //      String rule;
        String infoToken;
        String variantType;
        double variantScore;

        Map<Integer, String> inputSources = Maps.newTreeMap();

        Map<Integer, List<TempVecEntry>> contextVectors = Maps.newHashMap();

        OccurrenceStore occurrenceStore = null;

        // useful var for debug
        JsonToken tok;

        while ((tok = jp.nextToken()) != JsonToken.END_OBJECT) {

            fieldname = jp.getCurrentName();
            if (METADATA.equals(fieldname)) {
                jp.nextToken();
                String termIndexName = null;
                Lang lang = null;
                String corpusID = null;
                String occurrenceStorage = null;
                String occurrenceStoreURI = null;

                while ((tok = jp.nextToken()) != JsonToken.END_OBJECT) {
                    fieldname = jp.getCurrentName();
                    if (LANG.equals(fieldname)) {
                        lang = Lang.forName(jp.nextTextValue());
                    } else if (NAME.equals(fieldname)) {
                        termIndexName = jp.nextTextValue();
                    } else if (NB_WORD_ANNOTATIONS.equals(fieldname)) {
                        nbWordAnnos = jp.nextIntValue(-1);
                    } else if (NB_SPOTTED_TERMS.equals(fieldname)) {
                        nbSpottedTerms = jp.nextIntValue(-1);
                    } else if (CORPUS_ID.equals(fieldname)) {
                        corpusID = jp.nextTextValue();
                    } else if (OCCURRENCE_STORAGE.equals(fieldname)) {
                        occurrenceStorage = jp.nextTextValue();
                    } else if (OCCURRENCE_MONGODB_STORE_URI.equals(fieldname)) {
                        occurrenceStoreURI = jp.nextTextValue();
                    }
                }
                Preconditions.checkState(lang != null, "The property meta.lang must be defined");
                Preconditions.checkState(termIndexName != null, "The property meta.name must be defined");

                if (occurrenceStorage != null && occurrenceStorage.equals(OCCURRENCE_STORAGE_MONGODB)) {
                    Preconditions.checkNotNull(occurrenceStoreURI,
                            "Missing attribute " + OCCURRENCE_MONGODB_STORE_URI);
                    occurrenceStore = new MongoDBOccurrenceStore(occurrenceStoreURI, OccurrenceStore.State.INDEXED);
                } else
                    occurrenceStore = new MemoryOccurrenceStore();

                termIndex = new MemoryTermIndex(termIndexName, lang, occurrenceStore);
                if (corpusID != null)
                    termIndex.setCorpusId(corpusID);
                if (nbWordAnnos != -1)
                    termIndex.setWordAnnotationsNum(nbWordAnnos);
                if (nbSpottedTerms != -1)
                    termIndex.setSpottedTermsNum(nbSpottedTerms);

                if (options.isMetadataOnly())
                    return termIndex;

            } else if (WORDS.equals(fieldname)) {
                jp.nextToken();
                while ((tok = jp.nextToken()) != JsonToken.END_ARRAY) {
                    WordBuilder wordBuilder = WordBuilder.start();
                    while ((tok = jp.nextToken()) != JsonToken.END_OBJECT) {
                        fieldname = jp.getCurrentName();
                        if (LEMMA.equals(fieldname))
                            wordBuilder.setLemma(jp.nextTextValue());
                        else if (COMPOUND_TYPE.equals(fieldname))
                            wordBuilder.setCompoundType(CompoundType.fromName(jp.nextTextValue()));
                        else if (STEM.equals(fieldname))
                            wordBuilder.setStem(jp.nextTextValue());
                        else if (COMPONENTS.equals(fieldname)) {
                            while ((tok = jp.nextToken()) != JsonToken.END_ARRAY) {
                                while ((tok = jp.nextToken()) != JsonToken.END_OBJECT) {
                                    fieldname = jp.getCurrentName();
                                    if (LEMMA.equals(fieldname))
                                        compLemma = jp.nextTextValue();
                                    else if (BEGIN.equals(fieldname))
                                        begin = jp.nextIntValue(-2);
                                    else if (END.equals(fieldname))
                                        end = jp.nextIntValue(-2);
                                }
                                wordBuilder.addComponent(begin, end, compLemma);
                            }
                        }
                    }
                    try {
                        termIndex.addWord(wordBuilder.create());
                    } catch (Exception e) {
                        LOGGER.error("Could not add word " + wordBuilder.getLemma() + " to term index", e);
                        LOGGER.warn("Error ignored, trying ton continue the loading of TermIndex");
                    }
                }
            } else if (TERMS.equals(fieldname)) {
                jp.nextToken();
                while ((tok = jp.nextToken()) != JsonToken.END_ARRAY) {
                    TermBuilder builder = TermBuilder.start(termIndex);
                    List<TempVecEntry> currentContextVector = Lists.newArrayList();
                    int currentTermId = -1;
                    while ((tok = jp.nextToken()) != JsonToken.END_OBJECT) {
                        fieldname = jp.getCurrentName();
                        if (GROUPING_KEY.equals(fieldname))
                            builder.setGroupingKey(jp.nextTextValue());
                        else if (SPOTTING_RULE.equals(fieldname))
                            builder.setSpottingRule(jp.nextTextValue());
                        else if (ID.equals(fieldname)) {
                            currentTermId = jp.nextIntValue(-2);
                            builder.setId(currentTermId);
                        } else if (RANK.equals(fieldname)) {
                            builder.setRank(jp.nextIntValue(-1));
                        } else if (FREQUENCY.equals(fieldname)) {
                            builder.setFrequency(jp.nextIntValue(-1));
                        } else {
                            if (FREQ_NORM.equals(fieldname)) {
                                jp.nextToken();
                                builder.setFrequencyNorm((double) jp.getFloatValue());
                            } else if (SPECIFICITY.equals(fieldname)) {
                                jp.nextToken();
                                builder.setSpecificity((double) jp.getDoubleValue());
                            } else if (GENERAL_FREQ_NORM.equals(fieldname)) {
                                jp.nextToken();
                                builder.setGeneralFrequencyNorm((double) jp.getFloatValue());
                            } else if (WORDS.equals(fieldname)) {
                                while ((tok = jp.nextToken()) != JsonToken.END_ARRAY) {
                                    wordLemma = null;
                                    syntacticLabel = null;
                                    while ((tok = jp.nextToken()) != JsonToken.END_OBJECT) {
                                        fieldname = jp.getCurrentName();
                                        if (LEMMA.equals(fieldname))
                                            wordLemma = jp.nextTextValue();
                                        else if (SYN.equals(fieldname))
                                            syntacticLabel = jp.nextTextValue();
                                    }
                                    Preconditions.checkArgument(wordLemma != null, MSG_EXPECT_PROP_FOR_TERM_WORD,
                                            LEMMA);
                                    Preconditions.checkArgument(syntacticLabel != null,
                                            MSG_EXPECT_PROP_FOR_TERM_WORD, SYN);
                                    builder.addWord(termIndex.getWord(wordLemma), syntacticLabel);
                                } // end words

                            } else if (OCCURRENCES.equals(fieldname)) {
                                tok = jp.nextToken();
                                if (tok == JsonToken.START_ARRAY) {

                                    while ((tok = jp.nextToken()) != JsonToken.END_ARRAY) {
                                        begin = -1;
                                        end = -1;
                                        fileSource = -1;
                                        text = null;
                                        while ((tok = jp.nextToken()) != JsonToken.END_OBJECT) {
                                            fieldname = jp.getCurrentName();
                                            if (BEGIN.equals(fieldname))
                                                begin = jp.nextIntValue(-1);
                                            else if (TEXT.equals(fieldname))
                                                text = jp.nextTextValue();
                                            else if (END.equals(fieldname))
                                                end = jp.nextIntValue(-1);
                                            else if (FILE.equals(fieldname)) {
                                                fileSource = jp.nextIntValue(-1);
                                            }
                                        }

                                        Preconditions.checkArgument(begin != -1, MSG_EXPECT_PROP_FOR_OCCURRENCE,
                                                BEGIN);
                                        Preconditions.checkArgument(end != -1, MSG_EXPECT_PROP_FOR_OCCURRENCE, END);
                                        Preconditions.checkArgument(fileSource != -1,
                                                MSG_EXPECT_PROP_FOR_OCCURRENCE, FILE);
                                        Preconditions.checkNotNull(inputSources.get(fileSource),
                                                "No file source with id: %s", fileSource);
                                        Preconditions.checkNotNull(text, MSG_EXPECT_PROP_FOR_OCCURRENCE, TEXT);
                                        if (occurrenceStore.getStoreType() == OccurrenceStore.Type.MEMORY)
                                            builder.addOccurrence(begin, end,
                                                    termIndex.getDocument(inputSources.get(fileSource)), text);
                                    }
                                }
                                // end occurrences
                            } else if (CONTEXT.equals(fieldname)) {
                                @SuppressWarnings("unused")
                                int totalCooccs = 0;
                                while ((tok = jp.nextToken()) != JsonToken.END_OBJECT) {
                                    fieldname = jp.getCurrentName();
                                    if (TOTAL_COOCCURRENCES.equals(fieldname))
                                        /*
                                         * value never used since the total will 
                                         * be reincremented in the contextVector
                                         */
                                        totalCooccs = jp.nextIntValue(-1);
                                    else if (CO_OCCURRENCES.equals(fieldname)) {
                                        jp.nextToken();
                                        while ((tok = jp.nextToken()) != JsonToken.END_ARRAY) {
                                            TempVecEntry entry = new TempVecEntry();
                                            while ((tok = jp.nextToken()) != JsonToken.END_OBJECT) {
                                                fieldname = jp.getCurrentName();
                                                if (NB_COCCS.equals(fieldname))
                                                    entry.setNbCooccs(jp.nextIntValue(-1));
                                                else if (ASSOC_RATE.equals(fieldname)) {
                                                    jp.nextToken();
                                                    entry.setAssocRate(jp.getFloatValue());
                                                } else if (CO_TERM.equals(fieldname))
                                                    entry.setTermGroupingKey(jp.nextTextValue());
                                                else if (FILE.equals(fieldname)) {
                                                    fileSource = jp.nextIntValue(-1);
                                                }
                                            }
                                            currentContextVector.add(entry);
                                        }
                                    }
                                }
                            }
                        } //end if fieldname

                    } // end term object
                    try {
                        builder.createAndAddToIndex();
                    } catch (Exception e) {
                        LOGGER.error("Could not add term " + builder.getGroupingKey() + " to term index", e);
                        LOGGER.warn("Error ignored, trying ton continue the loading of TermIndex");
                    }

                    if (options.isWithContexts())
                        contextVectors.put(currentTermId, currentContextVector);

                } // end array of terms

            } else if (INPUT_SOURCES.equals(fieldname)) {
                jp.nextToken();
                while ((tok = jp.nextToken()) != JsonToken.END_OBJECT) {
                    String id = jp.getCurrentName();
                    try {
                        inputSources.put(Integer.parseInt(id), jp.nextTextValue());
                    } catch (NumberFormatException e) {
                        IOUtils.closeQuietly(jp);
                        throw new IllegalArgumentException("Bad format for input source key: " + id);
                    }
                }
            } else if (TERM_VARIATIONS.equals(fieldname)) {
                jp.nextToken();
                while ((tok = jp.nextToken()) != JsonToken.END_ARRAY) {
                    base = null;
                    variant = null;
                    infoToken = null;
                    variantType = null;
                    variantScore = 0;
                    while ((tok = jp.nextToken()) != JsonToken.END_OBJECT) {
                        fieldname = jp.getCurrentName();
                        if (BASE.equals(fieldname))
                            base = jp.nextTextValue();
                        else if (VARIANT.equals(fieldname))
                            variant = jp.nextTextValue();
                        else if (VARIANT_TYPE.equals(fieldname))
                            variantType = jp.nextTextValue();
                        else if (VARIANT_SCORE.equals(fieldname)) {
                            jp.nextToken();
                            variantScore = jp.getDoubleValue();
                        } else if (INFO.equals(fieldname))
                            infoToken = jp.nextTextValue();
                    } // end syntactic variant object
                    Preconditions.checkNotNull(base, MSG_EXPECT_PROP_FOR_VAR, BASE);
                    Preconditions.checkNotNull(variant, MSG_EXPECT_PROP_FOR_VAR, VARIANT);
                    Preconditions.checkNotNull(infoToken, MSG_EXPECT_PROP_FOR_VAR, INFO);
                    b = termIndex.getTermByGroupingKey(base);
                    v = termIndex.getTermByGroupingKey(variant);
                    if (b != null && v != null) {

                        VariationType vType = VariationType.fromShortName(variantType);

                        TermVariation tv = new TermVariation(vType, b, v,
                                vType == VariationType.GRAPHICAL ? Double.parseDouble(infoToken) : infoToken);
                        tv.setScore(variantScore);
                        b.addTermVariation(tv);
                    } else {
                        if (b == null)
                            LOGGER.warn("Could not build variant because term \"{}\" was not found.", base);
                        if (v == null)
                            LOGGER.warn("Could not build variant because term \"{}\" was not found.", variant);
                    }

                    //               Preconditions.checkNotNull(b, MSG_TERM_DOES_NOT_EXIST, base);
                    //               Preconditions.checkNotNull(v, MSG_TERM_DOES_NOT_EXIST, variant);

                } // end syntactic variations array
            }
        }
        jp.close();

        if (options.isWithContexts()) {
            /*
             *  map term ids with terms in context vectors and
             *  set context vectors
             */
            List<TempVecEntry> currentTempVecList;
            Term term = null;
            Term coTerm = null;
            ContextVector contextVector;
            for (int termId : contextVectors.keySet()) {
                currentTempVecList = contextVectors.get(termId);
                term = termIndex.getTermById(termId);
                contextVector = new ContextVector(term);
                for (TempVecEntry tempVecEntry : currentTempVecList) {
                    coTerm = termIndex.getTermByGroupingKey(tempVecEntry.getTermGroupingKey());
                    contextVector.addEntry(coTerm, tempVecEntry.getNbCooccs(), tempVecEntry.getAssocRate());
                }
                if (!contextVector.getEntries().isEmpty())
                    term.setContextVector(contextVector);
            }
        }

        return termIndex;
    }

    public static void save(Writer writer, TermIndex termIndex, JsonOptions options) throws IOException {
        JsonFactory jsonFactory = new JsonFactory(); // or, for data binding, org.codehaus.jackson.mapper.MappingJsonFactory 
        //      jsonFactory.configure(f, state)
        JsonGenerator jg = jsonFactory.createGenerator(writer); // or Stream, Reader
        jg.useDefaultPrettyPrinter();

        jg.writeStartObject();

        jg.writeFieldName(METADATA);
        jg.writeStartObject();
        jg.writeFieldName(NAME);
        jg.writeString(termIndex.getName());
        jg.writeFieldName(LANG);
        jg.writeString(termIndex.getLang().getCode());
        if (termIndex.getCorpusId() != null) {
            jg.writeFieldName(CORPUS_ID);
            jg.writeString(termIndex.getCorpusId());
        }

        jg.writeFieldName(OCCURRENCE_STORAGE);
        if (options.isMongoDBOccStore()) {
            jg.writeString(OCCURRENCE_STORAGE_MONGODB);
            jg.writeFieldName(OCCURRENCE_MONGODB_STORE_URI);
            jg.writeString(options.getMongoDBOccStore());
        } else if (options.isEmbeddedOccurrences())
            jg.writeString(OCCURRENCE_STORAGE_EMBEDDED);
        else
            throw new IllegalStateException("Unknown storage mode");

        jg.writeFieldName(NB_WORD_ANNOTATIONS);
        jg.writeNumber(termIndex.getWordAnnotationsNum());
        jg.writeFieldName(NB_SPOTTED_TERMS);
        jg.writeNumber(termIndex.getSpottedTermsNum());

        jg.writeEndObject();

        jg.writeFieldName(INPUT_SOURCES);
        int idCnt = 0;
        Map<String, Integer> inputSources = Maps.newTreeMap();
        for (Document d : termIndex.getDocuments())
            if (!inputSources.containsKey(d.getUrl()))
                inputSources.put(d.getUrl(), ++idCnt);
        jg.writeStartObject();
        for (String uri : inputSources.keySet()) {
            jg.writeFieldName(inputSources.get(uri).toString());
            jg.writeString(uri);
        }
        jg.writeEndObject();

        jg.writeFieldName(WORDS);
        jg.writeStartArray();
        for (Word w : termIndex.getWords()) {
            jg.writeStartObject();
            jg.writeFieldName(LEMMA);
            jg.writeString(w.getLemma());
            jg.writeFieldName(STEM);
            jg.writeString(w.getStem());
            if (w.isCompound()) {
                jg.writeFieldName(COMPOUND_TYPE);
                jg.writeString(w.getCompoundType().name());
                jg.writeFieldName(COMPONENTS);
                jg.writeStartArray();
                for (Component c : w.getComponents()) {
                    jg.writeStartObject();
                    jg.writeFieldName(LEMMA);
                    jg.writeString(c.getLemma());
                    jg.writeFieldName(BEGIN);
                    jg.writeNumber(c.getBegin());
                    jg.writeFieldName(END);
                    jg.writeNumber(c.getEnd());
                    jg.writeEndObject();
                }
                jg.writeEndArray();
            }

            jg.writeEndObject();
        }
        jg.writeEndArray();

        Set<TermVariation> termVariations = Sets.newHashSet();

        jg.writeFieldName(TERMS);
        jg.writeStartArray();
        for (Term t : termIndex.getTerms()) {
            termVariations.addAll(t.getVariations());

            jg.writeStartObject();
            jg.writeFieldName(ID);
            jg.writeNumber(t.getId());
            jg.writeFieldName(RANK);
            jg.writeNumber(t.getRank());
            jg.writeFieldName(GROUPING_KEY);
            jg.writeString(t.getGroupingKey());
            jg.writeFieldName(WORDS);
            jg.writeStartArray();
            for (TermWord tw : t.getWords()) {
                jg.writeStartObject();
                jg.writeFieldName(SYN);
                jg.writeString(tw.getSyntacticLabel());
                jg.writeFieldName(LEMMA);
                jg.writeString(tw.getWord().getLemma());
                jg.writeEndObject();
            }
            jg.writeEndArray();

            jg.writeFieldName(FREQUENCY);
            jg.writeNumber(t.getFrequency());
            jg.writeFieldName(FREQ_NORM);
            jg.writeNumber(t.getFrequencyNorm());
            jg.writeFieldName(GENERAL_FREQ_NORM);
            jg.writeNumber(t.getGeneralFrequencyNorm());
            jg.writeFieldName(SPECIFICITY);
            jg.writeNumber(t.getSpecificity());
            jg.writeFieldName(SPOTTING_RULE);
            jg.writeString(t.getSpottingRule());

            if (options.withOccurrences() && options.isEmbeddedOccurrences()) {
                jg.writeFieldName(OCCURRENCES);
                jg.writeStartArray();
                for (TermOccurrence termOcc : t.getOccurrences()) {
                    jg.writeStartObject();
                    jg.writeFieldName(BEGIN);
                    jg.writeNumber(termOcc.getBegin());
                    jg.writeFieldName(END);
                    jg.writeNumber(termOcc.getEnd());
                    jg.writeFieldName(TEXT);
                    jg.writeString(termOcc.getCoveredText());
                    jg.writeFieldName(FILE);
                    jg.writeNumber(inputSources.get(termOcc.getSourceDocument().getUrl()));
                    jg.writeEndObject();
                }
                jg.writeEndArray();
            }

            if (options.isWithContexts() && t.isContextVectorComputed()) {
                jg.writeFieldName(CONTEXT);
                jg.writeStartObject();

                jg.writeFieldName(TOTAL_COOCCURRENCES);
                jg.writeNumber(t.getContextVector().getTotalCoccurrences());
                jg.writeFieldName(CO_OCCURRENCES);
                jg.writeStartArray();
                if (t.isContextVectorComputed()) {
                    for (ContextVector.Entry contextEntry : t.getContextVector().getEntries()) {
                        jg.writeStartObject();
                        jg.writeFieldName(CO_TERM);
                        jg.writeString(contextEntry.getCoTerm().getGroupingKey());
                        jg.writeFieldName(NB_COCCS);
                        jg.writeNumber(contextEntry.getNbCooccs());
                        jg.writeFieldName(ASSOC_RATE);
                        jg.writeNumber(contextEntry.getAssocRate());
                        jg.writeEndObject();
                    }
                }
                jg.writeEndArray();
                jg.writeEndObject();
            }

            jg.writeEndObject();
        }
        jg.writeEndArray();

        /* Variants */
        jg.writeFieldName(TERM_VARIATIONS);
        jg.writeStartArray();
        for (TermVariation v : termVariations) {
            jg.writeStartObject();
            jg.writeFieldName(BASE);
            jg.writeString(v.getBase().getGroupingKey());
            jg.writeFieldName(VARIANT);
            jg.writeString(v.getVariant().getGroupingKey());
            jg.writeFieldName(VARIANT_TYPE);
            jg.writeString(v.getVariationType().getShortName());
            jg.writeFieldName(INFO);
            jg.writeString(v.getInfo().toString());
            jg.writeFieldName(VARIANT_SCORE);
            jg.writeNumber(v.getScore());
            jg.writeEndObject();
        }
        jg.writeEndArray();

        jg.writeEndObject();
        jg.close();
    }

    private static class TempVecEntry {
        String termGroupingKey;
        double assocRate;
        int nbCooccs;

        public String getTermGroupingKey() {
            return termGroupingKey;
        }

        public void setTermGroupingKey(String termGroupingKey) {
            this.termGroupingKey = termGroupingKey;
        }

        public double getAssocRate() {
            return assocRate;
        }

        public void setAssocRate(double assocRate) {
            this.assocRate = assocRate;
        }

        public int getNbCooccs() {
            return nbCooccs;
        }

        public void setNbCooccs(int nbCooccs) {
            this.nbCooccs = nbCooccs;
        }
    }
}