org.elasticsearch.vectorize.VectorizeService.java Source code

Java tutorial

Introduction

Here is the source code for org.elasticsearch.vectorize.VectorizeService.java

Source

/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.vectorize;

import org.apache.lucene.index.Fields;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.TermStatistics;
import org.elasticsearch.action.termvectors.TermVectorsResponse;
import org.elasticsearch.action.vectorize.VectorizeRequest;
import org.elasticsearch.action.vectorize.VectorizeResponse;
import org.elasticsearch.index.get.GetField;
import org.elasticsearch.index.get.GetResult;
import org.elasticsearch.index.shard.AbstractIndexShardComponent;
import org.elasticsearch.index.shard.IndexShard;

import java.io.IOException;
import java.util.Map;

/**
 */
public class VectorizeService extends AbstractIndexShardComponent {

    private IndexShard indexShard;

    // Unfortunately it does not seem possible to bind shard services in a plugin
    public VectorizeService(IndexShard indexShard) {
        super(indexShard.shardId(), indexShard.indexSettings());
        this.indexShard = indexShard;
    }

    public VectorizeResponse getVector(VectorizeRequest request) {
        final VectorizeResponse response = new VectorizeResponse(request.index(), request.type(), request.id());
        final Vectorizer vectorizer = request.vectorizer();

        // first fetch the term vectors
        TermVectorsResponse termVectorsResponse = null;
        if (request.fields() != null) {
            termVectorsResponse = getTermVectors(request);
        }
        if (termVectorsResponse != null && termVectorsResponse.isExists()) {
            try {
                processTermVectorsFields(vectorizer, termVectorsResponse.getFields());
            } catch (IOException e) {
                return response; // we failed return an empty response for now
            }
        }

        // now take care of the numerical fields
        GetResult getResult = null;
        if (request.numericalFields() != null) {
            getResult = getGetResult(request);
        }
        if (getResult != null && getResult.isExists()) {
            processGetResult(vectorizer, getResult.getFields());
        }

        // now write the obtained vector
        try {
            response.setVector(vectorizer.writeVector());
        } catch (IOException e) {
            return response;
        }

        // finally return the response and set the format
        response.setExists(exists(termVectorsResponse, getResult));
        response.setFormat(request.format());
        return response;
    }

    private boolean exists(TermVectorsResponse termVectorsResponse, GetResult getResult) {
        return ((termVectorsResponse != null && termVectorsResponse.isExists())
                || (getResult != null && getResult.isExists()));
    }

    private TermVectorsResponse getTermVectors(VectorizeRequest request) {
        // TODO: there is no need to actually to embed a term vector request
        return indexShard.termVectorsService().getTermVectors(request.getTermVectorsRequest(),
                indexShard.shardId().getIndex());
    }

    private GetResult getGetResult(VectorizeRequest request) {
        return indexShard.getService().get(request.type(), request.id(), request.numericalFields(),
                request.realtime(), request.version(), request.versionType(), null, true);
    }

    private void processTermVectorsFields(Vectorizer vectorizer, Fields termVectorsFields) throws IOException {
        for (String fieldName : termVectorsFields) {
            TermsEnum termsEnum = termVectorsFields.terms(fieldName).iterator();
            while (termsEnum.next() != null) {
                Term term = new Term(fieldName, termsEnum.term());
                TermStatistics termStatistics = new TermStatistics(termsEnum.term(), termsEnum.docFreq(),
                        termsEnum.totalTermFreq());
                int freq = termsEnum.postings(null, null, PostingsEnum.ALL).freq();
                vectorizer.add(term, termStatistics, freq);
            }
        }
    }

    private void processGetResult(Vectorizer vectorizer, Map<String, GetField> getResult) {
        for (GetField getField : getResult.values()) {
            vectorizer.add(getField.getName(), getField.getValues());
        }
    }
}