com.ibm.watson.developer_cloud.professor_languo.ingestion.indexing.LuceneDocumentMapper.java Source code

Java tutorial

Introduction

Here is the source code for com.ibm.watson.developer_cloud.professor_languo.ingestion.indexing.LuceneDocumentMapper.java

Source

/*
 * Copyright IBM Corp. 2015
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package com.ibm.watson.developer_cloud.professor_languo.ingestion.indexing;

import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Properties;
import java.util.Set;

import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.TextField;

import com.ibm.watson.developer_cloud.professor_languo.exception.IngestionException;
import com.ibm.watson.developer_cloud.professor_languo.model.stack_exchange.StackExchangeThread;
import com.ibm.watson.developer_cloud.professor_languo.model.stack_exchange.StackExchangeConstants.IndexDocumentFieldName;

public class LuceneDocumentMapper implements DocumentMapper {
    @Override
    public void initialize(Properties properties) throws IngestionException {

    }

    @Override
    public Document createDocument(StackExchangeThread question) throws IngestionException {
        Document doc = new Document();
        doc.add(new IntField(IndexDocumentFieldName.THREAD_POST_ID.toString(), question.getId(), Field.Store.YES));

        String questionTitle = question.getQuestion().getUnformattedTitle();
        doc.add(new TextField(IndexDocumentFieldName.THREAD_TITLE.toString(),
                (questionTitle == null) ? "" : questionTitle, Field.Store.YES));

        String questionBody = question.getQuestion().getUnformattedBody();
        doc.add(new TextField(IndexDocumentFieldName.THREAD_TEXT.toString(),
                (questionBody == null) ? "" : questionBody, Field.Store.YES));

        List<String> tags = question.getQuestion().getTags();
        doc.add(new TextField(IndexDocumentFieldName.THREAD_TAGS.toString(), (tags == null) ? "" : tags.toString(),
                Field.Store.YES));

        doc.add(new TextField(IndexDocumentFieldName.ACCEPTED_ANSWER_TEXT.toString(),
                question.getAcceptedAnswerText(), Field.Store.YES));

        doc.add(new TextField(IndexDocumentFieldName.TOP_VOTED_ANSWER_TEXT.toString(),
                question.getTopVotedAnswerText(), Field.Store.YES));

        doc.add(new TextField(IndexDocumentFieldName.CONCATENATED_ANSWERS_TEXT.toString(),
                question.getConcatenatedAnswersText(), Field.Store.YES));

        byte[] serializedThread = StackExchangeThreadSerializer.serializeObjToBinArr(question);
        doc.add(new StoredField(IndexDocumentFieldName.SERIALIZED_THREAD.toString(), serializedThread));

        return doc;
    }

    @Override
    public List<FieldType> getFieldTypes() {
        return Arrays.asList(IntField.TYPE_STORED, TextField.TYPE_STORED, TextField.TYPE_STORED,
                TextField.TYPE_STORED, TextField.TYPE_STORED, TextField.TYPE_STORED, TextField.TYPE_NOT_STORED,
                BinaryDocValuesField.TYPE);
    }

    @Override
    public Set<String> getFieldNames() {
        return new HashSet<String>(Arrays.asList(IndexDocumentFieldName.THREAD_POST_ID.toString(),
                IndexDocumentFieldName.THREAD_TITLE.toString(), IndexDocumentFieldName.THREAD_TEXT.toString(),
                IndexDocumentFieldName.THREAD_TAGS.toString(),
                IndexDocumentFieldName.ACCEPTED_ANSWER_TEXT.toString(),
                IndexDocumentFieldName.TOP_VOTED_ANSWER_TEXT.toString(),
                IndexDocumentFieldName.CONCATENATED_ANSWERS_TEXT.toString(),
                IndexDocumentFieldName.SERIALIZED_THREAD.toString()));
    }
}