Example usage for org.apache.lucene.document Document getBinaryValue

List of usage examples for org.apache.lucene.document Document getBinaryValue

Introduction

In this page you can find the example usage for org.apache.lucene.document Document getBinaryValue.

Prototype

public final BytesRef getBinaryValue(String name) 

Source Link

Document

Returns an array of bytes for the first (or only) field that has the name specified as the method parameter.

Usage

From source file:cn.hbu.cs.esearch.core.EsearchSegmentReader.java

License:Apache License

public BytesRef getStoredValue(int docid) throws IOException {
    Document doc = in.document(docid);
    if (doc != null) {
        return doc.getBinaryValue(AbstractEsearchIndexable.DOCUMENT_STORE_FIELD);
    }// www .j  a  v a2  s.  c  o m
    return null;
}

From source file:cn.hbu.cs.esearch.store.LuceneStore.java

License:Apache License

@Override
protected BytesRef getFromStore(long uid) throws IOException {
    int docid = mapDocId(uid);
    if (docid < 0) {
        return null;
    }//from  w  ww .  jav a 2 s  .  c  o m
    IndexReader reader = null;
    if (currentReaderData != null) {
        reader = currentReaderData.reader;
    }
    if (docid >= 0 && reader != null) {
        Document doc = reader.document(docid);
        if (doc != null) {
            return doc.getBinaryValue(field);
        }
    }
    return null;
}

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

private VcfIndexEntry createVcfIndexEntry(Document d, List<String> vcfInfoFields) {
    VcfIndexEntry vcfIndexEntry = new VcfIndexEntry();
    vcfIndexEntry.setGene(d.get(FeatureIndexFields.GENE_ID.getFieldName()));

    BytesRef bytes = d.getBinaryValue(FeatureIndexFields.GENE_IDS.getFieldName());
    if (bytes != null) {
        vcfIndexEntry.setGeneIds(bytes.utf8ToString());
    }/*w ww . j  a  va2 s.c o  m*/

    vcfIndexEntry.setGeneName(d.get(FeatureIndexFields.GENE_NAME.getFieldName()));

    bytes = d.getBinaryValue(FeatureIndexFields.GENE_NAMES.getFieldName());
    if (bytes != null) {
        vcfIndexEntry.setGeneNames(bytes.utf8ToString());
    }

    vcfIndexEntry.setInfo(new HashMap<>());

    String isExonStr = d.get(FeatureIndexFields.IS_EXON.getFieldName()); //TODO: remove, in future only binary
                                                                         // value will remain
    if (isExonStr == null) {
        bytes = d.getBinaryValue(FeatureIndexFields.IS_EXON.getFieldName());
        if (bytes != null) {
            isExonStr = bytes.utf8ToString();
        }
    }
    boolean isExon = isExonStr != null && Boolean.parseBoolean(isExonStr);
    vcfIndexEntry.setExon(isExon);
    vcfIndexEntry.getInfo().put(FeatureIndexFields.IS_EXON.getFieldName(), isExon);

    BytesRef featureIdBytes = d.getBinaryValue(FeatureIndexFields.VARIATION_TYPE.getFieldName());
    if (featureIdBytes != null) {
        vcfIndexEntry.setVariationType(VariationType.valueOf(featureIdBytes.utf8ToString().toUpperCase()));
    }
    vcfIndexEntry.setFailedFilter(d.get(FeatureIndexFields.FAILED_FILTER.getFieldName()));

    IndexableField qualityField = d.getField(FeatureIndexFields.QUALITY.getFieldName());
    if (qualityField != null) {
        vcfIndexEntry.setQuality(qualityField.numericValue().doubleValue());
    }

    if (vcfInfoFields != null) {
        for (String infoField : vcfInfoFields) {
            if (d.getBinaryValue(infoField.toLowerCase()) != null) {
                vcfIndexEntry.getInfo().put(infoField,
                        d.getBinaryValue(infoField.toLowerCase()).utf8ToString());
            } else {
                vcfIndexEntry.getInfo().put(infoField, d.get(infoField.toLowerCase()));
            }
        }
    }

    return vcfIndexEntry;
}

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

private FeatureIndexEntry createIndexEntry(ScoreDoc hit, Map<Long, BookmarkIndexEntry> foundBookmarkEntries,
        IndexSearcher searcher, List<String> vcfInfoFields) throws IOException {
    int docId = hit.doc;
    Document d = searcher.doc(docId);
    FeatureType featureType = FeatureType.forValue(d.get(FeatureIndexFields.FEATURE_TYPE.getFieldName()));
    FeatureIndexEntry entry;//from  ww w.  ja v a  2  s. com
    switch (featureType) {
    case VARIATION:
        entry = createVcfIndexEntry(d, vcfInfoFields);
        break;
    case BOOKMARK:
        BookmarkIndexEntry bookmarkEntry = new BookmarkIndexEntry();
        foundBookmarkEntries.put(Long.parseLong(d.get(FeatureIndexFields.FILE_ID.getFieldName())),
                bookmarkEntry);
        entry = bookmarkEntry;
        break;
    default:
        entry = new FeatureIndexEntry();
    }

    entry.setFeatureType(featureType);
    BytesRef featureIdBytes = d.getBinaryValue(FeatureIndexFields.FEATURE_ID.getFieldName());
    if (featureIdBytes != null) {
        entry.setFeatureId(featureIdBytes.utf8ToString());
    }

    entry.setStartIndex(d.getField(FeatureIndexFields.START_INDEX.getFieldName()).numericValue().intValue());
    entry.setEndIndex(d.getField(FeatureIndexFields.END_INDEX.getFieldName()).numericValue().intValue());
    entry.setFeatureFileId(Long.parseLong(d.get(FeatureIndexFields.FILE_ID.getFieldName())));
    entry.setFeatureName(d.get(FeatureIndexFields.FEATURE_NAME.getFieldName()));

    String chromosomeId = d.getBinaryValue(FeatureIndexFields.CHROMOSOME_ID.getFieldName()).utf8ToString();
    if (!chromosomeId.isEmpty()) {
        entry.setChromosome(new Chromosome(Long.parseLong(chromosomeId)));
        entry.getChromosome()
                .setName(d.getBinaryValue(FeatureIndexFields.CHROMOSOME_NAME.getFieldName()).utf8ToString());
    }

    return entry;
}

From source file:com.github.hotware.lucene.extension.bean.test.BeanConverterTest.java

License:BEER-WARE LICENSE

public void testBeanDocumentConversionViceVersa() throws IllegalArgumentException, IllegalAccessException {
    BeanConverter converter = new BeanConverterImpl(new BeanInformationCacheImpl());
    Field[] fields = TestBean.class.getFields();
    TestBean testBean = new TestBean();
    for (Field field : fields) {
        String fieldName = field.getName();
        Class<?> type = field.getType();
        if (type.equals(int.class)) {
            field.setInt(testBean, Integer.MAX_VALUE);
        } else if (type.equals(long.class)) {
            field.setLong(testBean, Long.MAX_VALUE);
        } else if (type.equals(double.class)) {
            field.setDouble(testBean, Double.MAX_VALUE);
        } else if (type.equals(float.class)) {
            field.setFloat(testBean, Float.MAX_VALUE);
        } else if (type.equals(boolean.class)) {
            field.setBoolean(testBean, true);
        } else if (type.equals(Integer.class)) {
            field.set(testBean, Integer.MAX_VALUE);
        } else if (type.equals(Long.class)) {
            field.set(testBean, Long.MAX_VALUE);
        } else if (type.equals(Double.class)) {
            field.set(testBean, Double.MAX_VALUE);
        } else if (type.equals(Float.class)) {
            field.set(testBean, Float.MAX_VALUE);
        } else if (type.equals(Boolean.class)) {
            field.set(testBean, true);//from   w  w  w.ja  va2 s . com
        } else if (type.equals(String.class)) {
            field.set(testBean, "Test");
        } else if (fieldName.equals("emptySetTest")) {
            field.set(testBean, new HashSet<String>());
        } else if (type.equals(Set.class)) {
            Set<String> set = new HashSet<String>();
            set.add("1");
            set.add("2");
            set.add("3");
            field.set(testBean, set);
        } else if (type.equals(List.class)) {
            List<String> list = new ArrayList<String>();
            list.add("1");
            list.add("2");
            list.add("3");
            field.set(testBean, list);
        } else if (type.equals(Object.class)) {
            field.set(testBean, new Date());
        } else {
            fail("type is not handled in the Unit-Test, please add " + type);
        }
        Document document = converter.beanToDocument(testBean);
        // check if all values are stored the same way they were entered
        if (fieldName.equals("serializeTest")) {
            System.out.println("doing serialize equality test.");
            assertTrue(Arrays.equals(toSerializedLuceneValue(field.get(testBean)),
                    document.getBinaryValue(fieldName).bytes));
        } else if (fieldName.equals("customNameTest")) {
            System.out.println("doing custom name equality test.");
            String originalValue = (String) field.get(testBean);
            String documentValue = document.get("customName");
            assertEquals(originalValue, documentValue);
        } else if (fieldName.equals("notAnnotatedTest")) {
            System.out.println("doing not annotated test.");
            assertEquals(null, document.get(fieldName));
        } else if (fieldName.equals("listTest")) {
            System.out.println("doing listTest");
            @SuppressWarnings("unchecked")
            List<String> originalList = (List<String>) field.get(testBean);
            IndexableField[] documentFields = document.getFields(fieldName);
            for (int i = 0; i < originalList.size(); ++i) {
                assertEquals(originalList.get(i), documentFields[i].stringValue());
            }
        } else if (fieldName.equals("setTest")) {
            System.out.println("doing listTest");
            @SuppressWarnings("unchecked")
            Set<String> originalSet = (Set<String>) field.get(testBean);
            Set<String> docSet = new HashSet<String>();
            for (IndexableField documentField : document.getFields(fieldName)) {
                docSet.add(documentField.stringValue());
            }
            assertEquals(originalSet, docSet);
        } else if (fieldName.equals("emptySetTest")) {
            System.out.println("doing emptySetTest");
            assertEquals(null, document.get(fieldName));
        } else if (fieldName.equals("multiTest")) {
            System.out.println("doing multiTest");
            assertEquals("multiTest", document.get(fieldName));
        } else if (fieldName.equals("multiMultiTest")) {
            System.out.println("doint multiMultiTest");
            assertEquals("multiMultiTest", document.get(fieldName));
        } else {
            // normally a check is needed, but in the test-case we
            // can do this without checking for a present annotation
            BeanField bf = field.getAnnotation(BeanField.class);
            System.out.println("doing " + bf.type() + " tests on \"" + fieldName + "\".");
            assertEquals(field.get(testBean).toString(), document.get(fieldName));
            IndexableField indexField = document.getField(fieldName);
            IndexableFieldType indexFieldType = indexField.fieldType();
            assertEquals(bf.store(), indexFieldType.stored());
            assertEquals(bf.index(), indexFieldType.indexed());
            assertEquals(bf.tokenized(), indexFieldType.tokenized());
            // TODO: test if fieldType is correct?
        }
    }

    // now that all the conversion works we can safely generate
    // a document with that and work backwards :)
    System.out.println("doing reverse conversion (document to bean) test.");
    Document document = converter.beanToDocument(testBean);
    TestBean reverseBean = converter.documentToBean(TestBean.class, document);

    // setting the stuff that can not be in the document and therefore not
    // in the reverseBean
    reverseBean.notAnnotatedTest = testBean.notAnnotatedTest;
    reverseBean.notStoredButIndexedTest = testBean.notStoredButIndexedTest;
    assertTrue(testBean.equals(reverseBean));

    System.out.println("Result: conversion test successfull.");
}

From source file:com.google.gerrit.lucene.LuceneChangeIndex.java

License:Apache License

private ChangeData toChangeData(Document doc) {
    BytesRef cb = doc.getBinaryValue(CHANGE_FIELD);
    if (cb == null) {
        int id = doc.getField(ID_FIELD).numericValue().intValue();
        return changeDataFactory.create(db.get(), new Change.Id(id));
    }//from  ww  w  .  jav a  2 s . co  m

    // Change proto.
    Change change = ChangeProtoField.CODEC.decode(cb.bytes, cb.offset, cb.length);
    ChangeData cd = changeDataFactory.create(db.get(), change);

    // Patch sets.
    List<PatchSet> patchSets = decodeProtos(doc, PATCH_SET_FIELD, PatchSetProtoField.CODEC);
    if (!patchSets.isEmpty()) {
        // Will be an empty list for schemas prior to when this field was stored;
        // this cannot be valid since a change needs at least one patch set.
        cd.setPatchSets(patchSets);
    }

    // Approvals.
    cd.setCurrentApprovals(decodeProtos(doc, APPROVAL_FIELD, PatchSetApprovalProtoField.CODEC));

    // Changed lines.
    IndexableField added = doc.getField(ADDED_FIELD);
    IndexableField deleted = doc.getField(DELETED_FIELD);
    if (added != null && deleted != null) {
        cd.setChangedLines(added.numericValue().intValue(), deleted.numericValue().intValue());
    }

    // Mergeable.
    String mergeable = doc.get(MERGEABLE_FIELD);
    if ("1".equals(mergeable)) {
        cd.setMergeable(true);
    } else if ("0".equals(mergeable)) {
        cd.setMergeable(false);
    }

    // Reviewed-by.
    IndexableField[] reviewedBy = doc.getFields(REVIEWEDBY_FIELD);
    if (reviewedBy.length > 0) {
        Set<Account.Id> accounts = Sets.newHashSetWithExpectedSize(reviewedBy.length);
        for (IndexableField r : reviewedBy) {
            int id = r.numericValue().intValue();
            if (reviewedBy.length == 1 && id == ChangeField.NOT_REVIEWED) {
                break;
            }
            accounts.add(new Account.Id(id));
        }
        cd.setReviewedBy(accounts);
    }

    return cd;
}

From source file:com.ibm.watson.developer_cloud.professor_languo.ingestion.indexing.RetrieveAndRankIndexer.java

License:Open Source License

/**
 * Given a built corpus(a set of StackExchangeThreads without duplicates) and a document mapper,
 * create solr documents from the threads and upload them to the solr cluster index and record the
 * statistics during the indexing period.
 * /*from  ww  w  .  j av a 2s  .  co m*/
 * @param uniqueThreadDirPath - the path of the folder which stores the unique threads
 * @param docMapper - document mapper which maps the StackExchange instance to the document unit
 * @return the statistics during the indexing period.
 * @throws IngestionException
 * @throws SolrServerException
 * @throws IOException
 * @throws RemoteSolrException
 */
private IndexingStats indexCorpus(String uniqueThreadDirPath, DocumentMapper docMapper)
        throws IngestionException, SolrServerException, IOException, RemoteSolrException {

    final List<SolrInputDocument> batch = Lists.newArrayList();
    List<Integer> indexThreadIds = new ArrayList<Integer>();
    int indexDocNum = 0;
    StackExchangeThread thread = null;

    long startTime = System.currentTimeMillis();

    // restore the unique StackExchangeThreads from the .ser Files and index
    // them
    for (File serFile : new File(uniqueThreadDirPath).listFiles()) {
        thread = StackExchangeThreadSerializer.deserializeThreadFromBinFile(serFile.getPath());

        final SolrInputDocument solrDoc = new SolrInputDocument();

        // Copy data over from thread to the SolrInputDocument
        final Document threadDoc = docMapper.createDocument(thread);
        for (IndexableField field : threadDoc.getFields()) {
            BytesRef bin = threadDoc.getBinaryValue(field.name());
            if (bin != null) {
                // Add field value as bytes if field is binary
                solrDoc.addField(field.name(), bin.bytes);
            } else {
                String value = threadDoc.get(field.name());
                if (field.name().equals(IndexDocumentFieldName.THREAD_TITLE.toString())) {
                    // boost the title
                    final float boostValue = 2.0f;
                    solrDoc.addField(field.name(), value, boostValue);
                } else {
                    solrDoc.addField(field.name(), value);
                }
            }
        }

        batch.add(solrDoc);
        indexThreadIds.add(thread.getId());

        indexDocNum++;
        // batch things to make it more efficient to index
        if (batch.size() >= 100) {
            logger.info(MessageFormat.format(Messages.getString("RetrieveAndRank.INDEXING_AMOUNT"), //$NON-NLS-1$
                    (indexDocNum - 100), collection_name)); // );
            final UpdateResponse addResponse = solrClient.add(collection_name, batch);
            logger.debug(addResponse);
            batch.clear();
        }
    }

    // Include any left documents in the batch
    if (!batch.isEmpty()) {
        final UpdateResponse addResponse = solrClient.add(collection_name, batch);
        logger.debug(addResponse);
    }

    logger.info(MessageFormat.format(Messages.getString("RetrieveAndRank.INDEXING_COMMITTING"), indexDocNum)); //$NON-NLS-1$

    // Commit the documents to the index so that it will be available for
    // searching.
    final UpdateResponse commitResponse = solrClient.commit(collection_name);
    logger.debug(commitResponse);
    logger.info(Messages.getString("RetrieveAndRank.INDEXING_COMMITTED")); //$NON-NLS-1$

    long endTime = System.currentTimeMillis();

    // create the indexing stats
    return createIndexingStats(indexDocNum, indexThreadIds, endTime - startTime);
}

From source file:com.ibm.watson.developer_cloud.professor_languo.ingestion.indexing.RetrieveAndRankIndexerTest.java

License:Open Source License

/**
 * check if the deserialized candidate answers are exactly the same as the candidate answers we
 * expected. Since the query is "question Title: what is right", the expected candidate answers
 * should be the entire corpus.//from  ww  w  .  j  ava  2  s.  c  o m
 * 
 * @throws IngestionException
 */
private void compare_indexed_records_to_corpus() throws IngestionException {
    indexdCorpus = corpusBuilder.getUniqueThreadSetFromBinFiles();

    // Check that the size of the corpus is the same as the size of the
    // indexed documents
    assertTrue("Wrong number of documents indexed", indexedRecords.size() == indexdCorpus.size());

    // Check that the indexed document in the corpus is in the index
    File serFile = new File(corpusBuilder.getUniqueThreadDirPath()).listFiles()[0];
    StackExchangeThread thread = StackExchangeThreadSerializer.deserializeThreadFromBinFile(serFile.getPath());

    final Document luceneDoc = new LuceneDocumentMapper().createDocument(thread);
    SolrInputDocument recordDoc = indexedRecords.get(0);

    for (IndexableField field : luceneDoc.getFields()) {
        BytesRef bin = luceneDoc.getBinaryValue(field.name());

        // Check that indexed fields (title and id) are indexed correctly
        if (field.name().equals(IndexDocumentFieldName.THREAD_TITLE.toString())
                || field.name().equals(IndexDocumentFieldName.THREAD_POST_ID.toString())) {

            String value = luceneDoc.get(field.name());
            assertEquals(value, recordDoc.getFieldValue(field.name()).toString());
        }

        // Check that indexed serialized field is indexed correctly
        if (bin != null) {
            BytesRef recordbin = new BytesRef((byte[]) recordDoc.getFieldValue(field.name()));
            assertEquals(bin, recordbin);
        }

    }
}

From source file:com.ibm.watson.developer_cloud.professor_languo.pipeline.primary_search.LuceneSearcher.java

License:Open Source License

@Override
public Set<CandidateAnswer> performSearch(Query query) throws SearchException {
    TopScoreDocCollector collector = TopScoreDocCollector.create(candidateAnswerNum);
    try {//from w  ww .j ava  2s  .com
        searcher.search(query, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;
        Set<CandidateAnswer> candidateAnswers = new HashSet<CandidateAnswer>();
        int rank = 1;
        for (ScoreDoc scoreDoc : hits) {
            Document doc = searcher.doc(scoreDoc.doc);
            byte[] binCode = doc.getBinaryValue(IndexDocumentFieldName.SERIALIZED_THREAD.toString()).bytes;
            CandidateAnswer candidateAnswer = StackExchangeThreadSerializer
                    .deserializeThreadFromBinArr(binCode);
            candidateAnswer.setFeatureValue(PipelineConstants.FEATURE_SEARCH_SCORE, (double) scoreDoc.score);
            candidateAnswer.setFeatureValue(PipelineConstants.FEATURE_SEARCH_RANK, (double) rank++);
            candidateAnswers.add(candidateAnswer);
        }
        return candidateAnswers;

    } catch (IOException | IngestionException e) {
        throw new SearchException(e);
    }
}

From source file:com.ibm.watson.developer_cloud.professor_languo.pipeline.primary_search.RetrieveAndRankSearcherTest.java

License:Open Source License

private void set_non_empty_repsonse() throws IngestionException {
    String uniqThreadDirPath = get_unique_thread_path();
    SolrDocumentList doclist = new SolrDocumentList();
    SolrDocument doc = new SolrDocument();
    File serFile = new File(uniqThreadDirPath).listFiles()[0];
    StackExchangeThread thread = StackExchangeThreadSerializer.deserializeThreadFromBinFile(serFile.getPath());

    final Document luceneDoc = new LuceneDocumentMapper().createDocument(thread);
    BytesRef bin = luceneDoc.getBinaryValue(IndexDocumentFieldName.SERIALIZED_THREAD.toString());
    doc.addField(IndexDocumentFieldName.SERIALIZED_THREAD.toString(), bin.bytes);
    doc.addField("score", Integer.MAX_VALUE);
    doc.addField("featureVector", Double.MAX_VALUE);
    doclist.add(doc);//w w w  .  j a va  2s  .c o m
    set_response(doclist);
}