List of usage examples for org.apache.lucene.document Document getBinaryValue
public final BytesRef getBinaryValue(String name)
From source file:cn.hbu.cs.esearch.core.EsearchSegmentReader.java
License:Apache License
public BytesRef getStoredValue(int docid) throws IOException { Document doc = in.document(docid); if (doc != null) { return doc.getBinaryValue(AbstractEsearchIndexable.DOCUMENT_STORE_FIELD); }// www .j a v a2 s. c o m return null; }
From source file:cn.hbu.cs.esearch.store.LuceneStore.java
License:Apache License
@Override protected BytesRef getFromStore(long uid) throws IOException { int docid = mapDocId(uid); if (docid < 0) { return null; }//from w ww . jav a 2 s . c o m IndexReader reader = null; if (currentReaderData != null) { reader = currentReaderData.reader; } if (docid >= 0 && reader != null) { Document doc = reader.document(docid); if (doc != null) { return doc.getBinaryValue(field); } } return null; }
From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java
License:Open Source License
private VcfIndexEntry createVcfIndexEntry(Document d, List<String> vcfInfoFields) { VcfIndexEntry vcfIndexEntry = new VcfIndexEntry(); vcfIndexEntry.setGene(d.get(FeatureIndexFields.GENE_ID.getFieldName())); BytesRef bytes = d.getBinaryValue(FeatureIndexFields.GENE_IDS.getFieldName()); if (bytes != null) { vcfIndexEntry.setGeneIds(bytes.utf8ToString()); }/*w ww . j a va2 s.c o m*/ vcfIndexEntry.setGeneName(d.get(FeatureIndexFields.GENE_NAME.getFieldName())); bytes = d.getBinaryValue(FeatureIndexFields.GENE_NAMES.getFieldName()); if (bytes != null) { vcfIndexEntry.setGeneNames(bytes.utf8ToString()); } vcfIndexEntry.setInfo(new HashMap<>()); String isExonStr = d.get(FeatureIndexFields.IS_EXON.getFieldName()); //TODO: remove, in future only binary // value will remain if (isExonStr == null) { bytes = d.getBinaryValue(FeatureIndexFields.IS_EXON.getFieldName()); if (bytes != null) { isExonStr = bytes.utf8ToString(); } } boolean isExon = isExonStr != null && Boolean.parseBoolean(isExonStr); vcfIndexEntry.setExon(isExon); vcfIndexEntry.getInfo().put(FeatureIndexFields.IS_EXON.getFieldName(), isExon); BytesRef featureIdBytes = d.getBinaryValue(FeatureIndexFields.VARIATION_TYPE.getFieldName()); if (featureIdBytes != null) { vcfIndexEntry.setVariationType(VariationType.valueOf(featureIdBytes.utf8ToString().toUpperCase())); } vcfIndexEntry.setFailedFilter(d.get(FeatureIndexFields.FAILED_FILTER.getFieldName())); IndexableField qualityField = d.getField(FeatureIndexFields.QUALITY.getFieldName()); if (qualityField != null) { vcfIndexEntry.setQuality(qualityField.numericValue().doubleValue()); } if (vcfInfoFields != null) { for (String infoField : vcfInfoFields) { if (d.getBinaryValue(infoField.toLowerCase()) != null) { vcfIndexEntry.getInfo().put(infoField, d.getBinaryValue(infoField.toLowerCase()).utf8ToString()); } else { vcfIndexEntry.getInfo().put(infoField, d.get(infoField.toLowerCase())); } } } return vcfIndexEntry; }
From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java
License:Open Source License
private FeatureIndexEntry createIndexEntry(ScoreDoc hit, Map<Long, BookmarkIndexEntry> foundBookmarkEntries, IndexSearcher searcher, List<String> vcfInfoFields) throws IOException { int docId = hit.doc; Document d = searcher.doc(docId); FeatureType featureType = FeatureType.forValue(d.get(FeatureIndexFields.FEATURE_TYPE.getFieldName())); FeatureIndexEntry entry;//from ww w. ja v a 2 s. com switch (featureType) { case VARIATION: entry = createVcfIndexEntry(d, vcfInfoFields); break; case BOOKMARK: BookmarkIndexEntry bookmarkEntry = new BookmarkIndexEntry(); foundBookmarkEntries.put(Long.parseLong(d.get(FeatureIndexFields.FILE_ID.getFieldName())), bookmarkEntry); entry = bookmarkEntry; break; default: entry = new FeatureIndexEntry(); } entry.setFeatureType(featureType); BytesRef featureIdBytes = d.getBinaryValue(FeatureIndexFields.FEATURE_ID.getFieldName()); if (featureIdBytes != null) { entry.setFeatureId(featureIdBytes.utf8ToString()); } entry.setStartIndex(d.getField(FeatureIndexFields.START_INDEX.getFieldName()).numericValue().intValue()); entry.setEndIndex(d.getField(FeatureIndexFields.END_INDEX.getFieldName()).numericValue().intValue()); entry.setFeatureFileId(Long.parseLong(d.get(FeatureIndexFields.FILE_ID.getFieldName()))); entry.setFeatureName(d.get(FeatureIndexFields.FEATURE_NAME.getFieldName())); String chromosomeId = d.getBinaryValue(FeatureIndexFields.CHROMOSOME_ID.getFieldName()).utf8ToString(); if (!chromosomeId.isEmpty()) { entry.setChromosome(new Chromosome(Long.parseLong(chromosomeId))); entry.getChromosome() .setName(d.getBinaryValue(FeatureIndexFields.CHROMOSOME_NAME.getFieldName()).utf8ToString()); } return entry; }
From source file:com.github.hotware.lucene.extension.bean.test.BeanConverterTest.java
License:BEER-WARE LICENSE
public void testBeanDocumentConversionViceVersa() throws IllegalArgumentException, IllegalAccessException { BeanConverter converter = new BeanConverterImpl(new BeanInformationCacheImpl()); Field[] fields = TestBean.class.getFields(); TestBean testBean = new TestBean(); for (Field field : fields) { String fieldName = field.getName(); Class<?> type = field.getType(); if (type.equals(int.class)) { field.setInt(testBean, Integer.MAX_VALUE); } else if (type.equals(long.class)) { field.setLong(testBean, Long.MAX_VALUE); } else if (type.equals(double.class)) { field.setDouble(testBean, Double.MAX_VALUE); } else if (type.equals(float.class)) { field.setFloat(testBean, Float.MAX_VALUE); } else if (type.equals(boolean.class)) { field.setBoolean(testBean, true); } else if (type.equals(Integer.class)) { field.set(testBean, Integer.MAX_VALUE); } else if (type.equals(Long.class)) { field.set(testBean, Long.MAX_VALUE); } else if (type.equals(Double.class)) { field.set(testBean, Double.MAX_VALUE); } else if (type.equals(Float.class)) { field.set(testBean, Float.MAX_VALUE); } else if (type.equals(Boolean.class)) { field.set(testBean, true);//from w w w.ja va2 s . com } else if (type.equals(String.class)) { field.set(testBean, "Test"); } else if (fieldName.equals("emptySetTest")) { field.set(testBean, new HashSet<String>()); } else if (type.equals(Set.class)) { Set<String> set = new HashSet<String>(); set.add("1"); set.add("2"); set.add("3"); field.set(testBean, set); } else if (type.equals(List.class)) { List<String> list = new ArrayList<String>(); list.add("1"); list.add("2"); list.add("3"); field.set(testBean, list); } else if (type.equals(Object.class)) { field.set(testBean, new Date()); } else { fail("type is not handled in the Unit-Test, please add " + type); } Document document = converter.beanToDocument(testBean); // check if all values are stored the same way they were entered if (fieldName.equals("serializeTest")) { System.out.println("doing serialize equality test."); assertTrue(Arrays.equals(toSerializedLuceneValue(field.get(testBean)), document.getBinaryValue(fieldName).bytes)); } else if (fieldName.equals("customNameTest")) { System.out.println("doing custom name equality test."); String originalValue = (String) field.get(testBean); String documentValue = document.get("customName"); assertEquals(originalValue, documentValue); } else if (fieldName.equals("notAnnotatedTest")) { System.out.println("doing not annotated test."); assertEquals(null, document.get(fieldName)); } else if (fieldName.equals("listTest")) { System.out.println("doing listTest"); @SuppressWarnings("unchecked") List<String> originalList = (List<String>) field.get(testBean); IndexableField[] documentFields = document.getFields(fieldName); for (int i = 0; i < originalList.size(); ++i) { assertEquals(originalList.get(i), documentFields[i].stringValue()); } } else if (fieldName.equals("setTest")) { System.out.println("doing listTest"); @SuppressWarnings("unchecked") Set<String> originalSet = (Set<String>) field.get(testBean); Set<String> docSet = new HashSet<String>(); for (IndexableField documentField : document.getFields(fieldName)) { docSet.add(documentField.stringValue()); } assertEquals(originalSet, docSet); } else if (fieldName.equals("emptySetTest")) { System.out.println("doing emptySetTest"); assertEquals(null, document.get(fieldName)); } else if (fieldName.equals("multiTest")) { System.out.println("doing multiTest"); assertEquals("multiTest", document.get(fieldName)); } else if (fieldName.equals("multiMultiTest")) { System.out.println("doint multiMultiTest"); assertEquals("multiMultiTest", document.get(fieldName)); } else { // normally a check is needed, but in the test-case we // can do this without checking for a present annotation BeanField bf = field.getAnnotation(BeanField.class); System.out.println("doing " + bf.type() + " tests on \"" + fieldName + "\"."); assertEquals(field.get(testBean).toString(), document.get(fieldName)); IndexableField indexField = document.getField(fieldName); IndexableFieldType indexFieldType = indexField.fieldType(); assertEquals(bf.store(), indexFieldType.stored()); assertEquals(bf.index(), indexFieldType.indexed()); assertEquals(bf.tokenized(), indexFieldType.tokenized()); // TODO: test if fieldType is correct? } } // now that all the conversion works we can safely generate // a document with that and work backwards :) System.out.println("doing reverse conversion (document to bean) test."); Document document = converter.beanToDocument(testBean); TestBean reverseBean = converter.documentToBean(TestBean.class, document); // setting the stuff that can not be in the document and therefore not // in the reverseBean reverseBean.notAnnotatedTest = testBean.notAnnotatedTest; reverseBean.notStoredButIndexedTest = testBean.notStoredButIndexedTest; assertTrue(testBean.equals(reverseBean)); System.out.println("Result: conversion test successfull."); }
From source file:com.google.gerrit.lucene.LuceneChangeIndex.java
License:Apache License
private ChangeData toChangeData(Document doc) { BytesRef cb = doc.getBinaryValue(CHANGE_FIELD); if (cb == null) { int id = doc.getField(ID_FIELD).numericValue().intValue(); return changeDataFactory.create(db.get(), new Change.Id(id)); }//from ww w . jav a 2 s . co m // Change proto. Change change = ChangeProtoField.CODEC.decode(cb.bytes, cb.offset, cb.length); ChangeData cd = changeDataFactory.create(db.get(), change); // Patch sets. List<PatchSet> patchSets = decodeProtos(doc, PATCH_SET_FIELD, PatchSetProtoField.CODEC); if (!patchSets.isEmpty()) { // Will be an empty list for schemas prior to when this field was stored; // this cannot be valid since a change needs at least one patch set. cd.setPatchSets(patchSets); } // Approvals. cd.setCurrentApprovals(decodeProtos(doc, APPROVAL_FIELD, PatchSetApprovalProtoField.CODEC)); // Changed lines. IndexableField added = doc.getField(ADDED_FIELD); IndexableField deleted = doc.getField(DELETED_FIELD); if (added != null && deleted != null) { cd.setChangedLines(added.numericValue().intValue(), deleted.numericValue().intValue()); } // Mergeable. String mergeable = doc.get(MERGEABLE_FIELD); if ("1".equals(mergeable)) { cd.setMergeable(true); } else if ("0".equals(mergeable)) { cd.setMergeable(false); } // Reviewed-by. IndexableField[] reviewedBy = doc.getFields(REVIEWEDBY_FIELD); if (reviewedBy.length > 0) { Set<Account.Id> accounts = Sets.newHashSetWithExpectedSize(reviewedBy.length); for (IndexableField r : reviewedBy) { int id = r.numericValue().intValue(); if (reviewedBy.length == 1 && id == ChangeField.NOT_REVIEWED) { break; } accounts.add(new Account.Id(id)); } cd.setReviewedBy(accounts); } return cd; }
From source file:com.ibm.watson.developer_cloud.professor_languo.ingestion.indexing.RetrieveAndRankIndexer.java
License:Open Source License
/** * Given a built corpus(a set of StackExchangeThreads without duplicates) and a document mapper, * create solr documents from the threads and upload them to the solr cluster index and record the * statistics during the indexing period. * /*from ww w . j av a 2s . co m*/ * @param uniqueThreadDirPath - the path of the folder which stores the unique threads * @param docMapper - document mapper which maps the StackExchange instance to the document unit * @return the statistics during the indexing period. * @throws IngestionException * @throws SolrServerException * @throws IOException * @throws RemoteSolrException */ private IndexingStats indexCorpus(String uniqueThreadDirPath, DocumentMapper docMapper) throws IngestionException, SolrServerException, IOException, RemoteSolrException { final List<SolrInputDocument> batch = Lists.newArrayList(); List<Integer> indexThreadIds = new ArrayList<Integer>(); int indexDocNum = 0; StackExchangeThread thread = null; long startTime = System.currentTimeMillis(); // restore the unique StackExchangeThreads from the .ser Files and index // them for (File serFile : new File(uniqueThreadDirPath).listFiles()) { thread = StackExchangeThreadSerializer.deserializeThreadFromBinFile(serFile.getPath()); final SolrInputDocument solrDoc = new SolrInputDocument(); // Copy data over from thread to the SolrInputDocument final Document threadDoc = docMapper.createDocument(thread); for (IndexableField field : threadDoc.getFields()) { BytesRef bin = threadDoc.getBinaryValue(field.name()); if (bin != null) { // Add field value as bytes if field is binary solrDoc.addField(field.name(), bin.bytes); } else { String value = threadDoc.get(field.name()); if (field.name().equals(IndexDocumentFieldName.THREAD_TITLE.toString())) { // boost the title final float boostValue = 2.0f; solrDoc.addField(field.name(), value, boostValue); } else { solrDoc.addField(field.name(), value); } } } batch.add(solrDoc); indexThreadIds.add(thread.getId()); indexDocNum++; // batch things to make it more efficient to index if (batch.size() >= 100) { logger.info(MessageFormat.format(Messages.getString("RetrieveAndRank.INDEXING_AMOUNT"), //$NON-NLS-1$ (indexDocNum - 100), collection_name)); // ); final UpdateResponse addResponse = solrClient.add(collection_name, batch); logger.debug(addResponse); batch.clear(); } } // Include any left documents in the batch if (!batch.isEmpty()) { final UpdateResponse addResponse = solrClient.add(collection_name, batch); logger.debug(addResponse); } logger.info(MessageFormat.format(Messages.getString("RetrieveAndRank.INDEXING_COMMITTING"), indexDocNum)); //$NON-NLS-1$ // Commit the documents to the index so that it will be available for // searching. final UpdateResponse commitResponse = solrClient.commit(collection_name); logger.debug(commitResponse); logger.info(Messages.getString("RetrieveAndRank.INDEXING_COMMITTED")); //$NON-NLS-1$ long endTime = System.currentTimeMillis(); // create the indexing stats return createIndexingStats(indexDocNum, indexThreadIds, endTime - startTime); }
From source file:com.ibm.watson.developer_cloud.professor_languo.ingestion.indexing.RetrieveAndRankIndexerTest.java
License:Open Source License
/** * check if the deserialized candidate answers are exactly the same as the candidate answers we * expected. Since the query is "question Title: what is right", the expected candidate answers * should be the entire corpus.//from ww w . j ava 2 s. c o m * * @throws IngestionException */ private void compare_indexed_records_to_corpus() throws IngestionException { indexdCorpus = corpusBuilder.getUniqueThreadSetFromBinFiles(); // Check that the size of the corpus is the same as the size of the // indexed documents assertTrue("Wrong number of documents indexed", indexedRecords.size() == indexdCorpus.size()); // Check that the indexed document in the corpus is in the index File serFile = new File(corpusBuilder.getUniqueThreadDirPath()).listFiles()[0]; StackExchangeThread thread = StackExchangeThreadSerializer.deserializeThreadFromBinFile(serFile.getPath()); final Document luceneDoc = new LuceneDocumentMapper().createDocument(thread); SolrInputDocument recordDoc = indexedRecords.get(0); for (IndexableField field : luceneDoc.getFields()) { BytesRef bin = luceneDoc.getBinaryValue(field.name()); // Check that indexed fields (title and id) are indexed correctly if (field.name().equals(IndexDocumentFieldName.THREAD_TITLE.toString()) || field.name().equals(IndexDocumentFieldName.THREAD_POST_ID.toString())) { String value = luceneDoc.get(field.name()); assertEquals(value, recordDoc.getFieldValue(field.name()).toString()); } // Check that indexed serialized field is indexed correctly if (bin != null) { BytesRef recordbin = new BytesRef((byte[]) recordDoc.getFieldValue(field.name())); assertEquals(bin, recordbin); } } }
From source file:com.ibm.watson.developer_cloud.professor_languo.pipeline.primary_search.LuceneSearcher.java
License:Open Source License
@Override public Set<CandidateAnswer> performSearch(Query query) throws SearchException { TopScoreDocCollector collector = TopScoreDocCollector.create(candidateAnswerNum); try {//from w ww .j ava 2s .com searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; Set<CandidateAnswer> candidateAnswers = new HashSet<CandidateAnswer>(); int rank = 1; for (ScoreDoc scoreDoc : hits) { Document doc = searcher.doc(scoreDoc.doc); byte[] binCode = doc.getBinaryValue(IndexDocumentFieldName.SERIALIZED_THREAD.toString()).bytes; CandidateAnswer candidateAnswer = StackExchangeThreadSerializer .deserializeThreadFromBinArr(binCode); candidateAnswer.setFeatureValue(PipelineConstants.FEATURE_SEARCH_SCORE, (double) scoreDoc.score); candidateAnswer.setFeatureValue(PipelineConstants.FEATURE_SEARCH_RANK, (double) rank++); candidateAnswers.add(candidateAnswer); } return candidateAnswers; } catch (IOException | IngestionException e) { throw new SearchException(e); } }
From source file:com.ibm.watson.developer_cloud.professor_languo.pipeline.primary_search.RetrieveAndRankSearcherTest.java
License:Open Source License
private void set_non_empty_repsonse() throws IngestionException { String uniqThreadDirPath = get_unique_thread_path(); SolrDocumentList doclist = new SolrDocumentList(); SolrDocument doc = new SolrDocument(); File serFile = new File(uniqThreadDirPath).listFiles()[0]; StackExchangeThread thread = StackExchangeThreadSerializer.deserializeThreadFromBinFile(serFile.getPath()); final Document luceneDoc = new LuceneDocumentMapper().createDocument(thread); BytesRef bin = luceneDoc.getBinaryValue(IndexDocumentFieldName.SERIALIZED_THREAD.toString()); doc.addField(IndexDocumentFieldName.SERIALIZED_THREAD.toString(), bin.bytes); doc.addField("score", Integer.MAX_VALUE); doc.addField("featureVector", Double.MAX_VALUE); doclist.add(doc);//w w w . j a va 2s .c o m set_response(doclist); }