List of usage examples for org.apache.lucene.document DocumentStoredFieldVisitor getDocument
public Document getDocument()
From source file:com.browseengine.bobo.api.BoboSegmentReader.java
License:Open Source License
public String[] getStoredFieldValue(int docid, final String fieldname) throws IOException { DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(fieldname); super.document(docid, visitor); Document doc = visitor.getDocument(); return doc.getValues(fieldname); }
From source file:com.core.nlp.index.IndexReader.java
License:Apache License
/** * Returns the stored fields of the <code>n</code><sup>th</sup> * <code>Document</code> in this index. This is just * sugar for using {@link DocumentStoredFieldVisitor}. * <p>/*www .ja v a 2s .co m*/ * <b>NOTE:</b> for performance reasons, this method does not check if the * requested document is deleted, and therefore asking for a deleted document * may yield unspecified results. Usually this is not required, however you * can test if the doc is deleted by checking the {@link * Bits} returned from {@link MultiFields#getLiveDocs}. * * <b>NOTE:</b> only the content of a field is returned, * if that field was stored during indexing. Metadata * like boost, omitNorm, IndexOptions, tokenized, etc., * are not preserved. * * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ // TODO: we need a separate StoredField, so that the // Document returned here contains that class not // IndexableField public final Document document(int docID) throws IOException { final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(); document(docID, visitor); return visitor.getDocument(); }
From source file:com.core.nlp.index.IndexReader.java
License:Apache License
/** * Like {@link #document(int)} but only loads the specified * fields. Note that this is simply sugar for {@link * DocumentStoredFieldVisitor#DocumentStoredFieldVisitor(Set)}. *//*from w w w.j a va 2s .c o m*/ public final Document document(int docID, Set<String> fieldsToLoad) throws IOException { final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(fieldsToLoad); document(docID, visitor); return visitor.getDocument(); }
From source file:com.lucid.solr.sidecar.SidecarIndexReaderFactory.java
License:Apache License
DirectoryReader buildParallelReader(DirectoryReader main, SolrIndexSearcher source, boolean rebuild) { try {//from w ww. jav a2 s .co m if (source == null) { throw new Exception("Source collection is missing."); } // create as a sibling path of the main index Directory d = main.directory(); File primaryDir = null; if (d instanceof FSDirectory) { String path = ((FSDirectory) d).getDirectory().getPath(); primaryDir = new File(path); sidecarIndex = new File(primaryDir.getParentFile(), sidecarIndexLocation); } else { String secondaryPath = System.getProperty("java.io.tmpdir") + File.separator + sidecarIndexLocation + "-" + System.currentTimeMillis(); sidecarIndex = new File(secondaryPath); } // create a new tmp dir for the secondary indexes File secondaryIndex = new File(sidecarIndex, System.currentTimeMillis() + "-index"); if (rebuild) { safeDelete(sidecarIndex); } parallelFields.addAll(source.getFieldNames()); parallelFields.remove("id"); LOG.debug("building a new index"); Directory dir = FSDirectory.open(secondaryIndex); if (IndexWriter.isLocked(dir)) { // try forcing unlock try { IndexWriter.unlock(dir); } catch (Exception e) { LOG.warn("Failed to unlock " + secondaryIndex); } } int[] mergeTargets; AtomicReader[] subReaders = SidecarIndexReader.getSequentialSubReaders(main); if (subReaders == null || subReaders.length == 0) { mergeTargets = new int[] { main.maxDoc() }; } else { mergeTargets = new int[subReaders.length]; for (int i = 0; i < subReaders.length; i++) { mergeTargets[i] = subReaders[i].maxDoc(); } } Version ver = currentCore.getLatestSchema().getDefaultLuceneMatchVersion(); IndexWriterConfig cfg = new IndexWriterConfig(ver, currentCore.getLatestSchema().getAnalyzer()); //cfg.setInfoStream(System.err); cfg.setMergeScheduler(new SerialMergeScheduler()); cfg.setMergePolicy(new SidecarMergePolicy(mergeTargets, false)); IndexWriter iw = new IndexWriter(dir, cfg); LOG.info("processing " + main.maxDoc() + " docs / " + main.numDeletedDocs() + " dels in main index"); int boostedDocs = 0; Bits live = MultiFields.getLiveDocs(main); int targetPos = 0; int nextTarget = mergeTargets[targetPos]; BytesRef idRef = new BytesRef(); for (int i = 0; i < main.maxDoc(); i++) { if (i == nextTarget) { iw.commit(); nextTarget = nextTarget + mergeTargets[++targetPos]; } if (live != null && !live.get(i)) { addDummy(iw); // this is required to preserve doc numbers. continue; } else { DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(docIdField); main.document(i, visitor); Document doc = visitor.getDocument(); // get docId String id = doc.get(docIdField); if (id == null) { LOG.debug("missing id, docNo=" + i); addDummy(iw); continue; } else { // find the data, if any doc = lookup(source, id, idRef, parallelFields); if (doc == null) { LOG.debug("missing boost data, docId=" + id); addDummy(iw); continue; } else { LOG.debug("adding boost data, docId=" + id + ", b=" + doc); iw.addDocument(doc); boostedDocs++; } } } } iw.close(); DirectoryReader other = DirectoryReader.open(dir); LOG.info("SidecarIndexReader with " + boostedDocs + " boosted documents."); SidecarIndexReader pr = createSidecarIndexReader(main, other, sourceCollection, secondaryIndex); return pr; } catch (Exception e) { LOG.warn("Unable to build parallel index: " + e.toString(), e); LOG.warn("Proceeding with single main index."); try { return new SidecarIndexReader(this, main, null, SidecarIndexReader.getSequentialSubReaders(main), sourceCollection, null); } catch (Exception e1) { LOG.warn("Unexpected exception, returning single main index", e1); return main; } } }
From source file:demo.jaxrs.search.server.Catalog.java
License:Apache License
@GET @Produces(MediaType.APPLICATION_JSON)/* ww w . ja v a2 s . c o m*/ public JsonArray getBooks() throws IOException { final IndexReader reader = DirectoryReader.open(directory); final IndexSearcher searcher = new IndexSearcher(reader); final JsonArrayBuilder builder = Json.createArrayBuilder(); try { final Query query = new MatchAllDocsQuery(); for (final ScoreDoc scoreDoc : searcher.search(query, 1000).scoreDocs) { final DocumentStoredFieldVisitor fieldVisitor = new DocumentStoredFieldVisitor( LuceneDocumentMetadata.SOURCE_FIELD); reader.document(scoreDoc.doc, fieldVisitor); builder.add(fieldVisitor.getDocument().getField(LuceneDocumentMetadata.SOURCE_FIELD).stringValue()); } return builder.build(); } finally { reader.close(); } }
From source file:fi.semantum.strategia.Lucene.java
License:Open Source License
public static synchronized List<String> search(String databaseId, String search) throws IOException { ArrayList<String> result = new ArrayList<String>(); IndexReader reader = null;/* w w w. j a v a 2 s. c o m*/ try { reader = DirectoryReader.open(getDirectory(databaseId)); IndexSearcher searcher = new IndexSearcher(reader); QueryParser parser = new QueryParser(Version.LUCENE_4_9, "text", getAnalyzer()); parser.setAllowLeadingWildcard(true); Query query = parser.parse(search); TopDocs docs = searcher.search(query, Integer.MAX_VALUE); for (ScoreDoc scoreDoc : docs.scoreDocs) { try { DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(); reader.document(scoreDoc.doc, visitor); Document doc = visitor.getDocument(); result.add(doc.get("uuid")); } catch (CorruptIndexException e) { throw new IOException(e); } } } catch (ParseException e) { throw new IOException(e); } finally { if (reader != null) reader.close(); } return result; }
From source file:gov.nist.basekb.FreebaseSearcher.java
License:LGPL
public String getSubjectPredicateValue(String subjectURI, String predName) throws IOException { // Return the value of predicate `predName' on `subjectURI'. If there are muliple values, // return the first one indexed, if there are none, return null. // This is specialized to only retrieve the `predName' field of the subject document. // If the full document has already been retrieved, use the Document accessor instead. int subjectId = getSubjectDocID(subjectURI); if (subjectId < 0) return null; else {//from www . ja v a 2s. co m DocumentStoredFieldVisitor fieldVisitor = new DocumentStoredFieldVisitor(predName); getIndexReader().document(subjectId, fieldVisitor); Document subject = fieldVisitor.getDocument(); return getSubjectPredicateValue(subject, predName); } }
From source file:gov.nist.basekb.FreebaseSearcher.java
License:LGPL
public String[] getSubjectPredicateValues(String subjectURI, String predName) throws IOException { // Return the values of predicate `predName' on `subjectURI'. // If there are none, return an empty array. // This is specialized to only retrieve the `predName' field of the subject document. // If the full document has already been retrieved, use the Document accessor instead. int subjectId = getSubjectDocID(subjectURI); if (subjectId < 0) return emptyValues; else {//from ww w . j a v a 2 s . c o m DocumentStoredFieldVisitor fieldVisitor = new DocumentStoredFieldVisitor(predName); getIndexReader().document(subjectId, fieldVisitor); Document subject = fieldVisitor.getDocument(); return getSubjectPredicateValues(subject, predName); } }
From source file:lux.CachingDocReader.java
License:Mozilla Public License
private XdmNode get(int docID, int luceneDocID, IndexReader reader) throws IOException { XdmNode node = cache.get(docID);/*from w ww. j av a 2 s . c om*/ if (node != null) { ++cacheHits; return node; } DocumentStoredFieldVisitor fieldSelector = new DocumentStoredFieldVisitor(); reader.document(luceneDocID, fieldSelector); Document document = fieldSelector.getDocument(); return getXdmNode(docID, document); }
From source file:org.apache.blur.mapreduce.lib.GenericRecordReader.java
License:Apache License
private void fetchBlurRecord() throws IOException { DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(); _fieldsReader.visitDocument(_docId, visitor); BlurRecord blurRecord = new BlurRecord(); String rowId = RowDocumentUtil.readRecord(visitor.getDocument(), blurRecord); blurRecord.setRowId(rowId);// ww w.ja va2s . c o m _rowId = new Text(rowId); _tableBlurRecord = new TableBlurRecord(_table, blurRecord); }