List of usage examples for org.apache.lucene.document DocumentStoredFieldVisitor DocumentStoredFieldVisitor
public DocumentStoredFieldVisitor(String... fields)
From source file:com.browseengine.bobo.api.BoboSegmentReader.java
License:Open Source License
public String[] getStoredFieldValue(int docid, final String fieldname) throws IOException { DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(fieldname); super.document(docid, visitor); Document doc = visitor.getDocument(); return doc.getValues(fieldname); }
From source file:com.core.nlp.index.IndexReader.java
License:Apache License
/** * Like {@link #document(int)} but only loads the specified * fields. Note that this is simply sugar for {@link * DocumentStoredFieldVisitor#DocumentStoredFieldVisitor(Set)}. *///from w w w .j a va2 s . com public final Document document(int docID, Set<String> fieldsToLoad) throws IOException { final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(fieldsToLoad); document(docID, visitor); return visitor.getDocument(); }
From source file:com.lucid.solr.sidecar.SidecarIndexReaderFactory.java
License:Apache License
DirectoryReader buildParallelReader(DirectoryReader main, SolrIndexSearcher source, boolean rebuild) { try {// w w w .jav a 2 s .c o m if (source == null) { throw new Exception("Source collection is missing."); } // create as a sibling path of the main index Directory d = main.directory(); File primaryDir = null; if (d instanceof FSDirectory) { String path = ((FSDirectory) d).getDirectory().getPath(); primaryDir = new File(path); sidecarIndex = new File(primaryDir.getParentFile(), sidecarIndexLocation); } else { String secondaryPath = System.getProperty("java.io.tmpdir") + File.separator + sidecarIndexLocation + "-" + System.currentTimeMillis(); sidecarIndex = new File(secondaryPath); } // create a new tmp dir for the secondary indexes File secondaryIndex = new File(sidecarIndex, System.currentTimeMillis() + "-index"); if (rebuild) { safeDelete(sidecarIndex); } parallelFields.addAll(source.getFieldNames()); parallelFields.remove("id"); LOG.debug("building a new index"); Directory dir = FSDirectory.open(secondaryIndex); if (IndexWriter.isLocked(dir)) { // try forcing unlock try { IndexWriter.unlock(dir); } catch (Exception e) { LOG.warn("Failed to unlock " + secondaryIndex); } } int[] mergeTargets; AtomicReader[] subReaders = SidecarIndexReader.getSequentialSubReaders(main); if (subReaders == null || subReaders.length == 0) { mergeTargets = new int[] { main.maxDoc() }; } else { mergeTargets = new int[subReaders.length]; for (int i = 0; i < subReaders.length; i++) { mergeTargets[i] = subReaders[i].maxDoc(); } } Version ver = currentCore.getLatestSchema().getDefaultLuceneMatchVersion(); IndexWriterConfig cfg = new IndexWriterConfig(ver, currentCore.getLatestSchema().getAnalyzer()); //cfg.setInfoStream(System.err); cfg.setMergeScheduler(new SerialMergeScheduler()); cfg.setMergePolicy(new SidecarMergePolicy(mergeTargets, false)); IndexWriter iw = new IndexWriter(dir, cfg); LOG.info("processing " + main.maxDoc() + " docs / " + main.numDeletedDocs() + " dels in main index"); int boostedDocs = 0; Bits live = MultiFields.getLiveDocs(main); int targetPos = 0; int nextTarget = mergeTargets[targetPos]; BytesRef idRef = new BytesRef(); for (int i = 0; i < main.maxDoc(); i++) { if (i == nextTarget) { iw.commit(); nextTarget = nextTarget + mergeTargets[++targetPos]; } if (live != null && !live.get(i)) { addDummy(iw); // this is required to preserve doc numbers. continue; } else { DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(docIdField); main.document(i, visitor); Document doc = visitor.getDocument(); // get docId String id = doc.get(docIdField); if (id == null) { LOG.debug("missing id, docNo=" + i); addDummy(iw); continue; } else { // find the data, if any doc = lookup(source, id, idRef, parallelFields); if (doc == null) { LOG.debug("missing boost data, docId=" + id); addDummy(iw); continue; } else { LOG.debug("adding boost data, docId=" + id + ", b=" + doc); iw.addDocument(doc); boostedDocs++; } } } } iw.close(); DirectoryReader other = DirectoryReader.open(dir); LOG.info("SidecarIndexReader with " + boostedDocs + " boosted documents."); SidecarIndexReader pr = createSidecarIndexReader(main, other, sourceCollection, secondaryIndex); return pr; } catch (Exception e) { LOG.warn("Unable to build parallel index: " + e.toString(), e); LOG.warn("Proceeding with single main index."); try { return new SidecarIndexReader(this, main, null, SidecarIndexReader.getSequentialSubReaders(main), sourceCollection, null); } catch (Exception e1) { LOG.warn("Unexpected exception, returning single main index", e1); return main; } } }
From source file:com.o19s.solr.swan.highlight.TermVectorFun.java
License:Apache License
@Test public void testBlah() throws IOException { RAMDirectory ramDir = new RAMDirectory(); // Index some made up content IndexWriterConfig iwf = new IndexWriterConfig(Version.LUCENE_47, new StandardAnalyzer(Version.LUCENE_47)); IndexWriter writer = new IndexWriter(ramDir, iwf); FieldType ft = new FieldType(); ft.setIndexed(true);/*from www . j ava 2s . co m*/ ft.setTokenized(true); ft.setStored(true); ft.setStoreTermVectorOffsets(true); ft.setStoreTermVectors(true); ft.setStoreTermVectorPositions(true); ft.freeze(); for (int i = 0; i < DOCS.length; i++) { Document doc = new Document(); StringField id = new StringField("id", "doc_" + i, StringField.Store.YES); doc.add(id); // Store both position and offset information Field text = new Field("content", DOCS[i], ft); // Field.Index.ANALYZED, // Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(text); writer.addDocument(doc); } //writer.close(); // Get a searcher AtomicReader dr = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(writer, true)); IndexSearcher searcher = new IndexSearcher(dr); // Do a search using SpanQuery SpanTermQuery fleeceQ = new SpanTermQuery(new Term("content", "fleece")); TopDocs results = searcher.search(fleeceQ, 10); for (int i = 0; i < results.scoreDocs.length; i++) { ScoreDoc scoreDoc = results.scoreDocs[i]; System.out.println("Score Doc: " + scoreDoc); } IndexReader reader = searcher.getIndexReader(); Bits acceptDocs = null; Map<Term, TermContext> termContexts = new HashMap<Term, TermContext>(); Spans spans = fleeceQ.getSpans(dr.getContext(), acceptDocs, termContexts); while (spans.next()) { System.out.println("Doc: " + spans.doc() + " Start: " + spans.start() + " End: " + spans.end()); DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor("content"); reader.document(spans.doc(), visitor); Terms terms = reader.getTermVector(spans.doc(), "content"); TermsEnum tenum = terms.iterator(null); // AttributeSource as = tenum.attributes(); while (tenum.next() != null) { System.out.println(tenum.term().utf8ToString()); } for (long pos = 0L; pos < spans.end(); pos++) { // tenum.next(); // if (tenum.ord()<pos) continue; // System.out.println(tenum.term()); // } reader.document(spans.doc(), visitor); // String[] values = visitor.getDocument().getValues("content"); // List<String> a = new ArrayList<String>(); // // build up the window // tvm.start = spans.start() - window; // tvm.end = spans.end() + window; // reader.getTermFreqVector(spans.doc(), "content", tvm); // for (WindowEntry entry : tvm.entries.values()) { // System.out.println("Entry: " + entry); // } // // clear out the entries for the next round // tvm.entries.clear(); } }
From source file:demo.jaxrs.search.server.Catalog.java
License:Apache License
@GET @Produces(MediaType.APPLICATION_JSON)/*from w ww . j a va 2s .c om*/ public JsonArray getBooks() throws IOException { final IndexReader reader = DirectoryReader.open(directory); final IndexSearcher searcher = new IndexSearcher(reader); final JsonArrayBuilder builder = Json.createArrayBuilder(); try { final Query query = new MatchAllDocsQuery(); for (final ScoreDoc scoreDoc : searcher.search(query, 1000).scoreDocs) { final DocumentStoredFieldVisitor fieldVisitor = new DocumentStoredFieldVisitor( LuceneDocumentMetadata.SOURCE_FIELD); reader.document(scoreDoc.doc, fieldVisitor); builder.add(fieldVisitor.getDocument().getField(LuceneDocumentMetadata.SOURCE_FIELD).stringValue()); } return builder.build(); } finally { reader.close(); } }
From source file:gov.nist.basekb.FreebaseSearcher.java
License:LGPL
public String getSubjectPredicateValue(String subjectURI, String predName) throws IOException { // Return the value of predicate `predName' on `subjectURI'. If there are muliple values, // return the first one indexed, if there are none, return null. // This is specialized to only retrieve the `predName' field of the subject document. // If the full document has already been retrieved, use the Document accessor instead. int subjectId = getSubjectDocID(subjectURI); if (subjectId < 0) return null; else {//from w ww . ja v a 2 s.c o m DocumentStoredFieldVisitor fieldVisitor = new DocumentStoredFieldVisitor(predName); getIndexReader().document(subjectId, fieldVisitor); Document subject = fieldVisitor.getDocument(); return getSubjectPredicateValue(subject, predName); } }
From source file:gov.nist.basekb.FreebaseSearcher.java
License:LGPL
public String[] getSubjectPredicateValues(String subjectURI, String predName) throws IOException { // Return the values of predicate `predName' on `subjectURI'. // If there are none, return an empty array. // This is specialized to only retrieve the `predName' field of the subject document. // If the full document has already been retrieved, use the Document accessor instead. int subjectId = getSubjectDocID(subjectURI); if (subjectId < 0) return emptyValues; else {/* w ww. j a v a 2 s.c o m*/ DocumentStoredFieldVisitor fieldVisitor = new DocumentStoredFieldVisitor(predName); getIndexReader().document(subjectId, fieldVisitor); Document subject = fieldVisitor.getDocument(); return getSubjectPredicateValues(subject, predName); } }
From source file:org.apache.mahout.text.LuceneStorageConfiguration.java
License:Apache License
public DocumentStoredFieldVisitor getStoredFieldVisitor() { Set<String> fieldSet = Sets.newHashSet(idField); fieldSet.addAll(fields);//from w ww. ja va2s . c o m return new DocumentStoredFieldVisitor(fieldSet); }
From source file:org.apache.solr.search.grouping.distributed.shardresultserializer.TopGroupsResultTransformer.java
License:Apache License
private Document retrieveDocument(final SchemaField uniqueField, int doc) throws IOException { DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(uniqueField.getName()); rb.req.getSearcher().doc(doc, visitor); return visitor.getDocument(); }
From source file:org.fao.geonet.component.csw.GetDomain.java
License:Open Source License
public static List<Element> handlePropertyName(CatalogConfiguration catalogConfig, String[] propertyNames, ServiceContext context, boolean freq, int maxRecords, String cswServiceSpecificConstraint, LuceneConfig luceneConfig) throws Exception { List<Element> domainValuesList = new ArrayList<Element>(); if (Log.isDebugEnabled(Geonet.CSW)) Log.debug(Geonet.CSW, "Handling property names '" + Arrays.toString(propertyNames) + "' with max records of " + maxRecords); for (int i = 0; i < propertyNames.length; i++) { // Initialize list of values element. Element listOfValues = null; // Generate DomainValues element Element domainValues = new Element("DomainValues", Csw.NAMESPACE_CSW); // FIXME what should be the type ??? domainValues.setAttribute("type", "csw:Record"); String property = propertyNames[i].trim(); // Set propertyName in any case. Element pn = new Element("PropertyName", Csw.NAMESPACE_CSW); domainValues.addContent(pn.setText(property)); GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME); SearchManager sm = gc.getBean(SearchManager.class); IndexAndTaxonomy indexAndTaxonomy = sm.getNewIndexReader(null); try {/*from w w w. j a va2s . c om*/ GeonetworkMultiReader reader = indexAndTaxonomy.indexReader; BooleanQuery groupsQuery = (BooleanQuery) CatalogSearcher.getGroupsQuery(context); BooleanQuery query = null; // Apply CSW service specific constraint if (StringUtils.isNotEmpty(cswServiceSpecificConstraint)) { Query constraintQuery = parseLuceneQuery(cswServiceSpecificConstraint, luceneConfig); query = new BooleanQuery(); BooleanClause.Occur occur = LuceneUtils.convertRequiredAndProhibitedToOccur(true, false); query.add(groupsQuery, occur); query.add(constraintQuery, occur); } else { query = groupsQuery; } List<Pair<String, Boolean>> sortFields = Collections .singletonList(Pair.read(Geonet.SearchResult.SortBy.RELEVANCE, true)); Sort sort = LuceneSearcher.makeSort(sortFields, context.getLanguage(), false); CachingWrapperFilter filter = null; Pair<TopDocs, Element> searchResults = LuceneSearcher.doSearchAndMakeSummary(maxRecords, 0, maxRecords, context.getLanguage(), null, luceneConfig, reader, query, filter, sort, null, false); TopDocs hits = searchResults.one(); try { // Get mapped lucene field in CSW configuration String indexField = catalogConfig.getFieldMapping().get(property.toLowerCase()); if (indexField != null) property = indexField; // check if params asked is in the index using getFieldNames ? @SuppressWarnings("resource") FieldInfos fi = SlowCompositeReaderWrapper.wrap(reader).getFieldInfos(); if (fi.fieldInfo(property) == null) continue; boolean isRange = false; if (catalogConfig.getGetRecordsRangeFields().contains(property)) isRange = true; if (isRange) listOfValues = new Element("RangeOfValues", Csw.NAMESPACE_CSW); else listOfValues = new Element("ListOfValues", Csw.NAMESPACE_CSW); Set<String> fields = new HashSet<String>(); fields.add(property); fields.add("_isTemplate"); // parse each document in the index String[] fieldValues; Collator stringCollator = Collator.getInstance(); stringCollator.setStrength(Collator.PRIMARY); SortedSet<String> sortedValues = new TreeSet<String>(stringCollator); ObjectKeyIntOpenHashMap duplicateValues = new ObjectKeyIntOpenHashMap(); for (int j = 0; j < hits.scoreDocs.length; j++) { DocumentStoredFieldVisitor selector = new DocumentStoredFieldVisitor(fields); reader.document(hits.scoreDocs[j].doc, selector); Document doc = selector.getDocument(); // Skip templates and subTemplates String[] isTemplate = doc.getValues("_isTemplate"); if (isTemplate[0] != null && !isTemplate[0].equals("n")) continue; // Get doc values for specified property fieldValues = doc.getValues(property); if (fieldValues == null) continue; addtoSortedSet(sortedValues, fieldValues, duplicateValues); } SummaryComparator valuesComparator = new SummaryComparator(SortOption.FREQUENCY, Type.STRING, context.getLanguage(), null); TreeSet<SummaryComparator.SummaryElement> sortedValuesFrequency = new TreeSet<SummaryComparator.SummaryElement>( valuesComparator); ObjectKeyIntMapIterator entries = duplicateValues.entries(); while (entries.hasNext()) { entries.next(); sortedValuesFrequency.add(new SummaryComparator.SummaryElement(entries)); } if (freq) return createValuesByFrequency(sortedValuesFrequency); else listOfValues.addContent(createValuesElement(sortedValues, isRange)); } finally { // any children means that the catalog was unable to determine // anything about the specified parameter if (listOfValues != null && listOfValues.getChildren().size() != 0) domainValues.addContent(listOfValues); // Add current DomainValues to the list domainValuesList.add(domainValues); } } finally { sm.releaseIndexReader(indexAndTaxonomy); } } return domainValuesList; }