List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID) throws IOException
n
th Document
in this index. From source file:org.alfresco.repo.search.impl.lucene.query.LeafScorer.java
License:Open Source License
private String[] getParents(IndexReader reader, int n) throws IOException { if (reader instanceof CachingIndexReader) { CachingIndexReader cachingIndexReader = (CachingIndexReader) reader; return cachingIndexReader.getParents(n); } else {//from w w w. java 2s. c o m Document document = reader.document(n); Field[] fields = document.getFields("PARENT"); if (fields != null) { String[] answer = new String[fields.length]; int i = 0; for (Field field : fields) { answer[i++] = (field == null) ? null : field.stringValue(); } return answer; } else { return null; } } }
From source file:org.alfresco.repo.search.impl.lucene.query.LeafScorer.java
License:Open Source License
private String[] getlinkAspects(IndexReader reader, int n) throws IOException { if (reader instanceof CachingIndexReader) { CachingIndexReader cachingIndexReader = (CachingIndexReader) reader; return cachingIndexReader.getLinkAspects(n); } else {//from w ww . j a v a 2 s .c o m Document document = reader.document(n); Field[] fields = document.getFields("LINKASPECT"); if (fields != null) { String[] answer = new String[fields.length]; int i = 0; for (Field field : fields) { answer[i++] = (field == null) ? null : field.stringValue(); } return answer; } else { return null; } } }
From source file:org.alfresco.repo.search.impl.lucene.query.PathScorer.java
License:Open Source License
public static PathScorer createPathScorer(Similarity similarity, PathQuery pathQuery, IndexReader reader, Weight weight, DictionaryService dictionarySertvice, boolean repeat) throws IOException { Scorer selfScorer = null;// ww w . j a va 2 s. c o m HashMap<String, Counter> selfIds = null; boolean followParentInLevel0 = true; StructuredFieldPosition last = null; if (pathQuery.getQNameStructuredFieldPositions().size() > 0) { last = pathQuery.getQNameStructuredFieldPositions() .get(pathQuery.getQNameStructuredFieldPositions().size() - 1); } if ((last != null) && last.linkSelf()) { PathQuery selfQuery = new PathQuery(dictionarySertvice); selfQuery.setQuery(pathQuery.getPathStructuredFieldPositions(), pathQuery.getQNameStructuredFieldPositions()); selfQuery.removeDescendantAndSelf(); if (!selfQuery.isEmpty()) { selfIds = new HashMap<String, Counter>(); selfScorer = PathScorer.createPathScorer(similarity, selfQuery, reader, weight, dictionarySertvice, repeat); selfIds.clear(); while (selfScorer.next()) { int doc = selfScorer.doc(); Document document = reader.document(doc); Field id = document.getField("ID"); Counter counter = selfIds.get(id.stringValue()); if (counter == null) { counter = new Counter(); selfIds.put(id.stringValue(), counter); } counter.count++; } } } if ((pathQuery.getPathStructuredFieldPositions().size() + pathQuery.getQNameStructuredFieldPositions().size()) == 0) { ArrayList<StructuredFieldPosition> answer = new ArrayList<StructuredFieldPosition>(2); answer.add(new SelfAxisStructuredFieldPosition()); answer.add(new SelfAxisStructuredFieldPosition()); pathQuery.appendQuery(answer); } for (StructuredFieldPosition sfp : pathQuery.getPathStructuredFieldPositions()) { if (sfp.getTermText() != null) { TermPositions p = reader.termPositions(new Term(pathQuery.getPathField(), sfp.getTermText())); if (p == null) return null; CachingTermPositions ctp = new CachingTermPositions(p); sfp.setCachingTermPositions(ctp); } } for (StructuredFieldPosition sfp : pathQuery.getQNameStructuredFieldPositions()) { if (sfp.getTermText() != null) { TermPositions p = reader.termPositions(new Term(pathQuery.getQnameField(), sfp.getTermText())); if (p == null) return null; CachingTermPositions ctp = new CachingTermPositions(p); sfp.setCachingTermPositions(ctp); } } TermPositions rootContainerPositions = null; if (pathQuery.getPathRootTerm() != null) { rootContainerPositions = reader.termPositions(pathQuery.getPathRootTerm()); } TermPositions rootLeafPositions = null; if (pathQuery.getQNameRootTerm() != null) { rootLeafPositions = reader.termPositions(pathQuery.getQNameRootTerm()); } ContainerScorer cs = null; TermPositions level0 = null; // StructuredFieldPosition[] test = // (StructuredFieldPosition[])structuredFieldPositions.toArray(new // StructuredFieldPosition[]{}); if (pathQuery.getPathStructuredFieldPositions().size() > 0) { TermPositions containerPositions = reader.termPositions(new Term("ISCONTAINER", "T")); cs = new ContainerScorer(weight, rootContainerPositions, (StructuredFieldPosition[]) pathQuery.getPathStructuredFieldPositions() .toArray(new StructuredFieldPosition[] {}), containerPositions, similarity, reader.norms(pathQuery.getPathField())); } else { level0 = reader.termPositions(new Term("ISROOT", "T")); } if ((cs == null) && (pathQuery.getQNameStructuredFieldPositions() .get(pathQuery.getQNameStructuredFieldPositions().size() - 1)).linkSelf()) { followParentInLevel0 = false; } LeafScorer ls = new LeafScorer(weight, rootLeafPositions, level0, cs, (StructuredFieldPosition[]) pathQuery.getQNameStructuredFieldPositions() .toArray(new StructuredFieldPosition[] {}), followParentInLevel0, selfIds, reader, similarity, reader.norms(pathQuery.getQnameField()), dictionarySertvice, repeat); return new PathScorer(similarity, ls); }
From source file:org.apache.jackrabbit.core.query.lucene.AbstractExcerpt.java
License:Apache License
/** * {@inheritDoc}/* w w w. ja va 2 s.c om*/ */ public String getExcerpt(NodeId id, int maxFragments, int maxFragmentSize) throws IOException { IndexReader reader = index.getIndexReader(); try { checkRewritten(reader); Term idTerm = new Term(FieldNames.UUID, id.toString()); TermDocs tDocs = reader.termDocs(idTerm); int docNumber; Document doc; try { if (tDocs.next()) { docNumber = tDocs.doc(); doc = reader.document(docNumber); } else { // node not found in index return null; } } finally { tDocs.close(); } Fieldable[] fields = doc.getFieldables(FieldNames.FULLTEXT); if (fields == null) { log.debug("Fulltext field not stored, using {}", SimpleExcerptProvider.class.getName()); SimpleExcerptProvider exProvider = new SimpleExcerptProvider(); exProvider.init(query, index); return exProvider.getExcerpt(id, maxFragments, maxFragmentSize); } StringBuffer text = new StringBuffer(); String separator = ""; for (int i = 0; i < fields.length; i++) { if (fields[i].stringValue().length() == 0) { continue; } text.append(separator); text.append(fields[i].stringValue()); // this is a hack! in general multiple fields with the same // name are handled properly, that is, offset and position is // calculated correctly. there is one case however where // the offset gets wrong: // if a term text ends with characters that are considered noise // then the offset of the next field will be off by the number // of noise characters. // therefore we delete noise characters at the end of the text. // this process is required for all but the last field if (i < fields.length - 1) { for (int j = text.length() - 1; j >= 0; j--) { if (Character.isLetterOrDigit(text.charAt(j))) { break; } else { text.deleteCharAt(j); } } } separator = " "; } TermFreqVector tfv = reader.getTermFreqVector(docNumber, FieldNames.FULLTEXT); if (tfv instanceof TermPositionVector) { return createExcerpt((TermPositionVector) tfv, text.toString(), maxFragments, maxFragmentSize); } else { log.debug("No TermPositionVector on Fulltext field."); return null; } } finally { Util.closeOrRelease(reader); } }
From source file:org.apache.jackrabbit.core.query.lucene.SearchIndex.java
License:Apache License
/** * Retrieves the root of the indexing aggregate for <code>removedNodeIds</code> * and puts it into <code>map</code>. * * @param removedNodeIds the ids of removed nodes. * @param map aggregate roots are collected in this map. * Key=NodeId, value=NodeState. *//*from w w w . jav a 2 s . c o m*/ protected void retrieveAggregateRoot(Set removedNodeIds, Map map) { if (indexingConfig != null) { AggregateRule aggregateRules[] = indexingConfig.getAggregateRules(); if (aggregateRules == null) { return; } int found = 0; long time = System.currentTimeMillis(); try { IndexReader reader = index.getIndexReader(); try { Term aggregateUUIDs = new Term(FieldNames.AGGREGATED_NODE_UUID, ""); TermDocs tDocs = reader.termDocs(); try { ItemStateManager ism = getContext().getItemStateManager(); for (Iterator it = removedNodeIds.iterator(); it.hasNext();) { NodeId id = (NodeId) it.next(); aggregateUUIDs = aggregateUUIDs.createTerm(id.getUUID().toString()); tDocs.seek(aggregateUUIDs); while (tDocs.next()) { Document doc = reader.document(tDocs.doc()); String uuid = doc.get(FieldNames.UUID); NodeId nId = new NodeId(UUID.fromString(uuid)); map.put(nId, ism.getItemState(nId)); found++; } } } finally { tDocs.close(); } } finally { reader.close(); } } catch (Exception e) { log.warn("Exception while retrieving aggregate roots", e); } time = System.currentTimeMillis() - time; log.debug("Retrieved {} aggregate roots in {} ms.", new Integer(found), new Long(time)); } }
From source file:org.apache.jackrabbit.core.query.lucene.AbstractExcerpt.java
License:Apache License
/** * {@inheritDoc}/*from w w w.j a v a2s .co m*/ */ public String getExcerpt(NodeId id, int maxFragments, int maxFragmentSize) throws IOException { IndexReader reader = index.getIndexReader(); try { checkRewritten(reader); Term idTerm = TermFactory.createUUIDTerm(id.toString()); TermDocs tDocs = reader.termDocs(idTerm); int docNumber; Document doc; try { if (tDocs.next()) { docNumber = tDocs.doc(); doc = reader.document(docNumber); } else { // node not found in index return null; } } finally { tDocs.close(); } Fieldable[] fields = doc.getFieldables(FieldNames.FULLTEXT); if (fields.length == 0) { log.debug("Fulltext field not stored, using {}", SimpleExcerptProvider.class.getName()); SimpleExcerptProvider exProvider = new SimpleExcerptProvider(); exProvider.init(query, index); return exProvider.getExcerpt(id, maxFragments, maxFragmentSize); } StringBuffer text = new StringBuffer(); String separator = ""; for (int i = 0; i < fields.length; i++) { if (fields[i].stringValue().length() == 0) { continue; } text.append(separator); text.append(fields[i].stringValue()); separator = " "; } TermFreqVector tfv = reader.getTermFreqVector(docNumber, FieldNames.FULLTEXT); if (tfv instanceof TermPositionVector) { return createExcerpt((TermPositionVector) tfv, text.toString(), maxFragments, maxFragmentSize); } else { log.debug("No TermPositionVector on Fulltext field."); return null; } } finally { Util.closeOrRelease(reader); } }
From source file:org.apache.jackrabbit.core.query.lucene.sort.AbstractFieldComparator.java
License:Apache License
protected String getUUIDForIndex(int doc) throws IOException { int idx = readerIndex(doc); IndexReader reader = readers.get(idx); Document document = reader.document(doc - starts[idx]); return document.get(FieldNames.UUID); }
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.util.MoreLikeThisHelper.java
License:Apache License
public static Query getMoreLikeThis(IndexReader reader, Analyzer analyzer, String mltQueryString) { Query moreLikeThisQuery = null; MoreLikeThis mlt = new MoreLikeThis(reader); mlt.setAnalyzer(analyzer);//from w w w . j av a 2 s . c o m try { String text = null; String[] fields = {}; for (String param : mltQueryString.split("&")) { String[] keyValuePair = param.split("="); if (keyValuePair.length != 2 || keyValuePair[0] == null || keyValuePair[1] == null) { throw new RuntimeException("Unparsable native Lucene MLT query: " + mltQueryString); } else { if ("stream.body".equals(keyValuePair[0])) { text = keyValuePair[1]; } else if ("mlt.fl".equals(keyValuePair[0])) { fields = keyValuePair[1].split(","); } else if ("mlt.mindf".equals(keyValuePair[0])) { mlt.setMinDocFreq(Integer.parseInt(keyValuePair[1])); } else if ("mlt.mintf".equals(keyValuePair[0])) { mlt.setMinTermFreq(Integer.parseInt(keyValuePair[1])); } else if ("mlt.boost".equals(keyValuePair[0])) { mlt.setBoost(Boolean.parseBoolean(keyValuePair[1])); } else if ("mlt.qf".equals(keyValuePair[0])) { mlt.setBoostFactor(Float.parseFloat(keyValuePair[1])); } else if ("mlt.maxdf".equals(keyValuePair[0])) { mlt.setMaxDocFreq(Integer.parseInt(keyValuePair[1])); } else if ("mlt.maxdfp".equals(keyValuePair[0])) { mlt.setMaxDocFreqPct(Integer.parseInt(keyValuePair[1])); } else if ("mlt.maxntp".equals(keyValuePair[0])) { mlt.setMaxNumTokensParsed(Integer.parseInt(keyValuePair[1])); } else if ("mlt.maxqt".equals(keyValuePair[0])) { mlt.setMaxQueryTerms(Integer.parseInt(keyValuePair[1])); } else if ("mlt.maxwl".equals(keyValuePair[0])) { mlt.setMaxWordLen(Integer.parseInt(keyValuePair[1])); } else if ("mlt.minwl".equals(keyValuePair[0])) { mlt.setMinWordLen(Integer.parseInt(keyValuePair[1])); } } } if (text != null) { if (FieldNames.PATH.equals(fields[0])) { IndexSearcher searcher = new IndexSearcher(reader); TermQuery q = new TermQuery(new Term(FieldNames.PATH, text)); TopDocs top = searcher.search(q, 1); if (top.totalHits == 0) { mlt.setFieldNames(fields); moreLikeThisQuery = mlt.like(new StringReader(text), mlt.getFieldNames()[0]); } else { ScoreDoc d = top.scoreDocs[0]; Document doc = reader.document(d.doc); List<String> fieldNames = new ArrayList<String>(); for (IndexableField f : doc.getFields()) { if (!FieldNames.PATH.equals(f.name())) { fieldNames.add(f.name()); } } String[] docFields = fieldNames.toArray(new String[fieldNames.size()]); mlt.setFieldNames(docFields); moreLikeThisQuery = mlt.like(d.doc); } } else { mlt.setFieldNames(fields); moreLikeThisQuery = mlt.like(new StringReader(text), mlt.getFieldNames()[0]); } } return moreLikeThisQuery; } catch (Exception e) { throw new RuntimeException("could not handle MLT query " + mltQueryString); } }
From source file:org.apache.mahout.text.LuceneIndexHelper.java
License:Apache License
public static void fieldShouldExistInIndex(IndexReader reader, String fieldName) throws IOException { IndexableField field = reader.document(0).getField(fieldName); if (field == null || !field.fieldType().stored()) { throw new IllegalArgumentException("Field '" + fieldName + "' is possibly not stored since first document in index does not contain this field."); }/* ww w . j a v a 2 s. c o m*/ }
From source file:org.apache.maven.index.AbstractRepoNexusIndexerTest.java
License:Apache License
public void testPackaging() throws Exception { IndexReader reader = context.acquireIndexSearcher().getIndexReader(); Bits liveDocs = MultiFields.getLiveDocs(reader); for (int i = 0; i < reader.maxDoc(); i++) { if (liveDocs == null || liveDocs.get(i)) { Document document = reader.document(i); String uinfo = document.get(ArtifactInfo.UINFO); if (uinfo != null) { String info = document.get(ArtifactInfo.INFO); assertFalse("Bad:" + info, info.startsWith("null")); }/* ww w . j a v a 2 s .c o m*/ } } // { // Query query = new TermQuery( new Term( MAVEN.PACKAGING, "jar" ) ); // FlatSearchResponse response = nexusIndexer.searchFlat(new FlatSearchRequest(query)); // assertEquals(response.getResults().toString(), 22, response.getTotalHits()); // } { Query query = nexusIndexer.constructQuery(MAVEN.PACKAGING, "tar.gz", SearchType.EXACT); FlatSearchResponse response = nexusIndexer.searchFlat(new FlatSearchRequest(query)); assertEquals(response.getResults().toString(), 1, response.getTotalHits()); ArtifactInfo ai = response.getResults().iterator().next(); assertEquals("tar.gz", ai.getPackaging()); assertEquals("tar.gz", ai.getFileExtension()); } { Query query = nexusIndexer.constructQuery(MAVEN.PACKAGING, "zip", SearchType.EXACT); FlatSearchResponse response = nexusIndexer.searchFlat(new FlatSearchRequest(query)); assertEquals(response.getResults().toString(), 1, response.getTotalHits()); ArtifactInfo ai = response.getResults().iterator().next(); assertEquals("zip", ai.getPackaging()); assertEquals("zip", ai.getFileExtension()); } }