Example usage for org.apache.lucene.index IndexReader document

List of usage examples for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype




public final Document document(int docID) throws IOException 

Source Link

Document

Returns the stored fields of the nth Document in this index.

Usage

From source file:org.alfresco.repo.search.impl.lucene.query.LeafScorer.java

License:Open Source License

private String[] getParents(IndexReader reader, int n) throws IOException {
    if (reader instanceof CachingIndexReader) {
        CachingIndexReader cachingIndexReader = (CachingIndexReader) reader;
        return cachingIndexReader.getParents(n);
    } else {//from  w w  w.  java  2s. c o m
        Document document = reader.document(n);
        Field[] fields = document.getFields("PARENT");
        if (fields != null) {
            String[] answer = new String[fields.length];
            int i = 0;
            for (Field field : fields) {
                answer[i++] = (field == null) ? null : field.stringValue();
            }
            return answer;
        } else {
            return null;
        }
    }
}

From source file:org.alfresco.repo.search.impl.lucene.query.LeafScorer.java

License:Open Source License

private String[] getlinkAspects(IndexReader reader, int n) throws IOException {
    if (reader instanceof CachingIndexReader) {
        CachingIndexReader cachingIndexReader = (CachingIndexReader) reader;
        return cachingIndexReader.getLinkAspects(n);
    } else {//from   w  ww .  j  a  v a  2  s  .c o  m
        Document document = reader.document(n);
        Field[] fields = document.getFields("LINKASPECT");
        if (fields != null) {
            String[] answer = new String[fields.length];
            int i = 0;
            for (Field field : fields) {
                answer[i++] = (field == null) ? null : field.stringValue();
            }
            return answer;
        } else {
            return null;
        }
    }
}

From source file:org.alfresco.repo.search.impl.lucene.query.PathScorer.java

License:Open Source License

public static PathScorer createPathScorer(Similarity similarity, PathQuery pathQuery, IndexReader reader,
        Weight weight, DictionaryService dictionarySertvice, boolean repeat) throws IOException {
    Scorer selfScorer = null;// ww  w .  j  a  va  2  s.  c o m
    HashMap<String, Counter> selfIds = null;
    boolean followParentInLevel0 = true;

    StructuredFieldPosition last = null;
    if (pathQuery.getQNameStructuredFieldPositions().size() > 0) {
        last = pathQuery.getQNameStructuredFieldPositions()
                .get(pathQuery.getQNameStructuredFieldPositions().size() - 1);
    }
    if ((last != null) && last.linkSelf()) {
        PathQuery selfQuery = new PathQuery(dictionarySertvice);
        selfQuery.setQuery(pathQuery.getPathStructuredFieldPositions(),
                pathQuery.getQNameStructuredFieldPositions());
        selfQuery.removeDescendantAndSelf();
        if (!selfQuery.isEmpty()) {
            selfIds = new HashMap<String, Counter>();
            selfScorer = PathScorer.createPathScorer(similarity, selfQuery, reader, weight, dictionarySertvice,
                    repeat);
            selfIds.clear();
            while (selfScorer.next()) {
                int doc = selfScorer.doc();
                Document document = reader.document(doc);
                Field id = document.getField("ID");
                Counter counter = selfIds.get(id.stringValue());
                if (counter == null) {
                    counter = new Counter();
                    selfIds.put(id.stringValue(), counter);
                }
                counter.count++;
            }
        }
    }

    if ((pathQuery.getPathStructuredFieldPositions().size()
            + pathQuery.getQNameStructuredFieldPositions().size()) == 0) {
        ArrayList<StructuredFieldPosition> answer = new ArrayList<StructuredFieldPosition>(2);
        answer.add(new SelfAxisStructuredFieldPosition());
        answer.add(new SelfAxisStructuredFieldPosition());

        pathQuery.appendQuery(answer);
    }

    for (StructuredFieldPosition sfp : pathQuery.getPathStructuredFieldPositions()) {
        if (sfp.getTermText() != null) {
            TermPositions p = reader.termPositions(new Term(pathQuery.getPathField(), sfp.getTermText()));
            if (p == null)
                return null;
            CachingTermPositions ctp = new CachingTermPositions(p);
            sfp.setCachingTermPositions(ctp);
        }
    }

    for (StructuredFieldPosition sfp : pathQuery.getQNameStructuredFieldPositions()) {
        if (sfp.getTermText() != null) {
            TermPositions p = reader.termPositions(new Term(pathQuery.getQnameField(), sfp.getTermText()));
            if (p == null)
                return null;
            CachingTermPositions ctp = new CachingTermPositions(p);
            sfp.setCachingTermPositions(ctp);
        }
    }

    TermPositions rootContainerPositions = null;
    if (pathQuery.getPathRootTerm() != null) {
        rootContainerPositions = reader.termPositions(pathQuery.getPathRootTerm());
    }

    TermPositions rootLeafPositions = null;
    if (pathQuery.getQNameRootTerm() != null) {
        rootLeafPositions = reader.termPositions(pathQuery.getQNameRootTerm());
    }

    ContainerScorer cs = null;

    TermPositions level0 = null;

    // StructuredFieldPosition[] test =
    // (StructuredFieldPosition[])structuredFieldPositions.toArray(new
    // StructuredFieldPosition[]{});
    if (pathQuery.getPathStructuredFieldPositions().size() > 0) {
        TermPositions containerPositions = reader.termPositions(new Term("ISCONTAINER", "T"));
        cs = new ContainerScorer(weight, rootContainerPositions,
                (StructuredFieldPosition[]) pathQuery.getPathStructuredFieldPositions()
                        .toArray(new StructuredFieldPosition[] {}),
                containerPositions, similarity, reader.norms(pathQuery.getPathField()));
    } else {
        level0 = reader.termPositions(new Term("ISROOT", "T"));
    }

    if ((cs == null) && (pathQuery.getQNameStructuredFieldPositions()
            .get(pathQuery.getQNameStructuredFieldPositions().size() - 1)).linkSelf()) {
        followParentInLevel0 = false;
    }

    LeafScorer ls = new LeafScorer(weight, rootLeafPositions, level0, cs,
            (StructuredFieldPosition[]) pathQuery.getQNameStructuredFieldPositions()
                    .toArray(new StructuredFieldPosition[] {}),
            followParentInLevel0, selfIds, reader, similarity, reader.norms(pathQuery.getQnameField()),
            dictionarySertvice, repeat);

    return new PathScorer(similarity, ls);
}

From source file:org.apache.jackrabbit.core.query.lucene.AbstractExcerpt.java

License:Apache License

/**
 * {@inheritDoc}/*  w w w. ja  va 2  s.c  om*/
 */
public String getExcerpt(NodeId id, int maxFragments, int maxFragmentSize) throws IOException {
    IndexReader reader = index.getIndexReader();
    try {
        checkRewritten(reader);
        Term idTerm = new Term(FieldNames.UUID, id.toString());
        TermDocs tDocs = reader.termDocs(idTerm);
        int docNumber;
        Document doc;
        try {
            if (tDocs.next()) {
                docNumber = tDocs.doc();
                doc = reader.document(docNumber);
            } else {
                // node not found in index
                return null;
            }
        } finally {
            tDocs.close();
        }
        Fieldable[] fields = doc.getFieldables(FieldNames.FULLTEXT);
        if (fields == null) {
            log.debug("Fulltext field not stored, using {}", SimpleExcerptProvider.class.getName());
            SimpleExcerptProvider exProvider = new SimpleExcerptProvider();
            exProvider.init(query, index);
            return exProvider.getExcerpt(id, maxFragments, maxFragmentSize);
        }
        StringBuffer text = new StringBuffer();
        String separator = "";
        for (int i = 0; i < fields.length; i++) {
            if (fields[i].stringValue().length() == 0) {
                continue;
            }
            text.append(separator);
            text.append(fields[i].stringValue());
            // this is a hack! in general multiple fields with the same
            // name are handled properly, that is, offset and position is
            // calculated correctly. there is one case however where
            // the offset gets wrong:
            // if a term text ends with characters that are considered noise
            // then the offset of the next field will be off by the number
            // of noise characters.
            // therefore we delete noise characters at the end of the text.
            // this process is required for all but the last field
            if (i < fields.length - 1) {
                for (int j = text.length() - 1; j >= 0; j--) {
                    if (Character.isLetterOrDigit(text.charAt(j))) {
                        break;
                    } else {
                        text.deleteCharAt(j);
                    }
                }
            }
            separator = " ";
        }
        TermFreqVector tfv = reader.getTermFreqVector(docNumber, FieldNames.FULLTEXT);
        if (tfv instanceof TermPositionVector) {
            return createExcerpt((TermPositionVector) tfv, text.toString(), maxFragments, maxFragmentSize);
        } else {
            log.debug("No TermPositionVector on Fulltext field.");
            return null;
        }
    } finally {
        Util.closeOrRelease(reader);
    }
}

From source file:org.apache.jackrabbit.core.query.lucene.SearchIndex.java

License:Apache License

/**
 * Retrieves the root of the indexing aggregate for <code>removedNodeIds</code>
 * and puts it into <code>map</code>.
 *
 * @param removedNodeIds the ids of removed nodes.
 * @param map            aggregate roots are collected in this map.
 *                       Key=NodeId, value=NodeState.
 *//*from   w w  w  . jav  a  2  s . c  o m*/
protected void retrieveAggregateRoot(Set removedNodeIds, Map map) {
    if (indexingConfig != null) {
        AggregateRule aggregateRules[] = indexingConfig.getAggregateRules();
        if (aggregateRules == null) {
            return;
        }
        int found = 0;
        long time = System.currentTimeMillis();
        try {
            IndexReader reader = index.getIndexReader();
            try {
                Term aggregateUUIDs = new Term(FieldNames.AGGREGATED_NODE_UUID, "");
                TermDocs tDocs = reader.termDocs();
                try {
                    ItemStateManager ism = getContext().getItemStateManager();
                    for (Iterator it = removedNodeIds.iterator(); it.hasNext();) {
                        NodeId id = (NodeId) it.next();
                        aggregateUUIDs = aggregateUUIDs.createTerm(id.getUUID().toString());
                        tDocs.seek(aggregateUUIDs);
                        while (tDocs.next()) {
                            Document doc = reader.document(tDocs.doc());
                            String uuid = doc.get(FieldNames.UUID);
                            NodeId nId = new NodeId(UUID.fromString(uuid));
                            map.put(nId, ism.getItemState(nId));
                            found++;
                        }
                    }
                } finally {
                    tDocs.close();
                }
            } finally {
                reader.close();
            }
        } catch (Exception e) {
            log.warn("Exception while retrieving aggregate roots", e);
        }
        time = System.currentTimeMillis() - time;
        log.debug("Retrieved {} aggregate roots in {} ms.", new Integer(found), new Long(time));
    }
}

From source file:org.apache.jackrabbit.core.query.lucene.AbstractExcerpt.java

License:Apache License

/**
 * {@inheritDoc}/*from   w w w.j  a  v a2s .co  m*/
 */
public String getExcerpt(NodeId id, int maxFragments, int maxFragmentSize) throws IOException {
    IndexReader reader = index.getIndexReader();
    try {
        checkRewritten(reader);
        Term idTerm = TermFactory.createUUIDTerm(id.toString());
        TermDocs tDocs = reader.termDocs(idTerm);
        int docNumber;
        Document doc;
        try {
            if (tDocs.next()) {
                docNumber = tDocs.doc();
                doc = reader.document(docNumber);
            } else {
                // node not found in index
                return null;
            }
        } finally {
            tDocs.close();
        }
        Fieldable[] fields = doc.getFieldables(FieldNames.FULLTEXT);
        if (fields.length == 0) {
            log.debug("Fulltext field not stored, using {}", SimpleExcerptProvider.class.getName());
            SimpleExcerptProvider exProvider = new SimpleExcerptProvider();
            exProvider.init(query, index);
            return exProvider.getExcerpt(id, maxFragments, maxFragmentSize);
        }
        StringBuffer text = new StringBuffer();
        String separator = "";
        for (int i = 0; i < fields.length; i++) {
            if (fields[i].stringValue().length() == 0) {
                continue;
            }
            text.append(separator);
            text.append(fields[i].stringValue());
            separator = " ";
        }
        TermFreqVector tfv = reader.getTermFreqVector(docNumber, FieldNames.FULLTEXT);
        if (tfv instanceof TermPositionVector) {
            return createExcerpt((TermPositionVector) tfv, text.toString(), maxFragments, maxFragmentSize);
        } else {
            log.debug("No TermPositionVector on Fulltext field.");
            return null;
        }
    } finally {
        Util.closeOrRelease(reader);
    }
}

From source file:org.apache.jackrabbit.core.query.lucene.sort.AbstractFieldComparator.java

License:Apache License

protected String getUUIDForIndex(int doc) throws IOException {
    int idx = readerIndex(doc);
    IndexReader reader = readers.get(idx);
    Document document = reader.document(doc - starts[idx]);
    return document.get(FieldNames.UUID);
}

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.util.MoreLikeThisHelper.java

License:Apache License

public static Query getMoreLikeThis(IndexReader reader, Analyzer analyzer, String mltQueryString) {
    Query moreLikeThisQuery = null;
    MoreLikeThis mlt = new MoreLikeThis(reader);
    mlt.setAnalyzer(analyzer);//from   w w  w . j av a 2  s .  c o m
    try {
        String text = null;
        String[] fields = {};
        for (String param : mltQueryString.split("&")) {
            String[] keyValuePair = param.split("=");
            if (keyValuePair.length != 2 || keyValuePair[0] == null || keyValuePair[1] == null) {
                throw new RuntimeException("Unparsable native Lucene MLT query: " + mltQueryString);
            } else {
                if ("stream.body".equals(keyValuePair[0])) {
                    text = keyValuePair[1];
                } else if ("mlt.fl".equals(keyValuePair[0])) {
                    fields = keyValuePair[1].split(",");
                } else if ("mlt.mindf".equals(keyValuePair[0])) {
                    mlt.setMinDocFreq(Integer.parseInt(keyValuePair[1]));
                } else if ("mlt.mintf".equals(keyValuePair[0])) {
                    mlt.setMinTermFreq(Integer.parseInt(keyValuePair[1]));
                } else if ("mlt.boost".equals(keyValuePair[0])) {
                    mlt.setBoost(Boolean.parseBoolean(keyValuePair[1]));
                } else if ("mlt.qf".equals(keyValuePair[0])) {
                    mlt.setBoostFactor(Float.parseFloat(keyValuePair[1]));
                } else if ("mlt.maxdf".equals(keyValuePair[0])) {
                    mlt.setMaxDocFreq(Integer.parseInt(keyValuePair[1]));
                } else if ("mlt.maxdfp".equals(keyValuePair[0])) {
                    mlt.setMaxDocFreqPct(Integer.parseInt(keyValuePair[1]));
                } else if ("mlt.maxntp".equals(keyValuePair[0])) {
                    mlt.setMaxNumTokensParsed(Integer.parseInt(keyValuePair[1]));
                } else if ("mlt.maxqt".equals(keyValuePair[0])) {
                    mlt.setMaxQueryTerms(Integer.parseInt(keyValuePair[1]));
                } else if ("mlt.maxwl".equals(keyValuePair[0])) {
                    mlt.setMaxWordLen(Integer.parseInt(keyValuePair[1]));
                } else if ("mlt.minwl".equals(keyValuePair[0])) {
                    mlt.setMinWordLen(Integer.parseInt(keyValuePair[1]));
                }
            }
        }
        if (text != null) {
            if (FieldNames.PATH.equals(fields[0])) {
                IndexSearcher searcher = new IndexSearcher(reader);
                TermQuery q = new TermQuery(new Term(FieldNames.PATH, text));
                TopDocs top = searcher.search(q, 1);
                if (top.totalHits == 0) {
                    mlt.setFieldNames(fields);
                    moreLikeThisQuery = mlt.like(new StringReader(text), mlt.getFieldNames()[0]);
                } else {
                    ScoreDoc d = top.scoreDocs[0];
                    Document doc = reader.document(d.doc);
                    List<String> fieldNames = new ArrayList<String>();
                    for (IndexableField f : doc.getFields()) {
                        if (!FieldNames.PATH.equals(f.name())) {
                            fieldNames.add(f.name());
                        }
                    }
                    String[] docFields = fieldNames.toArray(new String[fieldNames.size()]);
                    mlt.setFieldNames(docFields);
                    moreLikeThisQuery = mlt.like(d.doc);
                }
            } else {
                mlt.setFieldNames(fields);
                moreLikeThisQuery = mlt.like(new StringReader(text), mlt.getFieldNames()[0]);
            }
        }
        return moreLikeThisQuery;
    } catch (Exception e) {
        throw new RuntimeException("could not handle MLT query " + mltQueryString);
    }
}

From source file:org.apache.mahout.text.LuceneIndexHelper.java

License:Apache License

public static void fieldShouldExistInIndex(IndexReader reader, String fieldName) throws IOException {
    IndexableField field = reader.document(0).getField(fieldName);
    if (field == null || !field.fieldType().stored()) {
        throw new IllegalArgumentException("Field '" + fieldName
                + "' is possibly not stored since first document in index does not contain this field.");
    }/*  ww w  . j  a v  a  2  s. c o m*/
}

From source file:org.apache.maven.index.AbstractRepoNexusIndexerTest.java

License:Apache License

public void testPackaging() throws Exception {
    IndexReader reader = context.acquireIndexSearcher().getIndexReader();

    Bits liveDocs = MultiFields.getLiveDocs(reader);
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (liveDocs == null || liveDocs.get(i)) {
            Document document = reader.document(i);

            String uinfo = document.get(ArtifactInfo.UINFO);

            if (uinfo != null) {
                String info = document.get(ArtifactInfo.INFO);
                assertFalse("Bad:" + info, info.startsWith("null"));
            }/* ww w .  j  a  v a 2  s .c  o m*/
        }
    }

    // {
    // Query query = new TermQuery( new Term( MAVEN.PACKAGING, "jar" ) );
    // FlatSearchResponse response = nexusIndexer.searchFlat(new FlatSearchRequest(query));
    // assertEquals(response.getResults().toString(), 22, response.getTotalHits());
    // }
    {
        Query query = nexusIndexer.constructQuery(MAVEN.PACKAGING, "tar.gz", SearchType.EXACT);
        FlatSearchResponse response = nexusIndexer.searchFlat(new FlatSearchRequest(query));
        assertEquals(response.getResults().toString(), 1, response.getTotalHits());

        ArtifactInfo ai = response.getResults().iterator().next();
        assertEquals("tar.gz", ai.getPackaging());
        assertEquals("tar.gz", ai.getFileExtension());
    }
    {
        Query query = nexusIndexer.constructQuery(MAVEN.PACKAGING, "zip", SearchType.EXACT);
        FlatSearchResponse response = nexusIndexer.searchFlat(new FlatSearchRequest(query));
        assertEquals(response.getResults().toString(), 1, response.getTotalHits());

        ArtifactInfo ai = response.getResults().iterator().next();
        assertEquals("zip", ai.getPackaging());
        assertEquals("zip", ai.getFileExtension());
    }
}