Example usage for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype




public final Document document(int docID) throws IOException

Source Link

Document

Returns the stored fields of the n^th Document in this index.

Usage

From source file:com.downtree.tourbus.search.LocationFilter.java

@Override
public BitSet bits(IndexReader reader) throws IOException {
    BitSet bits = new BitSet(reader.maxDoc());

    TermDocs docs = reader.termDocs(new Term("ferret_class", m_type));
    while (docs.next()) {
        Document doc = reader.document(docs.doc());
        String value = doc.get("latitude");
        if (value == null)
            continue;

        try {//  ww  w. j  a v  a2  s  .c  o m
            double latitude = Double.parseDouble(value) * DEG2RAD;
            double longitude = Double.parseDouble(doc.get("longitude")) * DEG2RAD;

            double x = (Math.sin(latitude) * Math.sin(m_centerLat))
                    + (Math.cos(latitude) * Math.cos(m_centerLat) * Math.cos(longitude - m_centerLong));

            double distance = 0;
            if (x > -1 && x < 1) {
                distance = Math.acos(x) * EARTH_RADIUS;
            }

            if (distance <= m_radius) {
                bits.set(docs.doc());
            }
        } catch (Exception e) {
            SolrException.logOnce(SolrCore.log, "Error in location filter", e);
            continue;
        }
    }

    return bits;
}

From source file:com.esri.gpt.catalog.lucene.LuceneQueryAdapter.java

License:Apache License

/**
 * Executes a query against a Lucene index.
 * @param discoveryQuery the query to execute
 *//*w w  w.  j av a  2s. c  o  m*/
protected void executeQuery(DiscoveryQuery discoveryQuery)
        throws DiscoveryException, ParseException, CorruptIndexException, IOException {

    IndexSearcher searcher = null;
    try {

        // initialize
        searcher = getIndexAdapter().newSearcher();
        this.maxDoc = searcher.maxDoc();
        boolean bExecuteQuery = true;
        boolean bProcessHits = true;
        RequestContext reqContext = this.getIndexAdapter().getRequestContext();
        BooleanQuery rootQuery = new BooleanQuery();
        DiscoveryFilter discoveryFilter = discoveryQuery.getFilter();
        DiscoveryResult discoveryResult = discoveryQuery.getResult();
        Discoverables returnables = discoveryQuery.getReturnables();
        if ((returnables == null) || (returnables.size() == 0) || (discoveryFilter.getMaxRecords() <= 0)) {
            bProcessHits = false;
        }

        // CSW query provider options
        boolean isDublinCoreResponse = true;
        boolean isBriefResponse = false;
        boolean isSummaryResponse = false;
        QueryOptions cswQueryOptions = (QueryOptions) reqContext.getObjectMap()
                .get("com.esri.gpt.server.csw.provider.components.QueryOptions");

        // build the query (if no query was supplied, we'll query everything)
        LogicalClauseAdapter logicalAdapter = new LogicalClauseAdapter(this);
        LogicalClause rootClause = discoveryFilter.getRootClause();
        if ((rootClause == null) || (rootClause.getClauses().size() == 0)) {
            if (discoveryFilter.getMaxRecords() <= QUERYALL_THRESHOLD) {
                LOGGER.finer("No filter was supplied, querying all...");
                logicalAdapter.appendSelectAll(rootQuery);
            } else {
                LOGGER.finer("No filter was supplied, query will not be executed.");
                bExecuteQuery = false;
            }
        } else {
            logicalAdapter.adaptLogicalClause(rootQuery, rootClause);
            if ((rootQuery.clauses() == null) && (rootQuery.clauses().size() > 0)) {
                bExecuteQuery = false;
            }
        }
        if (!bExecuteQuery)
            return;

        // execute the query and process the hits if required

        // set the sort option
        Sort sortOption = null;
        if (bProcessHits && (searcher.maxDoc() > 0)) {
            sortOption = makeSortOption(discoveryQuery);
        }

        // filters
        Filter filter = null;

        // make the access control filter
        MetadataAcl acl = new MetadataAcl(reqContext);
        AuthenticationStatus auth = reqContext.getUser().getAuthenticationStatus();
        boolean bAdmin = auth.getAuthenticatedRoles().hasRole("gptAdministrator");
        if (!bAdmin && !acl.isPolicyUnrestricted()) {
            String[] aclValues = acl.makeUserAcl();
            filter = new AclFilter(Storeables.FIELD_ACL, aclValues);
        }

        // isPartOf filter
        filter = IsPartOfFilter.make(reqContext, filter);

        // make the schema filter
        if (cswQueryOptions != null) {
            String schemaName = Val.chkStr(cswQueryOptions.getSchemaFilter());
            if (schemaName.length() > 0) {
                filter = new SchemaFilter(schemaName, filter);
                isDublinCoreResponse = cswQueryOptions.isDublinCoreResponse();
                if (!isDublinCoreResponse) {
                    String elementSetType = Val.chkStr(cswQueryOptions.getElementSetType());
                    if (elementSetType.equalsIgnoreCase("brief")) {
                        isBriefResponse = true;
                    } else if (elementSetType.equalsIgnoreCase("summary")) {
                        isSummaryResponse = true;
                    }
                }
            }
        }

        // determine the start/end positions
        int startRecord = discoveryFilter.getStartRecord() - 1;
        int maxRecords = discoveryFilter.getMaxRecords();
        if (startRecord < 0)
            startRecord = 0;
        int recordsPerPage = maxRecords;
        if (recordsPerPage <= 0)
            recordsPerPage = 1;
        int hitsToReturn = startRecord + recordsPerPage;
        int nextRecord = 0;
        int numDocs = 0;

        // execute the query 
        LOGGER.finer("Executing Lucene Query:\n" + rootQuery);
        TopDocs topDocs = null;
        if (filter != null) {
            if (sortOption != null) {
                topDocs = searcher.search(rootQuery, filter, hitsToReturn, sortOption);
            } else {
                topDocs = searcher.search(rootQuery, filter, hitsToReturn);
            }
        } else {
            if (sortOption != null) {
                topDocs = searcher.search(rootQuery, filter, hitsToReturn, sortOption);
            } else {
                topDocs = searcher.search(rootQuery, hitsToReturn);
            }
        }

        // determine the hit count
        int totalHits = topDocs.totalHits;
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        if ((scoreDocs != null) && (scoreDocs.length) > 0) {
            numDocs = scoreDocs.length;
            if (totalHits > numDocs) {
                nextRecord = numDocs + 1;
            }
        }
        discoveryResult.setNumberOfHits(totalHits);
        LOGGER.finer("Total query hits: " + totalHits);

        if (startRecord > (totalHits - 1))
            bProcessHits = false;
        if (maxRecords <= 0)
            bProcessHits = false;
        int nTotal = numDocs - startRecord;
        if (!bProcessHits)
            return;

        // warn if many records were requested
        if (nTotal >= TOOMANY_WARNING_THRESHOLD) {
            LOGGER.warning("A request to process " + nTotal
                    + " discovery records was recieved and will be exceuted.\n" + discoveryQuery.toString());
        }

        // process the hits, build the results
        LOGGER.finer("Processing " + nTotal + " records from: " + (startRecord + 1) + " to: " + numDocs);
        Storeable storeable;
        DiscoveredRecords records = discoveryResult.getRecords();
        IndexReader reader = searcher.getIndexReader();
        for (int i = startRecord; i < numDocs; i++) {
            ScoreDoc scoreDoc = scoreDocs[i];
            Document document = reader.document(scoreDoc.doc);
            DiscoveredRecord record = new DiscoveredRecord();

            // Dublin Core based responses
            if (isDublinCoreResponse) {
                for (Discoverable target : returnables) {
                    ArrayList<Object> values = new ArrayList<Object>();
                    storeable = (Storeable) target.getStorable();

                    if (storeable instanceof AnyTextProperty) {
                        values = null;

                    } else if (storeable instanceof GeometryProperty) {
                        GeometryProperty geom = (GeometryProperty) storeable;
                        values.add(geom.readEnvelope(document));

                    } else if (target.getMeaning().getMeaningType().equals(PropertyMeaningType.XMLURL)) {
                        String uuid = document.get(Storeables.FIELD_UUID);
                        uuid = URLEncoder.encode(uuid, "UTF-8");
                        values.add("?getxml=" + uuid);

                    } else {
                        DatastoreField retrievalField = storeable.getRetrievalField();
                        Field[] fields = document.getFields(retrievalField.getName());
                        if (fields != null) {
                            for (Field f : fields) {
                                Object value = retrievalField.makeValueToReturn(f.stringValue());
                                values.add(value);
                            }
                        }
                    }

                    if (values != null) {
                        Object[] oValues = null;
                        if (values.size() >= 0)
                            oValues = values.toArray();
                        record.addField(target, oValues);
                    }
                }

                // non Dublin Core based responses
            } else {
                String responseXml = null;
                if (isBriefResponse && (responseXml == null)) {
                    Field field = document.getField(Storeables.FIELD_XML_BRIEF);
                    if (field != null) {
                        responseXml = field.stringValue();
                    }
                } else if (isSummaryResponse && (responseXml == null)) {
                    Field field = document.getField(Storeables.FIELD_XML_SUMMARY);
                    if (field != null) {
                        responseXml = field.stringValue();
                    }
                } else if (responseXml == null) {
                    Field field = document.getField(Storeables.FIELD_XML);
                    if (field != null) {
                        responseXml = field.stringValue();
                    }
                }
                record.setResponseXml(responseXml);
            }

            records.add(record);
        }
        int nPopulated = records.size();
        LOGGER.finer("Populated " + nPopulated + " records.");

    } finally {
        getIndexAdapter().closeSearcher(searcher);
    }
}

From source file:com.esri.gpt.server.assertion.handler.AsnCommentHandler.java

License:Apache License

/**
 * Queries comments.//from w w  w . j a v  a  2 s. c o  m
 * @param context the assertion operation context
 * @throws Exception if an exception occurs
 */
private void query(AsnContext context) throws Exception {

    // initialize
    AsnOperation operation = context.getOperation();
    AsnAssertionSet asnSet = operation.getAssertionSet();
    AsnValueType vType = asnSet.getValueType();
    String subject = operation.getSubject().getURN();
    String predicate = vType.getRdfPredicate();

    // build a query to match all occurrences of the subject/predicate pair
    BooleanQuery query = new BooleanQuery();
    Query qSubject = new TermQuery(new Term(AsnConstants.FIELD_RDF_SUBJECT, subject));
    Query qPredicate = new TermQuery(new Term(AsnConstants.FIELD_RDF_PREDICATE, predicate));
    query.add(qSubject, BooleanClause.Occur.MUST);
    query.add(qPredicate, BooleanClause.Occur.MUST);

    // sort on descending timestamp
    String tsField = AsnConstants.FIELD_SYS_TIMESTAMP;
    Sort sortOption = new Sort(new SortField(tsField, SortField.STRING, true));

    // determine the start and end positions
    int startRecord = context.getRequestOptions().getStartRecord() - 1;
    int maxRecords = context.getRequestOptions().getMaxRecords();
    if (startRecord < 0)
        startRecord = 0;
    int recordsPerPage = maxRecords;
    if (recordsPerPage <= 0)
        recordsPerPage = 1;
    int hitsToReturn = startRecord + recordsPerPage;
    int nextRecord = 0;
    int numDocs = 0;

    IndexReader reader = null;
    IndexSearcher searcher = null;
    try {

        // make the reader and searcher, execute the search
        reader = this.getIndexAdapter().makeIndexReader();
        searcher = new IndexSearcher(reader);
        TopDocs topDocs = searcher.search(query, null, hitsToReturn, sortOption);
        ScoreDoc[] scoreDocs = null;
        int totalHits = topDocs.totalHits;
        if (maxRecords > 0) {
            scoreDocs = topDocs.scoreDocs;
            if ((scoreDocs != null) && (scoreDocs.length) > 0) {
                numDocs = scoreDocs.length;
                if (totalHits > numDocs) {
                    nextRecord = numDocs + 1;
                }
            }
        }

        // root property for the response
        String rootSubject = subject;
        String roorPredicate = operation.getPredicate().getURN() + "response";
        AsnProperty rootProp = new AsnProperty(rootSubject, roorPredicate, null);

        // hit count and next record
        String queryPfx = asnSet.getURNPrefix() + ":query";
        rootProp.getChildren().add(new AsnProperty(null, queryPfx + ":hits", "" + totalHits));
        if (nextRecord > 0) {
            rootProp.getChildren().add(new AsnProperty(null, queryPfx + ":nextRecord", "" + nextRecord));
        }

        // canCreate capability for the active user
        String canCreatePred = asnSet.getURNPrefix() + ":activeUser:canCreate";
        String canCreateVal = "" + context.getAuthorizer().canCreate(context, asnSet.getAuthPolicy());
        rootProp.getChildren().add(new AsnProperty(null, canCreatePred, canCreateVal));

        // process the documents, generate the response
        AsnAssertionRenderer renderer = new AsnAssertionRenderer();
        for (int i = startRecord; i < numDocs; i++) {
            Document document = reader.document(scoreDocs[i].doc);
            Assertion assertion = asnSet.newAssertion(context, false);
            assertion.load(document);
            rootProp.getChildren().add(renderer.makeProperty(context, assertion));
        }
        context.getOperationResponse().generateResponse(context, rootProp.getChildren());

    } finally {
        this.getIndexAdapter().closeReader(reader);
        this.getIndexAdapter().closeSearcher(searcher);
    }

}

From source file:com.esri.gpt.server.assertion.index.AsnIndexAdapter.java

License:Apache License

/**
 * Loads an assertion based upon the subject id of the active operation.
 * @param context the assertion operation context
 * @param mustExist <code>true</code> true if the assertion must exist
 * @return the assertion (null if not found)
 * @throws CorruptIndexException if the index is corrupt
 * @throws IOException if an I/O exception occurs
 * @throws AsnInvalidOperationException if mustExist and the assertion was not found
 *///from  ww  w. j ava  2 s.c o  m
public Assertion loadAssertionById(AsnContext context, boolean mustExist)
        throws CorruptIndexException, IOException, AsnInvalidOperationException {
    TermDocs termDocs = null;
    IndexReader reader = null;
    AsnOperation operation = context.getOperation();
    try {
        String assertionId = Val.chkStr(operation.getSubject().getValuePart());
        if (assertionId.length() > 0) {
            reader = this.makeIndexReader();
            termDocs = reader.termDocs();
            termDocs.seek(new Term(AsnConstants.FIELD_SYS_ASSERTIONID, assertionId));
            if (termDocs.next()) {
                Document document = reader.document(termDocs.doc());
                Assertion assertion = operation.getAssertionSet().newAssertion(context, false);
                assertion.load(document);
                return assertion;
            }
        }
    } finally {
        try {
            if (termDocs != null)
                termDocs.close();
        } catch (Exception ef) {
        }
        this.closeReader(reader);
    }
    if (mustExist) {
        String msg = "This assertion id was not found - " + operation.getSubject().getURN();
        throw new AsnInvalidOperationException(msg);
    }
    return null;
}

From source file:com.flaptor.hounder.indexer.IndexManager.java

License:Apache License

/**
 * Searchs all the index to find the largest AddId.
 * @return the largest AddId found in the index.
 *//*  w w  w  .  jav a2 s  .c om*/
private long findLargestAddId() {
    long max = 1;
    IndexReader reader = null;
    try {
        reader = workIndex.getReader();
        int num = reader.maxDoc();
        for (int i = 0; i < num; i++) {
            if (!reader.isDeleted(i)) {
                String val = reader.document(i).get("AddId");
                if (null != val) {
                    long n = new Long(val).longValue();
                    if (max < n) {
                        max = n;
                    }
                }
            }
        }
    } catch (IOException e) {
        logger.fatal("Could not read from the index to get the last AddId." + e);
        throw new RuntimeException("Error reading the index when looking for initial AddId.", e);
    } finally {
        Execute.close(reader, logger);
    }
    logger.debug("Largest AddId found: " + max);
    return max;
}

From source file:com.flaptor.hounder.indexer.LuceneUnicodeTest.java

License:Apache License

/**
 * The only Test case./*from  w  w  w  .  j a  v  a 2s  .  c om*/
 * Generates a bunch of interesting test strings, indexes them and then
 * gets them from the index to compare with the original.
 */
@TestInfo(testType = TestInfo.TestType.UNIT)
public void testStoredContent() {
    try {
        String testString = getTestString();
        logger.debug("Using test string: " + testString);
        Document doc = new Document();
        doc.add(new Field("field1", testString, Field.Store.YES, Field.Index.NOT_ANALYZED));
        writer.addDocument(doc);
        writer.optimize();
        writer.close();
        IndexReader reader = IndexReader.open(dir);
        Document doc2 = reader.document(0);
        String recoveredString = doc2.get("field1");
        logger.debug("Recovered String: " + recoveredString);
        assertTrue("Strings do not match", testString.equals(recoveredString));
    } catch (Exception e) {
        logger.error("Exception caught:" + e);
        assertTrue("exception", false);
    }
}

From source file:com.flaptor.hounder.util.Idx.java

License:Apache License

public static void main(String arg[]) throws Exception {
    check(arg.length > 1, null);/*from w ww  . j  a va  2 s  .  c  o  m*/
    String cmd = arg[0];
    File idx = new File(arg[1]);
    if ("list".equals(cmd)) {
        int num = (arg.length > 2) ? Integer.parseInt(arg[2]) : -1;
        check(idx.exists(), "Index dir not found");
        IndexReader reader = IndexReader.open(idx);
        int docs = reader.numDocs();
        int max = reader.maxDoc();
        System.err.println("Index contains " + docs + " documents plus " + (max - docs) + " deleted.");
        if (num > -1) {
            if (num == 0)
                num = docs;
            for (int i = 0; i < max && i < num; i++) {
                System.out.println("----------------------------------------");
                if (!reader.isDeleted(i)) {
                    Document doc = reader.document(i);
                    List flds = doc.getFields();
                    Iterator iter = flds.iterator();
                    while (iter.hasNext()) {
                        Field fld = (Field) iter.next();
                        String attr = (fld.isIndexed() ? ",i" : "") + (fld.isStored() ? ",s" : "")
                                + (fld.isTokenized() ? ",t" : "");
                        System.out.println(fld.name() + attr + ": " + fld.stringValue());
                    }
                }
            }
            reader.close();
            System.out.println();
        }
    } else if ("search".equals(cmd)) {
        check(idx.exists(), "Index dir not found");
        check(arg.length > 3, "Not enough arguments");
        String field = arg[2];
        String value = arg[3];
        IndexSearcher searcher = new IndexSearcher(IndexReader.open(idx));
        ScorelessHitCollector collector = new HashSetScorelessHitCollector();
        searcher.search(new TermQuery(new Term(field, value)), collector);
        Set<Integer> docIds = collector.getMatchingDocuments();
        System.out.println("\nNumber of hits: " + docIds.size() + "\n");
        for (Integer docId : docIds) {
            Document doc = searcher.doc(docId);
            List flds = doc.getFields();
            Iterator iter = flds.iterator();
            while (iter.hasNext()) {
                Field fld = (Field) iter.next();
                System.out.println(fld.name() + ": " + fld.stringValue());
            }
        }
        searcher.close();
        System.out.println();
    } else if ("delete".equals(cmd)) {
        check(idx.exists(), "Index dir not found");
        check(arg.length > 3, "Not enough arguments");
        String field = arg[2];
        String value = arg[3];
        IndexReader reader = IndexReader.open(idx);
        reader.deleteDocuments(new Term(field, value));
        reader.close();
    } else if ("optimize".equals(cmd)) {
        IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(), false,
                IndexWriter.MaxFieldLength.UNLIMITED);
        writer.optimize();
        writer.close();
    } else if ("merge".equals(cmd)) {
        check(arg.length == 3, "not enough parameters");
        File idx2 = new File(arg[2]);
        check(idx.exists(), "Index dir 1 not found");
        check(idx2.exists(), "Index dir 2 not found");
        IndexReader reader = IndexReader.open(idx2);
        IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(), false,
                IndexWriter.MaxFieldLength.UNLIMITED);
        writer.addIndexes(new IndexReader[] { reader });
        writer.close();
        reader.close();
    } else if ("term-count".equals(cmd)) {
        check(arg.length == 3, "not enough parameters");
        check(idx.exists(), "Index dir not found");
        IndexReader reader = IndexReader.open(idx);
        String field = arg[2];
        int count = 0;
        TermEnum terms = reader.terms();
        while (terms.next()) {
            Term term = terms.term();
            if (term.field().equals(field))
                count++;
        }
        terms.close();
        reader.close();
        System.out.println("Found " + count + " different values for field " + field);
    } else if ("hit-count".equals(cmd)) {
        check(arg.length > 3, "Not enough arguments");
        check(idx.exists(), "Index dir not found");
        String field = arg[2];
        String value = arg[3];
        IndexSearcher searcher = new IndexSearcher(IndexReader.open(idx));
        CountingHitCollector collector = new CountingHitCollector();
        searcher.search(new TermQuery(new Term(field, value)), collector);
        System.out.println("\nNumber of hits: " + collector.getDocCount() + "\n");
        searcher.close();
    } else if ("uncompound".equals(cmd)) {
        IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(), false,
                IndexWriter.MaxFieldLength.UNLIMITED);
        writer.setUseCompoundFile(false);
        writer.optimize();
        writer.close();
    } else if ("compound".equals(cmd)) {
        IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(), false,
                IndexWriter.MaxFieldLength.UNLIMITED);
        writer.setUseCompoundFile(true);
        writer.optimize();
        writer.close();
    } else if ("terms".equals(cmd)) {
        check(arg.length == 3, "not enough parameters");
        check(idx.exists(), "Index dir not found");
        String field = arg[2];
        IndexReader reader = IndexReader.open(idx);
        TermEnum terms = reader.terms();
        while (terms.next()) {
            Term t = terms.term();
            if (t.field().equals(field)) {
                System.out.println(t.text());
            }
        }

    }

}

From source file:com.github.flaxsearch.resources.DocumentResource.java

License:Apache License

@GET
public DocumentData getDocument(@QueryParam("segment") Integer segment, @PathParam("docId") int doc)
        throws IOException {

    IndexReader reader = segment == null ? readerManager.getIndexReader()
            : readerManager.getLeafReader(segment);

    if (doc < 0 || doc > reader.maxDoc()) {
        throw new WebApplicationException("Unknown document " + doc, Response.Status.NOT_FOUND);
    }//from   ww  w.j  a v  a2  s  .  co m

    Document document = reader.document(doc);
    return new DocumentData(document);
}

From source file:com.github.rnewson.couchdb.lucene.Progress.java

License:Apache License

public void load(final IndexReader reader) throws IOException {
    progress = newDocument();/*from w w w . j  a v  a  2s  . c om*/

    final TermDocs termDocs = reader.termDocs(PROGRESS_TERM);
    try {
        while (termDocs.next()) {
            final int doc = termDocs.doc();
            if (!reader.isDeleted(doc)) {
                progress = reader.document(doc);
            }
        }
    } finally {
        termDocs.close();
    }
}

From source file:com.github.tteofili.looseen.MinHashClassifier.java

License:Apache License

public MinHashClassifier(IndexReader reader, String textField, String categoryField, int min, int hashCount,
        int hashSize) {
    this.min = min;
    this.hashCount = hashCount;
    this.hashSize = hashSize;
    try {/*from  w  w  w.j  a  v a  2  s .  c om*/
        Analyzer analyzer = createMinHashAnalyzer(min, hashCount, hashSize);
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        directory = new RAMDirectory();
        IndexWriter writer = new IndexWriter(directory, config);
        for (int i = 0; i < reader.maxDoc(); i++) {
            Document document = new Document();
            Document d = reader.document(i);
            String textValue = d.getField(textField).stringValue();
            String categoryValue = d.getField(categoryField).stringValue();
            document.add(new TextField(TEXT_FIELD, textValue, Field.Store.NO));
            document.add(new StringField(CLASS_FIELD, categoryValue, Field.Store.YES));
            writer.addDocument(document);
        }
        writer.commit();
        writer.close();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
}