Example usage for org.apache.lucene.search IndexSearcher search

List of usage examples for org.apache.lucene.search IndexSearcher search

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher search.

Prototype

public <C extends Collector, T> T search(Query query, CollectorManager<C, T> collectorManager)
        throws IOException 

Source Link

Document

Lower-level search API.

Usage

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

public int getTotalVariationsCountFacet(List<? extends FeatureFile> files, Query query) throws IOException {
    if (CollectionUtils.isEmpty(files)) {
        return 0;
    }/*from  w ww  .j a va2 s  . co m*/

    SimpleFSDirectory[] indexes = fileManager.getIndexesForFiles(files);

    try (MultiReader reader = openMultiReader(indexes)) {
        if (reader.numDocs() == 0) {
            return 0;
        }

        FacetsCollector facetsCollector = new FacetsCollector();
        IndexSearcher searcher = new IndexSearcher(reader);
        searcher.search(query, facetsCollector);

        Facets facets = new SortedSetDocValuesFacetCounts(
                new DefaultSortedSetDocValuesReaderState(reader, FeatureIndexFields.FACET_UID.fieldName),
                facetsCollector);
        FacetResult res = facets.getTopChildren(reader.numDocs(), FeatureIndexFields.F_UID.getFieldName());
        if (res == null) {
            return 0;
        }

        return res.childCount;
    } finally {
        for (SimpleFSDirectory index : indexes) {
            IOUtils.closeQuietly(index);
        }
    }
}

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

/**
 * Groups variations from specified {@link List} of {@link VcfFile}s by specified field
 * @param files a {@link List} of {@link FeatureFile}, which indexes to search
 * @param query a query to search in index
 * @param groupBy a field to perform grouping
 * @return a {@link List} of {@link Group}s, mapping field value to number of variations, having this value
 * @throws IOException if something goes wrong with the file system
 *//*from  w w w  .j  a  v a 2 s  .  com*/
public List<Group> groupVariations(List<VcfFile> files, Query query, String groupBy) throws IOException {
    List<Group> res = new ArrayList<>();

    if (CollectionUtils.isEmpty(files)) {
        return Collections.emptyList();
    }

    SimpleFSDirectory[] indexes = fileManager.getIndexesForFiles(files);

    try (MultiReader reader = openMultiReader(indexes)) {
        if (reader.numDocs() == 0) {
            return Collections.emptyList();
        }

        IndexSearcher searcher = new IndexSearcher(reader);
        AbstractGroupFacetCollector groupedFacetCollector = TermGroupFacetCollector
                .createTermGroupFacetCollector(FeatureIndexFields.UID.fieldName,
                        getGroupByField(files, groupBy), false, null, GROUP_INITIAL_SIZE);
        searcher.search(query, groupedFacetCollector); // Computing the grouped facet counts
        TermGroupFacetCollector.GroupedFacetResult groupedResult = groupedFacetCollector
                .mergeSegmentResults(reader.numDocs(), 1, false);
        List<AbstractGroupFacetCollector.FacetEntry> facetEntries = groupedResult.getFacetEntries(0,
                reader.numDocs());
        for (AbstractGroupFacetCollector.FacetEntry facetEntry : facetEntries) {
            res.add(new Group(facetEntry.getValue().utf8ToString(), facetEntry.getCount()));
        }
    } finally {
        for (SimpleFSDirectory index : indexes) {
            IOUtils.closeQuietly(index);
        }
    }

    return res;
}

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

private TopDocs performSearch(IndexSearcher searcher, Query query, IndexReader reader, Integer maxResultsCount,
        Sort sort) throws IOException {
    final TopDocs docs;
    int resultsCount = maxResultsCount == null ? reader.numDocs() : maxResultsCount;
    if (sort == null) {
        docs = searcher.search(query, resultsCount);
    } else {// w  w  w  . j  a  va 2 s  . co  m
        docs = searcher.search(query, resultsCount, sort);
    }

    return docs;
}

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

/**
 * Returns a {@code List} of chromosome IDs for a project, specified by ID, where variations exist and satisfy a
 * specified query/*from  w  w w.  j  a v a  2s . c  om*/
 *
 * @param projectId an ID of a project, which index to query
 * @param query     a query to filter variations
 * @return a {@code List} of chromosome IDs
 * @throws IOException
 */
public List<Long> getChromosomeIdsWhereVariationsPresentFacet(long projectId, Query query) throws IOException {
    List<Long> chromosomeIds = new ArrayList<>();

    try (Directory index = fileManager.getIndexForProject(projectId);
            IndexReader reader = DirectoryReader.open(index)) {
        if (reader.numDocs() == 0) {
            return Collections.emptyList();
        }

        FacetsCollector facetsCollector = new FacetsCollector();
        IndexSearcher searcher = new IndexSearcher(reader);
        searcher.search(query, facetsCollector);

        Facets facets = new SortedSetDocValuesFacetCounts(new DefaultSortedSetDocValuesReaderState(reader,
                FeatureIndexFields.FACET_CHR_ID.getFieldName()), facetsCollector);
        FacetResult res = facets.getTopChildren(FACET_LIMIT, FeatureIndexFields.CHR_ID.getFieldName());
        if (res == null) {
            return Collections.emptyList();
        }

        for (LabelAndValue labelAndValue : res.labelValues) {
            chromosomeIds.add(Long.parseLong(labelAndValue.label));
        }
    }

    return chromosomeIds;
}

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

/**
 * Returns a {@code List} of chromosome IDs from specified files, where variations exist and satisfy a
 * specified query/*from ww  w  . j av  a  2s  .  c o m*/
 *
 * @param files a list of {@link FeatureFile}s to search chromosomes
 * @param query     a query to filter variations
 * @return a {@code List} of chromosome IDs
 * @throws IOException
 */
public List<Long> getChromosomeIdsWhereVariationsPresentFacet(List<? extends FeatureFile> files, Query query)
        throws IOException {
    if (CollectionUtils.isEmpty(files)) {
        return Collections.emptyList();
    }

    List<Long> chromosomeIds = new ArrayList<>();

    SimpleFSDirectory[] indexes = fileManager.getIndexesForFiles(files);

    try (MultiReader reader = openMultiReader(indexes)) {
        if (reader.numDocs() == 0) {
            return Collections.emptyList();
        }

        FacetsCollector facetsCollector = new FacetsCollector();
        IndexSearcher searcher = new IndexSearcher(reader);
        searcher.search(query, facetsCollector);

        Facets facets = new SortedSetDocValuesFacetCounts(new DefaultSortedSetDocValuesReaderState(reader,
                FeatureIndexFields.FACET_CHR_ID.getFieldName()), facetsCollector);
        FacetResult res = facets.getTopChildren(FACET_LIMIT, FeatureIndexFields.CHR_ID.getFieldName());
        if (res == null) {
            return Collections.emptyList();
        }

        for (LabelAndValue labelAndValue : res.labelValues) {
            chromosomeIds.add(Long.parseLong(labelAndValue.label));
        }
    } finally {
        closeIndexes(indexes);
    }

    return chromosomeIds;
}

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

/**
 * Searches gene IDs, affected by variations in specified VCF files in a specified project
 *
 * @param projectId an ID of a project to search genes
 * @param gene a prefix of a gene ID to search
 * @param vcfFileIds a {@code List} of IDs of VCF files in project to search for gene IDs
 * @return a {@code Set} of gene IDs, that are affected by some variations in specified VCf files
 * @throws IOException/*  ww  w.j  av a 2 s. c  om*/
 */
public Set<String> searchGenesInVcfFilesInProject(long projectId, String gene, List<Long> vcfFileIds)
        throws IOException {
    if (vcfFileIds == null || vcfFileIds.isEmpty()) {
        return Collections.emptySet();
    }

    BooleanQuery.Builder builder = new BooleanQuery.Builder();

    PrefixQuery geneIdPrefixQuery = new PrefixQuery(
            new Term(FeatureIndexFields.GENE_ID.getFieldName(), gene.toLowerCase()));
    PrefixQuery geneNamePrefixQuery = new PrefixQuery(
            new Term(FeatureIndexFields.GENE_NAME.getFieldName(), gene.toLowerCase()));
    BooleanQuery.Builder geneIdOrNameQuery = new BooleanQuery.Builder();
    geneIdOrNameQuery.add(geneIdPrefixQuery, BooleanClause.Occur.SHOULD);
    geneIdOrNameQuery.add(geneNamePrefixQuery, BooleanClause.Occur.SHOULD);

    builder.add(geneIdOrNameQuery.build(), BooleanClause.Occur.MUST);

    List<Term> terms = vcfFileIds.stream()
            .map(vcfFileId -> new Term(FeatureIndexFields.FILE_ID.getFieldName(), vcfFileId.toString()))
            .collect(Collectors.toList());
    TermsQuery termsQuery = new TermsQuery(terms);
    builder.add(termsQuery, BooleanClause.Occur.MUST);
    BooleanQuery query = builder.build();

    Set<String> geneIds;

    try (Directory index = fileManager.getIndexForProject(projectId);
            IndexReader reader = DirectoryReader.open(index)) {
        if (reader.numDocs() == 0) {
            return Collections.emptySet();
        }

        IndexSearcher searcher = new IndexSearcher(reader);
        final TopDocs docs = searcher.search(query, reader.numDocs());
        final ScoreDoc[] hits = docs.scoreDocs;

        geneIds = fetchGeneIds(hits, searcher);
    } catch (IOException e) {
        LOGGER.error(MessageHelper.getMessage(MessagesConstants.ERROR_FEATURE_INDEX_SEARCH_FAILED), e);
        return Collections.emptySet();
    }

    return geneIds;
}

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

public Set<String> searchGenesInVcfFiles(String gene, List<VcfFile> vcfFiles) throws IOException {
    if (CollectionUtils.isEmpty(vcfFiles)) {
        return Collections.emptySet();
    }//w  ww  .j av  a  2 s  .  c o m

    BooleanQuery.Builder builder = new BooleanQuery.Builder();

    PrefixQuery geneIdPrefixQuery = new PrefixQuery(
            new Term(FeatureIndexFields.GENE_ID.getFieldName(), gene.toLowerCase()));
    PrefixQuery geneNamePrefixQuery = new PrefixQuery(
            new Term(FeatureIndexFields.GENE_NAME.getFieldName(), gene.toLowerCase()));
    BooleanQuery.Builder geneIdOrNameQuery = new BooleanQuery.Builder();
    geneIdOrNameQuery.add(geneIdPrefixQuery, BooleanClause.Occur.SHOULD);
    geneIdOrNameQuery.add(geneNamePrefixQuery, BooleanClause.Occur.SHOULD);

    builder.add(geneIdOrNameQuery.build(), BooleanClause.Occur.MUST);
    BooleanQuery query = builder.build();

    Set<String> geneIds;

    SimpleFSDirectory[] indexes = fileManager.getIndexesForFiles(vcfFiles);

    try (MultiReader reader = openMultiReader(indexes)) {
        if (reader.numDocs() == 0) {
            return Collections.emptySet();
        }

        IndexSearcher searcher = new IndexSearcher(reader);
        final TopDocs docs = searcher.search(query, reader.numDocs());
        final ScoreDoc[] hits = docs.scoreDocs;

        geneIds = fetchGeneIds(hits, searcher);
    } catch (IOException e) {
        LOGGER.error(MessageHelper.getMessage(MessagesConstants.ERROR_FEATURE_INDEX_SEARCH_FAILED), e);
        return Collections.emptySet();
    }

    return geneIds;
}

From source file:com.epimorphics.server.indexers.LuceneIndex.java

License:Apache License

/**
 * Search the index for entities which match a lucene query. Use field "label" for
 * searching on lables (e.g. PhraseQuery or TermQuery).
 *///  w w  w .j a  v  a2s  . c  o m
public LuceneResult[] search(Query query, int offset, int maxResults) {
    try {
        IndexSearcher searcher = searchManager.acquire();
        try {
            int searchLimit = offset + maxResults;
            TopDocs matches = searcher.search(query, searchLimit);
            ScoreDoc[] hits = matches.scoreDocs;
            if (hits.length < offset) {
                return new LuceneResult[0];
            }
            LuceneResult[] results = new LuceneResult[hits.length - offset];
            for (int i = offset; i < hits.length; i++) {
                ScoreDoc hit = hits[i];
                results[i - offset] = new LuceneResult(searcher.getIndexReader().document(hit.doc), hit.score);
            }
            return results;
        } finally {
            searchManager.release(searcher);
        }
    } catch (IOException e) {
        throw new EpiException(e);
    }
}

From source file:com.esri.gpt.server.assertion.index.AsnIndexAdapter.java

License:Apache License

/**
 * Returns a hit count for a subject, predicate and value combination.
 * <br/>This will only work correctly for values that have not been analyzed 
 * prior to indexing.//from w  w  w .ja v  a 2 s.  c  o m
 * @param context the assertion operation context
 * @param searcher the index searcher
 * @param valueField the value field name
 * @param subject the subject
 * @param predicate the predicate
 * @param value the value
 * @return the cardinality
 * @throws CorruptIndexException if the index is corrupt
 * @throws IOException if an I/O exception occurs
 */
public long count(AsnContext context, IndexSearcher searcher, String valueField, String subject,
        String predicate, String value) throws CorruptIndexException, IOException {

    // build a query to match the subject/predicate/value triple
    BooleanQuery query = new BooleanQuery();
    Query qSubject = new TermQuery(new Term(AsnConstants.FIELD_RDF_SUBJECT, subject));
    Query qPredicate = new TermQuery(new Term(AsnConstants.FIELD_RDF_PREDICATE, predicate));
    Query qValue = new TermQuery(new Term(valueField, value));
    query.add(qSubject, BooleanClause.Occur.MUST);
    query.add(qPredicate, BooleanClause.Occur.MUST);
    query.add(qValue, BooleanClause.Occur.MUST);

    // execute the search, return the hits
    TopDocs topDocs = searcher.search(query, 1);
    return topDocs.totalHits;
}

From source file:com.esri.gpt.server.assertion.index.AsnIndexAdapter.java

License:Apache License

/**
 * Loads the assertion previously cast for the active subject, predicate and user.
 * @param context the assertion operation context
 * @param searcher the index searcher//from   w w  w .  j  a  v  a2s  . c  o  m
 * @return the previously cast assertion (can be null)
 * @throws Exception if an exception occurs
 */
public Assertion loadPreviousUserAssertion(AsnContext context, IndexSearcher searcher) throws Exception {
    AsnOperation operation = context.getOperation();
    String userKey = Val.chkStr(operation.getUserPart().getKey());
    String username = Val.chkStr(operation.getUserPart().getName());
    boolean isAnonymous = username.equalsIgnoreCase(AsnConstants.ANONYMOUS_USERNAME);
    if (!isAnonymous && (userKey.length() > 0)) {
        AsnAssertionSet asnSet = operation.getAssertionSet();
        AsnValueType vType = asnSet.getValueType();
        String subject = operation.getSubject().getURN();
        String predicate = vType.getRdfPredicate();

        // build a query to match the subject/predicate/user triple
        BooleanQuery query = new BooleanQuery();
        Query qSubject = new TermQuery(new Term(AsnConstants.FIELD_RDF_SUBJECT, subject));
        Query qPredicate = new TermQuery(new Term(AsnConstants.FIELD_RDF_PREDICATE, predicate));
        Query qUserKey = new TermQuery(new Term(AsnConstants.FIELD_USER_KEY, userKey));
        query.add(qSubject, BooleanClause.Occur.MUST);
        query.add(qPredicate, BooleanClause.Occur.MUST);
        query.add(qUserKey, BooleanClause.Occur.MUST);

        // make the reader and searcher, execute the search, return the previous assertion
        TopDocs topDocs = searcher.search(query, 1);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        if ((scoreDocs != null) && (scoreDocs.length) > 0) {
            Document document = searcher.getIndexReader().document(scoreDocs[0].doc);
            Assertion assertion = asnSet.newAssertion(context, false);
            assertion.load(document);
            return assertion;
        }
    }
    return null;
}