List of usage examples for org.apache.lucene.search IndexSearcher search
public <C extends Collector, T> T search(Query query, CollectorManager<C, T> collectorManager) throws IOException
From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java
License:Open Source License
public int getTotalVariationsCountFacet(List<? extends FeatureFile> files, Query query) throws IOException { if (CollectionUtils.isEmpty(files)) { return 0; }/*from w ww .j a va2 s . co m*/ SimpleFSDirectory[] indexes = fileManager.getIndexesForFiles(files); try (MultiReader reader = openMultiReader(indexes)) { if (reader.numDocs() == 0) { return 0; } FacetsCollector facetsCollector = new FacetsCollector(); IndexSearcher searcher = new IndexSearcher(reader); searcher.search(query, facetsCollector); Facets facets = new SortedSetDocValuesFacetCounts( new DefaultSortedSetDocValuesReaderState(reader, FeatureIndexFields.FACET_UID.fieldName), facetsCollector); FacetResult res = facets.getTopChildren(reader.numDocs(), FeatureIndexFields.F_UID.getFieldName()); if (res == null) { return 0; } return res.childCount; } finally { for (SimpleFSDirectory index : indexes) { IOUtils.closeQuietly(index); } } }
From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java
License:Open Source License
/** * Groups variations from specified {@link List} of {@link VcfFile}s by specified field * @param files a {@link List} of {@link FeatureFile}, which indexes to search * @param query a query to search in index * @param groupBy a field to perform grouping * @return a {@link List} of {@link Group}s, mapping field value to number of variations, having this value * @throws IOException if something goes wrong with the file system *//*from w w w .j a v a 2 s . com*/ public List<Group> groupVariations(List<VcfFile> files, Query query, String groupBy) throws IOException { List<Group> res = new ArrayList<>(); if (CollectionUtils.isEmpty(files)) { return Collections.emptyList(); } SimpleFSDirectory[] indexes = fileManager.getIndexesForFiles(files); try (MultiReader reader = openMultiReader(indexes)) { if (reader.numDocs() == 0) { return Collections.emptyList(); } IndexSearcher searcher = new IndexSearcher(reader); AbstractGroupFacetCollector groupedFacetCollector = TermGroupFacetCollector .createTermGroupFacetCollector(FeatureIndexFields.UID.fieldName, getGroupByField(files, groupBy), false, null, GROUP_INITIAL_SIZE); searcher.search(query, groupedFacetCollector); // Computing the grouped facet counts TermGroupFacetCollector.GroupedFacetResult groupedResult = groupedFacetCollector .mergeSegmentResults(reader.numDocs(), 1, false); List<AbstractGroupFacetCollector.FacetEntry> facetEntries = groupedResult.getFacetEntries(0, reader.numDocs()); for (AbstractGroupFacetCollector.FacetEntry facetEntry : facetEntries) { res.add(new Group(facetEntry.getValue().utf8ToString(), facetEntry.getCount())); } } finally { for (SimpleFSDirectory index : indexes) { IOUtils.closeQuietly(index); } } return res; }
From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java
License:Open Source License
private TopDocs performSearch(IndexSearcher searcher, Query query, IndexReader reader, Integer maxResultsCount, Sort sort) throws IOException { final TopDocs docs; int resultsCount = maxResultsCount == null ? reader.numDocs() : maxResultsCount; if (sort == null) { docs = searcher.search(query, resultsCount); } else {// w w w . j a va 2 s . co m docs = searcher.search(query, resultsCount, sort); } return docs; }
From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java
License:Open Source License
/** * Returns a {@code List} of chromosome IDs for a project, specified by ID, where variations exist and satisfy a * specified query/*from w w w. j a v a 2s . c om*/ * * @param projectId an ID of a project, which index to query * @param query a query to filter variations * @return a {@code List} of chromosome IDs * @throws IOException */ public List<Long> getChromosomeIdsWhereVariationsPresentFacet(long projectId, Query query) throws IOException { List<Long> chromosomeIds = new ArrayList<>(); try (Directory index = fileManager.getIndexForProject(projectId); IndexReader reader = DirectoryReader.open(index)) { if (reader.numDocs() == 0) { return Collections.emptyList(); } FacetsCollector facetsCollector = new FacetsCollector(); IndexSearcher searcher = new IndexSearcher(reader); searcher.search(query, facetsCollector); Facets facets = new SortedSetDocValuesFacetCounts(new DefaultSortedSetDocValuesReaderState(reader, FeatureIndexFields.FACET_CHR_ID.getFieldName()), facetsCollector); FacetResult res = facets.getTopChildren(FACET_LIMIT, FeatureIndexFields.CHR_ID.getFieldName()); if (res == null) { return Collections.emptyList(); } for (LabelAndValue labelAndValue : res.labelValues) { chromosomeIds.add(Long.parseLong(labelAndValue.label)); } } return chromosomeIds; }
From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java
License:Open Source License
/** * Returns a {@code List} of chromosome IDs from specified files, where variations exist and satisfy a * specified query/*from ww w . j av a 2s . c o m*/ * * @param files a list of {@link FeatureFile}s to search chromosomes * @param query a query to filter variations * @return a {@code List} of chromosome IDs * @throws IOException */ public List<Long> getChromosomeIdsWhereVariationsPresentFacet(List<? extends FeatureFile> files, Query query) throws IOException { if (CollectionUtils.isEmpty(files)) { return Collections.emptyList(); } List<Long> chromosomeIds = new ArrayList<>(); SimpleFSDirectory[] indexes = fileManager.getIndexesForFiles(files); try (MultiReader reader = openMultiReader(indexes)) { if (reader.numDocs() == 0) { return Collections.emptyList(); } FacetsCollector facetsCollector = new FacetsCollector(); IndexSearcher searcher = new IndexSearcher(reader); searcher.search(query, facetsCollector); Facets facets = new SortedSetDocValuesFacetCounts(new DefaultSortedSetDocValuesReaderState(reader, FeatureIndexFields.FACET_CHR_ID.getFieldName()), facetsCollector); FacetResult res = facets.getTopChildren(FACET_LIMIT, FeatureIndexFields.CHR_ID.getFieldName()); if (res == null) { return Collections.emptyList(); } for (LabelAndValue labelAndValue : res.labelValues) { chromosomeIds.add(Long.parseLong(labelAndValue.label)); } } finally { closeIndexes(indexes); } return chromosomeIds; }
From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java
License:Open Source License
/** * Searches gene IDs, affected by variations in specified VCF files in a specified project * * @param projectId an ID of a project to search genes * @param gene a prefix of a gene ID to search * @param vcfFileIds a {@code List} of IDs of VCF files in project to search for gene IDs * @return a {@code Set} of gene IDs, that are affected by some variations in specified VCf files * @throws IOException/* ww w.j av a 2 s. c om*/ */ public Set<String> searchGenesInVcfFilesInProject(long projectId, String gene, List<Long> vcfFileIds) throws IOException { if (vcfFileIds == null || vcfFileIds.isEmpty()) { return Collections.emptySet(); } BooleanQuery.Builder builder = new BooleanQuery.Builder(); PrefixQuery geneIdPrefixQuery = new PrefixQuery( new Term(FeatureIndexFields.GENE_ID.getFieldName(), gene.toLowerCase())); PrefixQuery geneNamePrefixQuery = new PrefixQuery( new Term(FeatureIndexFields.GENE_NAME.getFieldName(), gene.toLowerCase())); BooleanQuery.Builder geneIdOrNameQuery = new BooleanQuery.Builder(); geneIdOrNameQuery.add(geneIdPrefixQuery, BooleanClause.Occur.SHOULD); geneIdOrNameQuery.add(geneNamePrefixQuery, BooleanClause.Occur.SHOULD); builder.add(geneIdOrNameQuery.build(), BooleanClause.Occur.MUST); List<Term> terms = vcfFileIds.stream() .map(vcfFileId -> new Term(FeatureIndexFields.FILE_ID.getFieldName(), vcfFileId.toString())) .collect(Collectors.toList()); TermsQuery termsQuery = new TermsQuery(terms); builder.add(termsQuery, BooleanClause.Occur.MUST); BooleanQuery query = builder.build(); Set<String> geneIds; try (Directory index = fileManager.getIndexForProject(projectId); IndexReader reader = DirectoryReader.open(index)) { if (reader.numDocs() == 0) { return Collections.emptySet(); } IndexSearcher searcher = new IndexSearcher(reader); final TopDocs docs = searcher.search(query, reader.numDocs()); final ScoreDoc[] hits = docs.scoreDocs; geneIds = fetchGeneIds(hits, searcher); } catch (IOException e) { LOGGER.error(MessageHelper.getMessage(MessagesConstants.ERROR_FEATURE_INDEX_SEARCH_FAILED), e); return Collections.emptySet(); } return geneIds; }
From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java
License:Open Source License
public Set<String> searchGenesInVcfFiles(String gene, List<VcfFile> vcfFiles) throws IOException { if (CollectionUtils.isEmpty(vcfFiles)) { return Collections.emptySet(); }//w ww .j av a 2 s . c o m BooleanQuery.Builder builder = new BooleanQuery.Builder(); PrefixQuery geneIdPrefixQuery = new PrefixQuery( new Term(FeatureIndexFields.GENE_ID.getFieldName(), gene.toLowerCase())); PrefixQuery geneNamePrefixQuery = new PrefixQuery( new Term(FeatureIndexFields.GENE_NAME.getFieldName(), gene.toLowerCase())); BooleanQuery.Builder geneIdOrNameQuery = new BooleanQuery.Builder(); geneIdOrNameQuery.add(geneIdPrefixQuery, BooleanClause.Occur.SHOULD); geneIdOrNameQuery.add(geneNamePrefixQuery, BooleanClause.Occur.SHOULD); builder.add(geneIdOrNameQuery.build(), BooleanClause.Occur.MUST); BooleanQuery query = builder.build(); Set<String> geneIds; SimpleFSDirectory[] indexes = fileManager.getIndexesForFiles(vcfFiles); try (MultiReader reader = openMultiReader(indexes)) { if (reader.numDocs() == 0) { return Collections.emptySet(); } IndexSearcher searcher = new IndexSearcher(reader); final TopDocs docs = searcher.search(query, reader.numDocs()); final ScoreDoc[] hits = docs.scoreDocs; geneIds = fetchGeneIds(hits, searcher); } catch (IOException e) { LOGGER.error(MessageHelper.getMessage(MessagesConstants.ERROR_FEATURE_INDEX_SEARCH_FAILED), e); return Collections.emptySet(); } return geneIds; }
From source file:com.epimorphics.server.indexers.LuceneIndex.java
License:Apache License
/** * Search the index for entities which match a lucene query. Use field "label" for * searching on lables (e.g. PhraseQuery or TermQuery). */// w w w .j a v a2s . c o m public LuceneResult[] search(Query query, int offset, int maxResults) { try { IndexSearcher searcher = searchManager.acquire(); try { int searchLimit = offset + maxResults; TopDocs matches = searcher.search(query, searchLimit); ScoreDoc[] hits = matches.scoreDocs; if (hits.length < offset) { return new LuceneResult[0]; } LuceneResult[] results = new LuceneResult[hits.length - offset]; for (int i = offset; i < hits.length; i++) { ScoreDoc hit = hits[i]; results[i - offset] = new LuceneResult(searcher.getIndexReader().document(hit.doc), hit.score); } return results; } finally { searchManager.release(searcher); } } catch (IOException e) { throw new EpiException(e); } }
From source file:com.esri.gpt.server.assertion.index.AsnIndexAdapter.java
License:Apache License
/** * Returns a hit count for a subject, predicate and value combination. * <br/>This will only work correctly for values that have not been analyzed * prior to indexing.//from w w w .ja v a 2 s. c o m * @param context the assertion operation context * @param searcher the index searcher * @param valueField the value field name * @param subject the subject * @param predicate the predicate * @param value the value * @return the cardinality * @throws CorruptIndexException if the index is corrupt * @throws IOException if an I/O exception occurs */ public long count(AsnContext context, IndexSearcher searcher, String valueField, String subject, String predicate, String value) throws CorruptIndexException, IOException { // build a query to match the subject/predicate/value triple BooleanQuery query = new BooleanQuery(); Query qSubject = new TermQuery(new Term(AsnConstants.FIELD_RDF_SUBJECT, subject)); Query qPredicate = new TermQuery(new Term(AsnConstants.FIELD_RDF_PREDICATE, predicate)); Query qValue = new TermQuery(new Term(valueField, value)); query.add(qSubject, BooleanClause.Occur.MUST); query.add(qPredicate, BooleanClause.Occur.MUST); query.add(qValue, BooleanClause.Occur.MUST); // execute the search, return the hits TopDocs topDocs = searcher.search(query, 1); return topDocs.totalHits; }
From source file:com.esri.gpt.server.assertion.index.AsnIndexAdapter.java
License:Apache License
/** * Loads the assertion previously cast for the active subject, predicate and user. * @param context the assertion operation context * @param searcher the index searcher//from w w w . j a v a2s . c o m * @return the previously cast assertion (can be null) * @throws Exception if an exception occurs */ public Assertion loadPreviousUserAssertion(AsnContext context, IndexSearcher searcher) throws Exception { AsnOperation operation = context.getOperation(); String userKey = Val.chkStr(operation.getUserPart().getKey()); String username = Val.chkStr(operation.getUserPart().getName()); boolean isAnonymous = username.equalsIgnoreCase(AsnConstants.ANONYMOUS_USERNAME); if (!isAnonymous && (userKey.length() > 0)) { AsnAssertionSet asnSet = operation.getAssertionSet(); AsnValueType vType = asnSet.getValueType(); String subject = operation.getSubject().getURN(); String predicate = vType.getRdfPredicate(); // build a query to match the subject/predicate/user triple BooleanQuery query = new BooleanQuery(); Query qSubject = new TermQuery(new Term(AsnConstants.FIELD_RDF_SUBJECT, subject)); Query qPredicate = new TermQuery(new Term(AsnConstants.FIELD_RDF_PREDICATE, predicate)); Query qUserKey = new TermQuery(new Term(AsnConstants.FIELD_USER_KEY, userKey)); query.add(qSubject, BooleanClause.Occur.MUST); query.add(qPredicate, BooleanClause.Occur.MUST); query.add(qUserKey, BooleanClause.Occur.MUST); // make the reader and searcher, execute the search, return the previous assertion TopDocs topDocs = searcher.search(query, 1); ScoreDoc[] scoreDocs = topDocs.scoreDocs; if ((scoreDocs != null) && (scoreDocs.length) > 0) { Document document = searcher.getIndexReader().document(scoreDocs[0].doc); Assertion assertion = asnSet.newAssertion(context, false); assertion.load(document); return assertion; } } return null; }