List of usage examples for org.apache.lucene.search BooleanQuery setMaxClauseCount
public static void setMaxClauseCount(int maxClauseCount)
From source file:biospectra.classify.Classifier.java
License:Apache License
private void initialize(File indexPath, int kmerSize, int kmerSkips, boolean minStrandKmer, double minShouldMatch, QueryGenerationAlgorithm queryGenerationAlgorithm, Similarity similarity) throws Exception { if (!indexPath.exists() || !indexPath.isDirectory()) { throw new IllegalArgumentException("indexPath is not a directory or does not exist"); }//from ww w. ja v a 2 s . co m this.indexPath = indexPath; this.kmerSize = kmerSize; this.kmerSkips = kmerSkips; this.minStrandKmer = minStrandKmer; this.queryAnalyzer = new KmerQueryAnalyzer(this.kmerSize, this.kmerSkips, this.minStrandKmer); Directory dir = new MMapDirectory(this.indexPath.toPath()); this.indexReader = DirectoryReader.open(dir); this.indexSearcher = new IndexSearcher(this.indexReader); if (similarity != null) { this.indexSearcher.setSimilarity(similarity); } this.minShouldMatch = minShouldMatch; this.queryGenerationAlgorithm = queryGenerationAlgorithm; BooleanQuery.setMaxClauseCount(10000); }
From source file:ch.systemsx.cisd.openbis.generic.server.dataaccess.db.search.LuceneQueryBuilder.java
License:Apache License
private static Query parseQuery(final String searchPattern, String wholeQuery, final QueryParser parser) { parser.setAllowLeadingWildcard(true); BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); try {//w ww . j a v a 2s . c o m return parser.parse(wholeQuery); } catch (ParseException ex) { throw new UserFailureException(String.format("Search pattern '%s' is invalid.", searchPattern), ex); } }
From source file:cn.edu.thss.iise.beehivez.server.index.petrinetindex.tarluceneindex.TARLuceneIndex.java
License:Open Source License
@Override public TreeSet<ProcessQueryResult> getProcessModels(Object o, float similarity) { TreeSet<ProcessQueryResult> ret = new TreeSet<ProcessQueryResult>(); try {//w ww. j av a 2 s . c om if (o instanceof PetriNet) { PetriNet pn = (PetriNet) o; IndexReader reader = IndexReader.open(FSDirectory.open(INDEX_DIR), true); Searcher searcher = new IndexSearcher(reader); BooleanQuery bq = new BooleanQuery(); bq.setMaxClauseCount(Integer.MAX_VALUE); // make it sure that every queryterm is unique HashSet<String> expandedTars = new HashSet(); // expand the query tars with their similar ones HashSet<HashSet<String>> exQueryTars = new HashSet<HashSet<String>>(); // calculate the tars Iterator<TransitionLabelPair> itTAR = PetriNetUtil.getTARSFromPetriNetByCFP(pn).iterator(); if (GlobalParameter.isEnableSimilarLabel()) { // label similarity is enabled while (itTAR.hasNext()) { TransitionLabelPair tlp = itTAR.next(); String tarString = tlp.getFirst().trim() + PetriNetTARsDocument.TARCONNECTOR + tlp.getSecond().trim(); HashSet<String> similarTars = new HashSet<String>(); TreeSet<SimilarLabelQueryResult> pres = labelIndex.getSimilarLabels(tlp.getFirst().trim(), GlobalParameter.getLabelSemanticSimilarity()); TreeSet<SimilarLabelQueryResult> sucs = labelIndex.getSimilarLabels(tlp.getSecond().trim(), GlobalParameter.getLabelSemanticSimilarity()); Iterator<SimilarLabelQueryResult> itPre = pres.iterator(); while (itPre.hasNext()) { String pre = itPre.next().getLabel(); Iterator<SimilarLabelQueryResult> itSuc = sucs.iterator(); while (itSuc.hasNext()) { String suc = itSuc.next().getLabel(); String tar = pre + PetriNetTARsDocument.TARCONNECTOR + suc; if (similarTars.add(tar)) { if (expandedTars.add(tar)) { Term term = new Term(PetriNetTARsDocument.FIELDTARS, tar); TermQuery termQuery = new TermQuery(term); bq.add(termQuery, Occur.SHOULD); } } } } if (similarTars.size() == 0) { similarTars.add(tarString); } exQueryTars.add(similarTars); } } else { // label similarity is not enabled while (itTAR.hasNext()) { TransitionLabelPair tlp = itTAR.next(); String tarString = tlp.getFirst().trim() + PetriNetTARsDocument.TARCONNECTOR + tlp.getSecond().trim(); HashSet<String> similarTars = new HashSet<String>(); similarTars.add(tarString); if (expandedTars.add(tarString)) { Term term = new Term(PetriNetTARsDocument.FIELDTARS, tarString); TermQuery termQuery = new TermQuery(term); bq.add(termQuery, Occur.SHOULD); } exQueryTars.add(similarTars); } } // while (itTAR.hasNext()) { // TransitionLabelPair tlp = itTAR.next(); // String tarString = tlp.getFirst().trim() // + PetriNetTARsDocument.TARCONNECTOR // + tlp.getSecond().trim(); // // HashSet<String> similarTars = new HashSet<String>(); // // // expand with its similar tars // if (GlobalParameter.isEnableSimilarLabel()) { // TreeSet<SimilarLabelQueryResult> pres = labelIndex // .getSimilarLabels(tlp.getFirst().trim(), // GlobalParameter // .getLabelSemanticSimilarity()); // // TreeSet<SimilarLabelQueryResult> sucs = labelIndex // .getSimilarLabels(tlp.getSecond().trim(), // GlobalParameter // .getLabelSemanticSimilarity()); // // Iterator<SimilarLabelQueryResult> itPre = pres // .iterator(); // while (itPre.hasNext()) { // String pre = itPre.next().getLabel(); // Iterator<SimilarLabelQueryResult> itSuc = sucs // .iterator(); // while (itSuc.hasNext()) { // String suc = itSuc.next().getLabel(); // String tar = pre // + PetriNetTARsDocument.TARCONNECTOR // + suc; // if (similarTars.add(tar)) { // if (expandedTars.add(tar)) { // Term term = new Term( // PetriNetTARsDocument.FIELDTARS, // tar); // TermQuery termQuery = new TermQuery( // term); // bq.add(termQuery, Occur.SHOULD); // } // } // } // } // // if (similarTars.size() == 0) { // similarTars.add(tarString); // } // // } else { // similarTars.add(tarString); // // if (expandedTars.add(tarString)) { // Term term = new Term( // PetriNetTARsDocument.FIELDTARS, tarString); // TermQuery termQuery = new TermQuery(term); // bq.add(termQuery, Occur.SHOULD); // } // } // // exQueryTars.add(similarTars); // } TARsQueryResultCollector collector = new TARsQueryResultCollector(reader, exQueryTars, similarity); searcher.search(bq, collector); ret = collector.getQueryResult(); searcher.close(); reader.close(); } } catch (Exception e) { e.printStackTrace(); } return ret; }
From source file:cn.edu.thss.iise.beehivez.server.index.petrinetindex.taskedgeindex.TaskEdgeLuceneIndex.java
License:Open Source License
@Override public TreeSet<ProcessQueryResult> getProcessModels(Object o, float similarity) { TreeSet<ProcessQueryResult> ret = new TreeSet<ProcessQueryResult>(); try {// w w w . j ava 2 s. com if (o instanceof PetriNet) { PetriNet pn = (PetriNet) o; IndexReader reader = IndexReader.open(FSDirectory.open(INDEX_DIR), true); Searcher searcher = new IndexSearcher(reader); BooleanQuery bq = new BooleanQuery(); bq.setMaxClauseCount(Integer.MAX_VALUE); // make it sure that every queryterm is unique HashSet<String> expandedTaskEdges = new HashSet(); // expand the query task edges with their similar ones HashSet<HashSet<String>> exQueryTaskEdges = new HashSet<HashSet<String>>(); // calculate the task edges of query Petri net ArrayList<TaskLine4PetriNet> tls = TaskLine4PetriNet.getAllTaskLinesOfPetriNet(pn); if (GlobalParameter.isEnableSimilarLabel()) { // label similarity is enabled for (TaskLine4PetriNet tl : tls) { String taskEdgeString = tl.getSrcTransition().getIdentifier().trim() + PetriNetTaskEdgesDocument.TASKEDGECONNECTOR + tl.getDestTransition().getIdentifier().trim(); HashSet<String> similarTaskEdges = new HashSet<String>(); TreeSet<SimilarLabelQueryResult> pres = labelIndex.getSimilarLabels( tl.getSrcTransition().getIdentifier().trim(), GlobalParameter.getLabelSemanticSimilarity()); TreeSet<SimilarLabelQueryResult> sucs = labelIndex.getSimilarLabels( tl.getDestTransition().getIdentifier().trim(), GlobalParameter.getLabelSemanticSimilarity()); Iterator<SimilarLabelQueryResult> itPre = pres.iterator(); while (itPre.hasNext()) { String pre = itPre.next().getLabel(); Iterator<SimilarLabelQueryResult> itSuc = sucs.iterator(); while (itSuc.hasNext()) { String suc = itSuc.next().getLabel(); String taskEdge = pre + PetriNetTaskEdgesDocument.TASKEDGECONNECTOR + suc; if (similarTaskEdges.add(taskEdge)) { if (expandedTaskEdges.add(taskEdge)) { Term term = new Term(PetriNetTaskEdgesDocument.FIELDTASKEDGES, taskEdge); TermQuery termQuery = new TermQuery(term); bq.add(termQuery, Occur.SHOULD); } } } } if (similarTaskEdges.size() == 0) { similarTaskEdges.add(taskEdgeString); } exQueryTaskEdges.add(similarTaskEdges); } } else { // label similarity is not enabled for (TaskLine4PetriNet tl : tls) { String taskEdgeString = tl.getSrcTransition().getIdentifier().trim() + PetriNetTaskEdgesDocument.TASKEDGECONNECTOR + tl.getDestTransition().getIdentifier().trim(); HashSet<String> similarTaskEdges = new HashSet<String>(); similarTaskEdges.add(taskEdgeString); if (expandedTaskEdges.add(taskEdgeString)) { Term term = new Term(PetriNetTaskEdgesDocument.FIELDTASKEDGES, taskEdgeString); TermQuery termQuery = new TermQuery(term); bq.add(termQuery, Occur.SHOULD); } exQueryTaskEdges.add(similarTaskEdges); } } TaskEdgesQueryResultCollector collector = new TaskEdgesQueryResultCollector(reader, exQueryTaskEdges, similarity); searcher.search(bq, collector); TreeSet<ProcessQueryResult> temp = collector.getQueryResult(); searcher.close(); reader.close(); // verify the candidate model Iterator<ProcessQueryResult> it = temp.iterator(); while (it.hasNext()) { ProcessQueryResult pqr = it.next(); long id = pqr.getProcess_id(); DataManager dm = DataManager.getInstance(); PetriNet c = dm.getProcessPetriNet(id); float mcesSimilarity = PetriNetUtil.mcesSimilarity(c, pn); if (mcesSimilarity >= similarity) { ret.add(new ProcessQueryResult(id, mcesSimilarity)); } } } } catch (Exception e) { e.printStackTrace(); } return ret; }
From source file:cn.edu.thss.iise.beehivez.server.index.yawlindex.yawltasksluceneindex.YAWLTasksLuceneIndex.java
License:Open Source License
@Override public TreeSet<ProcessQueryResult> getProcessModels(Object o, float similarity) { TreeSet<ProcessQueryResult> ret = new TreeSet<ProcessQueryResult>(); try {/* ww w. j a v a 2 s . com*/ if (o instanceof YNet) { YNet query = (YNet) o; IndexReader reader = IndexReader.open(FSDirectory.open(INDEX_DIR), true); Searcher searcher = new IndexSearcher(reader); BooleanQuery bq = new BooleanQuery(); BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); // expand the tasks with their similar ones HashSet<String> expandedTasks = new HashSet<String>(); if (GlobalParameter.isEnableSimilarLabel()) { // label similarity is enabled for (YTask task : query.getNetTasks()) { String taskName = task.getName().trim(); if (expandedTasks.add(taskName)) { BooleanQuery subq = new BooleanQuery(); // Term term = new // Term(YAWLTasksDocument.FIELDTASKS, // taskName); // TermQuery termQuery = new TermQuery(term); // subq.add(termQuery, Occur.SHOULD); TreeSet<SimilarLabelQueryResult> similarTasks = labelIndex.getSimilarLabels(taskName, GlobalParameter.getLabelSemanticSimilarity()); Iterator<SimilarLabelQueryResult> it = similarTasks.iterator(); while (it.hasNext()) { SimilarLabelQueryResult sl = it.next(); String similarTaskName = sl.getLabel(); Term term = new Term(YAWLTasksDocument.FIELDTASKS, similarTaskName); TermQuery termQuery = new TermQuery(term); subq.add(termQuery, Occur.SHOULD); } if (subq.getClauses().length > 0) { bq.add(subq, Occur.MUST); } else { return ret; } } } } else { // label similarity is not enabled for (YTask task : query.getNetTasks()) { String taskName = task.getName().trim(); if (expandedTasks.add(taskName)) { Term term = new Term(YAWLTasksDocument.FIELDTASKS, taskName); TermQuery termQuery = new TermQuery(term); bq.add(termQuery, Occur.MUST); } } } // for (YTask task : query.getNetTasks()) { // String taskName = task.getName().trim(); // if (GlobalParameter.isEnableSimilarLabel()) { // // label similarity is enabled // if (expandedTasks.add(taskName)) { // BooleanQuery subq = new BooleanQuery(); // // Term term = new // // Term(YAWLTasksDocument.FIELDTASKS, // // taskName); // // TermQuery termQuery = new TermQuery(term); // // subq.add(termQuery, Occur.SHOULD); // // TreeSet<SimilarLabelQueryResult> similarTasks = labelIndex // .getSimilarLabels(taskName, GlobalParameter // .getLabelSemanticSimilarity()); // Iterator<SimilarLabelQueryResult> it = similarTasks // .iterator(); // while (it.hasNext()) { // SimilarLabelQueryResult sl = it.next(); // String similarTaskName = sl.getLabel(); // Term term = new Term( // YAWLTasksDocument.FIELDTASKS, // similarTaskName); // TermQuery termQuery = new TermQuery(term); // subq.add(termQuery, Occur.SHOULD); // } // if (subq.getClauses().length > 0) { // bq.add(subq, Occur.MUST); // } else { // return ret; // } // } // } else { // // label similarity is not enabled // if (expandedTasks.add(taskName)) { // Term term = new Term(YAWLTasksDocument.FIELDTASKS, // taskName); // TermQuery termQuery = new TermQuery(term); // bq.add(termQuery, Occur.MUST); // } // } // } YAWLTasksQueryResultCollector collector = new YAWLTasksQueryResultCollector(reader); searcher.search(bq, collector); ret = collector.getQueryResult(); searcher.close(); reader.close(); // sub graph isomorphism check using Ullman's algorithm // accurately match Iterator<ProcessQueryResult> it = ret.iterator(); while (it.hasNext()) { ProcessQueryResult pqr = it.next(); long process_id = pqr.getProcess_id(); DataManager dm = DataManager.getInstance(); YNet model = YAWLUtil.getYNetFromDefinition(dm.getProcessDefinitionBytes(process_id)); if (!Ullman4YAWL.subGraphIsomorphism(query, model)) { it.remove(); } // if (!VF24YAWL.subGraphIsomorphism(query, model)) { // it.remove(); // } } } } catch (Exception e) { e.printStackTrace(); } return ret; }
From source file:com.atlassian.jira.upgrade.ConsistencyCheckImpl.java
/** * Looks for files that could be Lucene locks left after an unclean shutdown. Registers a Johnson Event in this * scenario./*from w ww .ja v a 2 s .c om*/ */ private void checkAndInitLucene(final ServletContext context) { final ApplicationProperties ap = ComponentAccessor.getApplicationProperties(); // Get a path for each index directory final IndexLifecycleManager indexManager = getIndexManager(); // A collection to which we will add all found lock files (if any) final Collection<String> existingLockFilepaths = getLuceneDirectoryUtils() .getStaleLockPaths(indexManager.getAllIndexPaths()); // If there were any lock files found, then place an event into the context. Otherwise we are OK and // can proceed. if ((existingLockFilepaths != null) && !existingLockFilepaths.isEmpty()) { final StringBuilder sb = new StringBuilder(); for (final String filePath : existingLockFilepaths) { if (filePath != null) { sb.append(filePath).append(' '); } } if (sb.length() > 1) { // Delete last " " sb.deleteCharAt(sb.length() - 1); } // Log error message final Collection<String> messages = CollectionBuilder.newBuilder( "Index lock file(s) found. This occurs either because JIRA was not cleanly shutdown", "or because there is another instance of this JIRA installation currently running.", "Please ensure that no other instance of this JIRA installation is running", "and then remove the following lock file(s) and restart JIRA:", "", sb.toString(), "", "Once restarted you will need to reindex your data to ensure that indexes are up to date.", "", "Do NOT delete the lock file(s) if there is another JIRA running with the same index directory", "instead cleanly shutdown the other instance.").asList(); startupLog.printMessage(messages, Level.ERROR); final Event event = new Event(EventType.get("index-lock-already-exists"), "An existing index lock was found.", EventLevel.get(EventLevel.ERROR)); event.addAttribute("lockfiles", sb.toString()); final JohnsonEventContainer eventCont = JohnsonEventContainer.get(context); if (eventCont != null) { eventCont.addEvent(event); } } // Set max clauses even if indexing is disabled so that it takes affect when indexing is enabled again. // As max clauses is a static variable in Lucene, this will work. int maxClausesCount = 65000; try { maxClausesCount = Integer.parseInt(ap.getDefaultBackedString(APKeys.JIRA_SEARCH_MAXCLAUSES)); } catch (final NumberFormatException e) { log.warn("Could not read the property '" + APKeys.JIRA_SEARCH_MAXCLAUSES + "' for the number of maximum search clauses. Using default " + maxClausesCount); } BooleanQuery.setMaxClauseCount(maxClausesCount); // Fixes JRA-3127 (JT) }
From source file:com.baidu.rigel.biplatform.tesseract.util.QueryRequestUtil.java
License:Open Source License
/** * // w w w. ja v a 2 s . co m * transQueryRequest2LuceneQuery queryRequest->query for lucene * * @param query * queryRequest * @return Query query for lucene * @throws ParseException * ? */ public static Query transQueryRequest2LuceneQuery(QueryRequest query) throws ParseException { if (query == null || query.getWhere() == null) { throw new IllegalArgumentException(); } BooleanQuery queryAll = new BooleanQuery(); BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); // process where // process and condition Map<String, List<String>> andCondition = transQueryRequestAndList2Map(query); for (String fieldName : andCondition.keySet()) { //QueryParser parser = new QueryParser(fieldName, new StandardAnalyzer()); BooleanQuery subQuery = new BooleanQuery(); for (String qs : andCondition.get(fieldName)) { subQuery.add(new TermQuery(new Term(fieldName, qs)), Occur.SHOULD); } queryAll.add(subQuery, Occur.MUST); } return queryAll; }
From source file:com.concursive.connect.indexer.LuceneIndexer.java
License:Open Source License
/** * Sets up any Lucene Indexer classes//w ww. ja v a2 s .com * * @param context * @return * @throws Exception */ public boolean setup(IndexerContext context) throws Exception { // Make sure the complex queries can use more than the default clause count BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); // Setup the Indexes so they are in a state in which they can immediately // be accessed ApplicationPrefs prefs = context.getApplicationPrefs(); { // Establish the full directory LOG.info("Starting Lucene disk index"); File path = new File(prefs.get("FILELIBRARY") + Constants.FULL_INDEX); boolean create = !path.exists(); fullIndex = FSDirectory.getDirectory(prefs.get("FILELIBRARY") + Constants.FULL_INDEX); if (create) { LOG.warn("Lucene index not found, creating new index: " + path.getPath()); Analyzer fullAnalyzer = new StandardAnalyzer(); fullWriter = new IndexWriter(fullIndex, fullAnalyzer, true); fullWriter.close(); } } { // Load up the ram directory LOG.info("Creating Lucene RAM index..."); // Create the Ram Directory directoryIndex = new RAMDirectory(); Analyzer directoryAnalyzer = new SnowballAnalyzer("English"); directoryWriter = new IndexWriter(directoryIndex, directoryAnalyzer, true); directoryWriter.close(); LOG.info("Initialization of RAM index complete..."); } // Search using the updated index resetSearchers(); return true; }
From source file:com.esri.gpt.catalog.lucene.LuceneConfig.java
License:Apache License
/** * Sets maximum number of clauses within boolean query. * @param maxClauseCount maximum number of clauses within boolean query *//*from w ww. j ava2 s. c o m*/ public void setMaxClauseCount(int maxClauseCount) { BooleanQuery.setMaxClauseCount(maxClauseCount); }
From source file:com.github.tteofili.looseen.MinHashClassifier.java
License:Apache License
public MinHashClassifier(IndexReader reader, String textField, String categoryField, int min, int hashCount, int hashSize) { this.min = min; this.hashCount = hashCount; this.hashSize = hashSize; try {//from w w w . j a va2s .c o m Analyzer analyzer = createMinHashAnalyzer(min, hashCount, hashSize); IndexWriterConfig config = new IndexWriterConfig(analyzer); directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, config); for (int i = 0; i < reader.maxDoc(); i++) { Document document = new Document(); Document d = reader.document(i); String textValue = d.getField(textField).stringValue(); String categoryValue = d.getField(categoryField).stringValue(); document.add(new TextField(TEXT_FIELD, textValue, Field.Store.NO)); document.add(new StringField(CLASS_FIELD, categoryValue, Field.Store.YES)); writer.addDocument(document); } writer.commit(); writer.close(); } catch (IOException e) { throw new RuntimeException(e); } BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); }