List of usage examples for org.apache.lucene.search IndexSearcher IndexSearcher
public IndexSearcher(IndexReaderContext context)
From source file:choco.lucene.IKAnalyzerDemo.java
License:Apache License
public static void main(String[] args) { //Lucene Document?? String fieldName = "text"; // // w w w . j a v a2 s . c om String text = "IK Analyzer???????"; //IKAnalyzer? Analyzer analyzer = new IKAnalyzer(); Directory directory = null; IndexWriter iwriter = null; IndexReader ireader = null; IndexSearcher isearcher = null; try { // directory = new RAMDirectory(); //?IndexWriterConfig IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_34, analyzer); iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); iwriter = new IndexWriter(directory, iwConfig); // Document doc = new Document(); doc.add(new Field("ID", "10000", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field(fieldName, text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.addDocument(doc); iwriter.close(); //?********************************** //? ireader = IndexReader.open(directory); isearcher = new IndexSearcher(ireader); String keyword = "?"; //QueryParser?Query QueryParser qp = new QueryParser(Version.LUCENE_34, fieldName, analyzer); qp.setDefaultOperator(QueryParser.AND_OPERATOR); Query query = qp.parse(keyword); //?5? TopDocs topDocs = isearcher.search(query, 5); System.out.println("" + topDocs.totalHits); // ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (int i = 0; i < topDocs.totalHits; i++) { Document targetDoc = isearcher.doc(scoreDocs[i].doc); System.out.println("" + targetDoc.toString()); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { e.printStackTrace(); } } if (directory != null) { try { directory.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:ci6226.eval_index_reader.java
public eval_index_reader(Analyzer _analyzer, String _dir, String[] _searchList, int _topn) throws IOException, org.apache.lucene.queryparser.classic.ParseException, InvalidTokenOffsetsException { String indexdir = "./" + _dir; String field = "text"; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexdir))); IndexSearcher searcher = new IndexSearcher(reader); PrintWriter writer = new PrintWriter(_dir + ".csv", "UTF-8"); Searchit(reader, searcher, _analyzer, field, _searchList, _topn, writer); searcher.setSimilarity(new similarity_tf_rm()); Searchit(reader, searcher, _analyzer, field, _searchList, _topn, writer); searcher.setSimilarity(new similiarty_queryNorm()); Searchit(reader, searcher, _analyzer, field, _searchList, _topn, writer); writer.close();/* www . j a va 2s. c om*/ reader.close(); /// searcher.setSimilarity(null); }
From source file:ci6226.facetsearch.java
public static void main(String[] args) throws Exception { String index = "./myindex"; String field = "text"; String queries = null;//www. j a v a2 s . com int hitsPerPage = 10; boolean raw = false; //http://lucene.apache.org/core/4_0_0/facet/org/apache/lucene/facet/doc-files/userguide.html#facet_accumulation IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); // :Post-Release-Update-Version.LUCENE_XY: //TODO: SAME AS HOW U BUILD INDEX Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } // :Post-Release-Update-Version.LUCENE_XY: QueryParser parser = new QueryParser(Version.LUCENE_47, field, analyzer); while (true) { System.out.println("Enter query: "); String line = in.readLine(); line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); Date start = new Date(); searcher.search(query, null, 100); Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; //N= max docs //df = totoal matched doc //idf=log(N/df) for (int i = 0; i < hits.length; i++) { Document doc = searcher.doc(hits[i].doc); System.out.println(ANSI_BLUE + (i + 1) + ANSI_RESET + "\nScore=\t" + hits[i].score); String rtext = doc.get(field); System.out.println("Text=\t" + rtext); Terms vector = reader.getTermVector(i, "text"); if (vector == null) continue; // System.out.println(vector.getSumDocFreq()); // Terms vector = reader.getTermVector(hits[i].doc, field); //hits[i].doc=docID TermsEnum termsEnum = vector.iterator(null); termsEnum = vector.iterator(termsEnum); Map<String, Integer> frequencies = new HashMap<>(); BytesRef text = null; while ((text = termsEnum.next()) != null) { String term = text.utf8ToString(); int freq = (int) termsEnum.totalTermFreq(); frequencies.put(term, freq); // System.out.println("Time: "+term + " idef "+freq); } } // String[] facetCatlog={""}; System.out.println(numTotalHits + " total matching documents"); } reader.close(); }
From source file:ci6226.loadIndex.java
public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);//from w w w . j ava 2 s . c o m } String index = "./myindex"; String field = "text"; String queries = null; int hitsPerPage = 10; boolean raw = false; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); // :Post-Release-Update-Version.LUCENE_XY: //TODO: SAME AS HOW U BUILD INDEX Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } // :Post-Release-Update-Version.LUCENE_XY: QueryParser parser = new QueryParser(Version.LUCENE_47, field, analyzer); while (true) { System.out.println("Enter query: "); String line = in.readLine(); line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); Date start = new Date(); searcher.search(query, null, 100); Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); doPagingSearch(in, searcher, query, hitsPerPage, raw, true, analyzer); } reader.close(); }
From source file:cn.codepub.redis.directory.Main.java
License:Apache License
public static void testRedisDirectoryWithShardedJedisPool() throws IOException { long start = System.currentTimeMillis(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()) .setOpenMode(IndexWriterConfig.OpenMode.CREATE); //indexWriterConfig.setInfoStream(System.out); //indexWriterConfig.setRAMBufferSizeMB(2048); //LogByteSizeMergePolicy logByteSizeMergePolicy = new LogByteSizeMergePolicy(); //logByteSizeMergePolicy.setMinMergeMB(1); //logByteSizeMergePolicy.setMaxMergeMB(64); //logByteSizeMergePolicy.setMaxCFSSegmentSizeMB(64); //indexWriterConfig.setRAMBufferSizeMB(1024).setMergePolicy(logByteSizeMergePolicy).setUseCompoundFile(false); //GenericObjectPoolConfig genericObjectPoolConfig = new GenericObjectPoolConfig(); //?/*from ww w . ja v a 2 s. c om*/ //genericObjectPoolConfig.setMaxWaitMillis(3000); //10s List<JedisShardInfo> shards = new ArrayList<>(); JedisShardInfo si = new JedisShardInfo("localhost", 6379, Constants.TIME_OUT); //JedisShardInfo si2 = new JedisShardInfo("localhost", 6380); shards.add(si); //shards.add(si2); JedisPoolConfig jedisPoolConfig = new JedisPoolConfig(); ShardedJedisPool shardedJedisPool = new ShardedJedisPool(jedisPoolConfig, shards); RedisDirectory redisDirectory = new RedisDirectory(new ShardedJedisPoolStream(shardedJedisPool)); IndexWriter indexWriter = new IndexWriter(redisDirectory, indexWriterConfig); for (int i = 0; i < 10000000; i++) { indexWriter.addDocument(addDocument(i)); } indexWriter.commit(); indexWriter.close(); redisDirectory.close(); long end = System.currentTimeMillis(); log.error("RedisDirectoryWithShardedJedisPool consumes {}s!", (end - start) / 1000); shardedJedisPool = new ShardedJedisPool(jedisPoolConfig, shards); start = System.currentTimeMillis(); IndexSearcher indexSearcher = new IndexSearcher( DirectoryReader.open(new RedisDirectory(new ShardedJedisPoolStream(shardedJedisPool)))); int total = 0; for (int i = 0; i < 10000000; i++) { TermQuery key1 = new TermQuery(new Term("key1", "key" + i)); TopDocs search = indexSearcher.search(key1, 10); total += search.totalHits; } System.out.println(total); end = System.currentTimeMillis(); log.error("RedisDirectoryWithShardedJedisPool search consumes {}ms!", (end - start)); }
From source file:cn.edu.thss.iise.beehivez.server.index.labelindex.LabelLuceneIndex.java
License:Open Source License
public boolean contain(String label) { try {//w ww. j av a2s. co m IndexReader reader = IndexReader.open(this.indexDir, true); Searcher searcher = new IndexSearcher(reader); // use the boolean query HashSet<String> queryTermSet = new HashSet<String>(); TokenStream stream = analyzer.tokenStream(LabelDocument.FIELD_LABEL, new StringReader(label)); TermAttribute termAtt = stream.addAttribute(TermAttribute.class); stream.reset(); while (stream.incrementToken()) { queryTermSet.add(termAtt.term()); } stream.end(); stream.close(); // construct the query BooleanQuery bq = new BooleanQuery(); Iterator<String> it = queryTermSet.iterator(); while (it.hasNext()) { String s = it.next(); Term term = new Term(LabelDocument.FIELD_LABEL, s); TermQuery termQuery = new TermQuery(term); bq.add(termQuery, Occur.MUST); } ExactLabelQueryResultCollector collector = new ExactLabelQueryResultCollector(reader, label); searcher.search(bq, collector); boolean ret = collector.isExistQueryLabel(); reader.close(); return ret; } catch (Exception e) { e.printStackTrace(); } return false; }
From source file:cn.edu.thss.iise.beehivez.server.index.labelindex.LabelLuceneIndex.java
License:Open Source License
public TreeSet<SimilarLabelQueryResult> getSimilarLabels(String query, float similarity) { TreeSet<SimilarLabelQueryResult> ret = new TreeSet<SimilarLabelQueryResult>(); if (query == null) { ret.add(new SimilarLabelQueryResult(null, 1)); return ret; }/*from w w w . j a v a 2 s . co m*/ try { IndexReader reader = IndexReader.open(this.indexDir, true); Searcher searcher = new IndexSearcher(reader); // get terms from query HashSet<String> queryTermSet = new HashSet<String>(); TokenStream stream = analyzer.tokenStream(LabelDocument.FIELD_LABEL, new StringReader(query)); TermAttribute termAtt = stream.addAttribute(TermAttribute.class); stream.reset(); while (stream.incrementToken()) { queryTermSet.add(termAtt.term()); } stream.end(); stream.close(); // construct the query BooleanQuery bq = new BooleanQuery(); Iterator<String> it = queryTermSet.iterator(); SynonymMap synMap = SynonymIndex.getSynonymMap(); HashSet<String> expandedQueryTermSet = new HashSet<String>(queryTermSet); while (it.hasNext()) { String s = it.next(); Term term = new Term(LabelDocument.FIELD_LABEL, s); TermQuery termQuery = new TermQuery(term); bq.add(termQuery, Occur.SHOULD); // expand using synonyms for (String syn : synMap.getSynonyms(s)) { stemer.setCurrent(syn); stemer.stem(); syn = stemer.getCurrent(); if (expandedQueryTermSet.add(syn)) { term = new Term(LabelDocument.FIELD_LABEL, syn); termQuery = new TermQuery(term); bq.add(termQuery, Occur.SHOULD); } } } // search in the label index SimilarLabelQueryResultCollector collector = new SimilarLabelQueryResultCollector(reader, queryTermSet, similarity); searcher.search(bq, collector); ret = collector.getQueryResult(); searcher.close(); reader.close(); } catch (Exception e) { e.printStackTrace(); } return ret; }
From source file:cn.edu.thss.iise.beehivez.server.index.petrinetindex.relationindex.TaskRelationIndex.java
License:Open Source License
@Override public TreeSet<ProcessQueryResult> getProcessModels(Object o, float similarity) { TreeSet<ProcessQueryResult> ret = new TreeSet<ProcessQueryResult>(); try {/*ww w . j a v a 2 s . c om*/ if (o instanceof String) { String query = (String) o; // analyze the query StringReader sr = new StringReader(query); BufferedReader br = new BufferedReader(sr); QueryParser parser = new QueryParser(br); if (GlobalParameter.isEnableSimilarLabel()) { parser.setSemanticAide(this.labelIndex, GlobalParameter.getLabelSemanticSimilarity()); } Query q = parser.parse(); // System.out.println("before optimization"); // System.out.println(q.toString()); br.close(); sr.close(); // optimize the query here // bq = parser.optimize(bq); // System.out.println("after optimization"); // System.out.println(bq.toString()); // query the lucene IndexReader reader = IndexReader.open(FSDirectory.open(INDEX_DIR), true); Searcher searcher = new IndexSearcher(reader); RelationQueryResultCollector collector = new RelationQueryResultCollector(reader); searcher.search(q, collector); ret = collector.getQueryResult(); searcher.close(); reader.close(); } } catch (Exception e) { e.printStackTrace(); } return ret; }
From source file:cn.edu.thss.iise.beehivez.server.index.petrinetindex.tarluceneindex.TARLuceneIndex.java
License:Open Source License
@Override public TreeSet<ProcessQueryResult> getProcessModels(Object o, float similarity) { TreeSet<ProcessQueryResult> ret = new TreeSet<ProcessQueryResult>(); try {// w w w. j av a 2 s . c om if (o instanceof PetriNet) { PetriNet pn = (PetriNet) o; IndexReader reader = IndexReader.open(FSDirectory.open(INDEX_DIR), true); Searcher searcher = new IndexSearcher(reader); BooleanQuery bq = new BooleanQuery(); bq.setMaxClauseCount(Integer.MAX_VALUE); // make it sure that every queryterm is unique HashSet<String> expandedTars = new HashSet(); // expand the query tars with their similar ones HashSet<HashSet<String>> exQueryTars = new HashSet<HashSet<String>>(); // calculate the tars Iterator<TransitionLabelPair> itTAR = PetriNetUtil.getTARSFromPetriNetByCFP(pn).iterator(); if (GlobalParameter.isEnableSimilarLabel()) { // label similarity is enabled while (itTAR.hasNext()) { TransitionLabelPair tlp = itTAR.next(); String tarString = tlp.getFirst().trim() + PetriNetTARsDocument.TARCONNECTOR + tlp.getSecond().trim(); HashSet<String> similarTars = new HashSet<String>(); TreeSet<SimilarLabelQueryResult> pres = labelIndex.getSimilarLabels(tlp.getFirst().trim(), GlobalParameter.getLabelSemanticSimilarity()); TreeSet<SimilarLabelQueryResult> sucs = labelIndex.getSimilarLabels(tlp.getSecond().trim(), GlobalParameter.getLabelSemanticSimilarity()); Iterator<SimilarLabelQueryResult> itPre = pres.iterator(); while (itPre.hasNext()) { String pre = itPre.next().getLabel(); Iterator<SimilarLabelQueryResult> itSuc = sucs.iterator(); while (itSuc.hasNext()) { String suc = itSuc.next().getLabel(); String tar = pre + PetriNetTARsDocument.TARCONNECTOR + suc; if (similarTars.add(tar)) { if (expandedTars.add(tar)) { Term term = new Term(PetriNetTARsDocument.FIELDTARS, tar); TermQuery termQuery = new TermQuery(term); bq.add(termQuery, Occur.SHOULD); } } } } if (similarTars.size() == 0) { similarTars.add(tarString); } exQueryTars.add(similarTars); } } else { // label similarity is not enabled while (itTAR.hasNext()) { TransitionLabelPair tlp = itTAR.next(); String tarString = tlp.getFirst().trim() + PetriNetTARsDocument.TARCONNECTOR + tlp.getSecond().trim(); HashSet<String> similarTars = new HashSet<String>(); similarTars.add(tarString); if (expandedTars.add(tarString)) { Term term = new Term(PetriNetTARsDocument.FIELDTARS, tarString); TermQuery termQuery = new TermQuery(term); bq.add(termQuery, Occur.SHOULD); } exQueryTars.add(similarTars); } } // while (itTAR.hasNext()) { // TransitionLabelPair tlp = itTAR.next(); // String tarString = tlp.getFirst().trim() // + PetriNetTARsDocument.TARCONNECTOR // + tlp.getSecond().trim(); // // HashSet<String> similarTars = new HashSet<String>(); // // // expand with its similar tars // if (GlobalParameter.isEnableSimilarLabel()) { // TreeSet<SimilarLabelQueryResult> pres = labelIndex // .getSimilarLabels(tlp.getFirst().trim(), // GlobalParameter // .getLabelSemanticSimilarity()); // // TreeSet<SimilarLabelQueryResult> sucs = labelIndex // .getSimilarLabels(tlp.getSecond().trim(), // GlobalParameter // .getLabelSemanticSimilarity()); // // Iterator<SimilarLabelQueryResult> itPre = pres // .iterator(); // while (itPre.hasNext()) { // String pre = itPre.next().getLabel(); // Iterator<SimilarLabelQueryResult> itSuc = sucs // .iterator(); // while (itSuc.hasNext()) { // String suc = itSuc.next().getLabel(); // String tar = pre // + PetriNetTARsDocument.TARCONNECTOR // + suc; // if (similarTars.add(tar)) { // if (expandedTars.add(tar)) { // Term term = new Term( // PetriNetTARsDocument.FIELDTARS, // tar); // TermQuery termQuery = new TermQuery( // term); // bq.add(termQuery, Occur.SHOULD); // } // } // } // } // // if (similarTars.size() == 0) { // similarTars.add(tarString); // } // // } else { // similarTars.add(tarString); // // if (expandedTars.add(tarString)) { // Term term = new Term( // PetriNetTARsDocument.FIELDTARS, tarString); // TermQuery termQuery = new TermQuery(term); // bq.add(termQuery, Occur.SHOULD); // } // } // // exQueryTars.add(similarTars); // } TARsQueryResultCollector collector = new TARsQueryResultCollector(reader, exQueryTars, similarity); searcher.search(bq, collector); ret = collector.getQueryResult(); searcher.close(); reader.close(); } } catch (Exception e) { e.printStackTrace(); } return ret; }
From source file:cn.edu.thss.iise.beehivez.server.index.petrinetindex.taskedgeindex.TaskEdgeLuceneIndex.java
License:Open Source License
@Override public TreeSet<ProcessQueryResult> getProcessModels(Object o, float similarity) { TreeSet<ProcessQueryResult> ret = new TreeSet<ProcessQueryResult>(); try {//from ww w .j ava 2 s.c o m if (o instanceof PetriNet) { PetriNet pn = (PetriNet) o; IndexReader reader = IndexReader.open(FSDirectory.open(INDEX_DIR), true); Searcher searcher = new IndexSearcher(reader); BooleanQuery bq = new BooleanQuery(); bq.setMaxClauseCount(Integer.MAX_VALUE); // make it sure that every queryterm is unique HashSet<String> expandedTaskEdges = new HashSet(); // expand the query task edges with their similar ones HashSet<HashSet<String>> exQueryTaskEdges = new HashSet<HashSet<String>>(); // calculate the task edges of query Petri net ArrayList<TaskLine4PetriNet> tls = TaskLine4PetriNet.getAllTaskLinesOfPetriNet(pn); if (GlobalParameter.isEnableSimilarLabel()) { // label similarity is enabled for (TaskLine4PetriNet tl : tls) { String taskEdgeString = tl.getSrcTransition().getIdentifier().trim() + PetriNetTaskEdgesDocument.TASKEDGECONNECTOR + tl.getDestTransition().getIdentifier().trim(); HashSet<String> similarTaskEdges = new HashSet<String>(); TreeSet<SimilarLabelQueryResult> pres = labelIndex.getSimilarLabels( tl.getSrcTransition().getIdentifier().trim(), GlobalParameter.getLabelSemanticSimilarity()); TreeSet<SimilarLabelQueryResult> sucs = labelIndex.getSimilarLabels( tl.getDestTransition().getIdentifier().trim(), GlobalParameter.getLabelSemanticSimilarity()); Iterator<SimilarLabelQueryResult> itPre = pres.iterator(); while (itPre.hasNext()) { String pre = itPre.next().getLabel(); Iterator<SimilarLabelQueryResult> itSuc = sucs.iterator(); while (itSuc.hasNext()) { String suc = itSuc.next().getLabel(); String taskEdge = pre + PetriNetTaskEdgesDocument.TASKEDGECONNECTOR + suc; if (similarTaskEdges.add(taskEdge)) { if (expandedTaskEdges.add(taskEdge)) { Term term = new Term(PetriNetTaskEdgesDocument.FIELDTASKEDGES, taskEdge); TermQuery termQuery = new TermQuery(term); bq.add(termQuery, Occur.SHOULD); } } } } if (similarTaskEdges.size() == 0) { similarTaskEdges.add(taskEdgeString); } exQueryTaskEdges.add(similarTaskEdges); } } else { // label similarity is not enabled for (TaskLine4PetriNet tl : tls) { String taskEdgeString = tl.getSrcTransition().getIdentifier().trim() + PetriNetTaskEdgesDocument.TASKEDGECONNECTOR + tl.getDestTransition().getIdentifier().trim(); HashSet<String> similarTaskEdges = new HashSet<String>(); similarTaskEdges.add(taskEdgeString); if (expandedTaskEdges.add(taskEdgeString)) { Term term = new Term(PetriNetTaskEdgesDocument.FIELDTASKEDGES, taskEdgeString); TermQuery termQuery = new TermQuery(term); bq.add(termQuery, Occur.SHOULD); } exQueryTaskEdges.add(similarTaskEdges); } } TaskEdgesQueryResultCollector collector = new TaskEdgesQueryResultCollector(reader, exQueryTaskEdges, similarity); searcher.search(bq, collector); TreeSet<ProcessQueryResult> temp = collector.getQueryResult(); searcher.close(); reader.close(); // verify the candidate model Iterator<ProcessQueryResult> it = temp.iterator(); while (it.hasNext()) { ProcessQueryResult pqr = it.next(); long id = pqr.getProcess_id(); DataManager dm = DataManager.getInstance(); PetriNet c = dm.getProcessPetriNet(id); float mcesSimilarity = PetriNetUtil.mcesSimilarity(c, pn); if (mcesSimilarity >= similarity) { ret.add(new ProcessQueryResult(id, mcesSimilarity)); } } } } catch (Exception e) { e.printStackTrace(); } return ret; }