List of usage examples for org.apache.lucene.index IndexReader close
@Override public final synchronized void close() throws IOException
From source file:eyeskyhigh.lucene.demo.SearchFiles.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field] [-paging hitsPerPage]"; usage += "\n\tSpecify 'false' for hitsPerPage to use streaming instead of paging search."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);//w w w .ja v a 2 s . c o m } String index = "index"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String normsField = null; boolean paging = true; int hitsPerPage = 10; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-norms".equals(args[i])) { normsField = args[i + 1]; i++; } else if ("-paging".equals(args[i])) { if (args[i + 1].equals("false")) { paging = false; } else { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage == 0) { paging = false; } } i++; } } Analyzer analyzer = new StandardAnalyzer(); IndexWriter writer = new IndexWriter(index, analyzer, IndexWriter.MaxFieldLength.LIMITED); IndexReader reader = IndexReader.open(index); if (normsField != null) reader = new OneNormsReader(reader, normsField); Searcher searcher = new IndexSearcher(reader); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new FileReader(queries)); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } // QueryParser parser = new QueryParser(field, analyzer); // parser.setAllowLeadingWildcard(true); while (true) { if (queries == null) // prompt the user System.out.println("Enter query: "); String line1 = in.readLine();//, line2 = in.readLine(); if (line1 == null || line1.length() == -1) break; line1 = line1.trim(); if (line1.length() == 0) break; Query query; // query = parser.parse(QueryParser.escape(line1)); // System.out.println(QueryParser.escape(line)); // query = new TermQuery(new Term(field, line1)); query = new BooleanQuery(); // ((BooleanQuery)query).add(new PrefixQuery(new Term(field, line1)), BooleanClause.Occur.SHOULD); // ((BooleanQuery)query).add(new PrefixQuery(new Term(field, line2)), BooleanClause.Occur.SHOULD); ((BooleanQuery) query).add(new WildcardQuery(new Term(field, line1)), BooleanClause.Occur.SHOULD); // ((BooleanQuery)query).add(new WildcardQuery(new Term(field, line2)), BooleanClause.Occur.SHOULD); // query = new WildcardQuery(new Term(field, line1)); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { // searcher.search(query, null, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } if (paging) { doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null); } else { doStreamingSearch(searcher, query); } } reader.close(); writer.close(); }
From source file:fi.semantum.strategia.Lucene.java
License:Open Source License
public static synchronized List<String> search(String databaseId, String search) throws IOException { ArrayList<String> result = new ArrayList<String>(); IndexReader reader = null; try {/*from w w w.j a v a2 s . com*/ reader = DirectoryReader.open(getDirectory(databaseId)); IndexSearcher searcher = new IndexSearcher(reader); QueryParser parser = new QueryParser(Version.LUCENE_4_9, "text", getAnalyzer()); parser.setAllowLeadingWildcard(true); Query query = parser.parse(search); TopDocs docs = searcher.search(query, Integer.MAX_VALUE); for (ScoreDoc scoreDoc : docs.scoreDocs) { try { DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(); reader.document(scoreDoc.doc, visitor); Document doc = visitor.getDocument(); result.add(doc.get("uuid")); } catch (CorruptIndexException e) { throw new IOException(e); } } } catch (ParseException e) { throw new IOException(e); } finally { if (reader != null) reader.close(); } return result; }
From source file:FindIO.TextIndex.java
License:Apache License
public Map<String, double[]> searchText(String queryString) throws Exception { List<String> terms = Arrays.asList(queryString.trim().split(" ")); IndexReader reader = DirectoryReader.open(FSDirectory.open(indexFile)); IndexSearcher searcher = new IndexSearcher(reader); // :Post-Release-Update-Version.LUCENE_XY: Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = null;/*from ww w .ja v a 2 s .c o m*/ in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); // :Post-Release-Update-Version.LUCENE_XY: QueryParser parser = new QueryParser(fieldname1, analyzer); Query query = parser.parse(queryString); if (test) System.out.println("Searching for text: " + query.toString(fieldname1)); TopDocs topDocs; if (test) { // repeat & time as benchmark long start = System.currentTimeMillis(); topDocs = searcher.search(query, null, Common.topK); long end = System.currentTimeMillis(); System.out.println("Time: " + (end - start) + " ms"); } else { topDocs = searcher.search(query, null, Common.topK); } ScoreDoc[] hits = topDocs.scoreDocs; Map<String, double[]> mapResults = new HashMap<String, double[]>(); //print out the top hits documents for (ScoreDoc hit : hits) { Document doc = searcher.doc(hit.doc); String tag = doc.get(fieldname1); int index = terms.indexOf(tag); if (index == -1) { continue; } String[] images = doc.get(fieldname2).split("\\s+"); for (int i = 0; i < images.length; i += 2) { String imageName = images[i]; String freq = images[i + 1]; if (mapResults.get(imageName) == null) { mapResults.put(imageName, new double[terms.size()]); } double[] docTerms = mapResults.get(imageName); docTerms[index] = Double.parseDouble(freq); } } reader.close(); return mapResults; }
From source file:FindIO.TextIndex.java
License:Apache License
/** * update score mainly used for relevance feedback, the input should be stemmed * @param imageID/*from w w w . ja va 2 s .c om*/ * @param tag_score_pairs * @throws Throwable */ public void updateScore(String imageID, List<FindIOPair> tag_score_pairs) throws Throwable { IndexReader reader = DirectoryReader.open(FSDirectory.open(indexFile)); IndexSearcher searcher = new IndexSearcher(reader); // :Post-Release-Update-Version.LUCENE_XY: Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = null; in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); // :Post-Release-Update-Version.LUCENE_XY: QueryParser parser = new QueryParser(fieldname1, analyzer); for (FindIOPair pair : tag_score_pairs) { String tag = pair.getID(); double add_score = pair.getValue(); Query query = parser.parse(tag); System.out.println("Updating Text: " + query.toString(fieldname1)); TopDocs topDocs; if (test) { // repeat & time as benchmark long start = System.currentTimeMillis(); topDocs = searcher.search(query, null, Common.topK); long end = System.currentTimeMillis(); System.out.println("Time: " + (end - start) + " ms"); } else { topDocs = searcher.search(query, null, Common.topK); } ScoreDoc[] hits = topDocs.scoreDocs; if (hits.length == 0) { //It's a new tag Document doc = new Document(); String img_score = imageID + " " + (0.1 * add_score) + " "; if (add_score > 0) { // set fields for document this.tag_field.setStringValue(this.textAnalyzer.getStem(tag)); this.img_field.setStringValue(img_score); doc.add(tag_field); doc.add(img_field); MMwriter.addDocument(doc); } } else { //The tag is included in the index int docId = hits[0].doc; //retrieve the old document Document doc = searcher.doc(docId); //replacement field value String currentScores = doc.get(fieldname2); String[] img_score_pairs = currentScores.split(" "); StringBuilder stringBuilder = new StringBuilder(); boolean isImageContained = false; for (int i = 0; i < img_score_pairs.length; i += 2) { String img = img_score_pairs[i]; double old_score = Double.valueOf(img_score_pairs[i + 1]); double new_score = old_score + add_score; if (new_score < 0) { new_score = 0; } String img_score_pair; if (img.equals(imageID)) { img_score_pair = img + " " + new_score + " "; isImageContained = true; } else { img_score_pair = img + " " + old_score + " "; } stringBuilder.append(img_score_pair); } if (!isImageContained) { //If the image was not covered by the tag, append it to the tail stringBuilder.append(imageID + " " + add_score + " "); } //remove all occurrences of the old field doc.removeFields(fieldname2); this.img_field.setStringValue(stringBuilder.toString().trim()); if (test) System.out.println(stringBuilder.toString()); //insert the replacement doc.add(img_field); Term tagTerm = new Term(this.fieldname1, tag); MMwriter.updateDocument(tagTerm, doc); } MMwriter.commit(); } reader.close(); closeWriter(); }
From source file:fr.ericlab.sondy.algo.eventdetection.ET.java
License:Open Source License
public static LinkedList<String> getFrequentBigrams(String tweets, HashSet<String> bigrams) { try {/*w ww . ja v a2 s. c o m*/ LinkedList<String> FCB = new LinkedList<String>(); WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_36); RAMDirectory temporaryIndex = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer); IndexWriter temporaryWriter = new IndexWriter(temporaryIndex, config); Document doc = new Document(); doc.add(new Field("content", tweets, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); temporaryWriter.addDocument(doc); temporaryWriter.commit(); IndexReader temporaryReader = IndexReader.open(temporaryWriter, true); TermEnum allTerms = temporaryReader.terms(); while (allTerms.next()) { String term = allTerms.term().text(); if (bigrams.contains(term)) { FCB.add(term); } } temporaryWriter.close(); temporaryReader.close(); temporaryIndex.close(); return FCB; } catch (LockObtainFailedException ex) { Logger.getLogger(ET.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(ET.class.getName()).log(Level.SEVERE, null, ex); } return new LinkedList<>(); }
From source file:fr.ericlab.sondy.algo.eventdetection.MABED.java
License:Open Source License
MABEDTopic getRefinedTopic(MABEDTopic simpleTopic, int nbrelatedTerms) { MABEDTopic refinedTopic = new MABEDTopic(); String[] frequentTerms = new String[nbrelatedTerms]; try {// w w w . jav a2 s . c om StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); RAMDirectory temporaryIndex = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer); IndexWriter temporaryWriter = new IndexWriter(temporaryIndex, config); Document doc = new Document(); doc.add(new Field("content", dbAccess.getMessagesAsString(appVariables, simpleTopic.mainTerm, simpleTopic.I.timeSliceA, simpleTopic.I.timeSliceB), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); temporaryWriter.addDocument(doc); temporaryWriter.commit(); IndexReader temporaryReader = IndexReader.open(temporaryWriter, true); TermEnum allTerms = temporaryReader.terms(); int minFreq = 0; TermInfoList termList = new TermInfoList(); while (allTerms.next()) { String term = allTerms.term().text(); if (!term.equals(simpleTopic.mainTerm) && term.length() > 1 && !appVariables.isStopWord(term)) { int cf = IndexAccess.getTermOccurenceCount(temporaryReader, term); if (cf > minFreq) { termList.addTermInfo(new TermInfo(term, (int) cf)); termList.sortList(); if (termList.size() > nbrelatedTerms) { termList.removeLast(); } minFreq = termList.get(termList.size() - 1).occurence; } } } for (int i = 0; i < termList.size() && i < nbrelatedTerms; i++) { frequentTerms[i] = termList.get(i).text; } temporaryWriter.close(); temporaryReader.close(); temporaryIndex.close(); float ref[] = indexAccess.getTermFrequency(appVariables, simpleTopic.mainTerm); float comp[]; refinedTopic = new MABEDTopic(simpleTopic.mainTerm, simpleTopic.I, simpleTopic.score, simpleTopic.anomaly); for (int j = 0; j < nbrelatedTerms && frequentTerms[j] != null; j++) { comp = indexAccess.getTermFrequency(appVariables, frequentTerms[j]); double w = getErdemCoefficient(ref, comp, simpleTopic.I.timeSliceA, simpleTopic.I.timeSliceB); if (w >= _THETA_) { refinedTopic.relatedTerms.add(new MABEDWeightedTerm(frequentTerms[j], w)); } } } catch (IOException ex) { Logger.getLogger(MABED.class.getName()).log(Level.SEVERE, null, ex); } return refinedTopic; }
From source file:fr.ericlab.sondy.core.DataManipulation.java
License:Open Source License
public String[] getFrequentCoocurringTerms(String document, int numTerms, String baseTerm, AppVariables appVariables) {/* ww w. j ava 2 s .c o m*/ String[] frequentTerms = new String[numTerms]; try { StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); RAMDirectory index = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer); IndexWriter w = new IndexWriter(index, config); Document doc = new Document(); doc.add(new Field("content", document, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); w.addDocument(doc); w.commit(); IndexReader r = IndexReader.open(w, true); TermEnum allTerms = r.terms(); int minFreq = 0; TermInfoList termList = new TermInfoList(); StopWords stopWords = appVariables.currentStopWords; HashSet<String> stopWordsSet = stopWords.getSet(); stopWords.add(baseTerm); while (allTerms.next()) { String term = allTerms.term().text(); if (term.length() > 1 && !stopWordsSet.contains(term)) { float cf = getTermOccurenceCount(r, term); if (cf > minFreq) { termList.addTermInfo(new TermInfo(term, (int) cf)); termList.sortList(); if (termList.size() > numTerms) { termList.removeLast(); } minFreq = termList.get(termList.size() - 1).occurence; } } } for (int i = 0; i < termList.size(); i++) { frequentTerms[i] = termList.get(i).text; } w.close(); r.close(); index.close(); } catch (Exception ex) { Logger.getLogger(DataManipulation.class.getName()).log(Level.SEVERE, null, ex); } return frequentTerms; }
From source file:fr.ericlab.sondy.core.DataManipulation.java
License:Open Source License
public String[] getFrequentCoocurringTermsFromFile(int numTerms, String baseTerm, AppVariables appVariables) { String[] frequentTerms = new String[numTerms]; try {/*from www . j a va 2 s. c om*/ BufferedReader input = new BufferedReader(new FileReader("tmp.msg")); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); RAMDirectory index = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer); IndexWriter w = new IndexWriter(index, config); String line = ""; String document = ""; int count = 0; while ((line = input.readLine()) != null) { count++; document += line; if (count == 2000) { Document doc = new Document(); doc.add(new Field("content", document, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); w.addDocument(doc); w.commit(); count = 0; document = ""; } } Document doc = new Document(); doc.add(new Field("content", document, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); w.addDocument(doc); w.commit(); input.close(); IndexReader r = IndexReader.open(w, true); TermEnum allTerms = r.terms(); int minFreq = 0; TermInfoList termList = new TermInfoList(); StopWords stopWords = appVariables.currentStopWords; HashSet<String> stopWordsSet = stopWords.getSet(); stopWords.add(baseTerm); while (allTerms.next()) { String term = allTerms.term().text(); if (term.length() > 1 && !stopWordsSet.contains(term)) { float cf = getTermOccurenceCount(r, term); if (cf > minFreq) { termList.addTermInfo(new TermInfo(term, (int) cf)); termList.sortList(); if (termList.size() > numTerms) { termList.removeLast(); } minFreq = termList.get(termList.size() - 1).occurence; } } } for (int i = 0; i < termList.size(); i++) { frequentTerms[i] = termList.get(i).text; } w.close(); r.close(); index.close(); } catch (Exception ex) { Logger.getLogger(DataManipulation.class.getName()).log(Level.SEVERE, null, ex); } return frequentTerms; }
From source file:fr.lipn.yasemir.ontology.annotation.KNNAnnotator.java
License:Open Source License
public DocumentAnnotation annotate(String document) { DocumentAnnotation ret = new DocumentAnnotation(); try {// w w w .j a v a 2s. c o m IndexReader reader = IndexReader.open(FSDirectory.open(new File(termIndexPath))); IndexSearcher searcher = new IndexSearcher(reader); document = document.replaceAll("Support, .+?;", ""); document = document.replaceAll("\\[.*?\\]", "").trim(); //document = document.replaceAll( "\\p{Punct}", " " ); String[] fragments = document.split("[;:\\.,]"); for (String ofragment : fragments) { ofragment = ofragment.replaceAll("\\p{Punct}", " "); ofragment = ofragment.trim(); String sa[] = ofragment.split("(?<=[ \\n])"); EnglishStemmer st = new EnglishStemmer(); StringBuffer fbuf = new StringBuffer(); for (String s : sa) { st.setCurrent(s.trim()); st.stem(); fbuf.append(st.getCurrent()); fbuf.append(" "); } String fragment = fbuf.toString().trim(); //stemmed fragment if (fragment.length() == 0) continue; //System.err.println("Annotating: "+fragment); //use K-NN annotation (see Trieschnigg et al. 2009) IndexReader docreader = IndexReader.open(FSDirectory.open(new File(this.standardIndexPath))); IndexSearcher docsearcher = new IndexSearcher(docreader); QueryParser parser = new QueryParser(Version.LUCENE_44, "text", Yasemir.analyzer); Query query = parser.parse(fragment); System.err.println("Looking for: " + query); TopDocs results = docsearcher.search(query, N); //get the first 100 documents ScoreDoc[] hits = results.scoreDocs; int topLimit = Math.min(results.totalHits, K); int bottomLimit = Math.min(results.totalHits, N) - K; int numTotalHits = Math.min(results.totalHits, N); //System.err.println("top:"+topLimit+" bottom:"+bottomLimit+" total:"+numTotalHits); HashMap<String, Double> ttags = new HashMap<String, Double>(); HashMap<String, Integer> btags = new HashMap<String, Integer>(); if (topLimit < bottomLimit) { //Get the tags used in the top K documents matching the request hits = docsearcher.search(query, numTotalHits).scoreDocs; for (int i = 0; i < topLimit; i++) { Document doc = docsearcher.doc(hits[i].doc); Vector<String> tags = new Vector<String>(); List<IndexableField> docFields = doc.getFields(); for (IndexableField f : docFields) { String fname = f.name(); if (fname.endsWith("annot")) { tags.add(fname + ":" + doc.get(fname)); } } String[] tagStrings = (String[]) tags.toArray(); for (String t : tagStrings) { t = t.replaceAll("\\W|_", " "); Double nt = ttags.get(t); if (nt == null) nt = new Double(hits[i].score); else nt = new Double(hits[i].score + nt.doubleValue()); ttags.put(t, nt); } } for (int i = bottomLimit; i < numTotalHits; i++) { Document doc = docsearcher.doc(hits[i].doc); Vector<String> tags = new Vector<String>(); List<IndexableField> docFields = doc.getFields(); for (IndexableField f : docFields) { String fname = f.name(); if (fname.endsWith("annot")) { tags.add(fname + ":" + doc.get(fname)); } } String[] tagStrings = (String[]) tags.toArray(); for (String t : tagStrings) { t = t.replaceAll("\\W|_", " "); Integer nt = btags.get(t); if (nt == null) nt = new Integer(1); else nt = new Integer((nt.intValue() + 1)); btags.put(t, nt); } } } Vector<WeightedTag> tagv = new Vector<WeightedTag>(); //now find, for all tags, the corresponding MeSH concepts double sum = 0; for (String tag : ttags.keySet()) { double tagStrength = ttags.get(tag).doubleValue(); double compStrength = 0; if (btags.containsKey(tag)) { compStrength = (btags.get(tag).doubleValue()) / ((double) K); } //System.err.println(tag+ " :str="+tagStrength+", comp="+compStrength); double weight = tagStrength * (1 - compStrength); sum += weight; tagv.add(new WeightedTag(tag, weight)); } double avg = sum / (double) tagv.size(); double ssum = 0; for (WeightedTag wt : tagv) { ssum += Math.sqrt(Math.pow(wt.getWeight() - avg, 2d)); } double stddev = ssum / (double) tagv.size(); //System.err.println("avg w: "+avg+" stddev:"+stddev+" limit:"+(avg+2*stddev)); double limit = (avg + 2 * stddev); //definition of statistic outlier TagComparator comparator = new TagComparator(); Collections.sort(tagv, comparator); int i = 0; for (WeightedTag wt : tagv) { String tag = wt.getName(); if (i >= maxTags) break; if (wt.getWeight() >= limit) { QueryParser tagparser = new QueryParser(Version.LUCENE_44, "labels", Yasemir.analyzer); Query tagquery = tagparser.parse("\"" + tag + "\""); TopDocs tagresults = searcher.search(tagquery, 5); ScoreDoc[] taghits = tagresults.scoreDocs; int numTagTotalHits = tagresults.totalHits; if (numTagTotalHits > 0) { taghits = searcher.search(tagquery, numTagTotalHits).scoreDocs; Document doc = searcher.doc(taghits[0].doc); Annotation ann = new Annotation(doc.get("id")); //System.err.println("Adding: "+tag+" w:"+wt.getWeight()); String ontoID = ann.getRelatedOntology().getOntologyID(); Vector<Annotation> annotations = ret.get(ontoID); if (annotations == null) annotations = new Vector<Annotation>(); annotations.add(ann); ret.put(ontoID, annotations); i++; } } } docreader.close(); } reader.close(); } catch (Exception e) { e.printStackTrace(); } return ret; }
From source file:fr.lipn.yasemir.ontology.annotation.SentenceBasedAnnotator.java
License:Open Source License
/** * Implementation of the annotate method by IndexBasedAnnotator. * //from w w w. j ava2 s .c o m * The input text is splitted in fragments according to punctuation; * every fragment is used as a query and sent to a Lucene SE that * was used to index the terminology (BM25 weight). * Up to the 20 top results returned by the system are taken as the annotation for the * fragment text. All the fragment annotations combined compose the document annotation * that is returned by this method. * */ public DocumentAnnotation annotate(String document) { DocumentAnnotation ret = new DocumentAnnotation(); try { IndexReader reader = IndexReader.open(FSDirectory.open(new File(termIndexPath))); IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new BM25Similarity()); /* document=document.replaceAll("\\[.*?\\]", "").trim(); //document = document.replaceAll( "\\p{Punct}", " " ); String [] fragments = document.split("[;:\\.,]"); */ String[] fragments = (String[]) getSentences(document).toArray(); for (String ofragment : fragments) { ofragment = ofragment.replaceAll("\\p{Punct}", " "); ofragment = ofragment.trim(); String sa[] = ofragment.split("(?<=[ \\n])"); EnglishStemmer st = new EnglishStemmer(); StringBuffer fbuf = new StringBuffer(); for (String s : sa) { st.setCurrent(s.trim()); st.stem(); fbuf.append(st.getCurrent()); fbuf.append(" "); } String fragment = fbuf.toString().trim(); //stemmed fragment if (fragment.length() == 0) continue; //System.err.println("Annotating: "+fragment); QueryParser parser = new QueryParser(Version.LUCENE_44, "labels", Yasemir.analyzer); Query query = parser.parse(fragment); String stemmedFragment = query.toString("labels").replaceAll("labels:", ""); TopDocs results = searcher.search(query, 20); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; //System.err.println(numTotalHits + " total matching classes"); if (numTotalHits > 0) { hits = searcher.search(query, numTotalHits).scoreDocs; for (int i = 0; i < Math.min(numTotalHits, MAX_ANNOTS); i++) { Document doc = searcher.doc(hits[i].doc); String ptrn = "(?i)(" + doc.get("labels").replaceAll(", ", "|") + ")"; //System.err.println("OWLClass="+doc.get("id")+" score="+hits[i].score); if (Tools.checkPattern(stemmedFragment, ptrn)) { //System.err.println("OK: OWLClass="+doc.get("id")+" score="+hits[i].score); Annotation ann = new Annotation(doc.get("id")); String ontoID = ann.getRelatedOntology().getOntologyID(); Vector<Annotation> annotations = ret.get(ontoID); if (annotations == null) annotations = new Vector<Annotation>(); annotations.add(ann); ret.put(ontoID, annotations); } } } } reader.close(); } catch (Exception e) { e.printStackTrace(); } return ret; }