List of usage examples for org.apache.lucene.search IndexSearcher IndexSearcher
public IndexSearcher(IndexReaderContext context)
From source file:at.lux.fotoretrieval.retrievalengines.LuceneRetrievalEngine.java
License:Open Source License
public List<ResultListEntry> getImagesByXPathSearch(String xPath, String whereToSearch, boolean recursive, JProgressBar progress) {/* w w w . jav a 2 s .c o m*/ ArrayList<ResultListEntry> results = new ArrayList<ResultListEntry>(maxResults); if (progress != null) progress.setString("Searching through index"); SAXBuilder builder = new SAXBuilder(); try { QueryParser qParse = new QueryParser("all", new StandardAnalyzer()); IndexSearcher searcher = new IndexSearcher(parseFulltextIndexDirectory(whereToSearch)); Query query = qParse.parse(xPath); Hits hits = searcher.search(query); int hitsCount = hits.length(); if (hitsCount > maxResults) hitsCount = maxResults; if (progress != null) { progress.setMinimum(0); progress.setMaximum(hitsCount); progress.setValue(0); progress.setString("Reading results from disk"); } for (int i = 0; i < hitsCount; i++) { Document d = hits.doc(i); Element e = builder.build(new FileInputStream(d.get("file"))).getRootElement(); results.add(new ResultListEntry(hits.score(i), e, d.get("file"))); if (progress != null) progress.setValue(i); } } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { System.err.println("XPath was: " + xPath); e.printStackTrace(); } catch (JDOMException e) { e.printStackTrace(); } return results; }
From source file:au.edu.unimelb.csse.exp.GenerateQueries.java
License:Apache License
public GenerateQueries(String pathToIndex) throws CorruptIndexException, IOException { this.pathToIndex = pathToIndex; this.reader = new IndexSearcher(pathToIndex); }
From source file:au.edu.unimelb.csse.exp.GenerateQueries.java
License:Apache License
private void run(String generatedFileName) throws IOException { getAllTerms();/*from w w w .j av a 2 s .com*/ BufferedWriter writer = new BufferedWriter(new FileWriter(generatedFileName)); System.out.println("Found " + textLabels.size() + " number of text labels."); reader = new IndexSearcher(pathToIndex); System.out.println("Generating 50 queries of length 2"); for (int i = 0; i < 50; i++) { String s = null; do { s = generateExprOfLength(2); } while (generated2.contains(s)); generated2.add(s); // System.out.println(s); } System.out.println(""); print(writer, generated2); System.out.println("Generating 50 queries of length 3"); for (int i = 0; i < 50; i++) { String s = null; do { s = generateExprOfLength(3); } while (generated3.contains(s)); generated3.add(s); // System.out.println(s); } System.out.println(""); print(writer, generated3); System.out.println("Generating 50 queries of length 4"); for (int i = 0; i < 50; i++) { String s = null; do { s = generateExprOfLength(4); } while (generated4.contains(s)); generated4.add(s); // System.out.println(s); } System.out.println(""); print(writer, generated4); System.out.println("Generating 20 filter queries"); for (int i = 0; i < 5; i++) { for (int j = 1; j < 5; j++) { String s = null; do { s = generateFilterQueryOfLength(j, 4); if (s == null) { System.out.println("breaking out after reaching overflow limit"); break; } } while (generatedFilter.contains(s)); generatedFilter.add(s); } } print(writer, generatedFilter); writer.close(); }
From source file:au.edu.unimelb.csse.join.JoinFunctionalTest.java
License:Apache License
/** * This test is actually commented out.. to run the test.. match counting has to be enabled in JoinLogic * @throws Exception//w ww .j a v a 2 s. c o m */ public void testNumberOfCallsToMatch() throws Exception { String sent = "(NP" + "(NP" + "(DT The)" + "(NN year))" + "(NP" + "(NP(CD 1956))" + "(PP" + "(IN in)" + "(NP(JJ rugby)(NN union))" + ")" + ")" + "(. .)" + ")"; Analyzer analyser = new FastStringAnalyser(); RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, analyser, true, IndexWriter.MaxFieldLength.UNLIMITED); Document d = new Document(); d.add(new Field("sent", sent, Field.Store.NO, Field.Index.ANALYZED_NO_NORMS, Field.TermVector.WITH_POSITIONS)); writer.addDocument(d); writer.close(); IndexSearcher searcher = new IndexSearcher(dir); assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.SIMPLE, false, 6); assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.SIMPLE_WITH_FC, false, 1); assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.EARLY_STOP, false, 2); assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.EARLY_STOP_WITH_FC, false, 1); assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.SIMPLE, true, 6); assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.SIMPLE_WITH_FC, true, 5); assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.EARLY_STOP, true, 6); assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.EARLY_STOP_WITH_FC, true, 5); assertNumberOfComparisons(searcher, "//NP//NP//NP", TermJoinType.SIMPLE, false, 23); assertNumberOfComparisons(searcher, "//NP//NP//NP", TermJoinType.SIMPLE_WITH_FC, false, 10); assertNumberOfComparisons(searcher, "//NP//NP//NP", TermJoinType.EARLY_STOP, false, 10); assertNumberOfComparisons(searcher, "//NP//NP//NP", TermJoinType.EARLY_STOP_WITH_FC, false, 8); }
From source file:au.edu.unimelb.csse.join.JoinFunctionalTest.java
License:Apache License
public void testFilterjoin() throws Exception { String sent = "(NP" + "(NP" + "(DT The)" + "(NN year))" + "(NP" + "(NP(CD 1956))" + "(PP" + "(IN in)" + "(NP(JJ rugby)(NN union))" + ")" + ")" + "(. .)" + ")"; Analyzer analyser = new FastStringAnalyser(); RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, analyser, true, IndexWriter.MaxFieldLength.UNLIMITED); Document d = new Document(); d.add(new Field("sent", sent, Field.Store.NO, Field.Index.ANALYZED_NO_NORMS, Field.TermVector.WITH_POSITIONS)); writer.addDocument(d);/*from w w w .java 2 s .c o m*/ writer.close(); IndexSearcher searcher = new IndexSearcher(dir); boolean[] lookaheadOptions = new boolean[] { false, true }; for (TermJoinType type : TermJoinType.values()) { for (boolean lookahead : lookaheadOptions) { QueryBuilder builder = new QueryBuilder("//PP[/IN AND /NP]"); TreebankQuery query = builder.parse(type, lookahead); SimpleHitCollector hitCollector = new SimpleHitCollector(10); searcher.search(query, hitCollector); assertEquals(1, hitCollector.totalHits); } } QueryBuilder builder = new QueryBuilder("//PP[/IN AND /NP/JJ/rugby]"); TreebankQuery query = builder.parse(TermJoinType.SIMPLE, true); SimpleHitCollector hitCollector = new SimpleHitCollector(10); searcher.search(query, hitCollector); assertEquals(1, hitCollector.totalHits); }
From source file:au.edu.unimelb.csse.listener.InitialiseIndexSearcherFull.java
License:Apache License
private void initSearcher(ServletContextEvent event, final String resourceLocation, final String contextAttrName) { try {/*from w w w .j av a 2 s. c om*/ IndexSearcher searcher = null; File f = new File(resourceLocation); searcher = new IndexSearcher(FSDirectory.getDirectory(f)); ServletContext servletContext = event.getServletContext(); servletContext.setAttribute(contextAttrName, searcher); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } }
From source file:au.edu.unimelb.csse.QueryExpTest.java
License:Apache License
private void run() throws CorruptIndexException, IOException, ParseException { IndexSearcher searcher = new IndexSearcher(indexPath); TreeTerm starterTerm = new TreeTerm(0, TreeAxis.DESCENDANT, new Term("sent", "the")); TreeExpr starterExpr = new TreeExpr(); starterExpr.addTerm(starterTerm);//from w ww . j a va 2 s.com SimpleHitCollector collector = new SimpleHitCollector(1); searcher.search(new TreebankQuery(starterExpr), collector); collector.reset(); try { Thread.sleep(3000); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } searcher.search(new TreebankQuery(starterExpr), collector); for (int i = 0; i < times; i++) { collector.reset(); QueryBuilder builder = new QueryBuilder(query); final TreebankQuery q = builder.parse(joinType, useLookahead); long start = System.nanoTime(); searcher.search(q, collector); long end = System.nanoTime(); System.out.println((end - start) + "\t" + collector.totalHits); } }
From source file:au.org.ala.names.search.ALANameIndexer.java
License:Open Source License
/** * Creates the temporary index that provides a lookup of checklist bank id to * GUID/*w ww . j ava2s .c o m*/ */ private IndexSearcher createTmpGuidIndex(String cbExportFile) throws Exception { System.out.println("Starting to create the tmp guid index..."); IndexWriter iw = createIndexWriter(new File("/data/tmp/guid"), new KeywordAnalyzer(), true); au.com.bytecode.opencsv.CSVReader cbreader = new au.com.bytecode.opencsv.CSVReader( new FileReader(cbExportFile), '\t', '"', '/', 1); for (String[] values = cbreader.readNext(); values != null; values = cbreader.readNext()) { Document doc = new Document(); String id = values[POS_ID]; String guid = values[POS_LSID]; doc.add(new StringField("id", id, Store.YES)); if (StringUtils.isEmpty(id)) guid = id; doc.add(new StoredField("guid", guid)); iw.addDocument(doc); } System.out.println("Finished writing the tmp guid index..."); iw.commit(); iw.forceMerge(1); iw.close(); //As of lucene 4.0 all IndexReaders are read only return new IndexSearcher(DirectoryReader.open(FSDirectory.open(new File("/data/tmp/guid")))); }
From source file:au.org.ala.names.search.ALANameIndexer.java
License:Open Source License
/** * Indexes common names from CoL and ANBG for use in the Common name search. * * @param iw The index writer to write the common documents to * @param exportDir The directory that contains the common name export files. * @param indexDir The directory in which to create the index. * @throws Exception/*from w ww . j a v a 2 s. c o m*/ */ private void indexCommonNames(IndexWriter iw, String exportDir, String indexDir) throws Exception { log.info("Creating Common Names Index ..."); //TODO think about adding additional sources for common names IndexSearcher currentNameSearcher = new IndexSearcher( DirectoryReader.open(FSDirectory.open(new File(indexDir + File.separator + "cb")))); IndexSearcher extraSearcher = new IndexSearcher( DirectoryReader.open(FSDirectory.open(new File(indexDir + File.separator + "id")))); addCoLCommonNames(iw, currentNameSearcher); addAnbgCommonNames(afdFile, iw, currentNameSearcher, extraSearcher, '\t'); addAnbgCommonNames(apniFile, iw, currentNameSearcher, extraSearcher, ','); iw.commit(); iw.forceMerge(1); iw.close(); }
From source file:au.org.ala.names.search.ALANameIndexer.java
License:Open Source License
/** * Creates a temporary index that will provide a lookup up of lsid to "real lsid". * <p/>/*from w ww . j a v a 2 s .c om*/ * This deals with the following situations: * - common names that are sourced from CoL (LSIDs will be mapped to corresponding ANBG LSID) * - Multiple ANBG LSIDs exist for the same scientific name and more than 1 are mapped to the same common name. * * @param idFile * @throws Exception */ private void createExtraIdIndex(String idxLocation, File idFile) throws Exception { CSVReader reader = new CSVReader(new FileReader(idFile), '\t', '"', '~');//CSVReader.build(idFile, "UTF-8", "\t", '"', 0); File indexDir = new File(idxLocation); IndexWriter iw = createIndexWriter(indexDir, new KeywordAnalyzer(), true);//new IndexWriter(FSDirectory.open(indexDir), new KeywordAnalyzer(), true, MaxFieldLength.UNLIMITED); String[] values = null; while ((values = reader.readNext()) != null) { if (values != null && values.length >= 3) { Document doc = new Document(); //doc.add(new Field("lsid", values[2], Store.NO, Index.NOT_ANALYZED)); doc.add(new StringField("lsid", values[2], Store.NO)); //doc.add(new Field("reallsid", values[1], Store.YES, Index.NO)); doc.add(new StoredField("reallsid", values[1])); iw.addDocument(doc); } } iw.commit(); iw.forceMerge(1); iw.close(); idSearcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(indexDir))); }