Example usage for org.apache.lucene.search IndexSearcher IndexSearcher

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher IndexSearcher.

Prototype

public IndexSearcher(IndexReaderContext context)

Source Link

Document

Creates a searcher searching the provided top-level IndexReaderContext .

Usage

From source file:at.lux.fotoretrieval.retrievalengines.LuceneRetrievalEngine.java

License:Open Source License

public List<ResultListEntry> getImagesByXPathSearch(String xPath, String whereToSearch, boolean recursive,
        JProgressBar progress) {/* w  w w . jav a  2  s .c  o m*/
    ArrayList<ResultListEntry> results = new ArrayList<ResultListEntry>(maxResults);
    if (progress != null)
        progress.setString("Searching through index");
    SAXBuilder builder = new SAXBuilder();
    try {
        QueryParser qParse = new QueryParser("all", new StandardAnalyzer());
        IndexSearcher searcher = new IndexSearcher(parseFulltextIndexDirectory(whereToSearch));
        Query query = qParse.parse(xPath);
        Hits hits = searcher.search(query);
        int hitsCount = hits.length();
        if (hitsCount > maxResults)
            hitsCount = maxResults;
        if (progress != null) {
            progress.setMinimum(0);
            progress.setMaximum(hitsCount);
            progress.setValue(0);
            progress.setString("Reading results from disk");
        }

        for (int i = 0; i < hitsCount; i++) {
            Document d = hits.doc(i);
            Element e = builder.build(new FileInputStream(d.get("file"))).getRootElement();
            results.add(new ResultListEntry(hits.score(i), e, d.get("file")));
            if (progress != null)
                progress.setValue(i);
        }

    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        System.err.println("XPath was: " + xPath);
        e.printStackTrace();
    } catch (JDOMException e) {
        e.printStackTrace();
    }
    return results;

}

From source file:au.edu.unimelb.csse.exp.GenerateQueries.java

License:Apache License

public GenerateQueries(String pathToIndex) throws CorruptIndexException, IOException {
    this.pathToIndex = pathToIndex;
    this.reader = new IndexSearcher(pathToIndex);
}

From source file:au.edu.unimelb.csse.exp.GenerateQueries.java

License:Apache License

private void run(String generatedFileName) throws IOException {
    getAllTerms();/*from  w w w  .j  av a 2 s  .com*/
    BufferedWriter writer = new BufferedWriter(new FileWriter(generatedFileName));

    System.out.println("Found " + textLabels.size() + " number of text labels.");

    reader = new IndexSearcher(pathToIndex);
    System.out.println("Generating 50 queries of length 2");
    for (int i = 0; i < 50; i++) {
        String s = null;
        do {
            s = generateExprOfLength(2);
        } while (generated2.contains(s));
        generated2.add(s);
        // System.out.println(s);
    }
    System.out.println("");
    print(writer, generated2);
    System.out.println("Generating 50 queries of length 3");
    for (int i = 0; i < 50; i++) {
        String s = null;
        do {
            s = generateExprOfLength(3);
        } while (generated3.contains(s));
        generated3.add(s);
        // System.out.println(s);
    }
    System.out.println("");
    print(writer, generated3);
    System.out.println("Generating 50 queries of length 4");
    for (int i = 0; i < 50; i++) {
        String s = null;
        do {
            s = generateExprOfLength(4);
        } while (generated4.contains(s));
        generated4.add(s);
        // System.out.println(s);
    }
    System.out.println("");
    print(writer, generated4);
    System.out.println("Generating 20 filter queries");
    for (int i = 0; i < 5; i++) {
        for (int j = 1; j < 5; j++) {
            String s = null;
            do {
                s = generateFilterQueryOfLength(j, 4);
                if (s == null) {
                    System.out.println("breaking out after reaching overflow limit");
                    break;
                }
            } while (generatedFilter.contains(s));
            generatedFilter.add(s);
        }
    }

    print(writer, generatedFilter);
    writer.close();
}

From source file:au.edu.unimelb.csse.join.JoinFunctionalTest.java

License:Apache License

/**
 * This test is actually commented out.. to run the test.. match counting has to be enabled in JoinLogic
 * @throws Exception//w ww .j  a v  a  2 s.  c  o  m
 */
public void testNumberOfCallsToMatch() throws Exception {
    String sent = "(NP" + "(NP" + "(DT The)" + "(NN year))" + "(NP" + "(NP(CD 1956))" + "(PP" + "(IN in)"
            + "(NP(JJ rugby)(NN union))" + ")" + ")" + "(. .)" + ")";
    Analyzer analyser = new FastStringAnalyser();
    RAMDirectory dir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(dir, analyser, true, IndexWriter.MaxFieldLength.UNLIMITED);

    Document d = new Document();
    d.add(new Field("sent", sent, Field.Store.NO, Field.Index.ANALYZED_NO_NORMS,
            Field.TermVector.WITH_POSITIONS));
    writer.addDocument(d);

    writer.close();

    IndexSearcher searcher = new IndexSearcher(dir);
    assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.SIMPLE, false, 6);

    assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.SIMPLE_WITH_FC, false, 1);

    assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.EARLY_STOP, false, 2);

    assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.EARLY_STOP_WITH_FC, false, 1);

    assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.SIMPLE, true, 6);

    assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.SIMPLE_WITH_FC, true, 5);

    assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.EARLY_STOP, true, 6);

    assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.EARLY_STOP_WITH_FC, true, 5);

    assertNumberOfComparisons(searcher, "//NP//NP//NP", TermJoinType.SIMPLE, false, 23);

    assertNumberOfComparisons(searcher, "//NP//NP//NP", TermJoinType.SIMPLE_WITH_FC, false, 10);

    assertNumberOfComparisons(searcher, "//NP//NP//NP", TermJoinType.EARLY_STOP, false, 10);

    assertNumberOfComparisons(searcher, "//NP//NP//NP", TermJoinType.EARLY_STOP_WITH_FC, false, 8);

}

From source file:au.edu.unimelb.csse.join.JoinFunctionalTest.java

License:Apache License

public void testFilterjoin() throws Exception {
    String sent = "(NP" + "(NP" + "(DT The)" + "(NN year))" + "(NP" + "(NP(CD 1956))" + "(PP" + "(IN in)"
            + "(NP(JJ rugby)(NN union))" + ")" + ")" + "(. .)" + ")";
    Analyzer analyser = new FastStringAnalyser();
    RAMDirectory dir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(dir, analyser, true, IndexWriter.MaxFieldLength.UNLIMITED);

    Document d = new Document();
    d.add(new Field("sent", sent, Field.Store.NO, Field.Index.ANALYZED_NO_NORMS,
            Field.TermVector.WITH_POSITIONS));
    writer.addDocument(d);/*from   w w w  .java  2  s .c  o m*/

    writer.close();

    IndexSearcher searcher = new IndexSearcher(dir);

    boolean[] lookaheadOptions = new boolean[] { false, true };
    for (TermJoinType type : TermJoinType.values()) {
        for (boolean lookahead : lookaheadOptions) {
            QueryBuilder builder = new QueryBuilder("//PP[/IN AND /NP]");
            TreebankQuery query = builder.parse(type, lookahead);
            SimpleHitCollector hitCollector = new SimpleHitCollector(10);
            searcher.search(query, hitCollector);
            assertEquals(1, hitCollector.totalHits);
        }
    }

    QueryBuilder builder = new QueryBuilder("//PP[/IN AND /NP/JJ/rugby]");
    TreebankQuery query = builder.parse(TermJoinType.SIMPLE, true);
    SimpleHitCollector hitCollector = new SimpleHitCollector(10);
    searcher.search(query, hitCollector);
    assertEquals(1, hitCollector.totalHits);

}

From source file:au.edu.unimelb.csse.listener.InitialiseIndexSearcherFull.java

License:Apache License

private void initSearcher(ServletContextEvent event, final String resourceLocation,
        final String contextAttrName) {
    try {/*from  w w  w .j  av  a  2  s. c om*/
        IndexSearcher searcher = null;
        File f = new File(resourceLocation);
        searcher = new IndexSearcher(FSDirectory.getDirectory(f));
        ServletContext servletContext = event.getServletContext();
        servletContext.setAttribute(contextAttrName, searcher);
    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:au.edu.unimelb.csse.QueryExpTest.java

License:Apache License

private void run() throws CorruptIndexException, IOException, ParseException {
    IndexSearcher searcher = new IndexSearcher(indexPath);
    TreeTerm starterTerm = new TreeTerm(0, TreeAxis.DESCENDANT, new Term("sent", "the"));
    TreeExpr starterExpr = new TreeExpr();
    starterExpr.addTerm(starterTerm);//from w  ww . j a  va 2 s.com
    SimpleHitCollector collector = new SimpleHitCollector(1);

    searcher.search(new TreebankQuery(starterExpr), collector);
    collector.reset();
    try {
        Thread.sleep(3000);
    } catch (InterruptedException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    searcher.search(new TreebankQuery(starterExpr), collector);

    for (int i = 0; i < times; i++) {
        collector.reset();
        QueryBuilder builder = new QueryBuilder(query);
        final TreebankQuery q = builder.parse(joinType, useLookahead);
        long start = System.nanoTime();
        searcher.search(q, collector);
        long end = System.nanoTime();
        System.out.println((end - start) + "\t" + collector.totalHits);
    }
}

From source file:au.org.ala.names.search.ALANameIndexer.java

License:Open Source License

/**
 * Creates the temporary index that provides a lookup of checklist bank id to
 * GUID/*w ww  .  j  ava2s  .c o  m*/
 */
private IndexSearcher createTmpGuidIndex(String cbExportFile) throws Exception {
    System.out.println("Starting to create the tmp guid index...");
    IndexWriter iw = createIndexWriter(new File("/data/tmp/guid"), new KeywordAnalyzer(), true);
    au.com.bytecode.opencsv.CSVReader cbreader = new au.com.bytecode.opencsv.CSVReader(
            new FileReader(cbExportFile), '\t', '"', '/', 1);
    for (String[] values = cbreader.readNext(); values != null; values = cbreader.readNext()) {
        Document doc = new Document();
        String id = values[POS_ID];
        String guid = values[POS_LSID];
        doc.add(new StringField("id", id, Store.YES));
        if (StringUtils.isEmpty(id))
            guid = id;

        doc.add(new StoredField("guid", guid));
        iw.addDocument(doc);
    }
    System.out.println("Finished writing the tmp guid index...");
    iw.commit();
    iw.forceMerge(1);
    iw.close();
    //As of lucene 4.0 all IndexReaders are read only
    return new IndexSearcher(DirectoryReader.open(FSDirectory.open(new File("/data/tmp/guid"))));
}

From source file:au.org.ala.names.search.ALANameIndexer.java

License:Open Source License

/**
 * Indexes common names from CoL and ANBG for use in the Common name search.
 *
 * @param iw  The index writer to write the common documents to
 * @param exportDir  The directory that contains the common name export files.
 * @param indexDir The directory in which to create the index.
 * @throws Exception/*from   w ww .  j a  v  a 2  s. c o m*/
 */
private void indexCommonNames(IndexWriter iw, String exportDir, String indexDir) throws Exception {
    log.info("Creating Common Names Index ...");

    //TODO think about adding additional sources for common names

    IndexSearcher currentNameSearcher = new IndexSearcher(
            DirectoryReader.open(FSDirectory.open(new File(indexDir + File.separator + "cb"))));
    IndexSearcher extraSearcher = new IndexSearcher(
            DirectoryReader.open(FSDirectory.open(new File(indexDir + File.separator + "id"))));

    addCoLCommonNames(iw, currentNameSearcher);
    addAnbgCommonNames(afdFile, iw, currentNameSearcher, extraSearcher, '\t');
    addAnbgCommonNames(apniFile, iw, currentNameSearcher, extraSearcher, ',');

    iw.commit();
    iw.forceMerge(1);
    iw.close();
}

From source file:au.org.ala.names.search.ALANameIndexer.java

License:Open Source License

/**
 * Creates a temporary index that will provide a lookup up of lsid to "real lsid".
 * <p/>/*from   w  ww  . j  a  v  a  2  s .c  om*/
 * This deals with the following situations:
 * - common names that are sourced from CoL (LSIDs will be mapped to corresponding ANBG LSID)
 * - Multiple ANBG LSIDs exist for the same scientific name and more than 1 are mapped to the same common name.
 *
 * @param idFile
 * @throws Exception
 */
private void createExtraIdIndex(String idxLocation, File idFile) throws Exception {
    CSVReader reader = new CSVReader(new FileReader(idFile), '\t', '"', '~');//CSVReader.build(idFile, "UTF-8", "\t", '"', 0);
    File indexDir = new File(idxLocation);
    IndexWriter iw = createIndexWriter(indexDir, new KeywordAnalyzer(), true);//new IndexWriter(FSDirectory.open(indexDir), new KeywordAnalyzer(), true, MaxFieldLength.UNLIMITED);
    String[] values = null;
    while ((values = reader.readNext()) != null) {

        if (values != null && values.length >= 3) {
            Document doc = new Document();
            //doc.add(new Field("lsid", values[2], Store.NO, Index.NOT_ANALYZED));
            doc.add(new StringField("lsid", values[2], Store.NO));
            //doc.add(new Field("reallsid", values[1], Store.YES, Index.NO));
            doc.add(new StoredField("reallsid", values[1]));
            iw.addDocument(doc);
        }
    }
    iw.commit();
    iw.forceMerge(1);
    iw.close();
    idSearcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(indexDir)));
}