Example usage for org.apache.lucene.search IndexSearcher IndexSearcher

List of usage examples for org.apache.lucene.search IndexSearcher IndexSearcher

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher IndexSearcher.

Prototype

public IndexSearcher(IndexReaderContext context) 

Source Link

Document

Creates a searcher searching the provided top-level IndexReaderContext .

Usage

From source file:at.lux.fotoretrieval.retrievalengines.LuceneRetrievalEngine.java

License:Open Source License

public List<ResultListEntry> getImagesByXPathSearch(String xPath, String whereToSearch, boolean recursive,
        JProgressBar progress) {/* w  w w . jav a  2  s .c  o m*/
    ArrayList<ResultListEntry> results = new ArrayList<ResultListEntry>(maxResults);
    if (progress != null)
        progress.setString("Searching through index");
    SAXBuilder builder = new SAXBuilder();
    try {
        QueryParser qParse = new QueryParser("all", new StandardAnalyzer());
        IndexSearcher searcher = new IndexSearcher(parseFulltextIndexDirectory(whereToSearch));
        Query query = qParse.parse(xPath);
        Hits hits = searcher.search(query);
        int hitsCount = hits.length();
        if (hitsCount > maxResults)
            hitsCount = maxResults;
        if (progress != null) {
            progress.setMinimum(0);
            progress.setMaximum(hitsCount);
            progress.setValue(0);
            progress.setString("Reading results from disk");
        }

        for (int i = 0; i < hitsCount; i++) {
            Document d = hits.doc(i);
            Element e = builder.build(new FileInputStream(d.get("file"))).getRootElement();
            results.add(new ResultListEntry(hits.score(i), e, d.get("file")));
            if (progress != null)
                progress.setValue(i);
        }

    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        System.err.println("XPath was: " + xPath);
        e.printStackTrace();
    } catch (JDOMException e) {
        e.printStackTrace();
    }
    return results;

}

From source file:au.edu.unimelb.csse.exp.GenerateQueries.java

License:Apache License

public GenerateQueries(String pathToIndex) throws CorruptIndexException, IOException {
    this.pathToIndex = pathToIndex;
    this.reader = new IndexSearcher(pathToIndex);
}

From source file:au.edu.unimelb.csse.exp.GenerateQueries.java

License:Apache License

private void run(String generatedFileName) throws IOException {
    getAllTerms();/*from  w w w  .j  av a 2 s  .com*/
    BufferedWriter writer = new BufferedWriter(new FileWriter(generatedFileName));

    System.out.println("Found " + textLabels.size() + " number of text labels.");

    reader = new IndexSearcher(pathToIndex);
    System.out.println("Generating 50 queries of length 2");
    for (int i = 0; i < 50; i++) {
        String s = null;
        do {
            s = generateExprOfLength(2);
        } while (generated2.contains(s));
        generated2.add(s);
        // System.out.println(s);
    }
    System.out.println("");
    print(writer, generated2);
    System.out.println("Generating 50 queries of length 3");
    for (int i = 0; i < 50; i++) {
        String s = null;
        do {
            s = generateExprOfLength(3);
        } while (generated3.contains(s));
        generated3.add(s);
        // System.out.println(s);
    }
    System.out.println("");
    print(writer, generated3);
    System.out.println("Generating 50 queries of length 4");
    for (int i = 0; i < 50; i++) {
        String s = null;
        do {
            s = generateExprOfLength(4);
        } while (generated4.contains(s));
        generated4.add(s);
        // System.out.println(s);
    }
    System.out.println("");
    print(writer, generated4);
    System.out.println("Generating 20 filter queries");
    for (int i = 0; i < 5; i++) {
        for (int j = 1; j < 5; j++) {
            String s = null;
            do {
                s = generateFilterQueryOfLength(j, 4);
                if (s == null) {
                    System.out.println("breaking out after reaching overflow limit");
                    break;
                }
            } while (generatedFilter.contains(s));
            generatedFilter.add(s);
        }
    }

    print(writer, generatedFilter);
    writer.close();
}

From source file:au.edu.unimelb.csse.join.JoinFunctionalTest.java

License:Apache License

/**
 * This test is actually commented out.. to run the test.. match counting has to be enabled in JoinLogic
 * @throws Exception//w ww .j  a v  a  2 s.  c  o  m
 */
public void testNumberOfCallsToMatch() throws Exception {
    String sent = "(NP" + "(NP" + "(DT The)" + "(NN year))" + "(NP" + "(NP(CD 1956))" + "(PP" + "(IN in)"
            + "(NP(JJ rugby)(NN union))" + ")" + ")" + "(. .)" + ")";
    Analyzer analyser = new FastStringAnalyser();
    RAMDirectory dir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(dir, analyser, true, IndexWriter.MaxFieldLength.UNLIMITED);

    Document d = new Document();
    d.add(new Field("sent", sent, Field.Store.NO, Field.Index.ANALYZED_NO_NORMS,
            Field.TermVector.WITH_POSITIONS));
    writer.addDocument(d);

    writer.close();

    IndexSearcher searcher = new IndexSearcher(dir);
    assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.SIMPLE, false, 6);

    assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.SIMPLE_WITH_FC, false, 1);

    assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.EARLY_STOP, false, 2);

    assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.EARLY_STOP_WITH_FC, false, 1);

    assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.SIMPLE, true, 6);

    assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.SIMPLE_WITH_FC, true, 5);

    assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.EARLY_STOP, true, 6);

    assertNumberOfComparisons(searcher, "//NP//NP", TermJoinType.EARLY_STOP_WITH_FC, true, 5);

    assertNumberOfComparisons(searcher, "//NP//NP//NP", TermJoinType.SIMPLE, false, 23);

    assertNumberOfComparisons(searcher, "//NP//NP//NP", TermJoinType.SIMPLE_WITH_FC, false, 10);

    assertNumberOfComparisons(searcher, "//NP//NP//NP", TermJoinType.EARLY_STOP, false, 10);

    assertNumberOfComparisons(searcher, "//NP//NP//NP", TermJoinType.EARLY_STOP_WITH_FC, false, 8);

}

From source file:au.edu.unimelb.csse.join.JoinFunctionalTest.java

License:Apache License

public void testFilterjoin() throws Exception {
    String sent = "(NP" + "(NP" + "(DT The)" + "(NN year))" + "(NP" + "(NP(CD 1956))" + "(PP" + "(IN in)"
            + "(NP(JJ rugby)(NN union))" + ")" + ")" + "(. .)" + ")";
    Analyzer analyser = new FastStringAnalyser();
    RAMDirectory dir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(dir, analyser, true, IndexWriter.MaxFieldLength.UNLIMITED);

    Document d = new Document();
    d.add(new Field("sent", sent, Field.Store.NO, Field.Index.ANALYZED_NO_NORMS,
            Field.TermVector.WITH_POSITIONS));
    writer.addDocument(d);/*from   w w w  .java  2  s .c  o m*/

    writer.close();

    IndexSearcher searcher = new IndexSearcher(dir);

    boolean[] lookaheadOptions = new boolean[] { false, true };
    for (TermJoinType type : TermJoinType.values()) {
        for (boolean lookahead : lookaheadOptions) {
            QueryBuilder builder = new QueryBuilder("//PP[/IN AND /NP]");
            TreebankQuery query = builder.parse(type, lookahead);
            SimpleHitCollector hitCollector = new SimpleHitCollector(10);
            searcher.search(query, hitCollector);
            assertEquals(1, hitCollector.totalHits);
        }
    }

    QueryBuilder builder = new QueryBuilder("//PP[/IN AND /NP/JJ/rugby]");
    TreebankQuery query = builder.parse(TermJoinType.SIMPLE, true);
    SimpleHitCollector hitCollector = new SimpleHitCollector(10);
    searcher.search(query, hitCollector);
    assertEquals(1, hitCollector.totalHits);

}

From source file:au.edu.unimelb.csse.listener.InitialiseIndexSearcherFull.java

License:Apache License

private void initSearcher(ServletContextEvent event, final String resourceLocation,
        final String contextAttrName) {
    try {/*from  w w  w .j  av  a  2  s. c om*/
        IndexSearcher searcher = null;
        File f = new File(resourceLocation);
        searcher = new IndexSearcher(FSDirectory.getDirectory(f));
        ServletContext servletContext = event.getServletContext();
        servletContext.setAttribute(contextAttrName, searcher);
    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:au.edu.unimelb.csse.QueryExpTest.java

License:Apache License

private void run() throws CorruptIndexException, IOException, ParseException {
    IndexSearcher searcher = new IndexSearcher(indexPath);
    TreeTerm starterTerm = new TreeTerm(0, TreeAxis.DESCENDANT, new Term("sent", "the"));
    TreeExpr starterExpr = new TreeExpr();
    starterExpr.addTerm(starterTerm);//from w  ww . j a  va 2 s.com
    SimpleHitCollector collector = new SimpleHitCollector(1);

    searcher.search(new TreebankQuery(starterExpr), collector);
    collector.reset();
    try {
        Thread.sleep(3000);
    } catch (InterruptedException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    searcher.search(new TreebankQuery(starterExpr), collector);

    for (int i = 0; i < times; i++) {
        collector.reset();
        QueryBuilder builder = new QueryBuilder(query);
        final TreebankQuery q = builder.parse(joinType, useLookahead);
        long start = System.nanoTime();
        searcher.search(q, collector);
        long end = System.nanoTime();
        System.out.println((end - start) + "\t" + collector.totalHits);
    }
}

From source file:au.org.ala.names.search.ALANameIndexer.java

License:Open Source License

/**
 * Creates the temporary index that provides a lookup of checklist bank id to
 * GUID/*w ww  .  j  ava2s  .c o  m*/
 */
private IndexSearcher createTmpGuidIndex(String cbExportFile) throws Exception {
    System.out.println("Starting to create the tmp guid index...");
    IndexWriter iw = createIndexWriter(new File("/data/tmp/guid"), new KeywordAnalyzer(), true);
    au.com.bytecode.opencsv.CSVReader cbreader = new au.com.bytecode.opencsv.CSVReader(
            new FileReader(cbExportFile), '\t', '"', '/', 1);
    for (String[] values = cbreader.readNext(); values != null; values = cbreader.readNext()) {
        Document doc = new Document();
        String id = values[POS_ID];
        String guid = values[POS_LSID];
        doc.add(new StringField("id", id, Store.YES));
        if (StringUtils.isEmpty(id))
            guid = id;

        doc.add(new StoredField("guid", guid));
        iw.addDocument(doc);
    }
    System.out.println("Finished writing the tmp guid index...");
    iw.commit();
    iw.forceMerge(1);
    iw.close();
    //As of lucene 4.0 all IndexReaders are read only
    return new IndexSearcher(DirectoryReader.open(FSDirectory.open(new File("/data/tmp/guid"))));
}

From source file:au.org.ala.names.search.ALANameIndexer.java

License:Open Source License

/**
 * Indexes common names from CoL and ANBG for use in the Common name search.
 *
 * @param iw  The index writer to write the common documents to
 * @param exportDir  The directory that contains the common name export files.
 * @param indexDir The directory in which to create the index.
 * @throws Exception/*from   w ww .  j a  v  a 2  s. c o m*/
 */
private void indexCommonNames(IndexWriter iw, String exportDir, String indexDir) throws Exception {
    log.info("Creating Common Names Index ...");

    //TODO think about adding additional sources for common names

    IndexSearcher currentNameSearcher = new IndexSearcher(
            DirectoryReader.open(FSDirectory.open(new File(indexDir + File.separator + "cb"))));
    IndexSearcher extraSearcher = new IndexSearcher(
            DirectoryReader.open(FSDirectory.open(new File(indexDir + File.separator + "id"))));

    addCoLCommonNames(iw, currentNameSearcher);
    addAnbgCommonNames(afdFile, iw, currentNameSearcher, extraSearcher, '\t');
    addAnbgCommonNames(apniFile, iw, currentNameSearcher, extraSearcher, ',');

    iw.commit();
    iw.forceMerge(1);
    iw.close();
}

From source file:au.org.ala.names.search.ALANameIndexer.java

License:Open Source License

/**
 * Creates a temporary index that will provide a lookup up of lsid to "real lsid".
 * <p/>/*from   w  ww  . j  a  v  a  2  s .c  om*/
 * This deals with the following situations:
 * - common names that are sourced from CoL (LSIDs will be mapped to corresponding ANBG LSID)
 * - Multiple ANBG LSIDs exist for the same scientific name and more than 1 are mapped to the same common name.
 *
 * @param idFile
 * @throws Exception
 */
private void createExtraIdIndex(String idxLocation, File idFile) throws Exception {
    CSVReader reader = new CSVReader(new FileReader(idFile), '\t', '"', '~');//CSVReader.build(idFile, "UTF-8", "\t", '"', 0);
    File indexDir = new File(idxLocation);
    IndexWriter iw = createIndexWriter(indexDir, new KeywordAnalyzer(), true);//new IndexWriter(FSDirectory.open(indexDir), new KeywordAnalyzer(), true, MaxFieldLength.UNLIMITED);
    String[] values = null;
    while ((values = reader.readNext()) != null) {

        if (values != null && values.length >= 3) {
            Document doc = new Document();
            //doc.add(new Field("lsid", values[2], Store.NO, Index.NOT_ANALYZED));
            doc.add(new StringField("lsid", values[2], Store.NO));
            //doc.add(new Field("reallsid", values[1], Store.YES, Index.NO));
            doc.add(new StoredField("reallsid", values[1]));
            iw.addDocument(doc);
        }
    }
    iw.commit();
    iw.forceMerge(1);
    iw.close();
    idSearcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(indexDir)));
}