Example usage for org.apache.lucene.index DirectoryReader open

Introduction

In this page you can find the example usage for org.apache.lucene.index DirectoryReader open.

Prototype

public static DirectoryReader open(final IndexCommit commit) throws IOException

Source Link

Document

Expert: returns an IndexReader reading the index in the given IndexCommit .

Usage

From source file:au.org.ala.names.search.ALANameIndexer.java

License:Open Source License

/**
 * Creates a temporary index that stores the taxon concept LSIDs that were
 * included in the last ANBG exports./*from   ww  w . jav a 2s.c om*/
 *
 * @param tcFileName
 * @return
 * @throws Exception
 */
private IndexSearcher createTmpIndex(String tcFileName) throws Exception {
    //creating the tmp index in the /tmp/taxonConcept directory
    CSVReader reader = new CSVReader(new FileReader(new File(tcFileName)), '\t', '"', '~');
    File indexDir = new File("/tmp/taxonConcept");
    IndexWriter iw = createIndexWriter(indexDir, new KeywordAnalyzer(), true);
    String[] values = null;
    while ((values = reader.readNext()) != null) {
        if (values != null && values.length > 1) {
            //just add the LSID to the index
            Document doc = new Document();

            doc.add(new StringField("lsid", values[0], Store.NO));
            iw.addDocument(doc);

        }
    }
    iw.commit();
    iw.forceMerge(1);
    iw.close();
    return new IndexSearcher(DirectoryReader.open(FSDirectory.open(indexDir)));
}

From source file:au.org.ala.names.search.ALANameSearcher.java

License:Open Source License

/**
 * Creates a new name searcher. Using the indexDirectory
 * as the source directory/*  w  w w . j  av a2 s . co m*/
 *
 * @param indexDirectory The directory that contains the index files for the scientific names, irmng and vernacular names.
 * @throws CorruptIndexException
 * @throws IOException
 */
public ALANameSearcher(String indexDirectory) throws CorruptIndexException, IOException {
    //Initialis CB index searching items
    log.debug("Creating the search object for the name matching api...");
    //make the query parsers thread safe
    queryParser = new ThreadLocal<QueryParser>() {
        @Override
        protected QueryParser initialValue() {
            QueryParser qp = new QueryParser(Version.LUCENE_34, "genus", new LowerCaseKeywordAnalyzer());
            qp.setFuzzyMinSim(0.8f); //fuzzy match similarity setting. used to match the authorship.
            return qp;
        }
    };
    idParser = new ThreadLocal<QueryParser>() {
        @Override
        protected QueryParser initialValue() {
            return new QueryParser(Version.LUCENE_34, "lsid",
                    new org.apache.lucene.analysis.core.KeywordAnalyzer());
        }
    };

    cbReader = DirectoryReader.open(FSDirectory.open(createIfNotExist(indexDirectory + File.separator + "cb")));//false
    cbSearcher = new IndexSearcher(cbReader);
    //Initalise the IRMNG index searching items
    irmngReader = DirectoryReader
            .open(FSDirectory.open(createIfNotExist(indexDirectory + File.separator + "irmng")));
    irmngSearcher = new IndexSearcher(irmngReader);
    //initalise the Common name index searching items
    vernReader = DirectoryReader
            .open(FSDirectory.open(createIfNotExist(indexDirectory + File.separator + "vernacular")));
    vernSearcher = new IndexSearcher(vernReader);
    //initialise the identifier index
    idSearcher = new IndexSearcher(
            DirectoryReader.open(FSDirectory.open(createIfNotExist(indexDirectory + File.separator + "id"))));
    tnse = new TaxonNameSoundEx();
    parser = new PhraseNameParser();
    crossRankHomonyms = au.org.ala.names.util.FileUtils.streamToSet(
            this.getClass().getClassLoader().getResourceAsStream("au/org/ala/homonyms/cross_rank_homonyms.txt"),
            new java.util.HashSet<String>(), true);
}

From source file:au.org.ala.names.search.DwcaNameIndexer.java

License:Open Source License

/**
 * Creates a loading index to use to generate the hierarchy including the left right values.
 *
 * @param tmpIndexDir/*w  w  w . ja va  2s.  com*/
 * @param archiveDirectory
 * @throws Exception
 */
private void createLoadingIndex(String tmpIndexDir, String archiveDirectory) throws Exception {
    log.info("Starting to create the temporary loading index.");
    File indexDir = new File(tmpIndexDir);
    IndexWriter iw = createIndexWriter(indexDir, new KeywordAnalyzer(), true);
    //create the loading index so that left right values and classifications can be generated
    Archive archive = ArchiveFactory.openArchive(new File(archiveDirectory));
    Iterator<DarwinCoreRecord> it = archive.iteratorDwc();
    int i = 0;
    long start = System.currentTimeMillis();
    while (it.hasNext()) {
        Document doc = new Document();
        DarwinCoreRecord dwcr = it.next();
        String id = dwcr.getId();
        String lsid = dwcr.getTaxonID() == null ? id : dwcr.getTaxonID();
        String acceptedLsid = dwcr.getAcceptedNameUsageID();
        //add and store the identifier for the record
        doc.add(new StringField(NameIndexField.ID.toString(), dwcr.getId(), Field.Store.YES));
        if (StringUtils.isNotBlank(lsid)) {
            doc.add(new StringField(NameIndexField.LSID.toString(), lsid, Field.Store.YES));
        } else {
            System.out.println("LSID is null for " + id + " " + lsid + " " + lsid + " " + acceptedLsid);
        }
        if (StringUtils.isNotBlank(dwcr.getParentNameUsageID())) {
            doc.add(new StringField("parent_id", dwcr.getParentNameUsageID(), Field.Store.YES));
        }
        if (StringUtils.isNotBlank(dwcr.getAcceptedNameUsageID())) {
            doc.add(new StringField(NameIndexField.ACCEPTED.toString(), dwcr.getAcceptedNameUsageID(),
                    Field.Store.YES));
        }
        if (StringUtils.isNotBlank(dwcr.getScientificName())) {
            //stored no need to search on
            doc.add(new StoredField(NameIndexField.NAME.toString(), dwcr.getScientificName()));
        }
        if (StringUtils.isNotBlank(dwcr.getScientificNameAuthorship())) {
            //stored no need to search on
            doc.add(new StoredField(NameIndexField.AUTHOR.toString(), dwcr.getScientificNameAuthorship()));
        }
        if (StringUtils.isNotBlank(dwcr.getGenus())) {
            //stored no need to search on
            doc.add(new StoredField("genus", dwcr.getGenus()));
        }
        if (StringUtils.isNotBlank(dwcr.getSpecificEpithet())) {
            //stored no need to search on
            doc.add(new StoredField(NameIndexField.SPECIFIC.toString(), dwcr.getSpecificEpithet()));
        }
        if (StringUtils.isNotBlank(dwcr.getInfraspecificEpithet())) {
            //stored no need to search on
            doc.add(new StoredField(NameIndexField.INFRA_SPECIFIC.toString(), dwcr.getInfraspecificEpithet()));
        }
        if (StringUtils.isNotBlank(dwcr.getTaxonRank())) {
            //match the supplied rank
            RankType rt = RankType.getForStrRank(dwcr.getTaxonRank());
            if (rt != null) {
                doc.add(new StringField(NameIndexField.RANK.toString(), rt.getRank(), Field.Store.YES));
                doc.add(new StringField(NameIndexField.RANK_ID.toString(), rt.getId().toString(),
                        Field.Store.YES));
            } else {
                doc.add(new StringField(NameIndexField.RANK.toString(), dwcr.getTaxonRank(), Field.Store.YES));
                doc.add(new StringField(NameIndexField.RANK_ID.toString(), RankType.UNRANKED.getId().toString(),
                        Field.Store.YES));
            }
        } else {
            //put in unknown rank
            doc.add(new StringField(NameIndexField.RANK.toString(), "Unknown", Field.Store.YES));
            doc.add(new StringField(NameIndexField.RANK_ID.toString(), RankType.UNRANKED.getId().toString(),
                    Field.Store.YES));
        }
        if (StringUtils.equals(lsid, acceptedLsid) || StringUtils.equals(id, acceptedLsid)
                || acceptedLsid == null) {
            //mark this one as an accepted concept
            doc.add(new StringField(NameIndexField.iS_SYNONYM.toString(), "F", Field.Store.YES));
            if (StringUtils.isBlank(dwcr.getParentNameUsageID())) {
                doc.add(new StringField("root", "T", Field.Store.YES));
            }
        } else {
            doc.add(new StringField(NameIndexField.iS_SYNONYM.toString(), "T", Field.Store.YES));
        }
        iw.addDocument(doc);
        i++;
        if (i % 1000 == 0) {
            long finish = System.currentTimeMillis();
            log.debug("Loading index: " + i + " records per sec: "
                    + (1000 / (((float) (finish / start)) / 1000)));
            start = finish;
        }
    }
    log.info("Finished creating the temporary load index with " + i + " concepts");
    iw.commit();
    iw.forceMerge(1);
    iw.close();
    lsearcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(indexDir)));
}

From source file:au.org.ala.names.search.DwcaNameIndexer.java

License:Open Source License

private TopDocs getLoadIdxResults(String field, String value, int max) throws Exception {
    if (lsearcher == null && new File(dirTmpIndex).exists()) {
        lsearcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(new File(dirTmpIndex))));
    } else if (lsearcher == null && !new File(dirTmpIndex).exists()) {
        throw new RuntimeException(
                "A load index has not been generated. Please run this tool with '-load' before creating the search index.");
    }//w  w w.  j a  v a 2 s  .c o  m
    TermQuery tq = new TermQuery(new Term(field, value));
    return lsearcher.search(tq, max);
}

From source file:back.Searcher.java

License:Apache License

/** Simple command-line based search demo. */
public static void search(String query, boolean stopword, boolean stemming, int consulta) throws Exception {

    String index = null;//from w  ww. j a  v  a  2  s  .c o  m
    Analyzer analyzer = null;
    if (!stopword && !stemming) {
        index = ".\\indexed";
        analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT,
                new CharArraySet(Version.LUCENE_CURRENT, 0, false));
        System.out.println("Nenhum Marcado");
    } else if (stopword && !stemming) {
        index = ".\\indexedNoStpWrd";
        analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
        System.out.println("Primeiro Marcado");

    } else if (!stopword && stemming) {
        index = ".\\indexedStemming";
        analyzer = new EnglishAnalyzer(Version.LUCENE_CURRENT,
                new CharArraySet(Version.LUCENE_CURRENT, 0, false));
        System.out.println("Segundo Marcado");

    } else if (stopword && stemming) {
        index = ".\\indexedTreated";
        analyzer = new EnglishAnalyzer(Version.LUCENE_CURRENT);
        System.out.println("Dois Marcados");

    }
    String field = "contents";
    String queries = null;
    int repeat = 0;
    boolean raw = false;
    String queryString = query;
    int hitsPerPage = 200;

    CSVReader CSVreader = new CSVReader(new FileReader(".\\matriz.csv"));
    List<String[]> myEntries = CSVreader.readAll();

    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));
    IndexSearcher searcher = new IndexSearcher(reader);

    BufferedReader in = null;
    if (queries != null) {
        in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8"));
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
    }
    QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer);
    while (true) {
        if (queries == null && queryString == null) { // prompt the user
            System.out.println("Enter query: ");
        }

        String line = queryString != null ? queryString : in.readLine();

        if (line == null || line.length() == -1) {
            break;
        }

        line = line.trim();
        if (line.length() == 0) {
            break;
        }

        Query query1 = parser.parse(line);
        System.out.println("Searching for: " + query1.toString(field));

        if (repeat > 0) { // repeat & time as benchmark
            Date start = new Date();
            for (int i = 0; i < repeat; i++) {
                searcher.search(query1, null, 100);
            }
            Date end = new Date();
            System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms");
        }

        doPagingSearch(in, searcher, query1, hitsPerPage, raw, queries == null && queryString == null,
                myEntries, consulta);

        if (queryString != null) {
            break;
        }
    }
    reader.close();
}

From source file:bajavista.Buscador.java

public ArrayList<Informacion> buscarContenido(String busqueda) throws IOException, ParseException {
    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);

    File indexDirES = new File(dirIndexES);
    Directory indexES = FSDirectory.open(indexDirES);
    //File indexDirNONES = new File(dirIndexNONES);
    //Directory indexNONES = FSDirectory.open(indexDirNONES);

    // 2. Query/*w ww . ja  v  a  2  s .c o m*/
    String querystr = busqueda;

    Query q = new QueryParser(Version.LUCENE_43, "text", analyzer).parse(querystr);
    //Query qNONES = new QueryParser(Version.LUCENE_43, "contenido", analyzer).parse(querystr);

    // 3. Search
    int hitsPage = 1024;
    IndexReader reader = DirectoryReader.open(indexES);
    IndexSearcher searcher = new IndexSearcher(reader);

    //IndexReader readerNONES = DirectoryReader.open(indexNONES);
    //IndexSearcher searcherNONES = new IndexSearcher(readerNONES);
    TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPage, true);
    //TopScoreDocCollector collectorNONES = TopScoreDocCollector.create(hitsPage, true);

    searcher.search(q, collector);
    //searcherNONES.search(q, collectorNONES);

    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    // ScoreDoc[] hitsNONES = collectorNONES.topDocs().scoreDocs;

    // 4. Return results
    for (int i = 0; i < hits.length; ++i) {
        int docId = hits[i].doc;
        Document data = searcher.doc(docId);
        info = new Informacion(Integer.parseInt(data.get("idUser")), Long.parseLong(data.get("timestamp")),
                data.get("text"), Double.parseDouble(data.get("objective")),
                Double.parseDouble(data.get("subjective")), Double.parseDouble(data.get("positive")),
                Double.parseDouble(data.get("negative")), Integer.parseInt(data.get("need")));
        listaInfo.add(info);
    }

    /*System.out.println("No ES Found " + hitsNONES.length + " hits.");
     for(int i=0;i<hitsNONES.length;++i) {
     int docId = hitsNONES[i].doc;
     Document d = searcherNONES.doc(docId);
     System.out.println((i + 1) + ". " + d.get("es") + "\t" + d.get("contenido"));
     }*/
    reader.close();
    //readerNONES.close();

    return listaInfo;
}

From source file:bbejeck.nosql.lucene.LuceneSqlFileSystemSearchBase.java

License:Apache License

@Override
public void openSearcher() throws Exception {
    ireader = DirectoryReader.open(fsDirectory);
    isearcher = new IndexSearcher(ireader);
}

From source file:bbejeck.nosql.lucene.LuceneSqlSearchBase.java

License:Apache License

public void openSearcher() throws Exception {
    ireader = DirectoryReader.open(ramDirectory);
    isearcher = new IndexSearcher(ireader);
}

From source file:biospectra.classify.Classifier.java

License:Apache License

private void initialize(File indexPath, int kmerSize, int kmerSkips, boolean minStrandKmer,
        double minShouldMatch, QueryGenerationAlgorithm queryGenerationAlgorithm, Similarity similarity)
        throws Exception {
    if (!indexPath.exists() || !indexPath.isDirectory()) {
        throw new IllegalArgumentException("indexPath is not a directory or does not exist");
    }//from  ww w. jav a2 s  . c  o  m

    this.indexPath = indexPath;
    this.kmerSize = kmerSize;
    this.kmerSkips = kmerSkips;
    this.minStrandKmer = minStrandKmer;
    this.queryAnalyzer = new KmerQueryAnalyzer(this.kmerSize, this.kmerSkips, this.minStrandKmer);
    Directory dir = new MMapDirectory(this.indexPath.toPath());
    this.indexReader = DirectoryReader.open(dir);
    this.indexSearcher = new IndexSearcher(this.indexReader);
    if (similarity != null) {
        this.indexSearcher.setSimilarity(similarity);
    }
    this.minShouldMatch = minShouldMatch;
    this.queryGenerationAlgorithm = queryGenerationAlgorithm;

    BooleanQuery.setMaxClauseCount(10000);
}

From source file:biospectra.utils.IndexUtil.java

License:Apache License

private void initialize(File indexPath) throws Exception {
    if (indexPath == null) {
        throw new IllegalArgumentException("indexPath is null");
    }//from   w  ww .  j a  v a2  s  .  c om

    this.indexPath = indexPath;
    Directory dir = new MMapDirectory(this.indexPath.toPath());
    this.indexReader = DirectoryReader.open(dir);
}