Example usage for org.apache.lucene.index DirectoryReader open

Introduction

In this page you can find the example usage for org.apache.lucene.index DirectoryReader open.

Prototype

public static DirectoryReader open(final IndexCommit commit) throws IOException

Source Link

Document

Expert: returns an IndexReader reading the index in the given IndexCommit .

Usage

From source file:br.com.crawlerspring.model.Searcher.java

public List<br.com.crawlerspring.model.Document> parametrizeDocuments(String parameters) throws Exception {
    List<br.com.crawlerspring.model.Document> parametrizedDocuments = new ArrayList<br.com.crawlerspring.model.Document>();

    RegexQuery q = new RegexQuery(new Term("title", ".*" + parameters + ".*"));
    int hitsPerPage = 10;
    IndexReader reader = DirectoryReader.open(index);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
    searcher.search(q, collector);/*from   w ww  .j  a  v a2s  .c o m*/
    ScoreDoc[] hits = collector.topDocs().scoreDocs;

    for (int cont = 0; cont < hits.length; ++cont) {
        br.com.crawlerspring.model.Document document = new br.com.crawlerspring.model.Document();
        int docId = hits[cont].doc;
        org.apache.lucene.document.Document luceneDocument = searcher.doc(docId);
        document.setTitle(luceneDocument.get("title"));
        document.setContent(luceneDocument.get("content"));

        parametrizedDocuments.add(document);
    }
    return parametrizedDocuments;
}

From source file:br.pucminas.ri.jsearch.queryexpansion.RocchioQueryExpansion.java

License:Open Source License

private List<Entry<String, Float>> getTermScoreList(Directory directory)
        throws CorruptIndexException, IOException {

    Map<String, Float> termScoreMap = new HashMap<>();

    ConcreteTFIDFSimilarity sim = new ConcreteTFIDFSimilarity();

    try (IndexReader idxReader = DirectoryReader.open(directory)) {

        idxReader.leaves().stream().map((leaf) -> leaf.reader()).forEach((reader) -> {
            try {
                Terms terms = reader.terms(Constants.DOC_CONTENT);
                TermsEnum termsEnum = terms.iterator();
                PostingsEnum postings = null;
                int docsNum = idxReader.numDocs();

                BytesRef text;/*from www  .  j a  v a2 s  . co m*/
                while ((text = termsEnum.next()) != null) {

                    postings = termsEnum.postings(postings);

                    while (postings.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                        int freq = postings.freq();
                        float tf = sim.tf(freq);
                        float idf = sim.idf(termsEnum.docFreq(), indexReader.numDocs());
                        termScoreMap.put(text.utf8ToString(), BETA * (tf * idf));
                    }
                }

            } catch (IOException ex) {
                Logger.getLogger(RocchioQueryExpansion.class.getName()).log(Level.SEVERE, null, ex);
            } finally {
                try {
                    idxReader.close();
                } catch (IOException ex) {
                    Logger.getLogger(RocchioQueryExpansion.class.getName()).log(Level.SEVERE, null, ex);
                }
            }
        });

    }

    return new ArrayList<>(termScoreMap.entrySet());
}

From source file:br.pucminas.ri.jsearch.queryexpansion.RocchioQueryExpansion.java

License:Open Source License

private float getScore(Directory directory, String term) throws CorruptIndexException, IOException {

    try (IndexReader idxReader = DirectoryReader.open(directory)) {

        ConcreteTFIDFSimilarity sim = new ConcreteTFIDFSimilarity();

        for (LeafReaderContext context : idxReader.leaves()) {
            LeafReader reader = context.reader();

            try {
                Terms terms = reader.terms(Constants.DOC_CONTENT);
                TermsEnum termsEnum = terms.iterator();
                PostingsEnum postings = null;

                BytesRef text;//from w  ww .j  a v a2  s. co  m
                while ((text = termsEnum.next()) != null) {
                    postings = termsEnum.postings(postings);
                    if (text.utf8ToString().equalsIgnoreCase(term)) {

                        while (postings.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                            int freq = postings.freq();
                            float tf = sim.tf(freq);
                            float idf = sim.idf(termsEnum.docFreq(), indexReader.numDocs());
                            return tf * idf;
                        }
                    }
                }

            } catch (IOException ex) {
                Logger.getLogger(RocchioQueryExpansion.class.getName()).log(Level.SEVERE, null, ex);
            }
        }

    }

    return 0;
}

From source file:br.ufmt.harmonizacao.implementer.PatenteeSearcher.java

public void setPath(String path) {
    try {//w  w w .  j  ava  2s .  c o  m
        this.path = path + dirName;
        System.out.println(this.path);
        dir = new SimpleFSDirectory(new File(this.path));
        reader = DirectoryReader.open(dir);
        searcher = new IndexSearcher(reader);
    } catch (IOException ex) {
        Logger.getLogger(PatenteeSearcher.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:br.ufmt.harmonizacao.implementer.StandardSearcher.java

public void setPath(String path) {
    try {/*from w  ww. jav  a  2 s . c  om*/
        this.path = path + dirName;
        System.out.println(this.path);
        dir = new SimpleFSDirectory(new File(this.path));
        reader = DirectoryReader.open(dir);
        searcher = new IndexSearcher(reader);
    } catch (IOException ex) {
        Logger.getLogger(PatenteeSearcher.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:buscador.SearchFiles.java

License:Apache License

/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {
    String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details.";
    if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
        System.out.println(usage);
        System.exit(0);//  ww w  .  jav a2 s  .  c om
    }

    String index = "Zaguan1";
    String[] fields = { "title", "description", "identifier", "date", "creator" };
    BooleanClause.Occur[] flags = { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD,
            BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD };

    String queries = null;
    int repeat = 0;
    boolean raw = false;
    String queryString = null;
    int hitsPerPage = 10;

    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            index = args[i + 1];
            i++;

        } else if ("-queries".equals(args[i])) {
            queries = args[i + 1];
            i++;
        } else if ("-query".equals(args[i])) {
            queryString = args[i + 1];
            i++;
        } else if ("-repeat".equals(args[i])) {
            repeat = Integer.parseInt(args[i + 1]);
            i++;
        } else if ("-raw".equals(args[i])) {
            raw = true;
        } else if ("-paging".equals(args[i])) {
            hitsPerPage = Integer.parseInt(args[i + 1]);
            if (hitsPerPage <= 0) {
                System.err.println("There must be at least 1 hit per page.");
                System.exit(1);
            }
            i++;
        }
    }

    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new SpanishAnalyzer(Version.LATEST);

    BufferedReader in = null;
    if (queries != null) {
        in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8"));
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
    }

    while (true) {
        if (queries == null && queryString == null) { // prompt the user
            System.out.println("Enter query: ");
        }

        String line = queryString != null ? queryString : in.readLine();

        if (line == null || line.length() == -1) {
            break;
        }

        line = line.trim();
        if (line.length() == 0) {
            break;
        }

        Query query = MultiFieldQueryParser.parse(line, fields, flags, analyzer);

        if (repeat > 0) { // repeat & time as benchmark
            Date start = new Date();
            for (int i = 0; i < repeat; i++) {
                searcher.search(query, 100);
            }
            Date end = new Date();
            System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms");
        }

        doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null);

        if (queryString != null) {
            break;
        }
    }
    reader.close();
}

From source file:bzh.terrevirtuelle.navisu.gazetteer.impl.lucene.GeoNameResolver.java

License:Apache License

private IndexReader createIndexReader(String indexerPath) throws IOException {
    File indexfile = new File(indexerPath);
    indexDir = FSDirectory.open(indexfile.toPath());

    if (!DirectoryReader.indexExists(indexDir)) {
        LOG.log(Level.SEVERE, "No Lucene Index Dierctory Found, Invoke indexBuild() First !");
        System.exit(1);/*from w w w.ja v a 2s.  c  o  m*/
    }

    return DirectoryReader.open(indexDir);
}

From source file:ca.dracode.ais.indexer.FileSearcher.java

License:Open Source License

public FileSearcher() {
    IndexReader indexReader;/*from w ww .j a  v a  2s  .c o m*/
    IndexSearcher indexSearcher = null;
    try {
        File indexDirFile = new File(FileIndexer.getRootStorageDir());
        Directory tmpDir = FSDirectory.open(indexDirFile);
        indexReader = DirectoryReader.open(tmpDir);
        indexSearcher = new IndexSearcher(indexReader);
    } catch (IOException ioe) {
        Log.e(TAG, "Error", ioe);
    }

    this.indexSearcher = indexSearcher;
}

From source file:ca.mcgill.cs.creco.logic.search.CategorySearch.java

License:Apache License

@Override
public List<Category> queryCategories(String pQueryString) {
    List<Category> searchResult = new ArrayList<Category>();
    try {/*from  w  w w  .j  av  a2  s . c o m*/
        DirectoryReader reader = DirectoryReader.open(aDirectory);
        IndexSearcher searcher = new IndexSearcher(reader);
        TopScoreDocCollector results = TopScoreDocCollector.create(MAX_NUM_RESULTS, true);

        // Search category names
        Query categoryNameQuery = new QueryParser(VERSION, CATEGORY_NAME, aAnalyzer).parse(pQueryString);
        searcher.search(categoryNameQuery, results);

        // Search flattened text (only product names for now)
        Query flattenedTextQuery = new QueryParser(VERSION, FLATTENED_TEXT, aAnalyzer).parse(pQueryString);
        searcher.search(flattenedTextQuery, results);

        for (ScoreDoc scoredResult : results.topDocs().scoreDocs) {
            Document doc = searcher.doc(scoredResult.doc);
            Category resultCategory = aDataStore.getCategory(doc.get(CATEGORY_ID));

            if (!searchResult.contains(resultCategory) && resultCategory.getNumberOfProducts() > 0) {
                searchResult.add(resultCategory);
            }
        }
    } catch (IOException e) {
        LOG.error(e.getMessage());
    } catch (ParseException e) {
        LOG.error(e.getMessage());
    }
    return searchResult;
}

From source file:calliope.search.AeseSearch.java

License:Open Source License

/**
 * Search the index for the given expression
 * @param expr the expression to be parsed
 * @param langCode the language of the expression and index
 * @param profile the hit profile (where to start from etc)
 * @return the result docs//from   w  w w. j a  va  2s .c  o m
 */
public static String searchIndex(String expr, String langCode, HitProfile profile) {
    StringBuilder sb = new StringBuilder();
    try {
        Analyzer analyzer = AeseSearch.createAnalyzer(langCode);
        DirectoryReader reader = DirectoryReader.open(AeseSearch.index);
        if (reader != null) {
            IndexSearcher searcher = new IndexSearcher(reader);
            QueryParser qp = new QueryParser(Version.LUCENE_45, "text", analyzer);
            Query q = qp.parse(expr);
            TopDocs hits = searcher.search(q, AeseSearch.maxHits);
            ScoreDoc[] docs = hits.scoreDocs;
            for (int j = profile.from; j < profile.to && j < docs.length; j++) {
                Document doc = searcher.doc(docs[j].doc);
                String vid = doc.get(LuceneFields.VID);
                String docID = doc.get(LuceneFields.DOCID);
                Highlighter h = new Highlighter(new QueryScorer(q));
                String text = getCorTexVersion(docID, vid);
                sb.append(formatDocID(docID));
                sb.append(" ");
                sb.append(formatVersionID(vid));
                sb.append(" ");
                String frag = h.getBestFragment(analyzer, "text", text);
                sb.append("<span class=\"found\">");
                sb.append(frag);
                sb.append("</span>\n");
            }
            profile.numHits = docs.length;
        }
        reader.close();
    } catch (Exception e) {
        sb.append(e.getMessage());
    }
    return sb.toString();
}