Example usage for org.apache.lucene.index DirectoryReader open

List of usage examples for org.apache.lucene.index DirectoryReader open

Introduction

In this page you can find the example usage for org.apache.lucene.index DirectoryReader open.

Prototype

public static DirectoryReader open(final IndexCommit commit) throws IOException 

Source Link

Document

Expert: returns an IndexReader reading the index in the given IndexCommit .

Usage

From source file:cc.pp.analyzer.ik.demo.IKAnalyzerDemo.java

License:Apache License

public static void main(String[] args) {

    //Lucene Document??
    String fieldName = "text";
    ////  ww w .  ja v  a2 s .c  o m
    String text = "IK Analyzer???????";

    //IKAnalyzer?
    Analyzer analyzer = new IKAnalyzer(Version.LUCENE_48, true);

    Directory directory = null;
    IndexWriter iwriter = null;
    DirectoryReader ireader = null;
    IndexSearcher isearcher = null;
    try {
        //
        directory = new RAMDirectory();

        //?IndexWriterConfig
        IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_48, analyzer);
        iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwriter = new IndexWriter(directory, iwConfig);
        //
        Document doc = new Document();
        doc.add(new LongField("ID", 1000, Field.Store.YES));
        doc.add(new TextField(fieldName, text, Field.Store.YES));
        iwriter.addDocument(doc);
        iwriter.close();

        //?**********************************
        //?
        ireader = DirectoryReader.open(directory);
        isearcher = new IndexSearcher(ireader);

        String keyword = "?";
        //         String keyword = "";
        //QueryParser?Query
        QueryParser qp = new QueryParser(Version.LUCENE_48, fieldName, analyzer);
        qp.setDefaultOperator(QueryParser.AND_OPERATOR);
        Query query = qp.parse(keyword);
        System.out.println("Query = " + query);

        //?5?
        TopDocs topDocs = isearcher.search(query, 5);
        System.out.println("" + topDocs.totalHits);
        //
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (int i = 0; i < topDocs.totalHits; i++) {
            Document targetDoc = isearcher.doc(scoreDocs[i].doc);
            System.out.println("" + targetDoc.toString());
        }

    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (LockObtainFailedException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } finally {
        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (directory != null) {
            try {
                directory.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:cc.twittertools.index.ExtractTermStatisticsFromIndex.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("dir").hasArg().withDescription("index").create(INDEX_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("min").create(MIN_OPTION));

    CommandLine cmdline = null;/*from w w w  .  jav a2 s.c  om*/
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(ExtractTermStatisticsFromIndex.class.getName(), options);
        System.exit(-1);
    }

    String indexLocation = cmdline.getOptionValue(INDEX_OPTION);
    int min = cmdline.hasOption(MIN_OPTION) ? Integer.parseInt(cmdline.getOptionValue(MIN_OPTION)) : 1;

    PrintStream out = new PrintStream(System.out, true, "UTF-8");

    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation)));
    Terms terms = SlowCompositeReaderWrapper.wrap(reader).terms(StatusField.TEXT.name);
    TermsEnum termsEnum = terms.iterator(TermsEnum.EMPTY);

    long missingCnt = 0;
    int skippedTerms = 0;
    BytesRef bytes = new BytesRef();
    while ((bytes = termsEnum.next()) != null) {
        byte[] buf = new byte[bytes.length];
        System.arraycopy(bytes.bytes, 0, buf, 0, bytes.length);
        String term = new String(buf, "UTF-8");
        int df = termsEnum.docFreq();
        long cf = termsEnum.totalTermFreq();

        if (df < min) {
            skippedTerms++;
            missingCnt += cf;
            continue;
        }

        out.println(term + "\t" + df + "\t" + cf);
    }

    reader.close();
    out.close();
    System.err.println("skipped terms: " + skippedTerms + ", cnt: " + missingCnt);
}

From source file:cc.twittertools.index.ExtractTweetidsFromIndex.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(/* w ww .  j  av a2 s  .  co  m*/
            OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(ExtractTweetidsFromIndex.class.getName(), options);
        System.exit(-1);
    }

    File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION));
    if (!indexLocation.exists()) {
        System.err.println("Error: " + indexLocation + " does not exist!");
        System.exit(-1);
    }

    IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation));
    PrintStream out = new PrintStream(System.out, true, "UTF-8");
    for (int i = 0; i < reader.maxDoc(); i++) {
        Document doc = reader.document(i);
        out.println(doc.getField(StatusField.ID.name).stringValue() + "\t"
                + doc.getField(StatusField.SCREEN_NAME.name).stringValue());
    }
    out.close();
    reader.close();
}

From source file:cc.twittertools.search.api.TrecSearchHandler.java

License:Apache License

public TrecSearchHandler(File indexPath, @Nullable Map<String, String> credentials) throws IOException {
    Preconditions.checkNotNull(indexPath);
    Preconditions.checkArgument(indexPath.exists());

    // Can be null, in which case we don't check for credentials.
    this.credentials = credentials;

    IndexReader reader = DirectoryReader.open(FSDirectory.open(indexPath));
    searcher = new IndexSearcher(reader);
    searcher.setSimilarity(new LMDirichletSimilarity(2500.0f));
}

From source file:cc.twittertools.search.api.TrecSearchThriftServer.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(new Option(HELP_OPTION, "show help"));
    options.addOption(OptionBuilder.withArgName("port").hasArg().withDescription("port").create(PORT_OPTION));
    options.addOption(//from  ww w  .  jav  a  2s.co m
            OptionBuilder.withArgName("index").hasArg().withDescription("index location").create(INDEX_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg()
            .withDescription("max number of threads in thread pool").create(MAX_THREADS_OPTION));
    options.addOption(OptionBuilder.withArgName("file").hasArg()
            .withDescription("file containing access tokens").create(CREDENTIALS_OPTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (cmdline.hasOption(HELP_OPTION) || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(TrecSearchThriftServer.class.getName(), options);
        System.exit(-1);
    }

    int port = cmdline.hasOption(PORT_OPTION) ? Integer.parseInt(cmdline.getOptionValue(PORT_OPTION))
            : DEFAULT_PORT;
    int maxThreads = cmdline.hasOption(MAX_THREADS_OPTION)
            ? Integer.parseInt(cmdline.getOptionValue(MAX_THREADS_OPTION))
            : DEFAULT_MAX_THREADS;
    File index = new File(cmdline.getOptionValue(INDEX_OPTION));

    Map<String, String> credentials = null;
    if (cmdline.hasOption(CREDENTIALS_OPTION)) {
        credentials = Maps.newHashMap();
        File cfile = new File(cmdline.getOptionValue(CREDENTIALS_OPTION));
        if (!cfile.exists()) {
            System.err.println("Error: " + cfile + " does not exist!");
            System.exit(-1);
        }
        for (String s : Files.readLines(cfile, Charsets.UTF_8)) {
            try {
                String[] arr = s.split(":");
                credentials.put(arr[0], arr[1]);
            } catch (Exception e) {
                // Catch any exceptions from parsing file contain access tokens
                System.err.println("Error reading access tokens from " + cfile + "!");
                System.exit(-1);
            }
        }
    }

    if (!index.exists()) {
        System.err.println("Error: " + index + " does not exist!");
        System.exit(-1);
    }

    IndexReader reader = DirectoryReader.open(MMapDirectory.open(index));
    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setSimilarity(new LMDirichletSimilarity(DEFAULT_MU));

    QueryLikelihoodModel qlModel = new QueryLikelihoodModel(DEFAULT_MU);

    TServerSocket serverSocket = new TServerSocket(port);
    TrecSearch.Processor<TrecSearch.Iface> searchProcessor = new TrecSearch.Processor<TrecSearch.Iface>(
            new TrecSearchHandler(searcher, qlModel, credentials));

    TThreadPoolServer.Args serverArgs = new TThreadPoolServer.Args(serverSocket);
    serverArgs.maxWorkerThreads(maxThreads);
    TServer thriftServer = new TThreadPoolServer(
            serverArgs.processor(searchProcessor).protocolFactory(new TBinaryProtocol.Factory()));

    thriftServer.serve();
}

From source file:cc.twittertools.search.local.RunQueries.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(//from w  w w  . j a v  a 2  s  .  co m
            OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return")
            .create(NUM_RESULTS_OPTION));
    options.addOption(OptionBuilder.withArgName("file").hasArg()
            .withDescription("file containing topics in TREC format").create(QUERIES_OPTION));
    options.addOption(OptionBuilder.withArgName("similarity").hasArg()
            .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION));
    options.addOption(new Option(VERBOSE_OPTION, "print out complete document"));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(QUERIES_OPTION) || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(RunQueries.class.getName(), options);
        System.exit(-1);
    }

    File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION));
    if (!indexLocation.exists()) {
        System.err.println("Error: " + indexLocation + " does not exist!");
        System.exit(-1);
    }

    String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG;

    String topicsFile = cmdline.getOptionValue(QUERIES_OPTION);

    int numResults = 1000;
    try {
        if (cmdline.hasOption(NUM_RESULTS_OPTION)) {
            numResults = Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION));
        }
    } catch (NumberFormatException e) {
        System.err.println("Invalid " + NUM_RESULTS_OPTION + ": " + cmdline.getOptionValue(NUM_RESULTS_OPTION));
        System.exit(-1);
    }

    String similarity = "LM";
    if (cmdline.hasOption(SIMILARITY_OPTION)) {
        similarity = cmdline.getOptionValue(SIMILARITY_OPTION);
    }

    boolean verbose = cmdline.hasOption(VERBOSE_OPTION);

    PrintStream out = new PrintStream(System.out, true, "UTF-8");

    IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation));
    IndexSearcher searcher = new IndexSearcher(reader);

    if (similarity.equalsIgnoreCase("BM25")) {
        searcher.setSimilarity(new BM25Similarity());
    } else if (similarity.equalsIgnoreCase("LM")) {
        searcher.setSimilarity(new LMDirichletSimilarity(2500.0f));
    }

    QueryParser p = new QueryParser(Version.LUCENE_43, StatusField.TEXT.name, IndexStatuses.ANALYZER);

    TrecTopicSet topics = TrecTopicSet.fromFile(new File(topicsFile));
    for (TrecTopic topic : topics) {
        Query query = p.parse(topic.getQuery());
        Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, topic.getQueryTweetTime(),
                true, true);

        TopDocs rs = searcher.search(query, filter, numResults);

        int i = 1;
        for (ScoreDoc scoreDoc : rs.scoreDocs) {
            Document hit = searcher.doc(scoreDoc.doc);
            out.println(String.format("%s Q0 %s %d %f %s", topic.getId(),
                    hit.getField(StatusField.ID.name).numericValue(), i, scoreDoc.score, runtag));
            if (verbose) {
                out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " "));
            }
            i++;
        }
    }
    reader.close();
    out.close();
}

From source file:cc.twittertools.search.local.SearchStatuses.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(/*from w w  w  .j  a v a 2  s  . c  o m*/
            OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("query id").create(QID_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("query text").create(QUERY_OPTION));
    options.addOption(
            OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("maxid").create(MAX_ID_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return")
            .create(NUM_RESULTS_OPTION));
    options.addOption(OptionBuilder.withArgName("similarity").hasArg()
            .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION));
    options.addOption(new Option(VERBOSE_OPTION, "print out complete document"));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(QUERY_OPTION) || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(SearchStatuses.class.getName(), options);
        System.exit(-1);
    }

    File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION));
    if (!indexLocation.exists()) {
        System.err.println("Error: " + indexLocation + " does not exist!");
        System.exit(-1);
    }

    String qid = cmdline.hasOption(QID_OPTION) ? cmdline.getOptionValue(QID_OPTION) : DEFAULT_QID;
    String queryText = cmdline.hasOption(QUERY_OPTION) ? cmdline.getOptionValue(QUERY_OPTION) : DEFAULT_Q;
    String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG;
    long maxId = cmdline.hasOption(MAX_ID_OPTION) ? Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION))
            : DEFAULT_MAX_ID;
    int numResults = cmdline.hasOption(NUM_RESULTS_OPTION)
            ? Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION))
            : DEFAULT_NUM_RESULTS;
    boolean verbose = cmdline.hasOption(VERBOSE_OPTION);

    String similarity = "LM";
    if (cmdline.hasOption(SIMILARITY_OPTION)) {
        similarity = cmdline.getOptionValue(SIMILARITY_OPTION);
    }

    PrintStream out = new PrintStream(System.out, true, "UTF-8");

    IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation));
    IndexSearcher searcher = new IndexSearcher(reader);

    if (similarity.equalsIgnoreCase("BM25")) {
        searcher.setSimilarity(new BM25Similarity());
    } else if (similarity.equalsIgnoreCase("LM")) {
        searcher.setSimilarity(new LMDirichletSimilarity(2500.0f));
    }

    QueryParser p = new QueryParser(Version.LUCENE_43, IndexStatuses.StatusField.TEXT.name,
            IndexStatuses.ANALYZER);
    Query query = p.parse(queryText);
    Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, maxId, true, true);

    TopDocs rs = searcher.search(query, filter, numResults);

    int i = 1;
    for (ScoreDoc scoreDoc : rs.scoreDocs) {
        Document hit = searcher.doc(scoreDoc.doc);

        out.println(String.format("%s Q0 %s %d %f %s", qid, hit.getField(StatusField.ID.name).numericValue(), i,
                scoreDoc.score, runtag));
        if (verbose) {
            out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " "));
        }
        i++;
    }

    reader.close();
    out.close();
}

From source file:cc.wikitools.lucene.hadoop.HdfsWikipediaSearcher.java

License:Apache License

public HdfsWikipediaSearcher(Path indexLocation, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    Directory directory = new FileSystemDirectory(fs, indexLocation, false, conf);

    reader = DirectoryReader.open(directory);

    init();// www.j  a va2s.c om
}

From source file:cc.wikitools.lucene.WikipediaSearcher.java

License:Apache License

public WikipediaSearcher(File indexLocation) throws IOException {
    Preconditions.checkNotNull(indexLocation);
    Preconditions.checkArgument(indexLocation.exists());

    reader = DirectoryReader.open(FSDirectory.open(indexLocation));

    init();//  w  ww.  jav a 2 s .  co m
}

From source file:ch.admin.isb.hermes5.business.search.SearchEngine.java

License:Apache License

public List<SearchResult> search(String searchInput, String modelIdentifier, String lang) {
    DirectoryReader directoryReader = null;

    try {//from   w  w  w . j  a  va  2  s.co  m
        List<SearchResult> results = new ArrayList<SearchResult>();
        String queryString = buildQueryString(searchInput);
        if (queryString != null) {

            String searchIndexPath = searchIndexManager.getSearchIndexPath(modelIdentifier, lang);
            try {
                directoryReader = DirectoryReader.open(FSDirectory.open(new File(searchIndexPath)));
            } catch (Exception e) {
                logger.warn("No index files found at " + searchIndexPath + ". Will try to restore from S3");
                searchIndexManager.restoreIndexFilesFromS3(modelIdentifier, lang);
                directoryReader = DirectoryReader.open(FSDirectory.open(new File(searchIndexPath)));
            }
            Analyzer analyzer = analyserRepository.getAnalyzer(lang);
            IndexSearcher isearcher = new IndexSearcher(directoryReader);

            Query query = new QueryParser(Version.LUCENE_47, "presentationName", analyzer).parse(queryString);
            ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;
            HighlighterWrapper highlighter = highlighterRepository.getHighlighter(analyzer, isearcher, query);
            for (int i = 0; i < hits.length; i++) {
                results.add(buildSearchResult(isearcher, highlighter, hits[i].doc));
            }
        }

        searchLogger.info("<{}> returned {} results", searchInput, results.size());

        return results;
    } catch (Exception e) {
        logger.warn("An exception occurred during search, empty result will be returned", e);
        return new ArrayList<SearchResult>();
    } finally {
        try {
            if (directoryReader != null) {
                directoryReader.close();
            }
        } catch (IOException e) {
            logger.debug("unable to close directory reader", e);
        }
    }
}