Example usage for org.apache.lucene.index DirectoryReader open

Introduction

In this page you can find the example usage for org.apache.lucene.index DirectoryReader open.

Prototype

public static DirectoryReader open(final IndexCommit commit) throws IOException

Source Link

Document

Expert: returns an IndexReader reading the index in the given IndexCommit .

Usage

From source file:com.mycompany.mavenproject1.Main.java

public static void main(String[] args) throws IOException, ParseException {
    StandardAnalyzer analyzer = new StandardAnalyzer();
    //        Directory index = new RAMDirectory();
    Directory index = new SimpleFSDirectory(Paths.get(
            "C:\\Users\\slete\\Documents\\NetBeansProjects\\mavenproject1\\src\\main\\java\\com\\mycompany\\mavenproject1\\data"));
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    //config.setOpenMode(OpenMode.CREATE);
    IndexWriter w = new IndexWriter(index, config);
    try (ItemProvider provider = new ItemProvider(
            "C:\\Users\\slete\\Documents\\NetBeansProjects\\mavenproject1\\src\\main\\java\\com\\mycompany\\mavenproject1\\items.xml")) {

        while (provider.hasNext()) {
            Item item = provider.next();
            addItem(w, item);/*www  . ja  v  a  2s  .c  o  m*/

        }
    } catch (XMLStreamException | IOException ex) {
        ex.getMessage();
    }
    //        w.commit();
    w.close();

    //        String queryStr = "id:1* NOT id:19*";
    String a = "id:1* NOT id:19*";
    String b = "name:Dekielek AND description:(ty AND obiektywu)";
    String c = "category:Dek*";
    String ds = "id:1232~2";
    String e = "price:[0.0 TO 100.0]";

    Query q = new QueryParser("name", analyzer).parse(ds);

    int hitsPerPage = 10;
    IndexReader reader = DirectoryReader.open(index);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs docs = searcher.search(q, hitsPerPage);
    ScoreDoc[] hits = docs.scoreDocs;

    System.out.println("Found " + hits.length + " hits.");
    for (int i = 0; i < hits.length; ++i) {
        int docId = hits[i].doc;
        Document d = searcher.doc(docId);
        System.out
                .println(d.get("id") + "\t" + d.get("price") + "\t" + d.get("name") + "\t" + d.get("category"));//+"\t" + d.get("description"));
    }
}

From source file:com.mylucene.basiclucene.SearchFiles.java

License:Apache License

/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {
    String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details.";
    if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
        System.out.println(usage);
        System.exit(0);//  w ww  .  j a v  a  2  s.  co  m
    }

    String index = "index";
    String field = "contents";
    String queries = null;
    int repeat = 0;
    boolean raw = false;
    String queryString = null;
    int hitsPerPage = 10;

    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            index = args[i + 1];
            i++;
        } else if ("-field".equals(args[i])) {
            field = args[i + 1];
            i++;
        } else if ("-queries".equals(args[i])) {
            queries = args[i + 1];
            i++;
        } else if ("-query".equals(args[i])) {
            queryString = args[i + 1];
            i++;
        } else if ("-repeat".equals(args[i])) {
            repeat = Integer.parseInt(args[i + 1]);
            i++;
        } else if ("-raw".equals(args[i])) {
            raw = true;
        } else if ("-paging".equals(args[i])) {
            hitsPerPage = Integer.parseInt(args[i + 1]);
            if (hitsPerPage <= 0) {
                System.err.println("There must be at least 1 hit per page.");
                System.exit(1);
            }
            i++;
        }
    }

    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);

    BufferedReader in = null;
    if (queries != null) {
        in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8"));
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
    }
    QueryParser parser = new QueryParser(Version.LUCENE_44, field, analyzer);
    while (true) {
        if (queries == null && queryString == null) { // prompt the user
            System.out.println("Enter query: ");
        }

        String line = queryString != null ? queryString : in.readLine();

        if (line == null || line.length() == -1) {
            break;
        }

        line = line.trim();
        if (line.length() == 0) {
            break;
        }

        Query query = parser.parse(line);
        System.out.println("Searching for: " + query.toString(field));

        if (repeat > 0) { // repeat & time as benchmark
            Date start = new Date();
            for (int i = 0; i < repeat; i++) {
                searcher.search(query, null, 100);
            }
            Date end = new Date();
            System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms");
        }

        doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null);

        if (queryString != null) {
            break;
        }
    }
    reader.close();
}

From source file:com.nec.scg.senseRanking.CountTextSimilarity.java

private void init() {
    analyzer = new StandardAnalyzer(Version.LUCENE_43);
    try {//from w  w  w.  ja  va  2  s .co  m
        directory = FSDirectory.open(new File("D:\\KBTextIndex"));
        ireader = DirectoryReader.open(directory);
        isearcher = new IndexSearcher(ireader);
    } catch (IOException e) {
        e.printStackTrace();
    }
    cache = new StringIntegerCache(cache_file);
    cache.readCache();
}

From source file:com.netcrest.pado.index.provider.lucene.LuceneSearch.java

License:Open Source License

protected Set<Object> getIdentityKeySet(String queryString, Directory dir) {
    Set<Object> identityKeySet = new HashSet<Object>();
    DirectoryReader reader;/*from  ww w .  j  ava 2s  .c  o  m*/
    try {
        reader = DirectoryReader.open(dir);
    } catch (CorruptIndexException e1) {
        e1.printStackTrace();
        throw new RuntimeException(e1);
    } catch (IOException e1) {
        e1.printStackTrace();
        throw new RuntimeException(e1);
    }

    Query query;
    try {
        StandardQueryParser parser = new StandardQueryParser(new StandardAnalyzer(LUCENE_VERSION));
        query = parser.parse(queryString.replaceAll("\\-", "\\\\-"), "__doc");
    } catch (Exception ex) {
        // Lucene bug. Unable to serialize exception. Log the message and
        // throw a new exception with the string message.
        ex.printStackTrace();
        throw new PadoException(ex.getMessage());
    }
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs results;
    try {
        results = searcher.search(query, null, Integer.MAX_VALUE);

        for (ScoreDoc hit : results.scoreDocs) {
            Document doc;
            try {
                doc = searcher.doc(hit.doc);
            } catch (CorruptIndexException e) {
                e.printStackTrace();
                throw new RuntimeException(e);
            } catch (IOException e) {
                e.printStackTrace();
                throw new RuntimeException(e);
            }
            //            IndexableField field = doc.getField("IdentityKey");
            //            if (field == null) {
            //               continue;
            //            }
            //            Object identityKey = field.stringValue();
            //            if (identityKey == null) {
            //               identityKey = field.numericValue();
            //            }
            //            if (identityKey == null) {
            //               BytesRef br = field.binaryValue();
            //               if (br != null) {
            //                  byte[] blob = br.bytes;
            //                  try {
            //                     identityKey = BlobHelper.deserializeBlob(blob);
            //                     identityKeySet.add(identityKey);
            //                  } catch (Exception ex) {
            //                     Logger.warning("Identity key deserialization error", ex);
            //                  }
            //               } else {
            //                  identityKey = field.toString();
            //               }
            //            }
            LuceneField luceneField = new LuceneField();
            ITemporalKey temporalKey = luceneField.getTemporalKey(doc);
            if (temporalKey != null) {
                identityKeySet.add(temporalKey.getIdentityKey());
            }
        }
    } catch (IOException e1) {
        e1.printStackTrace();
        throw new RuntimeException(e1);
    }
    return identityKeySet;
}

From source file:com.netcrest.pado.index.provider.lucene.LuceneSearch.java

License:Open Source License

protected Set<ITemporalKey> getTemporalKeySet(String queryString, Directory dir) {
    Set<ITemporalKey> temporalKeySet = new HashSet<ITemporalKey>();
    DirectoryReader reader;/*from   w  ww.  j  a va2s.com*/
    try {
        reader = DirectoryReader.open(dir);
    } catch (CorruptIndexException e1) {
        e1.printStackTrace();
        throw new RuntimeException(e1);
    } catch (IOException e1) {
        e1.printStackTrace();
        throw new RuntimeException(e1);
    }
    Query query;
    try {
        StandardQueryParser parser = new StandardQueryParser(new StandardAnalyzer(LUCENE_VERSION));
        query = parser.parse(queryString.replaceAll("\\-", "\\\\-"), "__doc");
    } catch (Exception ex) {
        // Lucene 4.7 bug, internal message not serializable
        // Send message instead of nesting the cause.
        throw new RuntimeException(ex.getMessage());
    }

    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs results;
    try {
        results = searcher.search(query, null, Integer.MAX_VALUE);

        for (ScoreDoc hit : results.scoreDocs) {
            Document doc;
            try {
                doc = searcher.doc(hit.doc);
            } catch (CorruptIndexException e) {
                e.printStackTrace();
                throw new RuntimeException(e);
            } catch (IOException e) {
                e.printStackTrace();
                throw new RuntimeException(e);
            }
            LuceneField luceneField = new LuceneField();
            ITemporalKey temporalKey = luceneField.getTemporalKey(doc);
            if (temporalKey != null) {
                temporalKeySet.add(temporalKey);
            }
        }
    } catch (IOException e1) {
        e1.printStackTrace();
        throw new RuntimeException(e1);
    }
    return temporalKeySet;
}

From source file:com.netcrest.pado.index.provider.lucene.TopNLuceneSearch.java

License:Open Source License

protected Set<Object> getIdentityKeySet(String queryString, Directory dir) {
    Set<Object> identityKeySet = new HashSet<Object>();
    DirectoryReader reader;//ww w .  j a  v  a 2s  . c  o m
    try {
        reader = DirectoryReader.open(dir);
    } catch (CorruptIndexException e1) {
        e1.printStackTrace();
        throw new RuntimeException(e1);
    } catch (IOException e1) {
        e1.printStackTrace();
        throw new RuntimeException(e1);
    }

    Query query;
    try {
        StandardQueryParser parser = new StandardQueryParser(new StandardAnalyzer(LUCENE_VERSION));
        query = parser.parse(queryString.replaceAll("\\-", "\\\\-"), "__doc");
    } catch (Exception ex) {
        // Lucene bug. Unable to serialize exception. Log the message and
        // throw a new exception with the string message.
        ex.printStackTrace();
        throw new PadoException(ex.getMessage());
    }
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs results;
    try {
        results = searcher.search(query, null, Integer.MAX_VALUE);
        for (ScoreDoc hit : results.scoreDocs) {
            Document doc;
            try {
                doc = searcher.doc(hit.doc);
            } catch (CorruptIndexException e) {
                e.printStackTrace();
                throw new RuntimeException(e);
            } catch (IOException e) {
                e.printStackTrace();
                throw new RuntimeException(e);
            }
            // IndexableField field = doc.getField("IdentityKey");
            // if (field == null) {
            // continue;
            // }
            // Object identityKey = field.stringValue();
            // if (identityKey == null) {
            // identityKey = field.numericValue();
            // }
            // if (identityKey == null) {
            // BytesRef br = field.binaryValue();
            // if (br != null) {
            // byte[] blob = br.bytes;
            // try {
            // identityKey = BlobHelper.deserializeBlob(blob);
            // identityKeySet.add(identityKey);
            // } catch (Exception ex) {
            // Logger.warning("Identity key deserialization error", ex);
            // }
            // } else {
            // identityKey = field.toString();
            // }
            // }
            LuceneField luceneField = new LuceneField();
            ITemporalKey temporalKey = luceneField.getTemporalKey(doc);

            if (temporalKey != null) {
                float docScore = hit.score;
                identityKeySet.add(temporalKey.getIdentityKey());
            }
        }
    } catch (IOException e1) {
        e1.printStackTrace();
        throw new RuntimeException(e1);
    }
    return identityKeySet;
}

From source file:com.nuvolect.deepdive.lucene.Index.java

public static JSONObject index(final String volumeId, final String searchPath, final boolean forceIndex) {

    if (m_interrupt[0]) {

        LogUtil.log(LogUtil.LogType.INDEX, "Index canceled post interrupt");

        m_interrupt[0] = false;//from   ww w  .jav a  2 s  .  c om
        return responseInterruptIndexing();
    }

    OmniFile cacheDir = IndexUtil.getCacheDir(volumeId, searchPath);
    boolean cacheDirCreated = false;
    try {
        cacheDirCreated = OmniUtil.forceMkdir(cacheDir);
    } catch (IOException e) {
        return responseFolderCreateError(searchPath);
    }

    final String luceneDirPath = cacheDir.getAbsolutePath();

    boolean cacheDirExists = !cacheDirCreated;
    boolean indexingOngoing = m_indexThread != null && m_indexThread.isAlive();
    boolean indexingRequired = !cacheDirExists || forceIndex;

    synchronized (m_lock) {

        if (indexingOngoing) {

            if (m_fileTreeActive)
                m_index_state = INDEX_STATE.filetree;
            else
                m_index_state = INDEX_STATE.indexing;
        } else {
            if (indexingRequired)
                m_index_state = INDEX_STATE.indexing;
            else
                m_index_state = INDEX_STATE.complete;
        }
    }

    if (indexingRequired || indexingOngoing) {

        if (indexingOngoing) {

            // Nothing to do, let the background process run. Monitor m_indexedDocs for progress.
        } else {

            synchronized (m_lock) {
                m_index_state = INDEX_STATE.filetree;
                m_totalDocs[0] = 0;
                m_indexedDocs[0] = 0;
                m_error[0] = "";
            }
            m_threadGroup = new ThreadGroup(INDEX_THREAD_GROUP);
            m_indexThread = new Thread(m_threadGroup, new Runnable() {
                @Override
                public void run() {

                    //                        Analyzer analyzer = new org.apache.lucene.analysis.core.WhitespaceAnalyzer();
                    //                        Analyzer analyzer = new org.apache.lucene.analysis.core.KeywordAnalyzer();
                    //                        Analyzer analyzer = new org.apache.lucene.analysis.standard.StandardAnalyzer();
                    Analyzer analyzer = new org.apache.lucene.analysis.core.SimpleAnalyzer();
                    IndexWriterConfig config = new IndexWriterConfig(analyzer);
                    IndexWriter iwriter = null;

                    try {
                        Directory m_directory = FSDirectory.open(Paths.get(luceneDirPath));
                        iwriter = new IndexWriter(m_directory, config);
                        iwriter.deleteAll();
                        iwriter.commit();
                    } catch (IOException e) {
                        LogUtil.logException(LogUtil.LogType.INDEX, e);
                        m_error[0] = "IndexWriter constructor exception";
                    }

                    synchronized (m_lock) {
                        m_fileTreeActive = true;
                        m_index_state = INDEX_STATE.filetree;
                    }
                    Collection<OmniFile> files = IndexUtil.getFilePaths(volumeId, searchPath);

                    synchronized (m_lock) {
                        m_index_state = INDEX_STATE.indexing;
                        m_fileTreeActive = false;
                        m_totalDocs[0] = files.size();
                        m_indexedDocs[0] = 0;
                    }

                    try {

                        for (OmniFile file : files) {

                            if (m_interrupt[0]) {
                                LogUtil.log(LogUtil.LogType.INDEX, "Iterator loop canceled");
                                break;
                            }

                            String path = file.getPath();

                            //                                LogUtil.log(LogUtil.LogType.INDEX, "indexing: " + path);// this is a bit excessive
                            iwriter.addDocument(makeDoc(volumeId, path));
                            synchronized (m_lock) {
                                ++m_indexedDocs[0];
                            }
                        }

                        iwriter.commit();
                        iwriter.close();
                        synchronized (m_lock) {
                            m_index_state = m_interrupt[0] ? INDEX_STATE.interrupted : INDEX_STATE.complete;
                            m_totalDocs[0] = m_indexedDocs[0];
                        }

                    } catch (Exception e) {
                        LogUtil.logException(LogUtil.LogType.INDEX, e);
                        m_error[0] = "IndexWriter addDocument exception";
                    }
                }
            }, INDEX_THREAD, STACK_SIZE);

            m_indexThread.setPriority(Thread.MAX_PRIORITY);
            m_indexThread.start();
        }
    } else {

        // Indexing is complete
        // Get number of documents indexed
        try {
            Directory directory = FSDirectory.open(Paths.get(luceneDirPath));
            DirectoryReader ireader = DirectoryReader.open(directory);
            synchronized (m_lock) {
                m_indexedDocs[0] = ireader.numDocs();
                m_totalDocs[0] = m_indexedDocs[0];
                m_index_state = INDEX_STATE.complete;
            }
            ireader.close();
            directory.close();
        } catch (IOException e) {
            LogUtil.logException(LogUtil.LogType.INDEX, e);
        }
    }

    JSONObject result = new JSONObject();
    try {
        synchronized (m_lock) {
            result.put("index_state", m_index_state.toString());
            result.put("error", m_error[0]);
            result.put("indexed_docs", m_indexedDocs[0]);
            result.put("total_docs", m_totalDocs[0]);
            //                result.put("full_path", cacheDir.getAbsolutePath());
            result.put("search_path", searchPath);
        }
    } catch (JSONException e) {
        e.printStackTrace();
    }

    return result;
}

From source file:com.nuvolect.deepdive.lucene.Search.java

/**
 * Return results for a search along a specific path.  If the path is changed or new
 * create an index.//w  ww .  j av  a  2s .com
 * @param searchQuery
 * @param searchPath
 * @return
 */
public static JSONObject search(String searchQuery, String volumeId, String searchPath) {

    JSONObject result = new JSONObject();
    JSONArray jsonArray = new JSONArray();
    Context ctx = App.getContext();

    DirectoryReader ireader = null;
    ScoreDoc[] scoreDocs = null;
    String error = "";

    preSearch(volumeId, searchPath);
    try {
        ireader = DirectoryReader.open(m_directory);
    } catch (IOException e) {
        LogUtil.logException(LogUtil.LogType.SEARCH, e);
        error += e.toString();
    }
    IndexSearcher isearcher = new IndexSearcher(ireader);
    Query query = null;

    try {

        LogUtil.log(LogUtil.LogType.SEARCH,
                "query: " + searchQuery + ", vid: " + volumeId + ", path: " + searchPath);

        // Parse a simple query that searches for "text":
        QueryParser parser = new QueryParser(CConst.FIELD_CONTENT, m_analyzer);
        query = parser.parse(searchQuery);
        TopScoreDocCollector collector = TopScoreDocCollector.create(MAX_HITS);
        isearcher.search(query, collector);
        scoreDocs = collector.topDocs().scoreDocs;

    } catch (ParseException | IOException e) {
        LogUtil.logException(LogUtil.LogType.SEARCH, e);
        error += e.toString();
    }
    // Iterate through the results creating an object for each file
    HashMap<String, Integer> hitCounts = new HashMap<>();
    HashMap<String, Integer> hitIndexes = new HashMap<>();

    /**
     * First iterate the hit list and count duplicates based on file path.
     */
    for (int ii = 0; scoreDocs != null && ii < scoreDocs.length; ++ii) {

        Document hitDoc = null;
        int fileHits = 1;
        try {
            hitDoc = isearcher.doc(scoreDocs[ii].doc);

            Explanation explanation = isearcher.explain(query, scoreDocs[ii].doc);
            Explanation[] details = explanation.getDetails();
            String description = details[0].getDescription();

            /**
             * FIXME, find a better way to count hits in each file
             */
            if (description.contains("=")) {

                String[] lineParts = description.split("=");
                String[] elementParts = lineParts[2].split(Pattern.quote(")"));
                if (elementParts.length > 0) {

                    fileHits = ((int) Double.parseDouble(elementParts[0]));
                }
            }

        } catch (IOException e) {
            LogUtil.logException(LogUtil.LogType.SEARCH, e);
            error += e.toString();
        }
        String filePath = hitDoc.get((CConst.FIELD_PATH));

        if (hitCounts.containsKey(filePath)) {

            hitCounts.put(filePath, hitCounts.get(filePath) + fileHits);
        } else {
            hitCounts.put(filePath, fileHits);
            hitIndexes.put(filePath, ii);
        }
    }

    /**
     * Iterate over each unique hit and save the results
     */
    for (Map.Entry<String, Integer> uniqueHit : hitIndexes.entrySet()) {

        Document hitDoc = null;
        try {
            hitDoc = isearcher.doc(scoreDocs[uniqueHit.getValue()].doc);
        } catch (IOException e) {
            LogUtil.logException(LogUtil.LogType.SEARCH, e);
            error += e.toString();
        }
        String file_name = hitDoc.get((CConst.FIELD_FILENAME));
        String file_path = hitDoc.get((CConst.FIELD_PATH));
        try {
            String folder_url = OmniHash.getStartPathUrl(ctx, volumeId, file_path);

            JSONObject hitObj = new JSONObject();
            hitObj.put("volume_id", volumeId);
            hitObj.put("file_path", file_path);
            hitObj.put("file_name", file_name);
            hitObj.put("folder_url", folder_url);
            hitObj.put("num_hits", hitCounts.get(file_path));
            hitObj.put("error", error);
            jsonArray.put(hitObj);

        } catch (Exception e) {
            LogUtil.logException(LogUtil.LogType.SEARCH, e);
        }
    }
    int num_hits = scoreDocs != null ? scoreDocs.length : 0;

    try {
        result.put("hits", jsonArray != null ? jsonArray : new JSONArray());
        result.put("num_hits", num_hits);
        result.put("error", error);

        ireader.close();
        m_directory.close();

    } catch (JSONException | IOException e) {
        LogUtil.logException(LogUtil.LogType.SEARCH, e);
    }

    return result;
}

From source file:com.o19s.es.explore.ExplorerQueryTests.java

License:Apache License

@Before
public void setupIndex() throws Exception {
    dir = new RAMDirectory();

    try (IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER))) {
        for (int i = 0; i < docs.length; i++) {
            Document doc = new Document();
            doc.add(new Field("_id", Integer.toString(i + 1), StoredField.TYPE));
            doc.add(newTextField("text", docs[i], Field.Store.YES));
            indexWriter.addDocument(doc);
        }//from   w  w w .  j  a  v  a 2 s.co  m
    }

    reader = DirectoryReader.open(dir);
    searcher = new IndexSearcher(reader);
}

From source file:com.o19s.es.ltr.logging.LoggingFetchSubPhaseTests.java

License:Apache License

@BeforeClass
public static void init() throws Exception {
    directory = newDirectory(random());/* w  ww. j a  v  a  2  s  .  c  om*/

    try (IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(new StandardAnalyzer()))) {
        int nDoc = TestUtil.nextInt(random(), 20, 100);
        docs = new HashMap<>();
        for (int i = 0; i < nDoc; i++) {
            Document d = buildDoc(random().nextBoolean() ? "foo" : "bar", random().nextFloat());
            writer.addDocument(d);
            if (random().nextInt(4) == 0) {
                writer.commit();
            }
            docs.put(d.get("id"), d);
        }
        writer.commit();
    }
    IndexReader reader = closeAfterSuite(DirectoryReader.open(directory));
    searcher = new IndexSearcher(reader);
}