Example usage for org.apache.lucene.search IndexSearcher IndexSearcher

List of usage examples for org.apache.lucene.search IndexSearcher IndexSearcher

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher IndexSearcher.

Prototype

public IndexSearcher(IndexReaderContext context) 

Source Link

Document

Creates a searcher searching the provided top-level IndexReaderContext .

Usage

From source file:cn.edu.thss.iise.beehivez.server.index.yawlindex.yawltasksluceneindex.YAWLTasksLuceneIndex.java

License:Open Source License

@Override
public TreeSet<ProcessQueryResult> getProcessModels(Object o, float similarity) {
    TreeSet<ProcessQueryResult> ret = new TreeSet<ProcessQueryResult>();

    try {//from  www .  ja  va2s  .c  om
        if (o instanceof YNet) {
            YNet query = (YNet) o;

            IndexReader reader = IndexReader.open(FSDirectory.open(INDEX_DIR), true);
            Searcher searcher = new IndexSearcher(reader);
            BooleanQuery bq = new BooleanQuery();
            BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);

            // expand the tasks with their similar ones
            HashSet<String> expandedTasks = new HashSet<String>();

            if (GlobalParameter.isEnableSimilarLabel()) {
                // label similarity is enabled
                for (YTask task : query.getNetTasks()) {
                    String taskName = task.getName().trim();
                    if (expandedTasks.add(taskName)) {
                        BooleanQuery subq = new BooleanQuery();
                        // Term term = new
                        // Term(YAWLTasksDocument.FIELDTASKS,
                        // taskName);
                        // TermQuery termQuery = new TermQuery(term);
                        // subq.add(termQuery, Occur.SHOULD);

                        TreeSet<SimilarLabelQueryResult> similarTasks = labelIndex.getSimilarLabels(taskName,
                                GlobalParameter.getLabelSemanticSimilarity());
                        Iterator<SimilarLabelQueryResult> it = similarTasks.iterator();
                        while (it.hasNext()) {
                            SimilarLabelQueryResult sl = it.next();
                            String similarTaskName = sl.getLabel();
                            Term term = new Term(YAWLTasksDocument.FIELDTASKS, similarTaskName);
                            TermQuery termQuery = new TermQuery(term);
                            subq.add(termQuery, Occur.SHOULD);
                        }
                        if (subq.getClauses().length > 0) {
                            bq.add(subq, Occur.MUST);
                        } else {
                            return ret;
                        }
                    }
                }
            } else {
                // label similarity is not enabled
                for (YTask task : query.getNetTasks()) {
                    String taskName = task.getName().trim();
                    if (expandedTasks.add(taskName)) {
                        Term term = new Term(YAWLTasksDocument.FIELDTASKS, taskName);
                        TermQuery termQuery = new TermQuery(term);
                        bq.add(termQuery, Occur.MUST);
                    }
                }
            }

            // for (YTask task : query.getNetTasks()) {
            // String taskName = task.getName().trim();
            // if (GlobalParameter.isEnableSimilarLabel()) {
            // // label similarity is enabled
            // if (expandedTasks.add(taskName)) {
            // BooleanQuery subq = new BooleanQuery();
            // // Term term = new
            // // Term(YAWLTasksDocument.FIELDTASKS,
            // // taskName);
            // // TermQuery termQuery = new TermQuery(term);
            // // subq.add(termQuery, Occur.SHOULD);
            //
            // TreeSet<SimilarLabelQueryResult> similarTasks = labelIndex
            // .getSimilarLabels(taskName, GlobalParameter
            // .getLabelSemanticSimilarity());
            // Iterator<SimilarLabelQueryResult> it = similarTasks
            // .iterator();
            // while (it.hasNext()) {
            // SimilarLabelQueryResult sl = it.next();
            // String similarTaskName = sl.getLabel();
            // Term term = new Term(
            // YAWLTasksDocument.FIELDTASKS,
            // similarTaskName);
            // TermQuery termQuery = new TermQuery(term);
            // subq.add(termQuery, Occur.SHOULD);
            // }
            // if (subq.getClauses().length > 0) {
            // bq.add(subq, Occur.MUST);
            // } else {
            // return ret;
            // }
            // }
            // } else {
            // // label similarity is not enabled
            // if (expandedTasks.add(taskName)) {
            // Term term = new Term(YAWLTasksDocument.FIELDTASKS,
            // taskName);
            // TermQuery termQuery = new TermQuery(term);
            // bq.add(termQuery, Occur.MUST);
            // }
            // }
            // }

            YAWLTasksQueryResultCollector collector = new YAWLTasksQueryResultCollector(reader);
            searcher.search(bq, collector);
            ret = collector.getQueryResult();
            searcher.close();
            reader.close();

            // sub graph isomorphism check using Ullman's algorithm
            // accurately match
            Iterator<ProcessQueryResult> it = ret.iterator();
            while (it.hasNext()) {
                ProcessQueryResult pqr = it.next();
                long process_id = pqr.getProcess_id();
                DataManager dm = DataManager.getInstance();
                YNet model = YAWLUtil.getYNetFromDefinition(dm.getProcessDefinitionBytes(process_id));
                if (!Ullman4YAWL.subGraphIsomorphism(query, model)) {
                    it.remove();
                }
                // if (!VF24YAWL.subGraphIsomorphism(query, model)) {
                // it.remove();
                // }
            }

        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return ret;
}

From source file:cn.fql.blogspider.SearchMain.java

License:Open Source License

public static void main(String[] args) throws Exception {
    String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details.";

    if ((args.length > 0) && ((("-h".equals(args[0])) || ("-help".equals(args[0]))))) {
        System.out.println(usage);
        System.exit(0);/*  www  . j  a  va  2s . co m*/
    }

    String index = "D:\\test\\index";
    String field = "contents";
    String queries = null;

    String queryString = null;
    int hitsPerPage = 10;

    //      for (int i = 0; i < args.length; ++i)
    //        if ("-index".equals(args[i])) {
    //          index = args[(i + 1)];
    //          ++i;
    //        } else if ("-field".equals(args[i])) {
    //          field = args[(i + 1)];
    //          ++i;
    //        } else if ("-queries".equals(args[i])) {
    //          queries = args[(i + 1)];
    //          ++i;
    //        } else if ("-query".equals(args[i])) {
    //          queryString = args[(i + 1)];
    //          ++i;
    //        } else if ("-repeat".equals(args[i])) {
    //          repeat = Integer.parseInt(args[(i + 1)]);
    //          ++i;
    //        } else if ("-raw".equals(args[i])) {
    //          raw = true;
    //        } else if ("-paging".equals(args[i])) {
    //          hitsPerPage = Integer.parseInt(args[(i + 1)]);
    //          if (hitsPerPage <= 0) {
    //            System.err.println("There must be at least 1 hit per page.");
    //            System.exit(1);
    //          }
    //          ++i;
    //        }

    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));
    IndexSearcher searcher = new IndexSearcher(reader);
    //Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
    Analyzer analyzer = new IKAnalyzer();
    BufferedReader in = null;
    if (queries != null)
        in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8"));
    else
        in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));

    QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer);
    while (true) {
        if ((queries == null) && (queryString == null)) {
            System.out.println("Enter query: ");
        }

        String line = (queryString != null) ? queryString : in.readLine();

        if (line == null)
            break;
        if (line.length() == -1) {
            break;
        }

        line = line.trim();
        if (line.length() == 0) {
            break;
        }

        Query query = parser.parse(line);
        System.out.println("Searching for: " + query.toString(field));

        doPagingSearch(in, searcher, query, hitsPerPage, (queries == null) && (queryString == null));

        if (queryString != null)
            break;
    }

    reader.close();
}

From source file:cn.hbu.cs.esearch.service.impl.EsearchSearchServiceImpl.java

License:Apache License

@Override
public SearchResult search(SearchRequest sResquest) throws EsearchException {
    try {//from w w  w. java  2 s  . co m
        esearchSystem.flushEvents(2000);
    } catch (EsearchException e) {
        LOGGER.error("Esearch flush events error. \n{}", e);
    }
    String queryString = sResquest.getQuery();
    String queryField = sResquest.getField();
    LOGGER.info("The search request coming: queryField:{},queryString:{}", queryField, queryString);

    Analyzer analyzer = esearchSystem.getAnalyzer();
    QueryParser queryParser = new QueryParser(Version.LUCENE_43, queryField, analyzer);
    SearchResult result = new SearchResult();

    List<EsearchMultiReader<R>> readers = null;
    MultiReader multiReader = null;
    IndexSearcher searcher = null;
    try {
        Query query = null;
        if (Strings.isNullOrEmpty(queryString)) {
            query = new MatchAllDocsQuery();
        } else {
            query = queryParser.parse(queryString);
        }
        readers = esearchSystem.getIndexReaders();
        multiReader = new MultiReader(readers.toArray(new IndexReader[readers.size()]), false);
        searcher = new IndexSearcher(multiReader);
        long start = System.currentTimeMillis();
        TopDocs docs = searcher.search(query, null, sResquest.getSize());
        long end = System.currentTimeMillis();

        result.setTime(end - start);
        result.setTotalDocs(multiReader.numDocs());
        result.setTotalHits(docs.totalHits);

        LOGGER.info("Got {} hits. Cost:{} ms", docs.totalHits, end - start);

        if (sResquest.getSearchType() == SearchRequest.SearchType.COUNT) {
            return result;
        }

        ScoreDoc[] scoreDocs = docs.scoreDocs;
        ArrayList<SearchHit> hitList = new ArrayList<SearchHit>(scoreDocs.length);
        for (ScoreDoc scoreDoc : scoreDocs) {
            SearchHit hit = new SearchHit();
            hit.setScore(scoreDoc.score);
            int docID = scoreDoc.doc;

            Document doc = multiReader.document(docID);
            String content = doc.get(queryField);

            Scorer qs = new QueryScorer(query);

            SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"hl\">", "</span>");
            Highlighter hl = new Highlighter(formatter, qs);
            String[] fragments = hl.getBestFragments(analyzer, queryField, content, 1);

            Map<String, String[]> fields = convert(doc, sResquest.getSearchType());
            fields.put("fragment", fragments);
            hit.setFields(fields);
            hitList.add(hit);
        }
        result.setHits(hitList.toArray(new SearchHit[hitList.size()]));
        return result;
    } catch (Exception e) {
        LOGGER.error(e.getMessage(), e);
        throw new EsearchException(e.getMessage(), e);
    } finally {
        if (multiReader != null) {
            try {
                multiReader.close();
            } catch (IOException e) {
                LOGGER.error(e.getMessage(), e);
            }
        }
        esearchSystem.returnIndexReaders(readers);
    }
}

From source file:cn.jcenterhome.web.action.CpAction.java

private List<String> getKeyWord(String text) throws IOException {
    List<String> keywords = new ArrayList<String>();
    if (!Common.empty(text)) {
        Map<String, Integer> words = new HashMap<String, Integer>();
        Analyzer analyzer = new IKAnalyzer(true);
        StringReader reader = new StringReader(text);
        TokenStream tokenStream = analyzer.tokenStream("*", reader);
        TermAttribute termAtt = (TermAttribute) tokenStream.getAttribute(TermAttribute.class);
        while (tokenStream.incrementToken()) {
            String word = termAtt.term();
            if (word.length() > 1 && Common.strlen(word) > 2) {
                Integer count = words.get(word);
                if (count == null) {
                    count = 0;/*from  w  w  w .j av a  2 s  .co m*/
                }
                words.put(word, count + 1);
            }
        }
        if (words.size() > 0) {
            Directory dir = null;
            IndexSearcher searcher = null;
            try {
                String fieldName = "text";
                dir = new RAMDirectory();
                IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
                Document doc = new Document();
                doc.add(new Field(fieldName, text, Field.Store.YES, Field.Index.ANALYZED));
                writer.addDocument(doc);
                writer.close();
                searcher = new IndexSearcher(dir);
                searcher.setSimilarity(new IKSimilarity());
                Set<String> keys = words.keySet();
                Map<String, Float> temps = new HashMap<String, Float>();
                for (String key : keys) {
                    int count = words.get(key);
                    Query query = IKQueryParser.parse(fieldName, key);
                    TopDocs topDocs = searcher.search(query, 1);
                    if (topDocs.totalHits > 0) {
                        temps.put(key, topDocs.getMaxScore() * count);
                    }
                }
                Entry<String, Float>[] keywordEntry = getSortedHashtableByValue(temps);
                for (Entry<String, Float> entry : keywordEntry) {
                    if (keywords.size() < 5) {
                        keywords.add(entry.getKey());
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                try {
                    searcher.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
                try {
                    dir.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }
    return keywords;
}

From source file:collene.Freedb.java

License:Apache License

public static void DoSearch(Directory directory) throws Exception {

    out.println("I think these are the files:");
    for (String s : directory.listAll()) {
        out.println(s);//from   w ww .j a  v  a 2  s.  c  o m
    }

    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory));
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
    QueryParser parser = new QueryParser(Version.LUCENE_4_9, "any", analyzer);
    for (int i = 0; i < 5; i++) {
        long searchStart = System.currentTimeMillis();
        Query query = parser.parse("morrissey");
        //Query query = parser.parse("Dance");
        TopDocs docs = searcher.search(query, 10);
        long searchEnd = System.currentTimeMillis();
        out.println(String.format("%s %d total hits in %d", directory.getClass().getSimpleName(),
                docs.totalHits, searchEnd - searchStart));
        long lookupStart = System.currentTimeMillis();
        for (ScoreDoc d : docs.scoreDocs) {
            Document doc = searcher.doc(d.doc);
            out.println(String.format("%d %.2f %d %s", d.doc, d.score, d.shardIndex,
                    doc.getField("any").stringValue()));
        }
        long lookupEnd = System.currentTimeMillis();
        out.println(String.format("Document lookup took %d ms for %d documents", lookupEnd - lookupStart,
                docs.scoreDocs.length));
    }

    directory.close();
}

From source file:collene.Freedb.java

License:Apache License

public static void BuildIndex(Directory directory) throws Exception {
    String freedbPath = "/Users/gdusbabek/Downloads/freedb-complete-20140701.tar.bz2";

    if (directory == null) {
        System.out.println("Need to specify: { memory | file | cassandra }. Did you misspell something?");
        System.exit(-1);/*from ww w  .  j av  a 2s  .c  o m*/
    }

    FreeDbReader reader = new FreeDbReader(new File(freedbPath), 50000);
    reader.start();

    long indexStart = System.currentTimeMillis();
    Collection<Document> documents = new ArrayList<Document>(BATCH_SIZE);
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(directory, config);

    // stop after this many documents.
    final int maxDocuments = 400000; //Integer.MAX_VALUE;

    FreeDbEntry entry = reader.next();
    int count = 0;
    while (entry != null && count < maxDocuments) {
        Document doc = new Document();
        String any = entry.toString();
        doc.add(new Field("any", any, TextField.TYPE_STORED));
        doc.add(new Field("artist", entry.getArtist(), TextField.TYPE_NOT_STORED));
        doc.add(new Field("album", entry.getAlbum(), TextField.TYPE_NOT_STORED));
        doc.add(new Field("title", entry.getTitle(), TextField.TYPE_NOT_STORED));
        doc.add(new Field("genre", entry.getGenre(), TextField.TYPE_NOT_STORED));
        doc.add(new Field("year", entry.getYear(), TextField.TYPE_NOT_STORED));
        for (int i = 0; i < entry.getTrackCount(); i++) {
            doc.add(new Field("track", entry.getTrack(i), TextField.TYPE_STORED));
        }
        documents.add(doc);
        if (VERBOSE) {
            out.println(any);
        }

        if (documents.size() == BATCH_SIZE) {
            //out.println(String.format("Adding batch at count %d", count));
            writer.addDocuments(documents);
            //out.println("done");
            documents.clear();
        }

        count += 1;
        if (count >= MAX_ENTRIES) {
            // done indexing.
            break;
        }
        entry = reader.next();

        if (count % 100000 == 0) {
            out.println(String.format("Indexed %d documents", count));

            // do a quick morrissey search for fun.
            //                IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(ColDirectory.open(
            //                                new CassandraIO(8192, "collene", "cindex").start("127.0.0.1:9042"),
            //                                new CassandraIO(8192, "collene", "cmeta").start("127.0.0.1:9042"),
            //                                new CassandraIO(8192, "collene", "clock").start("127.0.0.1:9042")
            //                )));
            IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false));
            QueryParser parser = new QueryParser(Version.LUCENE_4_9, "any", analyzer);
            long searchStart = System.currentTimeMillis();
            Query query = parser.parse("morrissey");
            TopDocs docs = searcher.search(query, 10);
            long searchEnd = System.currentTimeMillis();
            out.println(String.format("%s %d total hits in %d", directory.getClass().getSimpleName(),
                    docs.totalHits, searchEnd - searchStart));
            for (ScoreDoc d : docs.scoreDocs) {
                out.println(String.format("%d %.2f %d", d.doc, d.score, d.shardIndex));
            }
        }
    }

    if (documents.size() > 0) {
        out.println(String.format("Adding batch at count %d", count));
        writer.addDocuments(documents);
        out.println("done");
        documents.clear();

        // do a quick morrissey search for fun.
        IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false));
        QueryParser parser = new QueryParser(Version.LUCENE_4_9, "any", analyzer);
        long searchStart = System.currentTimeMillis();
        Query query = parser.parse("morrissey");
        TopDocs docs = searcher.search(query, 10);
        long searchEnd = System.currentTimeMillis();
        out.println(String.format("%s %d total hits in %d", directory.getClass().getSimpleName(),
                docs.totalHits, searchEnd - searchStart));
        for (ScoreDoc d : docs.scoreDocs) {
            out.println(String.format("%d %.2f %d", d.doc, d.score, d.shardIndex));
        }
    }

    long indexTime = System.currentTimeMillis() - indexStart;
    out.println(String.format("Indexed %d things in %d ms (%s)", count, indexTime, directory.toString()));

    //        long startMerge = System.currentTimeMillis();
    //        writer.forceMerge(1, true);
    //        long endMerge = System.currentTimeMillis();
    //        out.println(String.format("merge took %d ms", endMerge-startMerge));
    out.println("I think these are the files:");
    for (String s : directory.listAll()) {
        out.println(s);
    }

    writer.close(true);
    directory.close();
}

From source file:collene.TestIndexing.java

License:Apache License

@Test
public void test() throws IOException, ParseException {
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);

    // write it out.
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(directory, config);

    for (int i = 0; i < 100; i++) {
        Collection<Document> documents = new ArrayList<Document>();
        Document doc = new Document();
        doc.add(new Field("key", "aaa_" + i, TextField.TYPE_STORED));
        doc.add(new Field("not", "notaaa", TextField.TYPE_NOT_STORED));
        doc.add(new Field("meta", "aaa_meta_aaa_" + i, TextField.TYPE_STORED));
        documents.add(doc);/* www  .  ja va 2 s .  c  o  m*/

        writer.addDocuments(documents);

        writer.commit();
        writer.forceMerge(1);
        writer.forceMergeDeletes(true);
    }

    // now read it back.
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false));
    QueryParser parser = new QueryParser(Version.LUCENE_4_9, "key", analyzer);

    Query query = parser.parse("aaa_4");
    TopDocs docs = searcher.search(query, 1);
    int idToDelete = docs.scoreDocs[0].doc;
    Assert.assertTrue(docs.totalHits > 0);

    query = parser.parse("fersoius");
    docs = searcher.search(query, 1);
    Assert.assertFalse(docs.totalHits > 0);

    // delete that document.
    DirectoryReader reader = DirectoryReader.open(writer, true);
    writer.tryDeleteDocument(reader, idToDelete);

    reader.close();
    writer.close();

    // list files
    Set<String> files = new HashSet<String>();
    System.out.println("Listing files for " + directory.toString());
    for (String file : directory.listAll()) {
        files.add(file);
        System.out.println(" " + file);
    }

    if (strictFileChecking) {
        System.out.println("String file checking...");
        Sets.SetView<String> difference = Sets.difference(expectedFiles, files);
        Assert.assertEquals(Joiner.on(",").join(difference), 0, difference.size());
    }

    reader = DirectoryReader.open(directory);
    searcher = new IndexSearcher(reader);
    query = parser.parse("aaa_4");
    docs = searcher.search(query, 1);
    reader.close();
    Assert.assertFalse(docs.totalHits > 0);

    directory.close();
}

From source file:collene.TestLuceneAssumptions.java

License:Apache License

@Test
public void testCanSeeUpdatesAfterAdd() throws Exception {
    // this verifies that any reader can see updates after documents are added.
    File fdir = TestUtil.getRandomTempDir();
    pleaseDelete.add(fdir);//from  w w w.j a v  a 2 s.co  m

    Directory dir = FSDirectory.open(fdir);
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(dir, config);

    Document doc0 = new Document();
    Document doc1 = new Document();
    doc0.add(new Field("f0", "aaa", TextField.TYPE_STORED));
    doc1.add(new Field("f0", "bbb", TextField.TYPE_STORED));
    List<Document> docs = Lists.newArrayList(doc0, doc1);
    writer.addDocuments(docs, analyzer);

    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false));
    QueryParser parser = new QueryParser(Version.LUCENE_4_9, "f0", new StandardAnalyzer(Version.LUCENE_4_9));

    Query query = parser.parse("bbb");
    TopDocs topDocs = searcher.search(query, 10);

    Assert.assertEquals(1, topDocs.totalHits);
    Assert.assertEquals(1, topDocs.scoreDocs.length);

    writer.close();
    dir.close();
}

From source file:collene.TestShakespeare.java

License:Apache License

@Test
public void rest() throws IOException, ParseException {
    File shakespeareDir = new File("src/test/resources/shakespeare");
    File[] files = shakespeareDir.listFiles(new FileFilter() {
        @Override/*from  w  ww  . j  a  v a  2 s  .c o m*/
        public boolean accept(File pathname) {
            return !pathname.isHidden();
        }
    });

    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(directory, config);

    long startIndexTime = System.currentTimeMillis();
    final int flushLines = 200;
    int totalLines = 0;
    Collection<Document> documents = new ArrayList<Document>();
    for (File f : files) {
        String play = f.getName();
        int lineNumber = 1;
        BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f)));
        String line = reader.readLine();
        while (line != null) {
            // index it.

            Document doc = new Document();
            doc.add(new NumericDocValuesField("line", lineNumber));
            doc.add(new Field("play", play, TextField.TYPE_STORED));
            doc.add(new Field("content", line, TextField.TYPE_STORED));
            documents.add(doc);

            totalLines += 1;
            if (totalLines % flushLines == 0) {
                writer.addDocuments(documents);
                documents.clear();
            }

            lineNumber += 1;
            line = reader.readLine();
        }
        reader.close();
    }

    if (documents.size() > 0) {
        writer.addDocuments(documents);
    }
    long endIndexTime = System.currentTimeMillis();

    System.out.println(
            String.format("Index for %s took %d ms", directory.toString(), endIndexTime - startIndexTime));

    //System.out.println(String.format("%s committed", directory.getClass().getSimpleName()));
    //        writer.forceMerge(1);
    //        System.out.println(String.format("%s merged", directory.getClass().getSimpleName()));

    // let's search!
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false));
    QueryParser parser = new QueryParser(Version.LUCENE_4_9, "content", analyzer);

    String[] queryTerms = new String[] { "trumpet" };

    for (String term : queryTerms) {
        long searchStart = System.currentTimeMillis();
        Query query = parser.parse(term);
        TopDocs docs = searcher.search(query, 10);
        long searchEnd = System.currentTimeMillis();
        System.out.println(String.format("%s %d total hits in %d", directory.getClass().getSimpleName(),
                docs.totalHits, searchEnd - searchStart));
        for (ScoreDoc doc : docs.scoreDocs) {
            System.out.println(String.format("%d %.2f %d", doc.doc, doc.score, doc.shardIndex));
        }
    }

    writer.close(true);
    //System.out.println(String.format("%s closed", directory.getClass().getSimpleName()));

    System.out.println("I think these are the files:");
    for (String s : directory.listAll()) {
        System.out.println(s);
    }

    directory.close();
}

From source file:com.adanac.module.blog.search.LuceneHelper.java

License:Apache License

private static List<Map<String, String>> search(String searchText, String path, String title,
        LoadQuery loadQuery) {/*  w w w .  ja  v  a 2s . c o m*/
    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(INDEX_PATH + path)));
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new SmartChineseAnalyzer();
        QueryParser parser = new QueryParser("indexedContent", analyzer);
        Query query = parser.parse(searchText);
        TopDocs resultDocs = searcher.search(query, 100);
        ScoreDoc[] scoreDocs = resultDocs.scoreDocs;
        //
        SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<span style=\"color:red;\">",
                "</span>");
        Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query));
        highlighter.setTextFragmenter(new SimpleFragmenter(150));
        List<Map<String, String>> result = new ArrayList<>();
        List<Integer> idList = new ArrayList<>();
        for (int i = 0; i < scoreDocs.length; i++) {
            Document doc = searcher.doc(scoreDocs[i].doc);
            Integer id = Integer.valueOf(doc.get("id"));
            if (!idList.contains(id)) {
                String indexedContent = doc.get("indexedContent");
                TokenStream tokenStream = analyzer.tokenStream("indexedContent", indexedContent);
                Map<String, String> data = loadQuery.getById(id);
                String highlighterString = highlighter.getBestFragment(tokenStream, indexedContent);
                if (highlighterString.contains(SEPARATOR)) {
                    String[] array = highlighterString.split(SEPARATOR);
                    data.put(title, array[0]);
                    if (array.length > 1) {
                        data.put("summary", array[1]);
                    }
                } else {
                    data.put("summary", highlighterString);
                }
                result.add(data);
                idList.add(id);
            }
        }
        return result;
    } catch (Exception e) {
        logger.error("search failed ...", e);
    }
    return new ArrayList<>();
}