Example usage for org.apache.lucene.search IndexSearcher doc

List of usage examples for org.apache.lucene.search IndexSearcher doc

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher doc.

Prototype

public Document doc(int docID) throws IOException 

Source Link

Document

Sugar for .getIndexReader().document(docID)

Usage

From source file:lucenesearch.TagBodyCount.java

public void calculatePR(String[] bodyTerms, int N) throws IOException, ParseException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(new Searcher().getPostIndexPath())));
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new StandardAnalyzer();

    HashSet<Integer> found = new HashSet<>();
    HashSet<Integer> total = new HashSet<>();

    System.out.println("Calculating word itself: " + searchTag);
    BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
    booleanQuery.add(new QueryParser("Body", analyzer).parse(searchTag), BooleanClause.Occur.MUST);
    booleanQuery.add(IntPoint.newExactQuery("PostTypeId", 2), BooleanClause.Occur.MUST);

    TopDocs results;/*from w w w . j a v a 2 s.  c  om*/
    results = searcher.search(booleanQuery.build(), N);

    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " total matching documents");

    int start = 0;
    int end = Math.min(numTotalHits, N);

    int count_r = 0;
    int count_n = 0;
    int skip = 0;

    for (int i = start; i < end; i++) {
        Document doc = searcher.doc(hits[i].doc);
        if (doc.get("SId") == null) {
            skip++;
            continue;
        }

        int id = Integer.parseInt(doc.get("SId"));

        if (!hasTag(id, mainTag)) {
            continue;
        }

        if (this.acceptedAnswers.contains(id)) {
            found.add(id);
            count_r++;
        } else {
            count_n++;
        }
        total.add(id);
    }

    System.out.println("Total Post Cnt = " + count_r + "/" + this.acceptedAnswers.size());
    System.out.println("Total skipped Post = " + skip);

    double[] P = new double[bodyTerms.length + 1];
    double[] R = new double[bodyTerms.length + 1];
    int cnt = 0;
    P[cnt] = (double) (count_r) / (count_r + count_n);
    R[cnt] = (double) count_r / (acceptedAnswers.size());
    cnt++;

    for (String bodyTerm : bodyTerms) {
        HashSet<Integer> temp = new HashSet<>();
        System.out.println("Query for: " + bodyTerm);
        booleanQuery = new BooleanQuery.Builder();
        booleanQuery.add(new QueryParser("Body", analyzer).parse(bodyTerm), BooleanClause.Occur.MUST);
        booleanQuery.add(IntPoint.newExactQuery("PostTypeId", 2), BooleanClause.Occur.MUST);

        results = searcher.search(booleanQuery.build(), N);

        hits = results.scoreDocs;

        numTotalHits = results.totalHits;
        System.out.println(numTotalHits + " total matching documents");

        start = 0;
        end = Math.min(numTotalHits, N);

        count_r = 0;
        count_n = 0;
        skip = 0;

        for (int i = start; i < end; i++) {
            Document doc = searcher.doc(hits[i].doc);
            if (doc.get("SId") == null) {
                skip++;
                continue;
            }

            int id = Integer.parseInt(doc.get("SId"));

            if (!hasTag(id, searchTag)) {
                skip++;
                continue;
            }

            if (this.acceptedAnswers.contains(id)) {
                found.add(id);
                count_r++;
            } else {
                count_n++;
            }
            total.add(id);
        }
        P[cnt] = (double) found.size() / total.size();
        R[cnt] = (double) found.size() / (acceptedAnswers.size());
        cnt++;
        System.out.println("Total Post Cnt = " + count_r + "/" + count_n + "/" + this.acceptedAnswers.size());
        System.out.println("Total skipped Post = " + skip);
        System.out.println("-----------------");
    }
    //        System.out.println("-----Final Count-----");
    //        System.out.println("Self,"+((double)self.size()/acceptedAnswers.size())*100);
    //        for (int i = 0; i < cnt; i++)
    //        {
    //            System.out.println("Tr"+(i+1)+","+((double)counts[i]/acceptedAnswers.size())*100);
    //        }
    System.out.println("-----Final Accum Count-----");
    //        System.out.println("Self,"+((double)self.size()/acceptedAnswers.size())*100);
    //        for (int i = 0; i < cnt; i++)
    //        {
    //            System.out.println("Tr"+(i+1)+","+((double)accum_counts[i]/acceptedAnswers.size())*100);
    //        }
    System.out.println("Cnt,Method,Value");
    for (int i = 0; i < cnt; i++) {
        System.out.println((i) + "," + "Precision" + "," + P[i] * 100);
        System.out.println((i) + "," + "Recall" + "," + R[i] * 100);
    }

}

From source file:lucenesearch.TagBodyCount.java

public void calculateVenn(String[] bodyTerms, int N) throws IOException, ParseException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(new Searcher().getPostIndexPath())));
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new StandardAnalyzer();

    ArrayList<HashSet<Integer>> sets = new ArrayList<>();
    //        HashSet<?>[] sets = new HashSet<?>[bodyTerms.length + 1];

    System.out.println("Calculating word itself: " + searchTag);
    BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
    booleanQuery.add(new QueryParser("Body", analyzer).parse(searchTag), BooleanClause.Occur.MUST);
    booleanQuery.add(IntPoint.newExactQuery("PostTypeId", 2), BooleanClause.Occur.MUST);

    TopDocs results;/*  www.j av a 2  s  .  c  om*/
    results = searcher.search(booleanQuery.build(), N);

    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " total matching documents");

    int start = 0;
    int end = Math.min(numTotalHits, N);

    int count = 0;
    int skip = 0;

    sets.add(0, acceptedAnswers);

    HashSet<Integer> temp = new HashSet<Integer>();
    sets.add(1, new HashSet<>());

    for (int i = start; i < end; i++) {
        Document doc = searcher.doc(hits[i].doc);
        if (doc.get("SId") == null) {
            skip++;
            continue;
        }

        int id = Integer.parseInt(doc.get("SId"));
        if (this.acceptedAnswers.contains(id)) {
            sets.get(1).add(id);
        }
    }

    System.out.println("Total Post Cnt = " + count + "/" + this.acceptedAnswers.size());
    System.out.println("Total skipped Post = " + skip);

    int[] counts = new int[bodyTerms.length];
    int[] accum_counts = new int[bodyTerms.length];
    int cnt = 0;
    int arrayIndex = 2;
    for (String bodyTerm : bodyTerms) {
        sets.add(arrayIndex, new HashSet<>());
        System.out.println("Query for: " + bodyTerm);
        booleanQuery = new BooleanQuery.Builder();
        booleanQuery.add(new QueryParser("Body", analyzer).parse(bodyTerm), BooleanClause.Occur.MUST);
        booleanQuery.add(IntPoint.newExactQuery("PostTypeId", 2), BooleanClause.Occur.MUST);

        results = searcher.search(booleanQuery.build(), N);

        hits = results.scoreDocs;

        numTotalHits = results.totalHits;
        System.out.println(numTotalHits + " total matching documents");

        start = 0;
        end = Math.min(numTotalHits, N);

        count = 0;
        skip = 0;

        for (int i = start; i < end; i++) {
            Document doc = searcher.doc(hits[i].doc);
            if (doc.get("SId") == null) {
                skip++;
                continue;
            }

            int id = Integer.parseInt(doc.get("SId"));
            if (this.acceptedAnswers.contains(id)) {
                sets.get(arrayIndex).add(id);
            }
        }
        arrayIndex++;
        counts[cnt] = temp.size();
        accum_counts[cnt] = cnt == 0 ? temp.size() : accum_counts[cnt - 1] + temp.size();
        cnt++;
        //            System.out.println("Total Post Cnt = " + count + "/" + this.acceptedAnswers.size());
        //            System.out.println("Total skipped Post = " + skip);
        System.out.println("-----------------");
    }

    System.out.println("-------------------\nFinal Res\n-------------\n");
    int pow = 1;
    for (int i = 0; i < bodyTerms.length + 1; i++)
        pow *= 2;

    HashSet<Integer> temp2 = new HashSet<>();
    for (HashSet<Integer> hs : sets) {
        temp2.addAll(hs);
    }
    int size = temp2.size();
    for (int i = 1; i <= pow - 1; i++) {
        ArrayList<Integer> numbers = new ArrayList<>();
        //                int rem = 2;
        int dig = 2;
        int n = i;
        while (n != 0) {
            if (n % 2 == 1) {
                numbers.add(dig);
            }
            n /= 2;
            dig++;
        }
        //            System.out.println(numbers);
        temp = new HashSet<>();
        temp.addAll(sets.get(numbers.get(0) - 2));
        for (Integer number : numbers) {
            temp.retainAll(sets.get(number - 2)); //-1 to include self translation and accepted
        }
        String s = "";
        if (numbers.size() == 1)
            s = "area";
        else
            s = "n";
        for (Integer number : numbers) {
            s = s + (number - 1);
        }
        //            s += "="+((double)temp.size() / acceptedAnswers.size())+",";
        s += "=" + (temp.size()) + ",";
        System.out.println(s);
    }
    String s = "category = c(\"All\",\"" + this.searchTag + "\",";
    for (String t : bodyTerms) {
        s = s + "\"" + t + "\",";
    }
    s += "),";
    System.out.println(s);

    //        System.out.println("-----Final Count-----");
    //        System.out.println("Self," + ((double) self.size() / acceptedAnswers.size()) * 100);
    //        for (int i = 0; i < cnt; i++)
    //        {
    //            System.out.println("Tr" + (i + 1) + "," + ((double) counts[i] / acceptedAnswers.size()) * 100);
    //        }
    //        System.out.println("-----Final Accum Count-----");
    //        System.out.println("Self,"+((double)self.size()/acceptedAnswers.size())*100);
    //        for (int i = 0; i < cnt; i++)
    //        {
    //            System.out.println("Tr"+(i+1)+","+((double)accum_counts[i]/acceptedAnswers.size())*100);
    //        }
    //        System.out.println("Cnt,Method,Value");
    //        for (int i = 0; i < cnt; i++)
    //        {
    //            System.out.println((i + 1) + "," + "Translation" + "," + ((double) accum_counts[i] / acceptedAnswers.size()) * 100);
    //            System.out.println((i + 1) + "," + "self" + "," + ((double) self.size() / acceptedAnswers.size()) * 100);
    //        }

}

From source file:lucenesearch.TagScorer.java

public void caculate() throws IOException {

    HashMap<String, HashMap<Integer, Double>> tagUserScore = new HashMap<>();

    int hitsPerPage = 2000000;
    String index = new Searcher().getPostIndexPath();
    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));
    IndexSearcher searcher = new IndexSearcher(reader);

    BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
    booleanQuery = new BooleanQuery.Builder();
    booleanQuery.add(IntPoint.newExactQuery("PostTypeId", 2), BooleanClause.Occur.MUST);
    Query q = booleanQuery.build();
    TopDocs results;//  w  w  w.jav a2  s  .c o m
    results = searcher.search(q, hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;
    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " Total answers found.");

    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    int errorUsers = 0;
    for (int i = start; i < end; i++) {
        System.out.println("processing answer " + i + "/" + end);
        int docID = hits[i].doc;
        int uid = -1;
        Document doc = searcher.doc(docID);

        Post p = new Post(doc);
        try {
            uid = Integer.parseInt(doc.get("SOwnerUserId"));
            ArrayList<String> tgs = tags.get(p.getId());
            for (String tg : tgs) {
                if (!tagUserScore.containsKey(tg)) {
                    tagUserScore.put(tg, new HashMap<>());
                }
                HashMap<Integer, Double> temp = tagUserScore.get(tg);
                if (!temp.containsKey(uid)) {
                    temp.put(uid, 1.0);
                } else {
                    temp.replace(uid, 1 + temp.get(uid));
                }
            }
        } catch (Exception ex) {
            errorUsers++;
            continue;
        }
    }

    System.out.println("tag:map,p@1,p@5,p@10");
    for (Map.Entry<String, HashMap<Integer, Double>> entryM : tagUserScore.entrySet()) {
        String tag = entryM.getKey();
        HashMap<Integer, Double> userScores = entryM.getValue();

        String goldenFile = Utility.getGoldenFileName(tag);

        ValueComparator bvc = new ValueComparator(userScores);
        TreeMap<Integer, Double> sorted_map = new TreeMap<Integer, Double>(bvc);
        sorted_map.putAll(userScores);

        ArrayList<Integer> lst = new ArrayList<>();

        for (Map.Entry<Integer, Double> entry : sorted_map.entrySet()) {
            lst.add(entry.getKey());
        }
        Evaluator ev = new Evaluator();
        Balog b = new Balog();
        double map = ev.map(lst, b.getGoldenList(goldenFile));
        double p1 = ev.precisionAtK(lst, b.getGoldenList(goldenFile), 1);
        double p5 = ev.precisionAtK(lst, b.getGoldenList(goldenFile), 5);
        double p10 = ev.precisionAtK(lst, b.getGoldenList(goldenFile), 10);
        EvalResult er = new EvalResult(tag, map, p1, p5, p10);
        System.out.println(er.getMap() + "," + er.getP1() + "," + er.getP5() + "," + er.getP10());

    }

}

From source file:lucenesearche.HW3.java

public static void main(String[] args) throws IOException {
    System.out.println(//from ww w.  ja  v a  2 s .  c o m
            "Enter the FULL path where the index will be created: (e.g. /Usr/index or c:\\temp\\index)");

    String indexLocation = null;
    BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
    String s = br.readLine();

    HW3 indexer = null;
    try {
        indexLocation = s;
        indexer = new HW3(s);
    } catch (Exception ex) {
        System.out.println("Cannot create index..." + ex.getMessage());
        System.exit(-1);
    }
    String query1, query2, query3, query4;
    query1 = "Lucene_Results_Stopped.txt";
    query2 = "Lucene_Q2_top100.txt";
    query3 = "Lucene_Q3_top100.txt";
    query4 = "Lucene_Q4_top100.txt";

    File luceneFile = new File(query1); // change filename for each query
    int query_id;

    // ===================================================
    // read input from user until he enters q for quit
    // ===================================================
    while (!s.equalsIgnoreCase("q")) {
        try {
            System.out.println(
                    "Enter the FULL path to add into the index (q=quit): (e.g. /home/mydir/docs or c:\\Users\\mydir\\docs)");
            System.out.println("[Acceptable file types: .xml, .html, .html, .txt]");
            s = br.readLine();
            if (s.equalsIgnoreCase("q")) {
                break;
            }

            // try to add file into the index
            indexer.indexFileOrDirectory(s);
        } catch (Exception e) {
            System.out.println("Error indexing " + s + " : " + e.getMessage());
        }
    }

    // ===================================================
    // after adding, we always have to call the
    // closeIndex, otherwise the index is not created
    // ===================================================
    indexer.closeIndex();

    // =========================================================
    // Now search
    // =========================================================
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation)));
    IndexSearcher searcher = new IndexSearcher(reader);
    //TopScoreDocCollector collector = TopScoreDocCollector.create(100, true);
    Formatter f = new Formatter();

    s = "";
    File file1 = new File(
            "C:\\Users\\shantanu\\Downloads\\NetBeansProjects\\LuceneSearchE\\src\\lucenesearche\\query_stopped.txt");
    ScoreDoc[] hits;
    try {
        BufferedReader b = new BufferedReader(new FileReader(file1));
        query_id = 1;
        FileInputStream fis = new FileInputStream(
                "C:\\Users\\shantanu\\Downloads\\NetBeansProjects\\LuceneSearchE\\src\\lucenesearche\\query_stopped.txt");
        Scanner scanner = new Scanner(fis);

        luceneFile.createNewFile();
        FileWriter writer = new FileWriter(luceneFile);

        while (scanner.hasNextLine()) {
            String line;

            //line = b.readLine();
            line = scanner.nextLine();
            if (line == null)
                break;

            System.out.println(b.readLine());
            //s = br.readLine();
            if (s.equalsIgnoreCase("q")) {
                break;
            }
            TopScoreDocCollector collector = TopScoreDocCollector.create(100, true);
            Query q = new QueryParser(Version.LUCENE_47, "contents", sAnalyzer).parse(line);
            searcher.search(q, collector);
            //System.out.println(searcher);

            hits = collector.topDocs().scoreDocs;

            System.out.println(hits.length);

            // 4. display results
            // change this for new query 
            //writer.write(String.format("%-10s %-10s %-80s %-10s %-40s %-20s","Query ID","Q0","Document Name","Rank","Cosine Similarity Score","System Name\n"));
            System.out.println("Found " + hits.length + " hits.");
            //System.out.println(f.format("%-10s %-10s %-80s %-10s %-40s %-20s","Query ID","Q0","Document Name","Rank","Cosine Similarity Score","System Name"));
            for (int i = 0; i < hits.length; ++i) {
                Formatter fmt = new Formatter();
                int docId = hits[i].doc;
                Document d = searcher.doc(docId);
                //System.out.println(d.get("filename"));
                //System.out.println((i+1) +". " + d.get("path")+" "+ hits[i].score);
                String a = d.get("filename");
                String parts = a.substring(0, a.indexOf('.'));
                //System.out.println(parts);

                writer.append(String.format("%-10s %-10s %-30s %-10s %-30s", query_id, "Q0", parts, (i + 1),
                        hits[i].score));
                writer.append('\n');
                writer.flush();
                //System.out.println(fmt.format("%-10s %-10s %-80s %-10s %-40s %-20s",""+query_id,"Q0",""+d.get("path"),""+(i + 1),""+hits[i].score,"Shantanu-SYS-001"));
            }

            // 5. term stats --> watch out for which "version" of the term
            // must be checked here instead!
            /*Term termInstance = new Term("contents", s);
            long termFreq = reader.totalTermFreq(termInstance);
            long docCount = reader.docFreq(termInstance);
            System.out.println(s + " Term Frequency " + termFreq
               + " - Document Frequency " + docCount);*/
            query_id += 1;
        }
        writer.close();
    } catch (Exception e) {
        System.out.println("Error searching " + s + " : " + e.toString());
        //break;
    }

}

From source file:lucenesearche.HW3.java

public static void main(String[] args) throws IOException {
    System.out.println(/*from w w w. ja  v a  2 s  . c o  m*/
            "Enter the FULL path where the index will be created: (e.g. /Usr/index or c:\\temp\\index)");

    String indexLocation = null;
    BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
    String s = br.readLine();

    HW3 indexer = null;
    try {
        indexLocation = s;
        indexer = new HW3(s);
    } catch (Exception ex) {
        System.out.println("Cannot create index..." + ex.getMessage());
        System.exit(-1);
    }

    // ===================================================
    // read input from user until he enters q for quit
    // ===================================================
    while (!s.equalsIgnoreCase("q")) {
        try {
            System.out.println(
                    "Enter the FULL path to add into the index (q=quit): (e.g. /home/mydir/docs or c:\\Users\\mydir\\docs)");
            System.out.println("[Acceptable file types: .xml, .html, .html, .txt]");
            s = br.readLine();
            if (s.equalsIgnoreCase("q")) {
                break;
            }

            // try to add file into the index
            indexer.indexFileOrDirectory(s);
        } catch (Exception e) {
            System.out.println("Error indexing " + s + " : " + e.getMessage());
        }
    }

    // ===================================================
    // after adding, we always have to call the
    // closeIndex, otherwise the index is not created
    // ===================================================
    indexer.closeIndex();

    // =========================================================
    // Now search
    // =========================================================
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation)));
    IndexSearcher searcher = new IndexSearcher(reader);
    TopScoreDocCollector collector = TopScoreDocCollector.create(100, true);
    Formatter f = new Formatter();

    s = "";
    while (!s.equalsIgnoreCase("q")) {
        try {
            System.out.println("Enter the search query (q=quit):");
            s = br.readLine();
            if (s.equalsIgnoreCase("q")) {
                break;
            }

            Query q = new QueryParser(Version.LUCENE_47, "contents", sAnalyzer).parse(s);
            searcher.search(q, collector);
            ScoreDoc[] hits = collector.topDocs().scoreDocs;

            // 4. display results
            String query1, query2, query3, query4;
            query1 = "Lucene_Q1_top100.txt";
            query2 = "Lucene_Q2_top100.txt";
            query3 = "Lucene_Q3_top100.txt";
            query4 = "Lucene_Q4_top100.txt";
            File luceneFile = new File(query4); // change filename for each query
            int query_id;
            query_id = 4; // change this for new query 
            luceneFile.createNewFile();
            FileWriter writer = new FileWriter(luceneFile);
            writer.write(String.format("%-10s %-10s %-80s %-10s %-40s %-20s", "Query ID", "Q0", "Document Name",
                    "Rank", "Cosine Similarity Score", "System Name\n"));
            System.out.println("Found " + hits.length + " hits.");
            System.out.println(f.format("%-10s %-10s %-80s %-10s %-40s %-20s", "Query ID", "Q0",
                    "Document Name", "Rank", "Cosine Similarity Score", "System Name"));
            for (int i = 0; i < hits.length; ++i) {
                Formatter fmt = new Formatter();
                int docId = hits[i].doc;
                Document d = searcher.doc(docId);
                //System.out.println((i+1) +". " + d.get("path")+" "+ hits[i].score);
                writer.write(String.format("%-10s %-10s %-80s %-10s %-40s %-20s", "" + query_id, "Q0",
                        "" + d.get("path"), "" + (i + 1), "" + hits[i].score, "Shantanu-SYS-001\n"));
                writer.flush();
                System.out.println(fmt.format("%-10s %-10s %-80s %-10s %-40s %-20s", "" + query_id, "Q0",
                        "" + d.get("path"), "" + (i + 1), "" + hits[i].score, "Shantanu-SYS-001"));
            }
            writer.close();

            // 5. term stats --> watch out for which "version" of the term
            // must be checked here instead!
            Term termInstance = new Term("contents", s);
            long termFreq = reader.totalTermFreq(termInstance);
            long docCount = reader.docFreq(termInstance);
            System.out.println(s + " Term Frequency " + termFreq + " - Document Frequency " + docCount);

        } catch (Exception e) {
            System.out.println("Error searching " + s + " : " + e.getMessage());
            break;
        }

    }

}

From source file:magoffin.matt.lucene.LuceneSearchService.java

License:Open Source License

@Override
public List<?> build(String index, final TopDocCollector hits, final int start, final int end) {
    final LucenePlugin plugin = getPluginForString(index);
    final int length = end > start ? end - start : 0;
    final ScoreDoc[] docs = hits.topDocs().scoreDocs;
    final int hitLength = docs.length;
    final List<Object> searchMatches = new ArrayList<Object>(length);
    doIndexSearcherOp(index, new IndexSearcherOp() {

        @Override//from   w w w . j av  a 2 s .co m
        public void doSearcherOp(String type, IndexSearcher searcher) throws IOException {
            for (int i = start; i < end && i < hitLength; i++) {
                int docId = docs[i].doc;
                Document doc = searcher.doc(docId);
                searchMatches.add(plugin.build(doc));
            }
        }
    });
    return searchMatches;
}

From source file:magoffin.matt.lucene.LuceneSearchService.java

License:Open Source License

@Override
public void search(String index, String query, final LuceneSearchResultHandler handler) {
    Query luceneQuery = parseQuery(index, query);
    doIndexQueryOp(index, luceneQuery, ASYNCHRONOUS, new IndexQueryOp() {
        @SuppressWarnings("unchecked")
        @Override/*from w  w  w .  j  a  v  a  2  s.  co  m*/
        public void doSearcherOp(String indexType, IndexSearcher searcher, Query myQuery, TopDocCollector hits)
                throws IOException {
            int numHits = hits == null ? 0 : hits.getTotalHits();
            handler.setTotalMatches(numHits);
            ScoreDoc[] docs = hits == null ? null : hits.topDocs().scoreDocs;
            for (int i = 0; i < numHits; i++) {
                int docId = docs[i].doc;
                Document doc = searcher.doc(docId);
                List<Field> fields = doc.getFields();

                Map<String, String[]> match = new LinkedHashMap<String, String[]>();
                for (Field field : fields) {
                    match.put(field.name(), doc.getValues(field.name()));
                }

                if (!handler.processMatch(match)) {
                    break;
                }
            }
        }
    });
}

From source file:magoffin.matt.lucene.LuceneSearchService.java

License:Open Source License

@Override
public LuceneSearchResults search(String type, String query, final int maxResults, final int pageSize,
        final int page) {
    Query luceneQuery = parseQuery(type, query);
    final LuceneSearchResultsImpl results = new LuceneSearchResultsImpl();
    doIndexQueryOp(type, luceneQuery, ASYNCHRONOUS, new IndexQueryOp() {
        @SuppressWarnings({ "unchecked" })
        @Override/*from w w w . ja  v  a  2s.c  o  m*/
        public void doSearcherOp(String indexType, IndexSearcher searcher, Query myQuery, TopDocCollector hits)
                throws IOException {
            int numHits = hits == null ? 0 : hits.getTotalHits();
            results.totalMatches = numHits;
            if (numHits > 0) {
                Set<String> seenFieldNames = new HashSet<String>();
                results.results = new LinkedList<Map<String, String[]>>();
                int start = 0;
                int max = -1;
                if (pageSize > 0) {
                    start = pageSize * (page - 1);
                    max = pageSize;
                }
                int maxr = maxResults < 1 ? numHits : maxResults;
                ScoreDoc[] docs = hits == null ? null : hits.topDocs().scoreDocs;
                for (int i = start; i < numHits && i < maxr && ((max--) != 0); i++) {
                    int docId = docs[i].doc;
                    Document doc = searcher.doc(docId);
                    List<Field> fields = doc.getFields();

                    // use a TreeMap to keep keys sorted
                    Map<String, String[]> data = new TreeMap<String, String[]>();
                    for (Field field : fields) {
                        data.put(field.name(), doc.getValues(field.name()));
                    }

                    Set<String> fieldSet = new HashSet<String>();
                    fieldSet.addAll(data.keySet());

                    // see if doc was missing any seen fields...
                    Collection<String> fill = CollectionUtils.subtract(seenFieldNames, fieldSet);
                    if (fill.size() > 0) {
                        for (String fieldName : fill) {
                            data.put(fieldName, null);
                        }
                    }

                    // see if any fields we have not seen yet...
                    Collection<String> missing = CollectionUtils.subtract(fieldSet, seenFieldNames);

                    // any keys in 'missing' need to be added to all previous
                    // results so they all have same keys
                    if (missing.size() > 0) {
                        for (Map<String, String[]> map : results.results) {
                            for (Iterator<String> itr = missing.iterator(); itr.hasNext();) {
                                map.put(itr.next(), null);
                            }
                        }
                        seenFieldNames.addAll(missing);
                    }
                    results.results.add(data);
                }
            }
        }
    });
    return results;
}

From source file:main.java.run.SearchFiles.java

License:Apache License

/**
 * This demonstrates a typical paging search scenario, where the search engine presents 
 * pages of size n to the user. The user can then go to the next page if interested in
 * the next hits./* www . j a v a 2s.c  o m*/
 * 
 * When the query is executed for the first time, then only enough results are collected
 * to fill 5 result pages. If the user wants to page beyond this limit, then the query
 * is executed another time and all hits are collected.
 * 
 */
public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage,
        boolean raw, boolean interactive) throws IOException {

    // Collect enough docs to show 5 pages
    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " total matching documents");

    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    while (true) {
        if (end > hits.length) {
            System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits
                    + " total matching documents collected.");
            System.out.println("Collect more (y/n) ?");
            String line = in.readLine();
            if (line.length() == 0 || line.charAt(0) == 'n') {
                break;
            }

            hits = searcher.search(query, numTotalHits).scoreDocs;
        }

        end = Math.min(hits.length, start + hitsPerPage);

        for (int i = start; i < end; i++) {
            if (raw) { // output raw format
                System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
                continue;
            }

            Document doc = searcher.doc(hits[i].doc);
            String path = doc.get("path");
            if (path != null) {
                double score = hits[i].score;
                System.out.println((i + 1) + ". " + path + ". " + score);
                String title = doc.get("title");
                if (title != null) {
                    System.out.println("   Title: " + doc.get("title"));
                }
            } else {
                System.out.println((i + 1) + ". " + "No path for this document");
            }

        }

        if (!interactive || end == 0) {
            break;
        }

        if (numTotalHits >= end) {
            boolean quit = false;
            while (true) {
                System.out.print("Press ");
                if (start - hitsPerPage >= 0) {
                    System.out.print("(p)revious page, ");
                }
                if (start + hitsPerPage < numTotalHits) {
                    System.out.print("(n)ext page, ");
                }
                System.out.println("(q)uit or enter number to jump to a page.");

                String line = in.readLine();
                if (line.length() == 0 || line.charAt(0) == 'q') {
                    quit = true;
                    break;
                }
                if (line.charAt(0) == 'p') {
                    start = Math.max(0, start - hitsPerPage);
                    break;
                } else if (line.charAt(0) == 'n') {
                    if (start + hitsPerPage < numTotalHits) {
                        start += hitsPerPage;
                    }
                    break;
                } else {
                    int page = Integer.parseInt(line);
                    if ((page - 1) * hitsPerPage < numTotalHits) {
                        start = (page - 1) * hitsPerPage;
                        break;
                    } else {
                        System.out.println("No such page");
                    }
                }
            }
            if (quit)
                break;
            end = Math.min(numTotalHits, start + hitsPerPage);
        }
    }
}

From source file:Main.WebAPI.Search.java

/**
 * //from   w  w  w. ja v  a2 s. c om
 * @param args args[0] is a query
 * 
 * @throws IOException
 * @throws ParseException
 * @throws InvalidTokenOffsetsException 
 */

public static void main(String[] args) throws IOException, ParseException, InvalidTokenOffsetsException {
    //... Above, create documents with two fields, one with term vectors (tv) and one without (notv)
    Analyzer analyzer = new ThaiAnalyzer(Version.LUCENE_45);

    Directory index = FSDirectory.open(new File("data/indexing"));
    String querystr = args.length > 0 ? args[0] : "mike lab";
    // the "title" arg specifies the default field to use
    // when no field is explicitly specified in the query.
    Query query = new MultiFieldQueryParser(Version.LUCENE_45, new String[] { "content" }, analyzer)
            .parse(querystr);

    // 3. search
    int hitsPerPage = 10;
    IndexReader reader = DirectoryReader.open(index);
    IndexSearcher searcher = new IndexSearcher(reader);

    TopDocs hits = searcher.search(query, 10);

    SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
    Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
    String Preview;
    for (int i = 0; i < 10; i++) {
        int id = hits.scoreDocs[i].doc;
        Document doc = searcher.doc(id);
        String text;
        Preview = "";
        System.out.println(doc.get("url"));
        System.out.println(doc.get("title"));
        text = doc.get("content");
        TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "content",
                analyzer);
        TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "...");
        int k = 0;
        for (TextFragment frag1 : frag) {
            if ((frag1 != null) && (frag1.getScore() > 0)) {
                Preview += (frag1.toString()) + "...<br>";
                k++;
                // Get 2 Line Preview
                if (k >= 2)
                    break;
            }
        }
        //Term vector
        System.out.println("-------------");
    }
}