Example usage for org.apache.lucene.queryparser.classic QueryParser QueryParser

List of usage examples for org.apache.lucene.queryparser.classic QueryParser QueryParser

Introduction

In this page you can find the example usage for org.apache.lucene.queryparser.classic QueryParser QueryParser.

Prototype

public QueryParser(String f, Analyzer a) 

Source Link

Document

Create a query parser.

Usage

From source file:SearchFiles.java

License:Apache License

/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {
    String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details.";
    if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
        System.out.println(usage);
        System.exit(0);//from w  ww.ja va2  s.  c o m
    }

    String index = "index";
    String field = "contents";
    String queries = null;
    int repeat = 0;
    boolean raw = false;
    String queryString = null;
    int hitsPerPage = 10;

    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            index = args[i + 1];
            i++;
        } else if ("-field".equals(args[i])) {
            field = args[i + 1];
            i++;
        } else if ("-queries".equals(args[i])) {
            queries = args[i + 1];
            i++;
        } else if ("-query".equals(args[i])) {
            queryString = args[i + 1];
            i++;
        } else if ("-repeat".equals(args[i])) {
            repeat = Integer.parseInt(args[i + 1]);
            i++;
        } else if ("-raw".equals(args[i])) {
            raw = true;
        } else if ("-paging".equals(args[i])) {
            hitsPerPage = Integer.parseInt(args[i + 1]);
            if (hitsPerPage <= 0) {
                System.err.println("There must be at least 1 hit per page.");
                System.exit(1);
            }
            i++;
        }
    }

    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new StandardAnalyzer();

    BufferedReader in = null;
    if (queries != null) {
        in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8);
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
    }
    QueryParser parser = new QueryParser(field, analyzer);
    while (true) {
        if (queries == null && queryString == null) { // prompt the user
            System.out.println("Enter query: ");
        }

        String line = queryString != null ? queryString : in.readLine();

        if (line == null || line.length() == -1) {
            break;
        }

        line = line.trim();
        if (line.length() == 0) {
            break;
        }

        Query query = parser.parse(line);
        System.out.println("Searching for: " + query.toString(field));

        if (repeat > 0) { // repeat & time as benchmark
            Date start = new Date();
            for (int i = 0; i < repeat; i++) {
                searcher.search(query, 100);
            }
            Date end = new Date();
            System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms");
        }

        doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null);

        if (queryString != null) {
            break;
        }
    }
    reader.close();
}

From source file:MyServlet.java

private void gotoSearch(PrintWriter out, HttpServletRequest request, HttpServletResponse response) {
    try {//  ww  w.j a  va2 s .c  o  m
        //   Text to search
        String querystr = request.getParameter("keyword");

        log.addHistory(querystr);

        //   The \"title\" arg specifies the default field to use when no field is explicitly specified in the query
        Query q = new QueryParser("Classes", analyzer).parse(querystr);

        // Searching code
        int hitsPerPage = 10;
        IndexReader reader = DirectoryReader.open(index);
        IndexSearcher searcher = new IndexSearcher(reader);
        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage);
        searcher.search(q, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        //   Code to display the results of search
        //out.println("Found " + hits.length + " Classes Matching your Requirement");
        courseList = new ArrayList();
        for (int i = 0; i < hits.length; ++i) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            Course course = new Course(d.get("Number"), d.get("Classes"), d.get("Time"), d.get("Department"));
            //out.println((i + 1) + ". " +  d.get("Number")+ d.get("Classes") );
            courseList.add(course);
        }
        request.setAttribute("course", courseList);
        RequestDispatcher de = request.getRequestDispatcher("/table.jsp");
        de.forward(request, response);

        // reader can only be closed when there is no need to access the documents any more
        reader.close();
    } catch (Exception e) {
        System.out.println(e.getMessage());
    }
}

From source file:SearcherTest.java

/**
 * ??/*  w ww . jav  a 2s.  c om*/
 * QueryParser?????Query?
 *
 * @throws Exception
 */
@Test
public void testQueryParser() throws Exception {
    Analyzer analyzer = new StandardAnalyzer(); // ?
    String searchField = "contents";
    String q = "xxxxxxxxx$";
    //??
    QueryParser parser = new QueryParser(searchField, analyzer);
    //
    Query query = parser.parse(q);
    TopDocs hits = is.search(query, 100);
    System.out.println("? " + q + "" + hits.totalHits + "");
    for (ScoreDoc scoreDoc : hits.scoreDocs) {
        Document doc = is.doc(scoreDoc.doc);
        System.out.println(doc.get("fullPath"));
    }
}

From source file:IrqaQuery.java

License:Apache License

public static List<Document> query(String index, String stoppath, String question, int numResult, String sim)
        throws Exception {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));
    IndexSearcher searcher = new IndexSearcher(reader);

    Analyzer analyzer = new EnglishAnalyzer(StopFilter.makeStopSet(mygetStopwords(stoppath)));

    if (sim.equals("TFIDF"))
        searcher.setSimilarity(new ClassicSimilarity());
    else if (sim.equals("BM25"))
        searcher.setSimilarity(new BM25Similarity());
    else/* www.ja  v  a2s . co m*/
        searcher.setSimilarity(new BM25Similarity());

    String field = "contents";
    QueryParser parser = new QueryParser(field, analyzer);
    Query query = parser.parse(parser.escape(question));

    TopDocs results = searcher.search(query, numResult);
    ScoreDoc[] hits = results.scoreDocs;
    List<Document> docs = new ArrayList<Document>();

    int numTotalHits = results.totalHits;
    //        System.out.println(numTotalHits + " total matching documents");

    int end = Math.min(numTotalHits, numResult);

    String searchResult = "";
    //        System.out.println("Only results 1 - " + hits.length);

    for (int i = 0; i < end; i++) {
        Document doc = searcher.doc(hits[i].doc);
        docs.add(doc);
    }

    return docs;
}

From source file:Get_Top_Documents_Based_on_Lucene.java

License:Apache License

/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {
    String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details.";
    if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
        System.out.println(usage);
        System.exit(0);//from  www  .j  ava2  s.c o  m
    }

    //String index = "index";
    //String index = "index_wiki_2";
    String index = "index_external_links_v1/";
    String field = "contents";
    String queries = null;
    int repeat = 0;
    boolean raw = false;
    String queryString = null;
    int hitsPerPage = 20;

    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            index = args[i + 1];
            i++;
        } else if ("-field".equals(args[i])) {
            field = args[i + 1];
            i++;
        } else if ("-queries".equals(args[i])) {
            queries = args[i + 1];
            i++;
        } else if ("-query".equals(args[i])) {
            queryString = args[i + 1];
            i++;
        } else if ("-repeat".equals(args[i])) {
            repeat = Integer.parseInt(args[i + 1]);
            i++;
        } else if ("-raw".equals(args[i])) {
            raw = true;
        } else if ("-paging".equals(args[i])) {
            hitsPerPage = Integer.parseInt(args[i + 1]);
            if (hitsPerPage <= 0) {
                System.err.println("There must be at least 1 hit per page.");
                System.exit(1);
            }
            i++;
        }
    }

    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new StandardAnalyzer();

    BufferedReader in = null;
    if (queries != null) {
        in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8);
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
    }
    QueryParser parser = new QueryParser(field, analyzer);

    // Read Question in Training Data line by line
    //String path_train = "data/training_set.tsv";
    //String path_output = "data/lucene_search_result_train.txt";
    //String path_train = "data/validation_set.tsv";
    //String path_output = "data/lucene_search_result_validation_index_wiki_2.txt";
    String path_train = "data/training_set.tsv";
    String path_output = "data/lucene_search_result_train_index_wiki_external_links_v1.txt";
    Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(path_output), "UTF-8"));
    try (BufferedReader br = new BufferedReader(new FileReader(path_train))) {
        String line;
        while ((line = br.readLine()) != null) {
            line = line.trim();
            String[] lst = line.split("\t");
            String query_s = lst[1];
            if (query_s == "question") {
                continue;
            }
            System.out.println("query_s: " + query_s);
            writer.write(query_s + "\t");
            try {
                Query query = parser.parse(query_s);
                System.out.println("Searching for: " + query.toString(field));
                doPagingSearch(in, writer, searcher, query, hitsPerPage, raw,
                        queries == null && queryString == null);
            } catch (org.apache.lucene.queryparser.classic.ParseException e) {
                continue;
            }
        } // while
    }
    writer.close();
    /*
    while (true) {
      if (queries == null && queryString == null) {                        // prompt the user
        System.out.println("Enter query: ");
      }
            
      String line = queryString != null ? queryString : in.readLine();
            
      if (line == null || line.length() == -1) {
        break;
      }
            
      line = line.trim();
      if (line.length() == 0) {
        break;
      }
              
      Query query = parser.parse(line);
      System.out.println("Searching for: " + query.toString(field));
            
      if (repeat > 0) {                           // repeat & time as benchmark
        Date start = new Date();
        for (int i = 0; i < repeat; i++) {
          searcher.search(query, 100);
        }
        Date end = new Date();
        System.out.println("Time: "+(end.getTime()-start.getTime())+"ms");
      }
            
      doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null);
            
      if (queryString != null) {
        break;
      }
    } // while (True)
    */
    reader.close();
}

From source file:luceneInterface.java

License:Apache License

public static List<Document> query(String index, String stoppath, String question, int numResult, String sim)
        throws Exception {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));
    IndexSearcher searcher = new IndexSearcher(reader);

    Analyzer analyzer = new EnglishAnalyzer(StopFilter.makeStopSet(mygetStopwords(stoppath)));

    if (sim.equals("TFIDF"))
        searcher.setSimilarity(new ClassicSimilarity());
    else if (sim.equals("BM25"))
        searcher.setSimilarity(new BM25Similarity());
    else/*w  w w  . ja v  a 2  s  . c om*/
        searcher.setSimilarity(new BM25Similarity());

    String field = "contents";
    QueryParser parser = new QueryParser(field, analyzer);
    Query query = parser.parse(parser.escape(question));

    BooleanQuery.Builder bqb = new BooleanQuery.Builder();
    bqb.add(new TermQuery(new Term("contents", parser.escape(question))), BooleanClause.Occur.SHOULD);
    bqb.add(new TermQuery(new Term("sec", parser.escape(question))), BooleanClause.Occur.SHOULD);

    //        Term term = new Term(field, question);
    //        Query query = new TermQuery(term);

    //        TopDocs results = searcher.search(query, numResult);
    TopDocs results = searcher.search(parser.parse(bqb.build().toString()), numResult);

    ScoreDoc[] hits = results.scoreDocs;
    List<Document> docs = new ArrayList<Document>();

    int numTotalHits = results.totalHits;
    //        System.out.println(numTotalHits + " total matching documents");

    int end = Math.min(numTotalHits, numResult);

    String searchResult = "";
    //        System.out.println("Only results 1 - " + hits.length);

    for (int i = 0; i < end; i++) {
        Document doc = searcher.doc(hits[i].doc);
        docs.add(doc);
    }

    return docs;
}

From source file:ac.simons.tweetarchive.tweets.TweetRepositoryImpl.java

License:Apache License

@Override
@Transactional(readOnly = true)//  w  w w  .j  a v a  2  s  .c o  m
public List<TweetEntity> searchByQuery(final String query) {
    final FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(entityManager);
    List<TweetEntity> rv;
    try {
        final QueryParser queryParser = new QueryParser("content",
                fullTextEntityManager.getSearchFactory().getAnalyzer(TweetEntity.class));
        rv = fullTextEntityManager.createFullTextQuery(queryParser.parse(query), TweetEntity.class)
                .getResultList();
    } catch (ParseException e) {
        log.error("Could not parse query", e);
        rv = new ArrayList<>();
    }
    return rv;
}

From source file:ai.castor.idf.FetchTermIDF.java

License:Apache License

public double getTermIDF(String term) throws ParseException {
    Analyzer analyzer = new EnglishAnalyzer(CharArraySet.EMPTY_SET);
    QueryParser qp = new QueryParser(FIELD_BODY, analyzer);
    ClassicSimilarity similarity = new ClassicSimilarity();

    String esTerm = qp.escape(term);
    double termIDF = 0.0;
    try {//from w  ww  .j  a  va2s. c  om
        TermQuery q = (TermQuery) qp.parse(esTerm);
        Term t = q.getTerm();
        termIDF = similarity.idf(reader.docFreq(t), reader.numDocs());

        System.out.println(term + '\t' + esTerm + '\t' + q + '\t' + t + '\t' + termIDF);
    } catch (Exception e) {
        System.err.println("Exception in fetching IDF(" + term + "): " + e.toString());
    }
    return termIDF;
}

From source file:ai.castor.idf.IDFScorer.java

License:Apache License

public double calcIDF(String query, String answer, boolean analyze) throws ParseException {
    Analyzer analyzer;/* ww w.  j  a  v a2 s  .  co  m*/
    if (analyze) {
        analyzer = new EnglishAnalyzer(StopFilter.makeStopSet(stopWords));
    } else {
        analyzer = new WhitespaceAnalyzer();
    }

    QueryParser qp = new QueryParser(FIELD_BODY, analyzer);
    ClassicSimilarity similarity = new ClassicSimilarity();

    String escapedQuery = qp.escape(query);
    Query question = qp.parse(escapedQuery);
    HashSet<String> questionTerms = new HashSet<>(Arrays.asList(question.toString().trim().split("\\s+")));

    double idf = 0.0;
    HashSet<String> seenTerms = new HashSet<>();

    String[] terms = answer.split("\\s+");
    for (String term : terms) {
        try {
            TermQuery q = (TermQuery) qp.parse(term);
            Term t = q.getTerm();

            if (questionTerms.contains(t.toString()) && !seenTerms.contains(t.toString())) {
                idf += similarity.idf(reader.docFreq(t), reader.numDocs());
                seenTerms.add(t.toString());
            } else {
                idf += 0.0;
            }
        } catch (Exception e) {
            continue;
        }
    }
    return idf;
}

From source file:app.finder.topicsource.service.SearchFiles.java

License:Apache License

public List<TopicSource> getTopicSources(String queryString) throws IOException, ParseException {
    String field = "contents";
    String queries = null;/*from  w  w  w . j av a 2s .  c  om*/
    int repeat = 0;
    boolean raw = false;

    int hitsPerPage = SEARCH_MAX_SIZE; // 100;
    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexDir)));
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new StandardAnalyzer();

    BufferedReader in = null;
    QueryParser parser = new QueryParser(field, analyzer);

    Query query = parser.parse(queryString);

    //System.out.println("Searching for: " + query.toString(field));
    searcher.search(query, null, SEARCH_MAX_SIZE);

    List<String> list = doSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null);

    reader.close();

    List<TopicSource> topicSourceList = new ArrayList<TopicSource>();
    TopicSource topicSource = null;
    int counter = 0;
    for (String fileName : list) {
        topicSource = new TopicSource();
        File file = new File(fileName);

        topicSource.setFileName("" + (++counter) + ". " + file.getName());
        topicSource.setPath(file.getCanonicalPath());
        topicSource.setText(readFile(file));
        topicSourceList.add(topicSource);
    }

    return topicSourceList;
}