List of usage examples for org.apache.lucene.document Document toString
@Override public final String toString()
From source file:ca.dracode.ais.service.SearchService.java
License:Open Source License
/** * Tells the indexer to load a file's metadata into memory for use in searches. * The function can be called multiple times to load several files. Files remain loaded until the unload * function is called. Please make sure to call unload when you are finished with the document. * @param filePath - the location of the file to prepare; is also the identifier for the file's data in the index * @return 0 if the file exists in the index and was not already loaded; * 1 if the file was already loaded; * 2 if the file was not loaded and does not exist in the index; * -1 if there was an error//from w w w .j a va2s . com */ public int load(final String filePath) { if (this.data.containsKey(filePath)) { return 1; } SearchData tmpData = new SearchData(); Document tmp; if (SearchService.this.sm.searcher == null) { Log.e(TAG, "Searcher is null"); return -1; } Log.i(TAG, "Loading: " + filePath + " " + new File(filePath).getAbsolutePath()); if ((tmp = this.sm.searcher.getMetaFile(new File(filePath).getAbsolutePath())) != null) { try { IndexableField f = tmp.getField("pages"); if (f == null) { Log.e(TAG, "Cannot find pages in metafile: " + tmp.toString()); return -1; } else { tmpData.pages = f.numericValue().intValue(); } } catch (Exception e) { Log.e(TAG, "Error", e); return -1; } this.data.put(filePath, tmpData); return 0; } else { return 2; } }
From source file:cc.pp.analyzer.ik.demo.IKAnalyzerDemo.java
License:Apache License
public static void main(String[] args) { //Lucene Document?? String fieldName = "text"; ////w ww. j a va2 s .c o m String text = "IK Analyzer???????"; //IKAnalyzer? Analyzer analyzer = new IKAnalyzer(Version.LUCENE_48, true); Directory directory = null; IndexWriter iwriter = null; DirectoryReader ireader = null; IndexSearcher isearcher = null; try { // directory = new RAMDirectory(); //?IndexWriterConfig IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_48, analyzer); iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); iwriter = new IndexWriter(directory, iwConfig); // Document doc = new Document(); doc.add(new LongField("ID", 1000, Field.Store.YES)); doc.add(new TextField(fieldName, text, Field.Store.YES)); iwriter.addDocument(doc); iwriter.close(); //?********************************** //? ireader = DirectoryReader.open(directory); isearcher = new IndexSearcher(ireader); String keyword = "?"; // String keyword = ""; //QueryParser?Query QueryParser qp = new QueryParser(Version.LUCENE_48, fieldName, analyzer); qp.setDefaultOperator(QueryParser.AND_OPERATOR); Query query = qp.parse(keyword); System.out.println("Query = " + query); //?5? TopDocs topDocs = isearcher.search(query, 5); System.out.println("" + topDocs.totalHits); // ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (int i = 0; i < topDocs.totalHits; i++) { Document targetDoc = isearcher.doc(scoreDocs[i].doc); System.out.println("" + targetDoc.toString()); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { e.printStackTrace(); } } if (directory != null) { try { directory.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:cc.twittertools.search.local.RunQueries.java
License:Apache License
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(/*from ww w . jav a 2s . c o m*/ OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return") .create(NUM_RESULTS_OPTION)); options.addOption(OptionBuilder.withArgName("file").hasArg() .withDescription("file containing topics in TREC format").create(QUERIES_OPTION)); options.addOption(OptionBuilder.withArgName("similarity").hasArg() .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION)); options.addOption(new Option(VERBOSE_OPTION, "print out complete document")); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(QUERIES_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(RunQueries.class.getName(), options); System.exit(-1); } File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION)); if (!indexLocation.exists()) { System.err.println("Error: " + indexLocation + " does not exist!"); System.exit(-1); } String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG; String topicsFile = cmdline.getOptionValue(QUERIES_OPTION); int numResults = 1000; try { if (cmdline.hasOption(NUM_RESULTS_OPTION)) { numResults = Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION)); } } catch (NumberFormatException e) { System.err.println("Invalid " + NUM_RESULTS_OPTION + ": " + cmdline.getOptionValue(NUM_RESULTS_OPTION)); System.exit(-1); } String similarity = "LM"; if (cmdline.hasOption(SIMILARITY_OPTION)) { similarity = cmdline.getOptionValue(SIMILARITY_OPTION); } boolean verbose = cmdline.hasOption(VERBOSE_OPTION); PrintStream out = new PrintStream(System.out, true, "UTF-8"); IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation)); IndexSearcher searcher = new IndexSearcher(reader); if (similarity.equalsIgnoreCase("BM25")) { searcher.setSimilarity(new BM25Similarity()); } else if (similarity.equalsIgnoreCase("LM")) { searcher.setSimilarity(new LMDirichletSimilarity(2500.0f)); } QueryParser p = new QueryParser(Version.LUCENE_43, StatusField.TEXT.name, IndexStatuses.ANALYZER); TrecTopicSet topics = TrecTopicSet.fromFile(new File(topicsFile)); for (TrecTopic topic : topics) { Query query = p.parse(topic.getQuery()); Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, topic.getQueryTweetTime(), true, true); TopDocs rs = searcher.search(query, filter, numResults); int i = 1; for (ScoreDoc scoreDoc : rs.scoreDocs) { Document hit = searcher.doc(scoreDoc.doc); out.println(String.format("%s Q0 %s %d %f %s", topic.getId(), hit.getField(StatusField.ID.name).numericValue(), i, scoreDoc.score, runtag)); if (verbose) { out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " ")); } i++; } } reader.close(); out.close(); }
From source file:cc.twittertools.search.local.SearchStatuses.java
License:Apache License
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(//from ww w. j a va 2s.c o m OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("query id").create(QID_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("query text").create(QUERY_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("runtag").create(RUNTAG_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("maxid").create(MAX_ID_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return") .create(NUM_RESULTS_OPTION)); options.addOption(OptionBuilder.withArgName("similarity").hasArg() .withDescription("similarity to use (BM25, LM)").create(SIMILARITY_OPTION)); options.addOption(new Option(VERBOSE_OPTION, "print out complete document")); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(QUERY_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(SearchStatuses.class.getName(), options); System.exit(-1); } File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION)); if (!indexLocation.exists()) { System.err.println("Error: " + indexLocation + " does not exist!"); System.exit(-1); } String qid = cmdline.hasOption(QID_OPTION) ? cmdline.getOptionValue(QID_OPTION) : DEFAULT_QID; String queryText = cmdline.hasOption(QUERY_OPTION) ? cmdline.getOptionValue(QUERY_OPTION) : DEFAULT_Q; String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG; long maxId = cmdline.hasOption(MAX_ID_OPTION) ? Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION)) : DEFAULT_MAX_ID; int numResults = cmdline.hasOption(NUM_RESULTS_OPTION) ? Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION)) : DEFAULT_NUM_RESULTS; boolean verbose = cmdline.hasOption(VERBOSE_OPTION); String similarity = "LM"; if (cmdline.hasOption(SIMILARITY_OPTION)) { similarity = cmdline.getOptionValue(SIMILARITY_OPTION); } PrintStream out = new PrintStream(System.out, true, "UTF-8"); IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation)); IndexSearcher searcher = new IndexSearcher(reader); if (similarity.equalsIgnoreCase("BM25")) { searcher.setSimilarity(new BM25Similarity()); } else if (similarity.equalsIgnoreCase("LM")) { searcher.setSimilarity(new LMDirichletSimilarity(2500.0f)); } QueryParser p = new QueryParser(Version.LUCENE_43, IndexStatuses.StatusField.TEXT.name, IndexStatuses.ANALYZER); Query query = p.parse(queryText); Filter filter = NumericRangeFilter.newLongRange(StatusField.ID.name, 0L, maxId, true, true); TopDocs rs = searcher.search(query, filter, numResults); int i = 1; for (ScoreDoc scoreDoc : rs.scoreDocs) { Document hit = searcher.doc(scoreDoc.doc); out.println(String.format("%s Q0 %s %d %f %s", qid, hit.getField(StatusField.ID.name).numericValue(), i, scoreDoc.score, runtag)); if (verbose) { out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " ")); } i++; } reader.close(); out.close(); }
From source file:cc.wikitools.lucene.hadoop.SearchWikipediaHdfs.java
License:Apache License
@SuppressWarnings("static-access") @Override/*from w ww . j av a2 s. co m*/ public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("query text").create(QUERY_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return") .create(NUM_RESULTS_OPTION)); options.addOption(new Option(VERBOSE_OPTION, "print out complete document")); options.addOption(new Option(TITLE_OPTION, "search title")); options.addOption(new Option(ARTICLE_OPTION, "search article")); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(QUERY_OPTION) || !cmdline.hasOption(INDEX_OPTION) || !cmdline.hasOption(QUERY_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(SearchWikipediaHdfs.class.getName(), options); System.exit(-1); } String indexLocation = cmdline.getOptionValue(INDEX_OPTION); String queryText = cmdline.getOptionValue(QUERY_OPTION); int numResults = cmdline.hasOption(NUM_RESULTS_OPTION) ? Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION)) : DEFAULT_NUM_RESULTS; boolean verbose = cmdline.hasOption(VERBOSE_OPTION); boolean searchArticle = !cmdline.hasOption(TITLE_OPTION); PrintStream out = new PrintStream(System.out, true, "UTF-8"); HdfsWikipediaSearcher searcher = new HdfsWikipediaSearcher(new Path(indexLocation), getConf()); TopDocs rs = null; if (searchArticle) { rs = searcher.searchArticle(queryText, numResults); } else { rs = searcher.searchTitle(queryText, numResults); } int i = 1; for (ScoreDoc scoreDoc : rs.scoreDocs) { Document hit = searcher.doc(scoreDoc.doc); out.println(String.format("%d. %s (id = %s) %f", i, hit.getField(IndexField.TITLE.name).stringValue(), hit.getField(IndexField.ID.name).stringValue(), scoreDoc.score)); if (verbose) { out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " ")); } i++; } searcher.close(); out.close(); return 0; }
From source file:cc.wikitools.lucene.SearchWikipedia.java
License:Apache License
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(//from w ww .j a va 2 s.com OptionBuilder.withArgName("path").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption( OptionBuilder.withArgName("string").hasArg().withDescription("query text").create(QUERY_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return") .create(NUM_RESULTS_OPTION)); options.addOption(new Option(VERBOSE_OPTION, "print out complete document")); options.addOption(new Option(TITLE_OPTION, "search title")); options.addOption(new Option(ARTICLE_OPTION, "search article")); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(QUERY_OPTION) || !cmdline.hasOption(INDEX_OPTION) || !cmdline.hasOption(QUERY_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(SearchWikipedia.class.getName(), options); System.exit(-1); } File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION)); if (!indexLocation.exists()) { System.err.println("Error: " + indexLocation + " does not exist!"); System.exit(-1); } String queryText = cmdline.getOptionValue(QUERY_OPTION); int numResults = cmdline.hasOption(NUM_RESULTS_OPTION) ? Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION)) : DEFAULT_NUM_RESULTS; boolean verbose = cmdline.hasOption(VERBOSE_OPTION); boolean searchArticle = !cmdline.hasOption(TITLE_OPTION); PrintStream out = new PrintStream(System.out, true, "UTF-8"); WikipediaSearcher searcher = new WikipediaSearcher(indexLocation); TopDocs rs = null; if (searchArticle) { rs = searcher.searchArticle(queryText, numResults); } else { rs = searcher.searchTitle(queryText, numResults); } int i = 1; for (ScoreDoc scoreDoc : rs.scoreDocs) { Document hit = searcher.doc(scoreDoc.doc); out.println(String.format("%d. %s (wiki id = %s, lucene id = %d) %f", i, hit.getField(IndexField.TITLE.name).stringValue(), hit.getField(IndexField.ID.name).stringValue(), scoreDoc.doc, scoreDoc.score)); if (verbose) { out.println("# " + hit.toString().replaceAll("[\\n\\r]+", " ")); } i++; } searcher.close(); out.close(); }
From source file:choco.lucene.IKAnalyzerDemo.java
License:Apache License
public static void main(String[] args) { //Lucene Document?? String fieldName = "text"; // //from w w w. j a va 2 s . c o m String text = "IK Analyzer???????"; //IKAnalyzer? Analyzer analyzer = new IKAnalyzer(); Directory directory = null; IndexWriter iwriter = null; IndexReader ireader = null; IndexSearcher isearcher = null; try { // directory = new RAMDirectory(); //?IndexWriterConfig IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_34, analyzer); iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); iwriter = new IndexWriter(directory, iwConfig); // Document doc = new Document(); doc.add(new Field("ID", "10000", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field(fieldName, text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.addDocument(doc); iwriter.close(); //?********************************** //? ireader = IndexReader.open(directory); isearcher = new IndexSearcher(ireader); String keyword = "?"; //QueryParser?Query QueryParser qp = new QueryParser(Version.LUCENE_34, fieldName, analyzer); qp.setDefaultOperator(QueryParser.AND_OPERATOR); Query query = qp.parse(keyword); //?5? TopDocs topDocs = isearcher.search(query, 5); System.out.println("" + topDocs.totalHits); // ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (int i = 0; i < topDocs.totalHits; i++) { Document targetDoc = isearcher.doc(scoreDocs[i].doc); System.out.println("" + targetDoc.toString()); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { e.printStackTrace(); } } if (directory != null) { try { directory.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:com.cohesionforce.search.EMFIndex.java
License:Open Source License
/** * Searches the index for an attribute with a matching value. * /* w w w. jav a 2 s .c om*/ * @param feature * - the attribute to match when searching * @param value * - the value to match when searching * @return a list of search results * @throws IllegalArgumentException * if any parameters are null, or if the attribute is not marked * with the search annotation * @throws IOException * if there are issues reading the index */ public List<SearchResult> search(EStructuralFeature feature, String value) throws IllegalArgumentException, IOException { if (feature == null) { throw new IllegalArgumentException("Attribute cannot be null"); } if (value == null) { throw new IllegalArgumentException("Value cannot be null"); } List<SearchResult> rvalue = new ArrayList<SearchResult>(); boolean tokenize = false; EAnnotation annotation = feature.getEAnnotation(EMFIndexUtil.SOURCE); if (annotation != null) { if (annotation.getDetails().containsKey(EMFIndexUtil.TOKENIZE_KEY)) { tokenize = true; } } else { // Bail out early if this feature should not be indexed throw new IllegalArgumentException("Attribute is not annotated to be indexed"); } String key = EMFIndexUtil.getKey(feature); DirectoryReader reader = DirectoryReader.open(fsDir); IndexSearcher searcher = new IndexSearcher(reader); try { Query query = null; if (tokenize) { QueryParser parser = new QueryParser(version, key, analyzer); query = parser.parse(value); } else { Term term = new Term(key, value); query = new TermQuery(term); } ScoreDoc[] hits = searcher.search(query, null, MAX_SEARCH_RESULT).scoreDocs; // Iterate through the results: for (int i = 0; i < hits.length; i++) { Document hitDoc = searcher.doc(hits[i].doc); SearchResult result = new SearchResult(hitDoc); rvalue.add(result); logger.debug(hitDoc.toString()); } } catch (ParseException e) { logger.error(e.getMessage()); } finally { reader.close(); } return rvalue; }
From source file:com.cohesionforce.search.EMFIndex.java
License:Open Source License
/** * Searches the index for an attribute with a matching value and a matching * eclass/*from w w w .j a v a 2s . c om*/ * * @param eclass * - the EClass to match when searching * @param attr * - the EAttribute to match when searching * @param value * - the value to match when searching * @return a list of search results * @throws IllegalArgumentException * if any parameters are null, or if the attribute is not marked * with the search annotation * @throws IOException * if there are issues reading the index */ public List<SearchResult> search(EClass eclass, EAttribute attr, String value) throws IllegalArgumentException, IOException { if (eclass == null) { throw new IllegalArgumentException("EClass cannot be null"); } if (attr == null) { throw new IllegalArgumentException("Attribute cannot be null"); } if (value == null) { throw new IllegalArgumentException("Value cannot be null"); } EAnnotation annotation = eclass.getEAnnotation(EMFIndexUtil.SOURCE); if (annotation == null) { // Bail out early if this feature should not be indexed throw new IllegalArgumentException("EClass is not annotated to be indexed"); } List<SearchResult> rvalue = new ArrayList<SearchResult>(); boolean tokenize = false; annotation = attr.getEAnnotation(EMFIndexUtil.SOURCE); if (annotation != null) { if (annotation.getDetails().containsKey(EMFIndexUtil.TOKENIZE_KEY)) { tokenize = true; } } else { // Bail out early if this feature should not be indexed throw new IllegalArgumentException("Attribute is not annotated to be indexed"); } String key = EMFIndexUtil.getKey(attr); DirectoryReader reader = DirectoryReader.open(fsDir); IndexSearcher searcher = new IndexSearcher(reader); try { BooleanQuery bquery = new BooleanQuery(); TermQuery classQuery = new TermQuery(new Term(EMFIndexUtil.ETYPE_KEY, eclass.getName())); bquery.add(classQuery, Occur.MUST); Query query = null; if (tokenize) { QueryParser parser = new QueryParser(version, key, analyzer); query = parser.parse(value); } else { Term term = new Term(key, value); query = new TermQuery(term); } bquery.add(query, Occur.MUST); ScoreDoc[] hits = searcher.search(bquery, null, MAX_SEARCH_RESULT).scoreDocs; // Iterate through the results: for (int i = 0; i < hits.length; i++) { Document hitDoc = searcher.doc(hits[i].doc); SearchResult result = new SearchResult(hitDoc); rvalue.add(result); logger.debug(hitDoc.toString()); } } catch (ParseException e) { logger.error(e.getMessage()); } finally { reader.close(); } return rvalue; }
From source file:com.cohesionforce.search.EMFIndex.java
License:Open Source License
/** * Searches the index for a matching eclass * //from ww w. j a v a 2 s . c o m * @param eclass * - the EClass to match when searching * @return a list of search results * @throws IllegalArgumentException * if the eclass is null or is not annotated for indexing * @throws IOException * if there are issues reading the index */ public List<SearchResult> search(EClass eclass) throws IllegalArgumentException, IOException { if (eclass == null) { throw new IllegalArgumentException("EClass cannot be null"); } EAnnotation annotation = eclass.getEAnnotation(EMFIndexUtil.SOURCE); if (annotation == null) { throw new IllegalArgumentException("EClass is not annotated for indexing"); } List<SearchResult> rvalue = new ArrayList<SearchResult>(); DirectoryReader reader = DirectoryReader.open(fsDir); IndexSearcher searcher = new IndexSearcher(reader); try { TermQuery classQuery = new TermQuery(new Term(EMFIndexUtil.ETYPE_KEY, eclass.getName())); ScoreDoc[] hits = searcher.search(classQuery, null, MAX_SEARCH_RESULT).scoreDocs; // Iterate through the results: for (int i = 0; i < hits.length; i++) { Document hitDoc = searcher.doc(hits[i].doc); SearchResult result = new SearchResult(hitDoc); rvalue.add(result); logger.debug(hitDoc.toString()); } } finally { reader.close(); } return rvalue; }