List of usage examples for org.apache.lucene.util Version LATEST
Version LATEST
To view the source code for org.apache.lucene.util Version LATEST.
Click Source Link
WARNING: if you use this setting, and then upgrade to a newer release of Lucene, sizable changes may happen.
From source file:GUIFrame.java
public Directory InitializeIndex(Directory Index, PorterStemAnalyzer Analyzer) throws IOException { // Create the index Index = new RAMDirectory(); IndexWriterConfig myConfig = new IndexWriterConfig(Version.LATEST, Analyzer); try (IndexWriter indoWriter = new IndexWriter(Index, myConfig)) { BufferedReader buffR = new BufferedReader(new FileReader("npl\\doc-text")); String currLine;//from ww w .j a va2 s . co m StringBuilder idDoc = new StringBuilder(); StringBuilder sumDoc = new StringBuilder(); currLine = buffR.readLine(); while (currLine != null) { if (currLine.matches(".*\\d+.*")) { // Check if current line contains a document's id // Capture the document's id and summary idDoc.append(currLine); currLine = buffR.readLine(); // While the reader holds line of summary while (!(currLine.contains("/"))) { sumDoc.append(currLine).append(" "); currLine = buffR.readLine(); } // Add the new document to the index AddDocument(indoWriter, idDoc.toString(), sumDoc.toString()); //System.out.println(idDoc.toString()); //System.out.println(sumDoc.toString()); // Clear the string builders idDoc.delete(0, idDoc.length()); sumDoc.delete(0, sumDoc.length()); // Continue to next line currLine = buffR.readLine(); } else { // Continue to next line currLine = buffR.readLine(); } } } return Index; }
From source file:GUIFrame.java
public int SearchResults(PorterStemAnalyzer Analyzer, Directory Index, String userInput, DefaultListModel DocList) throws ParseException, IOException { // The query/*from ww w.jav a 2s .com*/ userInput = userInput.replace("\"", ""); Query q = new QueryParser(Version.LATEST, "summary", Analyzer).parse(userInput); // The search int hitsPerPage = 20; // return 20 top documents IndexReader indoReader = DirectoryReader.open(Index); IndexSearcher indoSearcher = new IndexSearcher(indoReader); TopScoreDocCollector docCollector = TopScoreDocCollector.create(hitsPerPage, true); indoSearcher.search(q, docCollector); ScoreDoc[] hits = docCollector.topDocs().scoreDocs; // Copy results to list models for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = indoSearcher.doc(docId); DocList.addElement(d.get("docID")); SumListModel.addElement(d.get("summary")); } GetTerms(Index, Analyzer, "summary", userInput); return hits.length; }
From source file:analyzers.FormalAnalyzer.java
License:Apache License
/** * Define how tokens are processed.//w ww . java 2 s . c om * * @param fieldName required input * @param reader reader for document */ @Override protected Analyzer.TokenStreamComponents createComponents(final String fieldName, final Reader reader) { Tokenizer tokenizer = new StandardTokenizer(reader); TokenStream chain = tokenizer; if (!tokenOpts.disableAllFilters) { // the chain of token filters... chain = new StandardFilter(chain); // discard tokens based on their type attribute chain = new StandardTagFilter(chain, tokenOpts); // convert tokens to lowercase chain = new LowerCaseFilter(chain); // replace accented chars with non-accented ASCII equivalents chain = new ASCIIFoldingFilter(chain); // remove stop words (must come after lowercasing) chain = new StopFilter(chain, stopWordSet); // remove 's chain = new EnglishPossessiveFilter(Version.LATEST, chain); // spelling correction if (!spellingHashtable.isEmpty()) chain = new SpellingCorrectionFilter(chain, spellingHashtable); if (!tokenOpts.disableStemming) { // Krovets stemmer (smarter than the Porter stemmer) chain = new KStemFilter(chain); } } return new Analyzer.TokenStreamComponents(tokenizer, chain); }
From source file:buscador.SearchFiles.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);//from w w w .j a va 2 s. c o m } String index = "Zaguan1"; String[] fields = { "title", "description", "identifier", "date", "creator" }; BooleanClause.Occur[] flags = { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD }; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new SpanishAnalyzer(Version.LATEST); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = MultiFieldQueryParser.parse(line, fields, flags, analyzer); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }
From source file:com.github.msarhan.lucene.ArabicRootExtractorAnalyzer.java
License:Open Source License
/** * Creates//from w ww . java 2s. c o m * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * used to tokenize all the text in the provided {@link Reader}. * * @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link LowerCaseFilter}, {@link DecimalDigitFilter}, {@link StopFilter}, * {@link ArabicRootExtractorStemFilter}, {@link SetKeywordMarkerFilter} */ @Override protected TokenStreamComponents createComponents(String fieldName) { final Tokenizer source; if (getVersion().onOrAfter(Version.LATEST)) { source = new StandardTokenizer(); } else { source = new StandardTokenizer40(); } TokenStream result = new LowerCaseFilter(source); if (getVersion().onOrAfter(Version.LUCENE_5_4_0)) { result = new DecimalDigitFilter(result); } // the order here is important: the stopword list is not normalized! result = new StopFilter(result, stopwords); result = new ArabicRootExtractorStemFilter(result); if (!stemExclusionSet.isEmpty()) { result = new SetKeywordMarkerFilter(result, stemExclusionSet); } return new TokenStreamComponents(source, result); }
From source file:com.grantingersoll.opengrok.analysis.BaseTokenStreamFactoryTestCase.java
License:Apache License
/** * Returns a fully initialized TokenizerFactory with the specified name and key-value arguments. * {@link ClasspathResourceLoader} is used for loading resources, so any required ones should * be on the test classpath./*from w ww. j a v a 2 s .c om*/ */ protected TokenizerFactory tokenizerFactory(String name, String... keysAndValues) throws Exception { return tokenizerFactory(name, Version.LATEST, keysAndValues); }
From source file:com.grantingersoll.opengrok.analysis.BaseTokenStreamFactoryTestCase.java
License:Apache License
/** * Returns a fully initialized TokenFilterFactory with the specified name and key-value arguments. * {@link ClasspathResourceLoader} is used for loading resources, so any required ones should * be on the test classpath./* w ww. ja va2s. c o m*/ */ protected TokenFilterFactory tokenFilterFactory(String name, String... keysAndValues) throws Exception { return tokenFilterFactory(name, Version.LATEST, keysAndValues); }
From source file:com.grantingersoll.opengrok.analysis.BaseTokenStreamFactoryTestCase.java
License:Apache License
/** * Returns a fully initialized CharFilterFactory with the specified name and key-value arguments. * {@link ClasspathResourceLoader} is used for loading resources, so any required ones should * be on the test classpath./*from w w w . ja v a 2 s .com*/ */ protected CharFilterFactory charFilterFactory(String name, String... keysAndValues) throws Exception { return charFilterFactory(name, Version.LATEST, new ClasspathResourceLoader(getClass()), keysAndValues); }
From source file:com.justinleegrant.myluceneplayground.SimpleFacetsExample.java
License:Apache License
/** Build the example index. */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(Version.LATEST, new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); Document doc = new Document(); doc.add(new FacetField("Author", "Bob")); doc.add(new FacetField("Publish Date", "2010", "10", "15")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Lisa")); doc.add(new FacetField("Publish Date", "2010", "10", "20")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Lisa")); doc.add(new FacetField("Publish Date", "2012", "1", "1")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Susan")); doc.add(new FacetField("Publish Date", "2012", "1", "7")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new FacetField("Author", "Frank")); doc.add(new FacetField("Publish Date", "1999", "5", "5")); indexWriter.addDocument(config.build(taxoWriter, doc)); indexWriter.close();/*from w w w .j a va 2s. c o m*/ taxoWriter.close(); }
From source file:com.lithium.flow.filer.lucene.LuceneFiler.java
License:Apache License
public LuceneFiler(@Nonnull Filer delegate, @Nonnull Config config) throws IOException { super(delegate); String path = config.getString("index.path"); maxAge = config.getTime("index.maxAge", "-1"); double maxMergeMb = config.getDouble("index.maxMergeMb", 4); double maxCachedMb = config.getDouble("index.maxCacheMb", 64); long targetMaxStale = config.getTime("index.targetMaxStale", "5s"); long targetMinStale = config.getTime("index.targetMinStale", "1s"); Version version = Version.LATEST; Directory dir = FSDirectory.open(new File(path)); NRTCachingDirectory cachingDir = new NRTCachingDirectory(dir, maxMergeMb, maxCachedMb); IndexWriterConfig writerConfig = new IndexWriterConfig(version, null); writerConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); writer = new TrackingIndexWriter(new IndexWriter(cachingDir, writerConfig)); manager = new SearcherManager(writer.getIndexWriter(), true, new SearcherFactory()); thread = new ControlledRealTimeReopenThread<>(writer, manager, targetMaxStale, targetMinStale); thread.start();//from w w w . j a va 2 s. c o m }