List of usage examples for org.apache.lucene.analysis.miscellaneous LimitTokenCountAnalyzer LimitTokenCountAnalyzer
public LimitTokenCountAnalyzer(Analyzer delegate, int maxTokenCount)
From source file:FileIndexer.java
License:Apache License
public static void main(String[] args) { String usage = "java FileIndexer" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-excludes FILE] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles\n" + "excludes is an optional list of files to be excluded, one per line."; String indexPath = "index"; String docsPath = null;// ww w. j a v a 2s .com boolean create = true; List<String> excludes = new ArrayList<String>(); for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-excludes".equals(args[i])) { Scanner sc = null; try { sc = new Scanner(new File(args[i + 1])); i++; } catch (FileNotFoundException fnfe) { System.err.println(fnfe.getMessage()); System.exit(1); } while (sc.hasNext()) { excludes.add(sc.next()); } sc.close(); } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new LimitTokenCountAnalyzer(new StandardAnalyzer(), 1000000); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir, excludes); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:jfix.search.FullTextIndex.java
License:Open Source License
public FullTextIndex() { try {/*from w w w . j a v a 2 s . c o m*/ objects = new ArrayList<>(); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig config = new IndexWriterConfig( new LimitTokenCountAnalyzer(analyzer, Integer.MAX_VALUE)); indexDirectory = new RAMDirectory(); indexWriter = new IndexWriter(indexDirectory, config); queryParser = new QueryParser("text", analyzer); queryParser.setDefaultOperator(QueryParser.AND_OPERATOR); fulltext = new TextField("text", "", Field.Store.NO); // Used as base-set for a NOT-Query Field inverse = new TextField("true", "yes", Field.Store.NO); document = new Document(); document.add(fulltext); document.add(inverse); } catch (Exception e) { throw new RuntimeException(e.getMessage(), e); } }
From source file:net.sf.logsaw.index.internal.LuceneIndexServiceImpl.java
License:Open Source License
/** * Returns the Lucene analyzer to use for indexing text fields. * <p>//from www .ja va 2s .c o m * Defaults to a <code>StandardAnalyzer</code> with Lucene 4.1 semantics. * * @return the Lucene analyzer to use */ protected Analyzer getAnalyzer() { return new LimitTokenCountAnalyzer(new StandardAnalyzer(getMatchVersion()), 10000); }
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.java
License:Apache License
private Analyzer createAnalyzer() { Analyzer result;//from w ww. java2 s . co m Analyzer defaultAnalyzer = LuceneIndexConstants.ANALYZER; if (analyzers.containsKey(LuceneIndexConstants.ANL_DEFAULT)) { defaultAnalyzer = analyzers.get(LuceneIndexConstants.ANL_DEFAULT); } if (!evaluatePathRestrictions()) { result = defaultAnalyzer; } else { Map<String, Analyzer> analyzerMap = ImmutableMap.<String, Analyzer>builder() .put(FieldNames.ANCESTORS, new TokenizerChain( new PathHierarchyTokenizerFactory(Collections.<String, String>emptyMap()))) .build(); result = new PerFieldAnalyzerWrapper(defaultAnalyzer, analyzerMap); } //In case of negative value no limits would be applied if (maxFieldLength < 0) { return result; } return new LimitTokenCountAnalyzer(result, maxFieldLength); }
From source file:org.apache.roller.weblogger.business.search.IndexManagerImpl.java
License:Apache License
private void createIndex(Directory dir) { IndexWriter writer = null;// w w w .j a v a 2 s .c o m try { IndexWriterConfig config = new IndexWriterConfig(FieldConstants.LUCENE_VERSION, new LimitTokenCountAnalyzer(IndexManagerImpl.getAnalyzer(), IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL)); writer = new IndexWriter(dir, config); } catch (IOException e) { mLogger.error("Error creating index", e); } finally { try { if (writer != null) { writer.close(); } } catch (IOException e) { } } }
From source file:org.apache.roller.weblogger.business.search.IndexManagerImpl.java
License:Apache License
private IndexOperation getSaveIndexOperation() { return new WriteToIndexOperation(this) { public void doRun() { Directory dir = getIndexDirectory(); Directory fsdir = getFSDirectory(true); IndexWriter writer = null;// w ww. ja va 2 s .c o m try { IndexWriterConfig config = new IndexWriterConfig(FieldConstants.LUCENE_VERSION, new LimitTokenCountAnalyzer(IndexManagerImpl.getAnalyzer(), IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL)); writer = new IndexWriter(fsdir, config); writer.addIndexes(new Directory[] { dir }); writer.commit(); indexConsistencyMarker.delete(); } catch (IOException e) { mLogger.error("Problem saving index to disk", e); // Delete the directory, since there was a problem saving the RAM contents getFSDirectory(true); } finally { try { if (writer != null) { writer.close(); } } catch (IOException e1) { mLogger.warn("Unable to close IndexWriter."); } } } }; }
From source file:org.apache.roller.weblogger.business.search.operations.IndexOperation.java
License:Apache License
/** * Begin writing./*ww w .j a va 2 s . c o m*/ * * @return the index writer */ protected IndexWriter beginWriting() { try { LimitTokenCountAnalyzer analyzer = new LimitTokenCountAnalyzer(IndexManagerImpl.getAnalyzer(), WebloggerConfig.getIntProperty("lucene.analyzer.maxTokenCount")); IndexWriterConfig config = new IndexWriterConfig(FieldConstants.LUCENE_VERSION, analyzer); writer = new IndexWriter(manager.getIndexDirectory(), config); } catch (IOException e) { mLogger.error("ERROR creating writer", e); } return writer; }
From source file:org.dspace.search.DSIndexer.java
License:BSD License
/** * prepare index, opening writer, and wiping out existing index if necessary *//*from ww w.j ava 2 s.c o m*/ private static IndexWriter openIndex(boolean wipeExisting) throws IOException { Directory dir = FSDirectory.open(new File(indexDirectory)); LimitTokenCountAnalyzer decoratorAnalyzer = null; /* Set maximum number of terms to index if present in dspace.cfg */ if (maxfieldlength == -1) { decoratorAnalyzer = new LimitTokenCountAnalyzer(getAnalyzer(), Integer.MAX_VALUE); } else { decoratorAnalyzer = new LimitTokenCountAnalyzer(getAnalyzer(), maxfieldlength); } IndexWriterConfig iwc = new IndexWriterConfig(luceneVersion, decoratorAnalyzer); if (wipeExisting) { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } IndexWriter writer = new IndexWriter(dir, iwc); return writer; }
From source file:org.dspace.search.LuceneIndex.java
License:BSD License
/** * Get the Lucene analyzer to use according to current configuration (or * default). TODO: Should have multiple analyzers (and maybe indices?) for * multi-lingual DSpaces.//ww w.j av a 2s . co m * * @return <code>Analyzer</code> to use * @throws IllegalStateException * if the configured analyzer can't be instantiated */ Analyzer getAnalyzer() { if (analyzer == null) { // We need to find the analyzer class from the configuration String analyzerClassName = ConfigurationManager.getProperty("search", "analyzer.default"); if (analyzerClassName == null) { // Use default analyzerClassName = "org.dspace.search.DSAnalyzer"; } try { Class analyzerClass = Class.forName(analyzerClassName); Constructor constructor = analyzerClass.getDeclaredConstructor(Version.class); constructor.setAccessible(true); analyzer = (Analyzer) constructor.newInstance(Version.LUCENE_36); if (maxFieldLength > -1) { analyzer = new LimitTokenCountAnalyzer(analyzer, maxFieldLength); } } catch (Exception e) { log.error(LogManager.getHeader(null, "no_search_analyzer", "search.analyzer=" + analyzerClassName), e); throw new IllegalStateException(e.toString()); } } return analyzer; }
From source file:org.silverpeas.core.index.indexing.model.IndexManager.java
License:Open Source License
/** * Return the analyzer used to parse indexed texts and queries in the given language. * * @param language the language used in a document or a query. * @return the analyzer for the required language or a default analyzer. *///ww w.j a v a 2 s . co m public Analyzer getAnalyzer(String language) { Analyzer analyzer = WAAnalyzer.getAnalyzer(language); if (analyzer == null) { analyzer = new LimitTokenCountAnalyzer(new StandardAnalyzer(), maxFieldLength); } return analyzer; }