Example usage for org.apache.lucene.analysis.miscellaneous LimitTokenCountAnalyzer LimitTokenCountAnalyzer

List of usage examples for org.apache.lucene.analysis.miscellaneous LimitTokenCountAnalyzer LimitTokenCountAnalyzer

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.miscellaneous LimitTokenCountAnalyzer LimitTokenCountAnalyzer.

Prototype

public LimitTokenCountAnalyzer(Analyzer delegate, int maxTokenCount) 

Source Link

Document

Build an analyzer that limits the maximum number of tokens per field.

Usage

From source file:FileIndexer.java

License:Apache License

public static void main(String[] args) {
    String usage = "java FileIndexer" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-excludes FILE] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles\n"
            + "excludes is an optional list of files to be excluded, one per line.";
    String indexPath = "index";
    String docsPath = null;// ww  w.  j  a v  a  2s .com
    boolean create = true;
    List<String> excludes = new ArrayList<String>();
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-excludes".equals(args[i])) {
            Scanner sc = null;
            try {
                sc = new Scanner(new File(args[i + 1]));
                i++;
            } catch (FileNotFoundException fnfe) {
                System.err.println(fnfe.getMessage());
                System.exit(1);
            }
            while (sc.hasNext()) {
                excludes.add(sc.next());
            }
            sc.close();
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new LimitTokenCountAnalyzer(new StandardAnalyzer(), 1000000);
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir, excludes);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }

}

From source file:jfix.search.FullTextIndex.java

License:Open Source License

public FullTextIndex() {
    try {/*from  w  w w . j  a v a  2 s .  c o  m*/
        objects = new ArrayList<>();

        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig config = new IndexWriterConfig(
                new LimitTokenCountAnalyzer(analyzer, Integer.MAX_VALUE));

        indexDirectory = new RAMDirectory();
        indexWriter = new IndexWriter(indexDirectory, config);

        queryParser = new QueryParser("text", analyzer);
        queryParser.setDefaultOperator(QueryParser.AND_OPERATOR);

        fulltext = new TextField("text", "", Field.Store.NO);

        // Used as base-set for a NOT-Query
        Field inverse = new TextField("true", "yes", Field.Store.NO);

        document = new Document();
        document.add(fulltext);
        document.add(inverse);
    } catch (Exception e) {
        throw new RuntimeException(e.getMessage(), e);
    }
}

From source file:net.sf.logsaw.index.internal.LuceneIndexServiceImpl.java

License:Open Source License

/**
 * Returns the Lucene analyzer to use for indexing text fields.
 * <p>//from www .ja va 2s .c o m
 * Defaults to a <code>StandardAnalyzer</code> with Lucene 4.1 semantics.
 * 
 * @return the Lucene analyzer to use
 */
protected Analyzer getAnalyzer() {
    return new LimitTokenCountAnalyzer(new StandardAnalyzer(getMatchVersion()), 10000);
}

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.java

License:Apache License

private Analyzer createAnalyzer() {
    Analyzer result;//from   w  ww.  java2  s .  co m
    Analyzer defaultAnalyzer = LuceneIndexConstants.ANALYZER;
    if (analyzers.containsKey(LuceneIndexConstants.ANL_DEFAULT)) {
        defaultAnalyzer = analyzers.get(LuceneIndexConstants.ANL_DEFAULT);
    }
    if (!evaluatePathRestrictions()) {
        result = defaultAnalyzer;
    } else {
        Map<String, Analyzer> analyzerMap = ImmutableMap.<String, Analyzer>builder()
                .put(FieldNames.ANCESTORS,
                        new TokenizerChain(
                                new PathHierarchyTokenizerFactory(Collections.<String, String>emptyMap())))
                .build();
        result = new PerFieldAnalyzerWrapper(defaultAnalyzer, analyzerMap);
    }

    //In case of negative value no limits would be applied
    if (maxFieldLength < 0) {
        return result;
    }
    return new LimitTokenCountAnalyzer(result, maxFieldLength);
}

From source file:org.apache.roller.weblogger.business.search.IndexManagerImpl.java

License:Apache License

private void createIndex(Directory dir) {
    IndexWriter writer = null;// w w w .j  a v a  2 s .c o  m

    try {

        IndexWriterConfig config = new IndexWriterConfig(FieldConstants.LUCENE_VERSION,
                new LimitTokenCountAnalyzer(IndexManagerImpl.getAnalyzer(),
                        IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL));

        writer = new IndexWriter(dir, config);

    } catch (IOException e) {
        mLogger.error("Error creating index", e);
    } finally {
        try {
            if (writer != null) {
                writer.close();
            }
        } catch (IOException e) {
        }
    }
}

From source file:org.apache.roller.weblogger.business.search.IndexManagerImpl.java

License:Apache License

private IndexOperation getSaveIndexOperation() {
    return new WriteToIndexOperation(this) {
        public void doRun() {
            Directory dir = getIndexDirectory();
            Directory fsdir = getFSDirectory(true);
            IndexWriter writer = null;//  w ww.  ja  va  2 s .c  o m
            try {
                IndexWriterConfig config = new IndexWriterConfig(FieldConstants.LUCENE_VERSION,
                        new LimitTokenCountAnalyzer(IndexManagerImpl.getAnalyzer(),
                                IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL));
                writer = new IndexWriter(fsdir, config);
                writer.addIndexes(new Directory[] { dir });
                writer.commit();
                indexConsistencyMarker.delete();
            } catch (IOException e) {
                mLogger.error("Problem saving index to disk", e);
                // Delete the directory, since there was a problem saving the RAM contents
                getFSDirectory(true);
            } finally {
                try {
                    if (writer != null) {
                        writer.close();
                    }
                } catch (IOException e1) {
                    mLogger.warn("Unable to close IndexWriter.");
                }
            }
        }
    };
}

From source file:org.apache.roller.weblogger.business.search.operations.IndexOperation.java

License:Apache License

/**
 * Begin writing./*ww w  .j a  va 2 s  .  c o  m*/
 * 
 * @return the index writer
 */
protected IndexWriter beginWriting() {
    try {

        LimitTokenCountAnalyzer analyzer = new LimitTokenCountAnalyzer(IndexManagerImpl.getAnalyzer(),
                WebloggerConfig.getIntProperty("lucene.analyzer.maxTokenCount"));

        IndexWriterConfig config = new IndexWriterConfig(FieldConstants.LUCENE_VERSION, analyzer);

        writer = new IndexWriter(manager.getIndexDirectory(), config);

    } catch (IOException e) {
        mLogger.error("ERROR creating writer", e);
    }

    return writer;
}

From source file:org.dspace.search.DSIndexer.java

License:BSD License

/**
 * prepare index, opening writer, and wiping out existing index if necessary
 *//*from  ww w.j ava 2  s.c o  m*/
private static IndexWriter openIndex(boolean wipeExisting) throws IOException {
    Directory dir = FSDirectory.open(new File(indexDirectory));

    LimitTokenCountAnalyzer decoratorAnalyzer = null;
    /* Set maximum number of terms to index if present in dspace.cfg */
    if (maxfieldlength == -1) {
        decoratorAnalyzer = new LimitTokenCountAnalyzer(getAnalyzer(), Integer.MAX_VALUE);
    } else {
        decoratorAnalyzer = new LimitTokenCountAnalyzer(getAnalyzer(), maxfieldlength);
    }

    IndexWriterConfig iwc = new IndexWriterConfig(luceneVersion, decoratorAnalyzer);
    if (wipeExisting) {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    } else {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    }

    IndexWriter writer = new IndexWriter(dir, iwc);

    return writer;
}

From source file:org.dspace.search.LuceneIndex.java

License:BSD License

/**
  * Get the Lucene analyzer to use according to current configuration (or
  * default). TODO: Should have multiple analyzers (and maybe indices?) for
  * multi-lingual DSpaces.//ww w.j av  a  2s  .  co  m
  *
  * @return <code>Analyzer</code> to use
  * @throws IllegalStateException
  *             if the configured analyzer can't be instantiated
  */
Analyzer getAnalyzer() {
    if (analyzer == null) {
        // We need to find the analyzer class from the configuration
        String analyzerClassName = ConfigurationManager.getProperty("search", "analyzer.default");

        if (analyzerClassName == null) {
            // Use default
            analyzerClassName = "org.dspace.search.DSAnalyzer";
        }

        try {
            Class analyzerClass = Class.forName(analyzerClassName);
            Constructor constructor = analyzerClass.getDeclaredConstructor(Version.class);
            constructor.setAccessible(true);
            analyzer = (Analyzer) constructor.newInstance(Version.LUCENE_36);
            if (maxFieldLength > -1) {
                analyzer = new LimitTokenCountAnalyzer(analyzer, maxFieldLength);
            }
        } catch (Exception e) {
            log.error(LogManager.getHeader(null, "no_search_analyzer", "search.analyzer=" + analyzerClassName),
                    e);

            throw new IllegalStateException(e.toString());
        }
    }

    return analyzer;
}

From source file:org.silverpeas.core.index.indexing.model.IndexManager.java

License:Open Source License

/**
 * Return the analyzer used to parse indexed texts and queries in the given language.
 *
 * @param language the language used in a document or a query.
 * @return the analyzer for the required language or a default analyzer.
 *///ww  w.j a  v a  2 s  .  co  m
public Analyzer getAnalyzer(String language) {
    Analyzer analyzer = WAAnalyzer.getAnalyzer(language);
    if (analyzer == null) {
        analyzer = new LimitTokenCountAnalyzer(new StandardAnalyzer(), maxFieldLength);
    }
    return analyzer;
}