Example usage for org.apache.lucene.analysis Tokenizer subclass-usage

List of usage examples for org.apache.lucene.analysis Tokenizer subclass-usage

Introduction

In this page you can find the example usage for org.apache.lucene.analysis Tokenizer subclass-usage.

Usage

From source file analysis.StandardTokenizer.java

/**
 * A grammar-based tokenizer constructed with JFlex
 * <p>
 * This should be a good tokenizer for most European-language documents:
 * <ul>
 * <li>Splits words at punctuation characters, removing punctuation. However, a

From source file au.edu.unimelb.csse.analyser.FastStringPerThreadTokenizer.java

public class FastStringPerThreadTokenizer extends Tokenizer {

    private final FastStringParser actualParser;
    private final char[] ioBuffer = new char[512];
    char[] sent = new char[512];

From source file au.edu.unimelb.csse.analyser.NodeTreebankSentenceTokenizer.java

public class NodeTreebankSentenceTokenizer extends Tokenizer {
    protected static final int BUFFER_SIZE = 512;
    private final char[] ioBuffer = new char[BUFFER_SIZE];
    private JsonSentenceParser elementTokenizer;
    private int read = -1;
    private static final String NONE = "";

From source file au.edu.unimelb.csse.analyser.TreebankSentenceTokenizer.java

public class TreebankSentenceTokenizer extends Tokenizer {
    protected static final int BUFFER_SIZE = 512;
    private final char[] ioBuffer = new char[BUFFER_SIZE];
    private Tknzr tokenizer;
    private int read = -1;

From source file au.edu.unimelb.csse.analyser.TreeTokenizer.java

public class TreeTokenizer extends Tokenizer {

    private String sentence;
    private TreeLexer lexer = new TreeLexer();
    private int numTokens; // total number of tokens in the sentence
    private int tokenPos; // current position of the returned token

From source file biospectra.lucene.KmerSequenceTokenizer.java

/**
 *
 * @author iychoi
 */
public class KmerSequenceTokenizer extends Tokenizer {

From source file br.bireme.ngrams.NGTokenizer.java

/**
 *
 * @author Heitor Barbieri
 * date: 20151216
 */
public class NGTokenizer extends Tokenizer {

From source file br.bireme.ngrams.OneTokenTokenizer.java

/**
 * Tokenizer that generates only one token, the whole input.
 * @author Heitor Barbieri
 * date: 20170327
 */
public class OneTokenTokenizer extends Tokenizer {

From source file byrne.mitre.NGramTokenizer.java

/**
 * Tokenizes the input into n-grams of the given size(s).
 */
public final class NGramTokenizer extends Tokenizer {

    public static final int DEFAULT_MIN_NGRAM_SIZE = 2;

From source file cc.pp.analyzer.ik.lucene.IKTokenizer.java

/**
 * IK? Lucene Tokenizer?
 *  lucene4.6
 */
public final class IKTokenizer extends Tokenizer {