Example usage for org.apache.lucene.util AttributeFactory DEFAULT_ATTRIBUTE_FACTORY

List of usage examples for org.apache.lucene.util AttributeFactory DEFAULT_ATTRIBUTE_FACTORY

Introduction

In this page you can find the example usage for org.apache.lucene.util AttributeFactory DEFAULT_ATTRIBUTE_FACTORY.

Prototype

AttributeFactory DEFAULT_ATTRIBUTE_FACTORY

To view the source code for org.apache.lucene.util AttributeFactory DEFAULT_ATTRIBUTE_FACTORY.

Click Source Link

Document

This is the default factory that creates AttributeImpl s using the class name of the supplied Attribute interface class by appending Impl to it.

Usage

From source file:cc.redpen.tokenizer.NeologdJapaneseTokenizer.java

License:Apache License

public NeologdJapaneseTokenizer() {
    this.tokenizer = new JapaneseTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, null, false,
            JapaneseTokenizer.Mode.NORMAL);
}

From source file:com.o19s.RegexPathHierarchyTokenizer.java

License:Apache License

public RegexPathHierarchyTokenizer(Reader input, String delimiter) {
    this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, DEFAULT_BUFFER_SIZE, delimiter,
            DEFAULT_DEPTH_PREFIX_NUM_CHARS);
}

From source file:com.o19s.RegexPathHierarchyTokenizer.java

License:Apache License

public RegexPathHierarchyTokenizer(Reader input, String delimiter, int depthPrefixNumChars) {
    this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, DEFAULT_BUFFER_SIZE, delimiter,
            depthPrefixNumChars);/*w  ww . j  ava  2s  .  c  o m*/
}

From source file:com.o19s.RegexPathHierarchyTokenizer.java

License:Apache License

public RegexPathHierarchyTokenizer(Reader input, int bufferSize, String delimiter, int depthPrefixNumChars) {
    this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, bufferSize, delimiter, depthPrefixNumChars);
}

From source file:com.sindicetech.siren.analysis.NumericTokenizer.java

License:Open Source License

/**
 * Creates a token stream for numeric values with the specified
 * <code>precisionStep</code>.
 *///from  www.jav a2  s . c o  m
public NumericTokenizer(final Reader input, final NumericParser<? extends Number> parser,
        final int precisionStep) {
    this(input, parser, precisionStep, AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
}

From source file:common.TextTokenize.java

public static void main(String[] args) throws IOException {
    // Define your attribute factory (or use the default) - same between 4.x and 5.x
    AttributeFactory factory = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;

    // Create the tokenizer and prepare it for reading
    //  Lucene 5.x
    //        StandardTokenizer tokenizer = new StandardTokenizer(factory);
    String text = "sgra.jpl.nasa.gov, U.S.A, www.isical.ac.in:8080/~mandar/dwaipayan";
    Analyzer analyzer = new EnglishAnalyzer();
    System.out.println(CommonMethods.analyzeText(analyzer, text, ""));
}

From source file:fi.nationallibrary.ndl.solrvoikko2.TestApp.java

License:Open Source License

public static void main(String[] args) throws IOException {
    BufferedReader stdin = new BufferedReader(new InputStreamReader(System.in));
    Voikko voikko = null;//from  w w w  . j a va  2  s  .  c om
    try {
        ConcurrentMap<String, List<CompoundToken>> cache = new ConcurrentLinkedHashMap.Builder<String, List<CompoundToken>>()
                .maximumWeightedCapacity(100).build();

        voikko = new Voikko("fi-x-morphoid");

        StringReader reader = new StringReader("");
        Tokenizer tokenizer = new StandardTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
        tokenizer.setReader(reader);
        tokenizer.reset();

        voikko = new Voikko("fi-x-morphoid");
        VoikkoFilter voikkoFilter = new VoikkoFilter(tokenizer, voikko, true,
                VoikkoFilter.DEFAULT_MIN_WORD_SIZE, VoikkoFilter.DEFAULT_MIN_SUBWORD_SIZE,
                VoikkoFilter.DEFAULT_MAX_SUBWORD_SIZE, true, cache, 0);

        String text;
        System.out.println();
        System.out.println("Enter word or phrase");
        while ((text = stdin.readLine()) != null) {
            List<Analysis> analysisList = voikko.analyze(text);
            if (analysisList.isEmpty()) {
                System.out.println("No analysis available");
            }
            for (Analysis analysis : analysisList) {
                System.out.println("Analysis:");
                if (analysis.containsKey(BASEFORM)) {
                    WordComponent component = new WordComponent();
                    component.component = analysis.get(BASEFORM);
                    component.startInOriginal = 0;
                    component.lengthInOriginal = text.length();
                    print(component);
                }
                if (analysis.containsKey(WORDBASES)) {
                    System.out.println(analysis.get(WORDBASES));
                }
            }

            tokenizer.close();
            reader = new StringReader(text);
            tokenizer.setReader(reader);
            tokenizer.reset();

            System.out.println("\nVoikkoFilter results:");
            while (voikkoFilter.incrementToken()) {
                System.out.println(
                        voikkoFilter.termAtt.toString() + " [" + voikkoFilter.posIncAtt.getPositionIncrement()
                                + ":" + voikkoFilter.offsetAtt.startOffset() + ":"
                                + voikkoFilter.offsetAtt.endOffset() + "]");
            }

            System.out.println();
            System.out.println("Enter word or phrase");
        }
        voikkoFilter.close();
    } finally {
        voikko.terminate();
    }
}

From source file:fi.nationallibrary.ndl.solrvoikko2.VoikkoTest.java

License:Open Source License

/**
 * Execute Voikko analysis and return results in a string
 * //from  www . j  a va2  s  .  com
 * @param term           String to analyze
 * 
 * @return Comma-separated list of results 
 * @throws IOException
 */
final protected String getVoikkoWords(String term) throws IOException {
    ConcurrentMap<String, List<CompoundToken>> cache = new ConcurrentLinkedHashMap.Builder<String, List<CompoundToken>>()
            .maximumWeightedCapacity(100).build();

    Tokenizer tokenizer = new StandardTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
    tokenizer.setReader(new StringReader(term));
    tokenizer.reset();

    Voikko voikko = new Voikko("fi-x-morphoid");
    VoikkoFilter voikkoFilter = new VoikkoFilter(tokenizer, voikko, true, VoikkoFilter.DEFAULT_MIN_WORD_SIZE,
            VoikkoFilter.DEFAULT_MIN_SUBWORD_SIZE, VoikkoFilter.DEFAULT_MAX_SUBWORD_SIZE, true, cache, 0);

    String results = "";

    //voikkoFilter.reset();
    while (voikkoFilter.incrementToken()) {
        if (!results.isEmpty()) {
            results += ",";
        }
        results += voikkoFilter.termAtt.toString() + " [" + voikkoFilter.posIncAtt.getPositionIncrement() + ":"
                + voikkoFilter.offsetAtt.startOffset() + ":" + voikkoFilter.offsetAtt.endOffset() + "]";
    }
    voikkoFilter.close();

    return results;
}

From source file:org.apache.solr.legacy.LegacyNumericTokenStream.java

License:Apache License

/**
 * Creates a token stream for numeric values using the default <code>precisionStep</code>
 * {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16). The stream is not yet initialized,
 * before using set a value using the various set<em>???</em>Value() methods.
 */// w ww .  j av a  2 s  .c  o  m
public LegacyNumericTokenStream() {
    this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, LegacyNumericUtils.PRECISION_STEP_DEFAULT);
}

From source file:org.apache.solr.legacy.LegacyNumericTokenStream.java

License:Apache License

/**
 * Creates a token stream for numeric values with the specified
 * <code>precisionStep</code>. The stream is not yet initialized,
 * before using set a value using the various set<em>???</em>Value() methods.
 *//*from   w ww .ja  v  a 2s  .  com*/
public LegacyNumericTokenStream(final int precisionStep) {
    this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, precisionStep);
}