List of usage examples for org.apache.lucene.util AttributeFactory DEFAULT_ATTRIBUTE_FACTORY
AttributeFactory DEFAULT_ATTRIBUTE_FACTORY
To view the source code for org.apache.lucene.util AttributeFactory DEFAULT_ATTRIBUTE_FACTORY.
Click Source Link
Impl to it. From source file:cc.redpen.tokenizer.NeologdJapaneseTokenizer.java
License:Apache License
public NeologdJapaneseTokenizer() { this.tokenizer = new JapaneseTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, null, false, JapaneseTokenizer.Mode.NORMAL); }
From source file:com.o19s.RegexPathHierarchyTokenizer.java
License:Apache License
public RegexPathHierarchyTokenizer(Reader input, String delimiter) { this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, DEFAULT_BUFFER_SIZE, delimiter, DEFAULT_DEPTH_PREFIX_NUM_CHARS); }
From source file:com.o19s.RegexPathHierarchyTokenizer.java
License:Apache License
public RegexPathHierarchyTokenizer(Reader input, String delimiter, int depthPrefixNumChars) { this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, DEFAULT_BUFFER_SIZE, delimiter, depthPrefixNumChars);/*w ww . j ava 2s . c o m*/ }
From source file:com.o19s.RegexPathHierarchyTokenizer.java
License:Apache License
public RegexPathHierarchyTokenizer(Reader input, int bufferSize, String delimiter, int depthPrefixNumChars) { this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, bufferSize, delimiter, depthPrefixNumChars); }
From source file:com.sindicetech.siren.analysis.NumericTokenizer.java
License:Open Source License
/** * Creates a token stream for numeric values with the specified * <code>precisionStep</code>. *///from www.jav a2 s . c o m public NumericTokenizer(final Reader input, final NumericParser<? extends Number> parser, final int precisionStep) { this(input, parser, precisionStep, AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY); }
From source file:common.TextTokenize.java
public static void main(String[] args) throws IOException { // Define your attribute factory (or use the default) - same between 4.x and 5.x AttributeFactory factory = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY; // Create the tokenizer and prepare it for reading // Lucene 5.x // StandardTokenizer tokenizer = new StandardTokenizer(factory); String text = "sgra.jpl.nasa.gov, U.S.A, www.isical.ac.in:8080/~mandar/dwaipayan"; Analyzer analyzer = new EnglishAnalyzer(); System.out.println(CommonMethods.analyzeText(analyzer, text, "")); }
From source file:fi.nationallibrary.ndl.solrvoikko2.TestApp.java
License:Open Source License
public static void main(String[] args) throws IOException { BufferedReader stdin = new BufferedReader(new InputStreamReader(System.in)); Voikko voikko = null;//from w w w . j a va 2 s . c om try { ConcurrentMap<String, List<CompoundToken>> cache = new ConcurrentLinkedHashMap.Builder<String, List<CompoundToken>>() .maximumWeightedCapacity(100).build(); voikko = new Voikko("fi-x-morphoid"); StringReader reader = new StringReader(""); Tokenizer tokenizer = new StandardTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY); tokenizer.setReader(reader); tokenizer.reset(); voikko = new Voikko("fi-x-morphoid"); VoikkoFilter voikkoFilter = new VoikkoFilter(tokenizer, voikko, true, VoikkoFilter.DEFAULT_MIN_WORD_SIZE, VoikkoFilter.DEFAULT_MIN_SUBWORD_SIZE, VoikkoFilter.DEFAULT_MAX_SUBWORD_SIZE, true, cache, 0); String text; System.out.println(); System.out.println("Enter word or phrase"); while ((text = stdin.readLine()) != null) { List<Analysis> analysisList = voikko.analyze(text); if (analysisList.isEmpty()) { System.out.println("No analysis available"); } for (Analysis analysis : analysisList) { System.out.println("Analysis:"); if (analysis.containsKey(BASEFORM)) { WordComponent component = new WordComponent(); component.component = analysis.get(BASEFORM); component.startInOriginal = 0; component.lengthInOriginal = text.length(); print(component); } if (analysis.containsKey(WORDBASES)) { System.out.println(analysis.get(WORDBASES)); } } tokenizer.close(); reader = new StringReader(text); tokenizer.setReader(reader); tokenizer.reset(); System.out.println("\nVoikkoFilter results:"); while (voikkoFilter.incrementToken()) { System.out.println( voikkoFilter.termAtt.toString() + " [" + voikkoFilter.posIncAtt.getPositionIncrement() + ":" + voikkoFilter.offsetAtt.startOffset() + ":" + voikkoFilter.offsetAtt.endOffset() + "]"); } System.out.println(); System.out.println("Enter word or phrase"); } voikkoFilter.close(); } finally { voikko.terminate(); } }
From source file:fi.nationallibrary.ndl.solrvoikko2.VoikkoTest.java
License:Open Source License
/** * Execute Voikko analysis and return results in a string * //from www . j a va2 s . com * @param term String to analyze * * @return Comma-separated list of results * @throws IOException */ final protected String getVoikkoWords(String term) throws IOException { ConcurrentMap<String, List<CompoundToken>> cache = new ConcurrentLinkedHashMap.Builder<String, List<CompoundToken>>() .maximumWeightedCapacity(100).build(); Tokenizer tokenizer = new StandardTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY); tokenizer.setReader(new StringReader(term)); tokenizer.reset(); Voikko voikko = new Voikko("fi-x-morphoid"); VoikkoFilter voikkoFilter = new VoikkoFilter(tokenizer, voikko, true, VoikkoFilter.DEFAULT_MIN_WORD_SIZE, VoikkoFilter.DEFAULT_MIN_SUBWORD_SIZE, VoikkoFilter.DEFAULT_MAX_SUBWORD_SIZE, true, cache, 0); String results = ""; //voikkoFilter.reset(); while (voikkoFilter.incrementToken()) { if (!results.isEmpty()) { results += ","; } results += voikkoFilter.termAtt.toString() + " [" + voikkoFilter.posIncAtt.getPositionIncrement() + ":" + voikkoFilter.offsetAtt.startOffset() + ":" + voikkoFilter.offsetAtt.endOffset() + "]"; } voikkoFilter.close(); return results; }
From source file:org.apache.solr.legacy.LegacyNumericTokenStream.java
License:Apache License
/** * Creates a token stream for numeric values using the default <code>precisionStep</code> * {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16). The stream is not yet initialized, * before using set a value using the various set<em>???</em>Value() methods. */// w ww . j av a 2 s .c o m public LegacyNumericTokenStream() { this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, LegacyNumericUtils.PRECISION_STEP_DEFAULT); }
From source file:org.apache.solr.legacy.LegacyNumericTokenStream.java
License:Apache License
/** * Creates a token stream for numeric values with the specified * <code>precisionStep</code>. The stream is not yet initialized, * before using set a value using the various set<em>???</em>Value() methods. *//*from w ww .ja v a 2s . com*/ public LegacyNumericTokenStream(final int precisionStep) { this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, precisionStep); }