List of usage examples for org.apache.lucene.util CharsRef CharsRef
public CharsRef(String string)
From source file:com.bizosys.unstructured.CustomAnalyzerExample.java
License:Apache License
@Override public TokenStream tokenStream(String field, Reader reader) { Tokenizer tokenizer = new StandardTokenizer(Version.LUCENE_36, reader); TokenStream ts = new LowerCaseFilter(Version.LUCENE_36, tokenizer); ts = new PorterStemFilter(ts); Set<String> stopwords = new HashSet<String>(); stopwords.add("a"); stopwords.add("in"); ts = new StopFilter(Version.LUCENE_36, ts, stopwords); SynonymMap smap = null;/*from w w w .ja v a2 s. co m*/ try { SynonymMap.Builder sb = new SynonymMap.Builder(true); String base1 = "abinash"; String syn1 = "abinasha"; String syn11 = "abinashak"; sb.add(new CharsRef(base1), new CharsRef(syn1), true); sb.add(new CharsRef(base1), new CharsRef(syn11), true); String base2 = "bangalor"; String syn2 = "bangaloru"; sb.add(new CharsRef(base2), new CharsRef(syn2), true); smap = sb.build(); } catch (IOException ex) { ex.printStackTrace(System.err); } ts = new SynonymFilter(ts, smap, true); return ts; }
From source file:com.bizosys.unstructured.StopwordAndSynonymAnalyzer.java
License:Apache License
@Override public TokenStream tokenStream(String field, Reader reader) { TokenStream ts = new HSearchTokenizer(Version.LUCENE_36, reader); ts = new LowerCaseFilter(Version.LUCENE_36, ts); SynonymMap smap = null;//ww w . j ava2 s. c o m try { if (null != conceptWithPipeSeparatedSynonums) { SynonymMap.Builder sb = new SynonymMap.Builder(true); List<String> tempList = new ArrayList<String>(); for (String concept : conceptWithPipeSeparatedSynonums.keySet()) { tempList.clear(); LineReaderUtil.fastSplit(tempList, conceptWithPipeSeparatedSynonums.get(concept), this.conceptWordSeparator); for (String syn : tempList) { int synLen = (null == syn) ? 0 : syn.length(); if (synLen == 0) continue; sb.add(new CharsRef(syn), new CharsRef(concept), false); } } if (conceptWithPipeSeparatedSynonums.size() > 0) { smap = sb.build(); if (null != smap) ts = new SynonymFilter(ts, smap, true); } } if (isStopFilterEnabled) { int stopwordsT = (null == stopwords) ? 0 : stopwords.size(); if (stopwordsT > 0) { ts = new StopFilter(Version.LUCENE_36, ts, stopwords); } } if (isAccentFilterEnabled) ts = new ASCIIFoldingFilter(ts); if (isSnoballStemEnabled) ts = new SnowballFilter(ts, new EnglishStemmer()); return ts; } catch (IOException ex) { ex.printStackTrace(System.err); throw new NullPointerException(ex.toString()); } }
From source file:com.isotrol.impe3.lucene.PortalSpanishAnalyzer.java
License:Open Source License
public void afterPropertiesSet() throws Exception { if (postSynonyms == null || postSynonyms.isEmpty()) { return;//from ww w. j a v a 2 s . co m } final SynonymMap.Builder b = new SynonymMap.Builder(true); boolean used = false; for (Entry<String, String> entry : postSynonyms.entrySet()) { final String input = entry.getKey(); final String output = entry.getValue(); if (!Strings.isNullOrEmpty(input) && !Strings.isNullOrEmpty(output) && !input.equals(output)) { b.add(new CharsRef(input), new CharsRef(output), false); used = true; } } if (used) { postSynonymMap = b.build(); } }
From source file:com.shaie.fst.FstExample.java
License:Apache License
public static void main(String[] args) throws Exception { final CharsRef output = new CharsRef("color"); final SynonymMap.Builder builder = new SynonymMap.Builder(true); builder.add(SynonymMap.Builder.join("blue".split(" "), new CharsRefBuilder()), output, true); builder.add(SynonymMap.Builder.join("green".split(" "), new CharsRefBuilder()), output, true); builder.add(SynonymMap.Builder.join("pale green".split(" "), new CharsRefBuilder()), output, true); builder.add(SynonymMap.Builder.join("pale blue".split(" "), new CharsRefBuilder()), output, true); builder.add(SynonymMap.Builder.join("dark sea green".split(" "), new CharsRefBuilder()), output, true); final SynonymMap synMap = builder.build(); try (PrintWriter pw = new PrintWriter("d:/tmp/syns.dot");) { Util.toDot(synMap.fst, pw, true, true); }//from www . ja v a2 s .co m System.out.println("Done!"); }
From source file:edu.sdsc.scigraph.lucene.SynonymMapSupplier.java
License:Apache License
@Override public SynonymMap get() { try {//w w w . j a v a2 s . c o m return Resources.readLines(Resources.getResource("lemmatization.txt"), Charsets.UTF_8, new LineProcessor<SynonymMap>() { SynonymMap.Builder builder = new SynonymMap.Builder(true); @Override public boolean processLine(String line) throws IOException { List<String> synonyms = newArrayList(Splitter.on(',').trimResults().split(line)); for (String term : synonyms) { for (String synonym : synonyms) { if (!term.equals(synonym)) { builder.add(new CharsRef(term), new CharsRef(synonym), true); } } } return true; } @Override public SynonymMap getResult() { try { return builder.build(); } catch (IOException e) { e.printStackTrace(); return null; } } }); } catch (Exception e) { logger.log(Level.WARNING, "Failed to build synonym map", e); return null; } }
From source file:edu.upenn.library.solrplugins.FilingPrefixIgnorer.java
License:Apache License
@Override public CharsRef transform(CharsRef input) { return new CharsRef(transform(input.toString())); }
From source file:elhuyar.bilakit.Dictionary.java
License:Apache License
private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException { Map<String, String> mappings = new TreeMap<>(); for (int i = 0; i < num; i++) { String line = reader.readLine(); String parts[] = line.split("\\s+"); if (parts.length != 3) { throw new ParseException("invalid syntax: " + line, reader.getLineNumber()); }// ww w . ja va2 s. c o m if (mappings.put(parts[1], parts[2]) != null) { throw new IllegalStateException("duplicate mapping specified for: " + parts[1]); } } Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton(); Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs); IntsRefBuilder scratchInts = new IntsRefBuilder(); for (Map.Entry<String, String> entry : mappings.entrySet()) { Util.toUTF16(entry.getKey(), scratchInts); builder.add(scratchInts.get(), new CharsRef(entry.getValue())); } return builder.finish(); }
From source file:org.alfresco.solr.component.AsyncBuildSuggestComponent.java
License:Open Source License
/** * Responsible for using the specified suggester to get the suggestions * for the query and write the results //from ww w . j a va 2 s .co m * */ @Override public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); LOG.debug("SuggestComponent process with : " + params); if (!params.getBool(COMPONENT_NAME, false) || suggesters.isEmpty()) { return; } boolean buildAll = params.getBool(SUGGEST_BUILD_ALL, false); boolean reloadAll = params.getBool(SUGGEST_RELOAD_ALL, false); Set<SolrSuggester> querySuggesters; try { querySuggesters = getSuggesters(params); } catch (IllegalArgumentException ex) { if (!buildAll && !reloadAll) { throw ex; } else { querySuggesters = new HashSet<>(); } } String query = params.get(SUGGEST_Q); if (query == null) { query = rb.getQueryString(); if (query == null) { query = params.get(CommonParams.Q); } } if (query != null) { int count = params.getInt(SUGGEST_COUNT, 1); boolean highlight = params.getBool(SUGGEST_HIGHLIGHT, false); boolean allTermsRequired = params.getBool(SUGGEST_ALL_TERMS_REQUIRED, true); String contextFilter = params.get(SUGGEST_CONTEXT_FILTER_QUERY); if (contextFilter != null) { contextFilter = contextFilter.trim(); if (contextFilter.length() == 0) { contextFilter = null; } } SuggesterOptions options = new SuggesterOptions(new CharsRef(query), count, contextFilter, allTermsRequired, highlight); Map<String, SimpleOrderedMap<NamedList<Object>>> namedListResults = new HashMap<>(); for (SolrSuggester suggester : querySuggesters) { SuggesterResult suggesterResult = suggester.getSuggestions(options); toNamedList(suggesterResult, namedListResults); } rb.rsp.add(SuggesterResultLabels.SUGGEST, namedListResults); } }
From source file:org.alfresco.solr.component.AsyncBuildSuggestComponent.java
License:Open Source License
/** Convert NamedList (suggester response) to {@link SuggesterResult} */ private SuggesterResult toSuggesterResult(Map<String, SimpleOrderedMap<NamedList<Object>>> suggestionsMap) { SuggesterResult result = new SuggesterResult(); if (suggestionsMap == null) { return result; }// w w w .j a v a 2 s .c o m // for each token for (Map.Entry<String, SimpleOrderedMap<NamedList<Object>>> entry : suggestionsMap.entrySet()) { String suggesterName = entry.getKey(); for (Iterator<Map.Entry<String, NamedList<Object>>> suggestionsIter = entry.getValue() .iterator(); suggestionsIter.hasNext();) { Map.Entry<String, NamedList<Object>> suggestions = suggestionsIter.next(); String tokenString = suggestions.getKey(); List<LookupResult> lookupResults = new ArrayList<>(); NamedList<Object> suggestion = suggestions.getValue(); // for each suggestion for (int j = 0; j < suggestion.size(); j++) { String property = suggestion.getName(j); if (property.equals(SuggesterResultLabels.SUGGESTIONS)) { @SuppressWarnings("unchecked") List<NamedList<Object>> suggestionEntries = (List<NamedList<Object>>) suggestion.getVal(j); for (NamedList<Object> suggestionEntry : suggestionEntries) { String term = (String) suggestionEntry.get(SuggesterResultLabels.SUGGESTION_TERM); Long weight = (Long) suggestionEntry.get(SuggesterResultLabels.SUGGESTION_WEIGHT); String payload = (String) suggestionEntry.get(SuggesterResultLabels.SUGGESTION_PAYLOAD); LookupResult res = new LookupResult(new CharsRef(term), weight, new BytesRef(payload)); lookupResults.add(res); } } result.add(suggesterName, tokenString, lookupResults); } } } return result; }
From source file:org.apache.solr.handler.AnalysisRequestHandlerBase.java
License:Apache License
/** * Converts the list of Tokens to a list of NamedLists representing the tokens. * * @param tokenList Tokens to convert//from w w w . j a va 2s .c o m * @param context The analysis context * * @return List of NamedLists containing the relevant information taken from the tokens */ private List<NamedList> convertTokensToNamedLists(final List<AttributeSource> tokenList, AnalysisContext context) { final List<NamedList> tokensNamedLists = new ArrayList<NamedList>(); final FieldType fieldType = context.getFieldType(); final AttributeSource[] tokens = tokenList.toArray(new AttributeSource[tokenList.size()]); // sort the tokens by absoulte position ArrayUtil.timSort(tokens, new Comparator<AttributeSource>() { @Override public int compare(AttributeSource a, AttributeSource b) { return arrayCompare(a.getAttribute(TokenTrackingAttribute.class).getPositions(), b.getAttribute(TokenTrackingAttribute.class).getPositions()); } private int arrayCompare(int[] a, int[] b) { int p = 0; final int stop = Math.min(a.length, b.length); while (p < stop) { int diff = a[p] - b[p]; if (diff != 0) return diff; p++; } // One is a prefix of the other, or, they are equal: return a.length - b.length; } }); for (int i = 0; i < tokens.length; i++) { AttributeSource token = tokens[i]; final NamedList<Object> tokenNamedList = new SimpleOrderedMap<Object>(); final TermToBytesRefAttribute termAtt = token.getAttribute(TermToBytesRefAttribute.class); BytesRef rawBytes = termAtt.getBytesRef(); termAtt.fillBytesRef(); final String text = fieldType.indexedToReadable(rawBytes, new CharsRef(rawBytes.length)).toString(); tokenNamedList.add("text", text); if (token.hasAttribute(CharTermAttribute.class)) { final String rawText = token.getAttribute(CharTermAttribute.class).toString(); if (!rawText.equals(text)) { tokenNamedList.add("raw_text", rawText); } } tokenNamedList.add("raw_bytes", rawBytes.toString()); if (context.getTermsToMatch().contains(rawBytes)) { tokenNamedList.add("match", true); } token.reflectWith(new AttributeReflector() { @Override public void reflect(Class<? extends Attribute> attClass, String key, Object value) { // leave out position and bytes term if (TermToBytesRefAttribute.class.isAssignableFrom(attClass)) return; if (CharTermAttribute.class.isAssignableFrom(attClass)) return; if (PositionIncrementAttribute.class.isAssignableFrom(attClass)) return; String k = attClass.getName() + '#' + key; // map keys for "standard attributes": if (ATTRIBUTE_MAPPING.containsKey(k)) { k = ATTRIBUTE_MAPPING.get(k); } if (value instanceof BytesRef) { final BytesRef p = (BytesRef) value; value = p.toString(); } tokenNamedList.add(k, value); } }); tokensNamedLists.add(tokenNamedList); } return tokensNamedLists; }