Example usage for org.apache.lucene.util CharsRef CharsRef

List of usage examples for org.apache.lucene.util CharsRef CharsRef

Introduction

In this page you can find the example usage for org.apache.lucene.util CharsRef CharsRef.

Prototype

public CharsRef(String string) 

Source Link

Document

Creates a new CharsRef initialized with the given Strings character array

Usage

From source file:com.bizosys.unstructured.CustomAnalyzerExample.java

License:Apache License

@Override
public TokenStream tokenStream(String field, Reader reader) {
    Tokenizer tokenizer = new StandardTokenizer(Version.LUCENE_36, reader);
    TokenStream ts = new LowerCaseFilter(Version.LUCENE_36, tokenizer);
    ts = new PorterStemFilter(ts);

    Set<String> stopwords = new HashSet<String>();
    stopwords.add("a");
    stopwords.add("in");
    ts = new StopFilter(Version.LUCENE_36, ts, stopwords);

    SynonymMap smap = null;/*from w  w  w .ja v  a2 s.  co m*/
    try {
        SynonymMap.Builder sb = new SynonymMap.Builder(true);

        String base1 = "abinash";
        String syn1 = "abinasha";
        String syn11 = "abinashak";
        sb.add(new CharsRef(base1), new CharsRef(syn1), true);
        sb.add(new CharsRef(base1), new CharsRef(syn11), true);

        String base2 = "bangalor";
        String syn2 = "bangaloru";
        sb.add(new CharsRef(base2), new CharsRef(syn2), true);

        smap = sb.build();

    } catch (IOException ex) {
        ex.printStackTrace(System.err);
    }

    ts = new SynonymFilter(ts, smap, true);

    return ts;
}

From source file:com.bizosys.unstructured.StopwordAndSynonymAnalyzer.java

License:Apache License

@Override
public TokenStream tokenStream(String field, Reader reader) {

    TokenStream ts = new HSearchTokenizer(Version.LUCENE_36, reader);
    ts = new LowerCaseFilter(Version.LUCENE_36, ts);

    SynonymMap smap = null;//ww w . j ava2  s.  c  o m
    try {
        if (null != conceptWithPipeSeparatedSynonums) {
            SynonymMap.Builder sb = new SynonymMap.Builder(true);
            List<String> tempList = new ArrayList<String>();

            for (String concept : conceptWithPipeSeparatedSynonums.keySet()) {
                tempList.clear();
                LineReaderUtil.fastSplit(tempList, conceptWithPipeSeparatedSynonums.get(concept),
                        this.conceptWordSeparator);
                for (String syn : tempList) {
                    int synLen = (null == syn) ? 0 : syn.length();
                    if (synLen == 0)
                        continue;
                    sb.add(new CharsRef(syn), new CharsRef(concept), false);
                }
            }
            if (conceptWithPipeSeparatedSynonums.size() > 0) {
                smap = sb.build();
                if (null != smap)
                    ts = new SynonymFilter(ts, smap, true);
            }
        }

        if (isStopFilterEnabled) {
            int stopwordsT = (null == stopwords) ? 0 : stopwords.size();
            if (stopwordsT > 0) {
                ts = new StopFilter(Version.LUCENE_36, ts, stopwords);
            }
        }

        if (isAccentFilterEnabled)
            ts = new ASCIIFoldingFilter(ts);
        if (isSnoballStemEnabled)
            ts = new SnowballFilter(ts, new EnglishStemmer());

        return ts;

    } catch (IOException ex) {
        ex.printStackTrace(System.err);
        throw new NullPointerException(ex.toString());
    }
}

From source file:com.isotrol.impe3.lucene.PortalSpanishAnalyzer.java

License:Open Source License

public void afterPropertiesSet() throws Exception {
    if (postSynonyms == null || postSynonyms.isEmpty()) {
        return;//from   ww w. j  a  v  a 2 s  . co m
    }
    final SynonymMap.Builder b = new SynonymMap.Builder(true);
    boolean used = false;
    for (Entry<String, String> entry : postSynonyms.entrySet()) {
        final String input = entry.getKey();
        final String output = entry.getValue();
        if (!Strings.isNullOrEmpty(input) && !Strings.isNullOrEmpty(output) && !input.equals(output)) {
            b.add(new CharsRef(input), new CharsRef(output), false);
            used = true;
        }
    }
    if (used) {
        postSynonymMap = b.build();
    }
}

From source file:com.shaie.fst.FstExample.java

License:Apache License

public static void main(String[] args) throws Exception {
    final CharsRef output = new CharsRef("color");
    final SynonymMap.Builder builder = new SynonymMap.Builder(true);
    builder.add(SynonymMap.Builder.join("blue".split(" "), new CharsRefBuilder()), output, true);
    builder.add(SynonymMap.Builder.join("green".split(" "), new CharsRefBuilder()), output, true);
    builder.add(SynonymMap.Builder.join("pale green".split(" "), new CharsRefBuilder()), output, true);
    builder.add(SynonymMap.Builder.join("pale blue".split(" "), new CharsRefBuilder()), output, true);
    builder.add(SynonymMap.Builder.join("dark sea green".split(" "), new CharsRefBuilder()), output, true);
    final SynonymMap synMap = builder.build();
    try (PrintWriter pw = new PrintWriter("d:/tmp/syns.dot");) {
        Util.toDot(synMap.fst, pw, true, true);
    }//from   www .  ja  v a2  s  .co m
    System.out.println("Done!");
}

From source file:edu.sdsc.scigraph.lucene.SynonymMapSupplier.java

License:Apache License

@Override
public SynonymMap get() {
    try {//w  w  w  . j a v  a2  s  .  c  o m
        return Resources.readLines(Resources.getResource("lemmatization.txt"), Charsets.UTF_8,
                new LineProcessor<SynonymMap>() {

                    SynonymMap.Builder builder = new SynonymMap.Builder(true);

                    @Override
                    public boolean processLine(String line) throws IOException {
                        List<String> synonyms = newArrayList(Splitter.on(',').trimResults().split(line));
                        for (String term : synonyms) {
                            for (String synonym : synonyms) {
                                if (!term.equals(synonym)) {
                                    builder.add(new CharsRef(term), new CharsRef(synonym), true);
                                }
                            }
                        }
                        return true;
                    }

                    @Override
                    public SynonymMap getResult() {
                        try {
                            return builder.build();
                        } catch (IOException e) {
                            e.printStackTrace();
                            return null;
                        }
                    }
                });
    } catch (Exception e) {
        logger.log(Level.WARNING, "Failed to build synonym map", e);
        return null;
    }
}

From source file:edu.upenn.library.solrplugins.FilingPrefixIgnorer.java

License:Apache License

@Override
public CharsRef transform(CharsRef input) {
    return new CharsRef(transform(input.toString()));
}

From source file:elhuyar.bilakit.Dictionary.java

License:Apache License

private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
      Map<String, String> mappings = new TreeMap<>();

      for (int i = 0; i < num; i++) {
          String line = reader.readLine();
          String parts[] = line.split("\\s+");
          if (parts.length != 3) {
              throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
          }// ww  w . ja va2 s. c  o  m
          if (mappings.put(parts[1], parts[2]) != null) {
              throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
          }
      }

      Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
      Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
      IntsRefBuilder scratchInts = new IntsRefBuilder();
      for (Map.Entry<String, String> entry : mappings.entrySet()) {
          Util.toUTF16(entry.getKey(), scratchInts);
          builder.add(scratchInts.get(), new CharsRef(entry.getValue()));
      }

      return builder.finish();
  }

From source file:org.alfresco.solr.component.AsyncBuildSuggestComponent.java

License:Open Source License

/** 
 * Responsible for using the specified suggester to get the suggestions 
 * for the query and write the results //from  ww  w  .  j  a  va 2 s .co  m
 * */
@Override
public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    LOG.debug("SuggestComponent process with : " + params);
    if (!params.getBool(COMPONENT_NAME, false) || suggesters.isEmpty()) {
        return;
    }

    boolean buildAll = params.getBool(SUGGEST_BUILD_ALL, false);
    boolean reloadAll = params.getBool(SUGGEST_RELOAD_ALL, false);
    Set<SolrSuggester> querySuggesters;
    try {
        querySuggesters = getSuggesters(params);
    } catch (IllegalArgumentException ex) {
        if (!buildAll && !reloadAll) {
            throw ex;
        } else {
            querySuggesters = new HashSet<>();
        }
    }

    String query = params.get(SUGGEST_Q);
    if (query == null) {
        query = rb.getQueryString();
        if (query == null) {
            query = params.get(CommonParams.Q);
        }
    }

    if (query != null) {
        int count = params.getInt(SUGGEST_COUNT, 1);

        boolean highlight = params.getBool(SUGGEST_HIGHLIGHT, false);
        boolean allTermsRequired = params.getBool(SUGGEST_ALL_TERMS_REQUIRED, true);
        String contextFilter = params.get(SUGGEST_CONTEXT_FILTER_QUERY);
        if (contextFilter != null) {
            contextFilter = contextFilter.trim();
            if (contextFilter.length() == 0) {
                contextFilter = null;
            }
        }

        SuggesterOptions options = new SuggesterOptions(new CharsRef(query), count, contextFilter,
                allTermsRequired, highlight);
        Map<String, SimpleOrderedMap<NamedList<Object>>> namedListResults = new HashMap<>();
        for (SolrSuggester suggester : querySuggesters) {
            SuggesterResult suggesterResult = suggester.getSuggestions(options);
            toNamedList(suggesterResult, namedListResults);
        }
        rb.rsp.add(SuggesterResultLabels.SUGGEST, namedListResults);
    }
}

From source file:org.alfresco.solr.component.AsyncBuildSuggestComponent.java

License:Open Source License

/** Convert NamedList (suggester response) to {@link SuggesterResult} */
private SuggesterResult toSuggesterResult(Map<String, SimpleOrderedMap<NamedList<Object>>> suggestionsMap) {
    SuggesterResult result = new SuggesterResult();
    if (suggestionsMap == null) {
        return result;
    }//  w  w w .j  a  v a 2 s  .c  o m
    // for each token
    for (Map.Entry<String, SimpleOrderedMap<NamedList<Object>>> entry : suggestionsMap.entrySet()) {
        String suggesterName = entry.getKey();
        for (Iterator<Map.Entry<String, NamedList<Object>>> suggestionsIter = entry.getValue()
                .iterator(); suggestionsIter.hasNext();) {
            Map.Entry<String, NamedList<Object>> suggestions = suggestionsIter.next();
            String tokenString = suggestions.getKey();
            List<LookupResult> lookupResults = new ArrayList<>();
            NamedList<Object> suggestion = suggestions.getValue();
            // for each suggestion
            for (int j = 0; j < suggestion.size(); j++) {
                String property = suggestion.getName(j);
                if (property.equals(SuggesterResultLabels.SUGGESTIONS)) {
                    @SuppressWarnings("unchecked")
                    List<NamedList<Object>> suggestionEntries = (List<NamedList<Object>>) suggestion.getVal(j);
                    for (NamedList<Object> suggestionEntry : suggestionEntries) {
                        String term = (String) suggestionEntry.get(SuggesterResultLabels.SUGGESTION_TERM);
                        Long weight = (Long) suggestionEntry.get(SuggesterResultLabels.SUGGESTION_WEIGHT);
                        String payload = (String) suggestionEntry.get(SuggesterResultLabels.SUGGESTION_PAYLOAD);
                        LookupResult res = new LookupResult(new CharsRef(term), weight, new BytesRef(payload));
                        lookupResults.add(res);
                    }
                }
                result.add(suggesterName, tokenString, lookupResults);
            }
        }
    }
    return result;
}

From source file:org.apache.solr.handler.AnalysisRequestHandlerBase.java

License:Apache License

/**
 * Converts the list of Tokens to a list of NamedLists representing the tokens.
 *
 * @param tokenList  Tokens to convert//from  w w w  .  j a va 2s .c o  m
 * @param context The analysis context
 *
 * @return List of NamedLists containing the relevant information taken from the tokens
 */
private List<NamedList> convertTokensToNamedLists(final List<AttributeSource> tokenList,
        AnalysisContext context) {
    final List<NamedList> tokensNamedLists = new ArrayList<NamedList>();
    final FieldType fieldType = context.getFieldType();
    final AttributeSource[] tokens = tokenList.toArray(new AttributeSource[tokenList.size()]);

    // sort the tokens by absoulte position
    ArrayUtil.timSort(tokens, new Comparator<AttributeSource>() {
        @Override
        public int compare(AttributeSource a, AttributeSource b) {
            return arrayCompare(a.getAttribute(TokenTrackingAttribute.class).getPositions(),
                    b.getAttribute(TokenTrackingAttribute.class).getPositions());
        }

        private int arrayCompare(int[] a, int[] b) {
            int p = 0;
            final int stop = Math.min(a.length, b.length);
            while (p < stop) {
                int diff = a[p] - b[p];
                if (diff != 0)
                    return diff;
                p++;
            }
            // One is a prefix of the other, or, they are equal:
            return a.length - b.length;
        }
    });

    for (int i = 0; i < tokens.length; i++) {
        AttributeSource token = tokens[i];
        final NamedList<Object> tokenNamedList = new SimpleOrderedMap<Object>();
        final TermToBytesRefAttribute termAtt = token.getAttribute(TermToBytesRefAttribute.class);
        BytesRef rawBytes = termAtt.getBytesRef();
        termAtt.fillBytesRef();
        final String text = fieldType.indexedToReadable(rawBytes, new CharsRef(rawBytes.length)).toString();
        tokenNamedList.add("text", text);

        if (token.hasAttribute(CharTermAttribute.class)) {
            final String rawText = token.getAttribute(CharTermAttribute.class).toString();
            if (!rawText.equals(text)) {
                tokenNamedList.add("raw_text", rawText);
            }
        }

        tokenNamedList.add("raw_bytes", rawBytes.toString());

        if (context.getTermsToMatch().contains(rawBytes)) {
            tokenNamedList.add("match", true);
        }

        token.reflectWith(new AttributeReflector() {
            @Override
            public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
                // leave out position and bytes term
                if (TermToBytesRefAttribute.class.isAssignableFrom(attClass))
                    return;
                if (CharTermAttribute.class.isAssignableFrom(attClass))
                    return;
                if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
                    return;

                String k = attClass.getName() + '#' + key;

                // map keys for "standard attributes":
                if (ATTRIBUTE_MAPPING.containsKey(k)) {
                    k = ATTRIBUTE_MAPPING.get(k);
                }

                if (value instanceof BytesRef) {
                    final BytesRef p = (BytesRef) value;
                    value = p.toString();
                }

                tokenNamedList.add(k, value);
            }
        });

        tokensNamedLists.add(tokenNamedList);
    }

    return tokensNamedLists;
}