Java tutorial
/** * Constellio, Open Source Enterprise Search * Copyright (C) 2010 DocuLibre inc. * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package com.doculibre.constellio.utils; import java.io.IOException; import java.io.StringReader; import java.util.HashSet; import java.util.Set; import org.apache.commons.lang.StringUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.Version; import org.apache.solr.client.solrj.SolrServer; import com.doculibre.analyzer.FrenchAccentPlurielAnalyzer; import com.doculibre.constellio.entities.IndexField; import com.doculibre.constellio.entities.RecordCollection; import com.doculibre.constellio.services.SolrServices; public class AnalyzerUtils { public static String analyze(String input, RecordCollection collection) { SolrServices solrServices = ConstellioSpringUtils.getSolrServices(); SolrServer server = solrServices.getSolrServer(collection); return analyze(input, IndexField.DEFAULT_SEARCH_FIELD, server, false); } public static String analyze(String input, String indexFieldName, SolrServer server) { return analyze(input, indexFieldName, server, false); } public static String analyze(String input, String indexFieldName, SolrServer server, boolean queryAnalyzer) { return analyzePhrase(input, true); // SolrQuery query = new SolrQuery(); // query.setRequestHandler("/analysis/field"); // query.setParam(AnalysisParams.FIELD_NAME, indexFieldName); // query.setParam(AnalysisParams.FIELD_VALUE, input); // if (queryAnalyzer) { // query.setParam(AnalysisParams.QUERY, input); // } // // QueryResponse queryResponse; // try { // queryResponse = server.query(query); // } catch (SolrServerException e) { // throw new RuntimeException(e); // } // // NamedList<Object> result = (NamedList<Object>) queryResponse.getResponse().get("analysis"); // NamedList<NamedList> fieldNames = (NamedList<NamedList>) result.get("field_names"); // NamedList<NamedList> fieldValues = fieldNames.get(indexFieldName); // // StringBuffer sb = new StringBuffer(); // // String partName = queryAnalyzer ? "query" : "index"; // NamedList<List<NamedList>> part = (NamedList<List<NamedList>>) fieldValues.get(partName); // for (Iterator<Entry<String, List<NamedList>>> it = part.iterator(); it.hasNext();) { // Entry<String, List<NamedList>> entry = it.next(); // List<NamedList> namedListValues = entry.getValue(); // for (int i = 0; i < namedListValues.size(); i++) { // NamedList namedListValue = namedListValues.get(i); // String text = (String) namedListValue.get("text"); // sb.append(text); // if (i < namedListValues.size() - 1) { // sb.append(" "); // } // } // } // return sb.toString(); } public static String analyzePhrase(String phrase) { return analyzePhrase(phrase, true); } // Fait par Rida, moddif par N public static String analyzePhrase(String phrase, boolean useStopWords) { if (StringUtils.isNotBlank(phrase)) { String analysedPhrase; Analyzer analyzer = getDefaultAnalyzer(useStopWords); StringBuilder norm = new StringBuilder(); TokenStream tokens; try { tokens = analyzer.tokenStream("", new StringReader(phrase)); tokens.reset(); CharTermAttribute termAtt = tokens.addAttribute(CharTermAttribute.class); while (tokens.incrementToken()) { norm.append(termAtt.buffer(), 0, termAtt.length()); } analysedPhrase = norm.toString().trim(); } catch (IOException e) { throw new RuntimeException(e); } return analysedPhrase; } else { return phrase; } } public static Analyzer getDefaultAnalyzer() { return getDefaultAnalyzer(true); } public static Analyzer getDefaultAnalyzer(boolean useStopWords) { Analyzer analyzer; if (useStopWords) { analyzer = new FrenchAccentPlurielAnalyzer(Version.LUCENE_44); } else { CharArraySet emptyStpWords = new CharArraySet(Version.LUCENE_44, new HashSet<String>(), true); analyzer = new FrenchAccentPlurielAnalyzer(Version.LUCENE_44, emptyStpWords); } // if (useStopWords) { // analyzer = new FrenchAccentAnalyzer(Version.LUCENE_44); // } else { // Set<String> emptyStpWords = new HashSet<String>(); // analyzer = new FrenchAccentAnalyzer(Version.LUCENE_44, emptyStpWords); // } return analyzer; } public static void main(String[] args) { // String phrase = " Rda Bendjlloun "; // String phraseAnalysee = "reda bendjelloun"; // // Assert.assertEquals(phraseAnalysee, analyzePhrase(phrase)); System.out.println(analyzePhrase("carra")); System.out.println(analyzePhrase("CARRA")); } }