Example usage for org.apache.solr.schema FieldType getQueryAnalyzer

List of usage examples for org.apache.solr.schema FieldType getQueryAnalyzer

Introduction

In this page you can find the example usage for org.apache.solr.schema FieldType getQueryAnalyzer.

Prototype

public Analyzer getQueryAnalyzer() 

Source Link

Document

Returns the Analyzer to be used when searching fields of this type.

Usage

From source file:com.billiger.solr.handler.component.QLTBComponent.java

License:Apache License

/**
 * Inform component of core reload./*from  ww w.  j a v  a  2  s  .  com*/
 *
 * This will both set the analyzer according to the configured
 * queryFieldType, and load the QLTB data. Data source can be (in this
 * order) ZooKeeper, the conf/ directory or the data/ directory.
 */
@Override
public final void inform(final SolrCore core) {
    // load analyzer
    String queryFieldType = initArgs.get(FIELD_TYPE);
    if (queryFieldType != null) {
        FieldType ft = core.getLatestSchema().getFieldTypes().get(queryFieldType);
        if (ft == null) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                    "unknown FieldType \"" + queryFieldType + "\" used in QLTBComponent");
        }
        analyzer = ft.getQueryAnalyzer();
    } else {
        analyzer = null;
    }
    synchronized (qltbCache) {
        qltbCache.clear();
        try {
            // retrieve QLTB data filename
            String qltbFile = initArgs.get(QLTB_FILE);
            if (qltbFile == null) {
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                        "QLTBComponent must specify argument: \"" + QLTB_FILE + "\" - path to QLTB data");
            }
            boolean exists = false;
            // check ZooKeeper
            ZkController zkController = core.getCoreDescriptor().getCoreContainer().getZkController();
            if (zkController != null) {
                exists = zkController.configFileExists(zkController.readConfigName(
                        core.getCoreDescriptor().getCloudDescriptor().getCollectionName()), qltbFile);
            } else {
                // no ZooKeeper, check conf/ and data/ directories
                File fConf = new File(core.getResourceLoader().getConfigDir(), qltbFile);
                File fData = new File(core.getDataDir(), qltbFile);
                if (fConf.exists() == fData.exists()) {
                    // both or neither exist
                    throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                            "QLTBComponent missing config file: \"" + qltbFile + "\": either "
                                    + fConf.getAbsolutePath() + " or " + fData.getAbsolutePath()
                                    + " must exist, but not both");
                }
                if (fConf.exists()) {
                    // conf/ found, load it
                    exists = true;
                    log.info("QLTB source conf/: " + fConf.getAbsolutePath());
                    Config cfg = new Config(core.getResourceLoader(), qltbFile);
                    qltbCache.put(null, loadQLTBMap(cfg, core));
                }
            }
            if (!exists) {
                // Neither ZooKeeper nor conf/, so must be in data/
                // We need an IndexReader and the normal
                RefCounted<SolrIndexSearcher> searcher = null;
                try {
                    searcher = core.getNewestSearcher(false);
                    IndexReader reader = searcher.get().getIndexReader();
                    getQLTBMap(reader, core);
                } finally {
                    if (searcher != null) {
                        searcher.decref();
                    }
                }
            }
        } catch (Exception ex) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error initializing QltbComponent.",
                    ex);
        }
    }
}

From source file:NomusSolrPlugins.NomusDismaxQParserPlugin.java

License:Apache License

public static TokenizerChain getQueryTokenizerChain(QParser parser, String fieldName) {
    FieldType ft = parser.getReq().getSchema().getFieldType(fieldName);
    Analyzer qa = ft.getQueryAnalyzer();
    return qa instanceof TokenizerChain ? (TokenizerChain) qa : null;
}

From source file:NomusSolrPlugins.NomusDismaxQParserPlugin.java

License:Apache License

public TokenStream tokenStream(String fieldName, Reader reader) {
    if (!removeStopFilter) {
        return queryAnalyzer.tokenStream(fieldName, reader);
    }/*from w ww . j  av a  2  s .c o  m*/

    Analyzer a = map.get(fieldName);
    if (a != null) {
        return a.tokenStream(fieldName, reader);
    }

    FieldType ft = parser.getReq().getSchema().getFieldType(fieldName);
    Analyzer qa = ft.getQueryAnalyzer();
    if (!(qa instanceof TokenizerChain)) {
        map.put(fieldName, qa);
        return qa.tokenStream(fieldName, reader);
    }
    TokenizerChain tcq = (TokenizerChain) qa;
    Analyzer ia = ft.getAnalyzer();
    if (ia == qa || !(ia instanceof TokenizerChain)) {
        map.put(fieldName, qa);
        return qa.tokenStream(fieldName, reader);
    }
    TokenizerChain tci = (TokenizerChain) ia;

    // make sure that there isn't a stop filter in the indexer
    for (TokenFilterFactory tf : tci.getTokenFilterFactories()) {
        if (tf instanceof StopFilterFactory) {
            map.put(fieldName, qa);
            return qa.tokenStream(fieldName, reader);
        }
    }

    // now if there is a stop filter in the query analyzer, remove it
    int stopIdx = -1;
    TokenFilterFactory[] facs = tcq.getTokenFilterFactories();

    for (int i = 0; i < facs.length; i++) {
        TokenFilterFactory tf = facs[i];
        if (tf instanceof StopFilterFactory) {
            stopIdx = i;
            break;
        }
    }

    if (stopIdx == -1) {
        // no stop filter exists
        map.put(fieldName, qa);
        return qa.tokenStream(fieldName, reader);
    }

    TokenFilterFactory[] newtf = new TokenFilterFactory[facs.length - 1];
    for (int i = 0, j = 0; i < facs.length; i++) {
        if (i == stopIdx)
            continue;
        newtf[j++] = facs[i];
    }

    TokenizerChain newa = new TokenizerChain(tcq.getTokenizerFactory(), newtf);
    newa.setPositionIncrementGap(tcq.getPositionIncrementGap(fieldName));

    map.put(fieldName, newa);
    return newa.tokenStream(fieldName, reader);
}

From source file:org.apache.lucene.queryparser.classic.PreAnalyzedQueryParser.java

License:Apache License

/***
 * Perform tokenization and term analysis using the Solr analyzers defined in the Schema. 
 * @param langCode //from   w w w. ja v a2 s  .c  o m
 * @param rawContent 
 * @return
 * */
private ArrayList<String> getTokensArrayUsingFieldTypes(String langCode, String rawContent) {
    ArrayList<String> tokens = new ArrayList<String>();
    Boolean isPhrase = false;
    if ((rawContent.trim().startsWith("\"")) && (rawContent.trim().endsWith("\""))) {
        isPhrase = true;
    }
    try {
        // Build Solr field type based on language and get the respective analyzer
        String fieldTypeName = "text_" + langCode;
        FieldType fType = schema.getFieldTypeByName(fieldTypeName);
        //Analyzer an = fType.getAnalyzer();
        Analyzer an = fType.getQueryAnalyzer();

        // Apply the analyzer to the content
        TokenStream ts = null;
        ts = an.tokenStream("", new StringReader(rawContent));
        ts.reset();
        CharTermAttribute term = (CharTermAttribute) ts.getAttribute(CharTermAttribute.class);
        int termsCount = 0;
        while (ts.incrementToken()) { // For each detected token
            if ((termsCount == 0) && isPhrase) {
                tokens.add("\"" + term.toString()); // preserve initial quote
            } else {
                String tok = term.toString();
                if (rawContent.contains(tok + "*")) {// preserve trailing wildcard operator
                    tok += "*";
                }
                if (rawContent.contains("*" + tok)) {// preserve leading wildcard operator
                    tok = "*" + tok;
                }
                tokens.add(tok);
            }
            termsCount++;
        }
        if (isPhrase) { // Adding final quote to last term from phrase
            tokens.set(tokens.size() - 1, tokens.get(tokens.size() - 1) + "\"");
        }
        // Close token stream
        ts.end();
        ts.close();
    } catch (Exception e) {
        // TODO: handle exception
    }

    return tokens;
}

From source file:org.dice.solrenhancements.spellchecker.DiceMultipleCaseSuggester.java

License:Apache License

@Override
public String init(NamedList config, SolrCore core) {
    LOG.info("init: " + config);
    String name = super.init(config, core);
    threshold = config.get(THRESHOLD_TOKEN_FREQUENCY) == null ? 0.0f
            : (Float) config.get(THRESHOLD_TOKEN_FREQUENCY);
    sourceLocation = (String) config.get(LOCATION);
    lookupImpl = (String) config.get(LOOKUP_IMPL);

    IndexSchema schema = core.getLatestSchema();
    suggestionAnalyzerFieldTypeName = (String) config.get(SUGGESTION_ANALYZER_FIELDTYPE);
    if (schema.getFieldTypes().containsKey(suggestionAnalyzerFieldTypeName)) {
        FieldType fieldType = schema.getFieldTypes().get(suggestionAnalyzerFieldTypeName);
        suggestionAnalyzer = fieldType.getQueryAnalyzer();
    }//from ww  w.  jav a  2s  .c  om

    // support the old classnames without -Factory for config file backwards compatibility.
    if (lookupImpl == null || "org.apache.solr.spelling.suggest.jaspell.JaspellLookup".equals(lookupImpl)) {
        lookupImpl = JaspellLookupFactory.class.getName();
    } else if ("org.apache.solr.spelling.suggest.tst.TSTLookup".equals(lookupImpl)) {
        lookupImpl = TSTLookupFactory.class.getName();
    } else if ("org.apache.solr.spelling.suggest.fst.FSTLookup".equals(lookupImpl)) {
        lookupImpl = FSTLookupFactory.class.getName();
    }

    factory = core.getResourceLoader().newInstance(lookupImpl, LookupFactory.class);

    lookup = factory.create(config, core);
    String store = (String) config.get(STORE_DIR);
    if (store != null) {
        storeDir = new File(store);
        if (!storeDir.isAbsolute()) {
            storeDir = new File(core.getDataDir() + File.separator + storeDir);
        }
        if (!storeDir.exists()) {
            storeDir.mkdirs();
        } else {
            // attempt reload of the stored lookup
            try {
                lookup.load(new FileInputStream(new File(storeDir, factory.storeFileName())));
            } catch (IOException e) {
                LOG.warn("Loading stored lookup data failed", e);
            }
        }
    }
    return name;
}

From source file:org.dice.solrenhancements.spellchecker.DiceSpellCheckComponent.java

License:Apache License

@Override
public void inform(SolrCore core) {
    if (initParams != null) {
        LOG.info("Initializing spell checkers");
        boolean hasDefault = false;
        for (int i = 0; i < initParams.size(); i++) {
            if (initParams.getName(i).equals("spellchecker")) {
                NamedList spellchecker = (NamedList) initParams.getVal(i);
                String className = (String) spellchecker.get("classname");
                // TODO: this is a little bit sneaky: warn if class isnt supplied
                // so that its mandatory in a future release?
                if (className == null)
                    className = IndexBasedSpellChecker.class.getName();
                SolrResourceLoader loader = core.getResourceLoader();
                SolrSpellChecker checker = loader.newInstance(className, SolrSpellChecker.class);
                if (checker != null) {
                    String dictionary = checker.init(spellchecker, core);
                    if (dictionary != null) {
                        boolean isDefault = dictionary.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME);
                        if (isDefault == true && hasDefault == false) {
                            hasDefault = true;
                        } else if (isDefault == true && hasDefault == true) {
                            throw new RuntimeException("More than one dictionary is missing name.");
                        }/*w  w w .  j  a v  a 2  s  .c o m*/
                        spellCheckers.put(dictionary, checker);
                    } else {
                        if (hasDefault == false) {
                            spellCheckers.put(SolrSpellChecker.DEFAULT_DICTIONARY_NAME, checker);
                            hasDefault = true;
                        } else {
                            throw new RuntimeException("More than one dictionary is missing name.");
                        }
                    }
                    // Register event listeners for this SpellChecker
                    core.registerFirstSearcherListener(new SpellCheckerListener(core, checker, false, false));
                    boolean buildOnCommit = Boolean.parseBoolean((String) spellchecker.get("buildOnCommit"));
                    boolean buildOnOptimize = Boolean
                            .parseBoolean((String) spellchecker.get("buildOnOptimize"));
                    if (buildOnCommit || buildOnOptimize) {
                        LOG.info("Registering newSearcher listener for spellchecker: "
                                + checker.getDictionaryName());
                        core.registerNewSearcherListener(
                                new SpellCheckerListener(core, checker, buildOnCommit, buildOnOptimize));
                    }
                } else {
                    throw new RuntimeException("Can't load spell checker: " + className);
                }
            }
        }

        Map<String, QueryConverter> queryConverters = new HashMap<String, QueryConverter>();
        core.initPlugins(queryConverters, QueryConverter.class);

        //ensure that there is at least one query converter defined
        if (queryConverters.size() == 0) {
            LOG.info("No queryConverter defined, using default converter");
            queryConverters.put("queryConverter", new SpellingQueryConverter());
        }

        //there should only be one
        if (queryConverters.size() == 1) {
            queryConverter = queryConverters.values().iterator().next();
            IndexSchema schema = core.getLatestSchema();
            String fieldTypeName = (String) initParams.get("queryAnalyzerFieldType");
            FieldType fieldType = schema.getFieldTypes().get(fieldTypeName);
            Analyzer analyzer = fieldType == null
                    ? new WhitespaceAnalyzer(core.getSolrConfig().luceneMatchVersion)
                    : fieldType.getQueryAnalyzer();
            //TODO: There's got to be a better way!  Where's Spring when you need it?
            queryConverter.setAnalyzer(analyzer);
        }
    }
}

From source file:org.sindice.siren.solr.qparser.keyword.KeywordQParser.java

License:Apache License

/**
 * Initialize the "per-field" analyzer. Walk over the field boosts and
 * retrieve the associated query analyzer. <br>
 * For each field type, check if there is not an associated keyword field
 * type, i.e., a field type with an identical name with "-keyword" appended.
 *
 * @param boosts The field boosts//from   w w  w  . ja  va2  s .com
 * @return The per-field analyzer wrapper.
 */
private Analyzer initAnalyzers(final Map<String, Float> boosts) {
    final Analyzer defaultAnalyzer = new WhitespaceAnalyzer(Version.LUCENE_31);
    final PerFieldAnalyzerWrapper analyzerWrapper = new PerFieldAnalyzerWrapper(defaultAnalyzer);

    // Add analyzers for each field type
    for (final String fieldName : boosts.keySet()) {
        final FieldType fieldType = req.getSchema().getFieldType(fieldName);
        // check if there is a MultiQueryAnalyzerWrapper
        // and extract the associated keyword query analzyer
        if (fieldType.getQueryAnalyzer() instanceof MultiQueryAnalyzerWrapper) {
            final MultiQueryAnalyzerWrapper wrapper = (MultiQueryAnalyzerWrapper) fieldType.getQueryAnalyzer();
            Analyzer keywordAnalyzer;
            if ((keywordAnalyzer = wrapper.getAnalyzer(fieldType.getTypeName() + "-keyword")) == null) {
                throw new SolrException(ErrorCode.SERVER_ERROR,
                        "Field type definition " + fieldType.getTypeName() + "-keyword not defined");
            }
            analyzerWrapper.addAnalyzer(fieldName, keywordAnalyzer);
        } else {
            analyzerWrapper.addAnalyzer(fieldName, fieldType.getQueryAnalyzer());
        }
    }
    return analyzerWrapper;
}

From source file:org.sindice.siren.solr.qparser.ntriple.NTripleQParser.java

License:Apache License

/**
 * Find the associated query analyzers for the given field type.
 * The query analyzers should be provided through the
 * {@link MultiQueryAnalyzerWrapper}.<br>
 * Derive the names of the field types from the name of the given field type.
 * There is three field types expected:/*from  ww  w . j av a  2  s.c  o m*/
 * <ul>
 * <li> main: define the main NTriple query analyzer
 * <li> uri: define the analyzer for URIs
 * <li> literal: define the analyzer for literals
 * <ul>
 *
 * @param fieldType The field type of the ntriple fields to query
 */
private void initAnalyzers(final FieldType fieldType) {
    if (!(fieldType.getQueryAnalyzer() instanceof MultiQueryAnalyzerWrapper)) {
        throw new SolrException(ErrorCode.SERVER_ERROR,
                "MultiQueryAnalyzerWrapper" + " expected for field type " + fieldType.getTypeName());
    }
    final MultiQueryAnalyzerWrapper wrapper = (MultiQueryAnalyzerWrapper) fieldType.getQueryAnalyzer();
    if ((ntripleAnalyzer = wrapper.getAnalyzer(fieldType.getTypeName() + "-main")) == null) {
        throw new SolrException(ErrorCode.SERVER_ERROR,
                "Field type definition " + fieldType.getTypeName() + "-main not defined");
    }
    if ((uriAnalyzer = wrapper.getAnalyzer(fieldType.getTypeName() + "-uri")) == null) {
        throw new SolrException(ErrorCode.SERVER_ERROR,
                "Field type definition " + fieldType.getTypeName() + "-uri not defined");
    }
    if ((literalAnalyzer = wrapper.getAnalyzer(fieldType.getTypeName() + "-literal")) == null) {
        throw new SolrException(ErrorCode.SERVER_ERROR,
                "Field type definition " + fieldType.getTypeName() + "-literal not defined");
    }
}

From source file:org.sindice.siren.solr.schema.TestSirenField.java

License:Open Source License

@Test
public void testSirenFieldTopLevelAnalyzer() throws Exception {
    final IndexSchema schema = h.getCore().getSchema();
    final SchemaField ntriple = schema.getField("ntriple");
    final FieldType tmp = ntriple.getType();

    assertTrue(tmp.getAnalyzer() instanceof TokenizerChain);
    TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer();
    assertNotNull(ts.getTokenizerFactory());
    assertTrue(ts.getTokenizerFactory() instanceof TupleTokenizerFactory);

    // 3 filters for index
    assertNotNull(ts.getTokenFilterFactories());
    assertEquals(3, ts.getTokenFilterFactories().length);

    assertTrue(tmp.getQueryAnalyzer() instanceof TokenizerChain);
    ts = (TokenizerChain) tmp.getQueryAnalyzer();
    assertNotNull(ts.getTokenizerFactory());
    assertTrue(ts.getTokenizerFactory() instanceof WhitespaceTokenizerFactory);

    // 6 filters for keyword-query
    assertNotNull(ts.getTokenFilterFactories());
    assertEquals(6, ts.getTokenFilterFactories().length);
}