List of usage examples for org.apache.solr.schema FieldType readableToIndexed
public String readableToIndexed(String val)
From source file:com.billiger.solr.handler.component.QLTBComponent.java
License:Apache License
/** * Load the QLTB map from a Config.// w w w. j ava2 s.com * * Read and process the "boosts/query" XPath nodes from the given * Config, and build them into a QLTB map. The XML format is described * in the class documentation. * * The result of this function is a map of (analyzed) query strings * with their respective lists of boosted query terms. These are * ConstantScoreQuery instances for each term with the corresponding * boost factor. (Invalid - i.e. non-numerical - boost factors are * logged as warnings). * * The SOLR core that is passed into this function is necessary for * determinating the FieldType of the boosted fields. Only with the * correct field type is it possible to boost non-string fields, as * these non-string values need to be ft.readableToIndexed(). * * @param cfg * Config object to read the XML QLTB from * @param core * SOLR Core the query is performed on * @return QLTB map * * @throws IOException * If the query could not be analysed */ private Map<String, List<Query>> loadQLTBMap(final Config cfg, final SolrCore core) throws IOException { Map<String, List<Query>> map = new HashMap<String, List<Query>>(); NodeList nodes = (NodeList) cfg.evaluate("boosts/query", XPathConstants.NODESET); for (int i = 0; i < nodes.getLength(); i++) { Node node = nodes.item(i); String qstr = DOMUtil.getAttr(node, "text", "missing query 'text'"); qstr = getAnalyzedQuery(qstr); NodeList children = node.getChildNodes(); List<Query> termBoosts = new ArrayList<Query>(); for (int j = 0; j < children.getLength(); j++) { Node child = children.item(j); if (!child.getNodeName().equals("term")) { continue; } String field = DOMUtil.getAttr(child, "field", "missing 'field'"); String value = DOMUtil.getAttr(child, "value", "missing 'value'"); String boost = DOMUtil.getAttr(child, "boost", "missing 'boost'"); float termBoost = 1; try { termBoost = Float.parseFloat(boost); } catch (NumberFormatException e) { log.warn("invalid boost " + boost + " for query \"" + qstr + "\", term: \"" + field + ":" + value + "\": " + e.getMessage()); continue; } // without readableToIndexed QLTB boosting would only work // for string field types FieldType ft = core.getLatestSchema().getField(field).getType(); value = ft.readableToIndexed(value); Term t = new Term(field, value); TermQuery tq = new TermQuery(t); ConstantScoreQuery csq = new ConstantScoreQuery(tq); csq.setBoost(termBoost); termBoosts.add(csq); } map.put(qstr, termBoosts); } return map; }
From source file:org.alfresco.solr.SolrInformationServer.java
License:Open Source License
@Override public List<TenantAclIdDbId> getDocsWithUncleanContent(int start, int rows) throws IOException { RefCounted<SolrIndexSearcher> refCounted = null; try {/* ww w. j a v a 2 s. co m*/ List<TenantAclIdDbId> docIds = new ArrayList<>(); refCounted = this.core.getSearcher(); SolrIndexSearcher searcher = refCounted.get(); /* * Below is the code for purging the cleanContentCache. * The cleanContentCache is an in-memory LRU cache of the transactions that have already * had their content fetched. This is needed because the ContentTracker does not have an up-to-date * snapshot of the index to determine which nodes are marked as dirty/new. The cleanContentCache is used * to filter out nodes that belong to transactions that have already been processed, which stops them from * being re-processed. * * The cleanContentCache needs to be purged periodically to support retrying of failed content fetches. * This is because fetches for individual nodes within the transaction may have failed, but the transaction will still be in the * cleanContentCache, which prevents it from being retried. * * Once a transaction is purged from the cleanContentCache it will be retried automatically if it is marked dirty/new * in current snapshot of the index. * * The code below runs every two minutes and purges transactions from the * cleanContentCache that is more then 20 minutes old. * */ long purgeTime = System.currentTimeMillis(); if (purgeTime - cleanContentLastPurged > 120000) { Iterator<Entry<Long, Long>> entries = cleanContentCache.entrySet().iterator(); while (entries.hasNext()) { Entry<Long, Long> entry = entries.next(); long txnTime = entry.getValue(); if (purgeTime - txnTime > 1200000) { //Purge the clean content cache of records more then 20 minutes old. entries.remove(); } } cleanContentLastPurged = purgeTime; } long txnFloor; Sort sort = new Sort(new SortField(FIELD_INTXID, SortField.Type.LONG)); sort = sort.rewrite(searcher); TopFieldCollector collector = TopFieldCollector.create(sort, 1, null, false, false, false); DelegatingCollector delegatingCollector = new TxnCacheFilter(cleanContentCache); //Filter transactions that have already been processed. delegatingCollector.setLastDelegate(collector); searcher.search(dirtyOrNewContentQuery(), delegatingCollector); if (collector.getTotalHits() == 0) { return docIds; } ScoreDoc[] scoreDocs = collector.topDocs().scoreDocs; List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves(); int index = ReaderUtil.subIndex(scoreDocs[0].doc, leaves); LeafReaderContext context = leaves.get(index); NumericDocValues longs = context.reader().getNumericDocValues(FIELD_INTXID); txnFloor = longs.get(scoreDocs[0].doc - context.docBase); //Find the next N transactions //The TxnCollector collects the transaction ids from the matching documents //The txnIds are limited to a range >= the txnFloor and < an arbitrary transaction ceiling. TxnCollector txnCollector = new TxnCollector(txnFloor); searcher.search(dirtyOrNewContentQuery(), txnCollector); LongHashSet txnSet = txnCollector.getTxnSet(); if (txnSet.size() == 0) { //This should really never be the case, at a minimum the transaction floor should be collected. return docIds; } FieldType fieldType = searcher.getSchema().getField(FIELD_INTXID).getType(); BooleanQuery.Builder builder = new BooleanQuery.Builder(); for (LongCursor cursor : txnSet) { long txnID = cursor.value; //Build up the query for the filter of transactions we need to pull the dirty content for. TermQuery txnIDQuery = new TermQuery( new Term(FIELD_INTXID, fieldType.readableToIndexed(Long.toString(txnID)))); builder.add(new BooleanClause(txnIDQuery, BooleanClause.Occur.SHOULD)); } BooleanQuery txnFilterQuery = builder.build(); //Get the docs with dirty content for the transactions gathered above. DocListCollector docListCollector = new DocListCollector(); BooleanQuery.Builder builder2 = new BooleanQuery.Builder(); builder2.add(dirtyOrNewContentQuery(), BooleanClause.Occur.MUST); builder2.add(new QueryWrapperFilter(txnFilterQuery), BooleanClause.Occur.MUST); searcher.search(builder2.build(), docListCollector); IntArrayList docList = docListCollector.getDocs(); int size = docList.size(); List<Long> processedTxns = new ArrayList<>(); for (int i = 0; i < size; ++i) { int doc = docList.get(i); Document document = searcher.doc(doc, REQUEST_ONLY_ID_FIELD); index = ReaderUtil.subIndex(doc, leaves); context = leaves.get(index); longs = context.reader().getNumericDocValues(FIELD_INTXID); long txnId = longs.get(doc - context.docBase); if (!cleanContentCache.containsKey(txnId)) { processedTxns.add(txnId); IndexableField id = document.getField(FIELD_SOLR4_ID); String idString = id.stringValue(); TenantAclIdDbId tenantAndDbId = AlfrescoSolrDataModel.decodeNodeDocumentId(idString); docIds.add(tenantAndDbId); } } long txnTime = System.currentTimeMillis(); for (Long l : processedTxns) { //Save the indexVersion so we know when we can clean out this entry cleanContentCache.put(l, txnTime); } return docIds; } finally { ofNullable(refCounted).ifPresent(RefCounted::decref); } }
From source file:org.alfresco.solr.SolrInformationServer.java
License:Open Source License
private boolean isInIndex(long id, LRU cache, String fieldName, boolean populateCache, SolrCore core) throws IOException { if (cache.containsKey(id)) { return true; } else {//ww w . j a v a 2 s. co m RefCounted<SolrIndexSearcher> refCounted = null; try { if (populateCache) { cache.put(id, null); // Safe to add this here because we reset this on rollback. } refCounted = core.getSearcher(); SolrIndexSearcher searcher = refCounted.get(); FieldType fieldType = searcher.getSchema().getField(fieldName).getType(); TermQuery q = new TermQuery(new Term(fieldName, fieldType.readableToIndexed(Long.toString(id)))); TopDocs topDocs = searcher.search(q, 1); return topDocs.totalHits > 0; } finally { ofNullable(refCounted).ifPresent(RefCounted::decref); } } }