List of usage examples for org.apache.solr.update AddUpdateCommand getSolrInputDocument
public SolrInputDocument getSolrInputDocument()
From source file:com.cominvent.solr.update.processor.MappingUpdateProcessor.java
License:Apache License
@Override public void processAdd(AddUpdateCommand cmd) throws IOException { if (isEnabled()) { SolrInputDocument doc = cmd.getSolrInputDocument(); // Fetch document id String docId = ""; if (doc.containsKey(docIdField)) docId = (String) doc.getFieldValue(docIdField); String inValue = (String) doc.getFieldValue(inputField); String outValue;/*from w w w . j a v a2 s . c o m*/ if (map.containsKey(inValue)) { outValue = map.get(inValue); } else { outValue = fallbackValue; } if (outValue != null && outValue.length() > 0) { log.debug("Mapping done for document " + docId + ": " + inValue + " => " + outValue); doc.setField(outputField, outValue); } } else { log.debug("MappingUpdateProcessor is not enabled. Skipping"); } super.processAdd(cmd); }
From source file:com.francelabs.datafari.updateprocessor.DatafariUpdateProcessor.java
License:Apache License
@Override public void processAdd(final AddUpdateCommand cmd) throws IOException { final SolrInputDocument doc = cmd.getSolrInputDocument(); // Sometimes Tika put several ids so we keep the first one which is // always the right one if (doc.getFieldValues("id").size() > 1) { final Object id = doc.getFieldValue("id"); doc.remove("id"); doc.addField("id", id); }/*ww w . ja v a 2 s. com*/ // Try to retrieve at the ignored_filelastmodified field to set it's // value in the last_modified field if (doc.getFieldValue("ignored_filelastmodified") != null) { final Object last_modified = doc.getFieldValue("ignored_filelastmodified"); doc.remove("last_modified"); doc.addField("last_modified", last_modified); } // Sometimes Tika put several last_modified dates, so we keep the first // one which is always the right one if ((doc.getFieldValues("last_modified") != null) && (doc.getFieldValues("last_modified").size() > 1)) { final Object last_modified = doc.getFieldValue("last_modified"); doc.remove("last_modified"); doc.addField("last_modified", last_modified); } final String url = (String) doc.getFieldValue("id"); // Create path hierarchy for facet final List<String> urlHierarchy = new ArrayList<>(); /* * // Create path hierarchy for facet * * final List<String> urlHierarchy = new ArrayList<String>(); * * final String path = url.replace("file:", ""); int previousIndex = 1; int * depth = 0; // Tokenize the path and add the depth as first character for * each token // (like: 0/home, 1/home/project ...) for (int i = 0; i < * path.split("/").length - 2; i++) { int endIndex = path.indexOf('/', * previousIndex); if (endIndex == -1) { endIndex = path.length() - 1; } * urlHierarchy.add(depth + path.substring(0, endIndex)); depth++; * previousIndex = endIndex + 1; } * * // Add the tokens to the urlHierarchy field doc.addField("urlHierarchy", * urlHierarchy); */ doc.addField("url", url); String filename = ""; final SolrInputField streamNameField = doc.get("ignored_stream_name"); if (streamNameField != null) { filename = (String) streamNameField.getFirstValue(); } else { final Pattern pattern = Pattern.compile("[^/]*$"); final Matcher matcher = pattern.matcher(url); if (matcher.find()) { filename = matcher.group(); } } if (url.startsWith("http")) { if (doc.get("title") == null) { doc.addField("title", filename); } doc.addField("source", "web"); } if (url.startsWith("file")) { doc.removeField("title"); doc.addField("title", filename); doc.addField("source", "file"); } String extension = ""; URL urlObject = new URL(url); String path = urlObject.getPath(); final SolrInputField mimeTypeField = doc.get("ignored_content_type"); String nameExtension = FilenameUtils.getExtension(path); String tikaExtension = mimeTypeField == null ? "" : extensionFromMimeTypeField(mimeTypeField); if (extensionFromName) { extension = nameExtension.length() > 1 && nameExtension.length() < 5 ? nameExtension : tikaExtension; } else { extension = tikaExtension.length() > 1 && tikaExtension.length() < 5 ? tikaExtension : nameExtension; } /* if (extensionFromName || mimeTypeField == null) { if (path.contains(".")){ extension = FilenameUtils.getExtension(path); if (extension.length() > 4 || extension.length() < 1) { // If length is too long, try extracting from tika information if available String tryExtension = mimeTypeField==null ? null : extensionFromMimeTypeField(mimeTypeField); if (tryExtension != null) { extension = tryExtension; } else { // Else default to bin for anything else extension = "bin"; } } } else if (urlObject.getProtocol().equals("http") || urlObject.getProtocol().equals("https")) { extension = null; if (mimeTypeField != null) { extension = extensionFromMimeTypeField(mimeTypeField); } if (extension == null) { extension = "html"; } } } else { extension = extensionFromMimeTypeField(mimeTypeField); if (extension == null) { extension = FilenameUtils.getExtension(path); } } */ doc.addField("extension", extension.toLowerCase()); super.processAdd(cmd); }
From source file:com.github.le11.nls.solr.UIMAAsyncUpdateRequestProcessor.java
License:Apache License
@Override public void processAdd(AddUpdateCommand cmd) throws IOException { String text = null;//from w w w.j a v a2 s . c o m try { /* get Solr document */ SolrInputDocument solrInputDocument = cmd.getSolrInputDocument(); /* get the fields to analyze */ String[] texts = getTextsToAnalyze(solrInputDocument); for (int i = 0; i < texts.length; i++) { text = texts[i]; if (text != null && text.length() > 0) { /* process the text value */ JCas jcas = UIMAAnalyzersUtils.getInstance() .analyzeAsynchronously(new StringReader(text), solrUIMAConfiguration.getAePath()) .getJCas(); UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(solrInputDocument, jcas); /* get field mapping from config */ /* map type features on fields */ for (String typeFQN : solrUIMAConfiguration.getTypesFeaturesFieldsMapping().keySet()) { uimaToSolrMapper.map(typeFQN, solrUIMAConfiguration.getTypesFeaturesFieldsMapping().get(typeFQN)); } } } } catch (Exception e) { String logField = solrUIMAConfiguration.getLogField(); if (logField == null) { SchemaField uniqueKeyField = solrCore.getSchema().getUniqueKeyField(); if (uniqueKeyField != null) { logField = uniqueKeyField.getName(); } } String optionalFieldInfo = logField == null ? "." : new StringBuilder(". ").append(logField).append("=") .append((String) cmd.getSolrInputDocument().getField(logField).getValue()).append(", ") .toString(); int len = Math.min(text.length(), 100); if (solrUIMAConfiguration.isIgnoreErrors()) { log.warn(new StringBuilder("skip the text processing due to ").append(e.getLocalizedMessage()) .append(optionalFieldInfo).append(" text=\"").append(text.substring(0, len)).append("...\"") .toString()); } else { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, new StringBuilder("processing error: ").append(e.getLocalizedMessage()) .append(optionalFieldInfo).append(" text=\"").append(text.substring(0, len)) .append("...\"").toString(), e); } } super.processAdd(cmd); }
From source file:com.grantingersoll.intell.index.BayesUpdateRequestProcessor.java
License:Apache License
@Override public void processAdd(AddUpdateCommand cmd) throws IOException { SolrInputDocument doc = cmd.getSolrInputDocument(); ClassifierResult result = classifyDocument(doc); if (result != null && result.getLabel() != NULL) { doc.addField(outputField, result.getLabel()); }/*from w ww . jav a2s .co m*/ super.processAdd(cmd); }
From source file:com.gu.solr.MergeUpdateRequestProcessorFactory.java
License:Apache License
@Override public void processAdd(AddUpdateCommand cmd) throws IOException { if (mergeQuery == null || mergeQuery.trim().equals("")) { String id = cmd.getIndexedId(schema); log.debug("MergeUpdateRequest: add " + id); Searcher search = new Searcher(schema, searcher, log); SolrDocument doc = search.findById(id); if (doc != null) { if (log.isDebugEnabled()) { log.debug("MergeUpdateRequest: Merging with existing document."); }/*from w ww . ja va 2s .c o m*/ cmd.solrDoc = MergeUtils.merge(cmd.getSolrInputDocument(), doc, schema, overwriteMultivalues); } else { if (log.isDebugEnabled()) { log.debug("MergeUpdateRequest: New insert."); } } super.processAdd(cmd); } else { if (log.isDebugEnabled()) { log.debug("MergeUpdateRequest: add " + mergeQuery); log.debug(String.format("MergeUpdateRequest: Merge into existing documents(%s)", mergeQuery)); } Query q = QueryParsing.parseQuery(mergeQuery, schema); DocIterator docs = searcher.getDocSet(q).iterator(); SolrInputDocument merge = MergeUtils.withoutId(cmd.getSolrInputDocument(), schema); while (docs.hasNext()) { Document luceneDoc = searcher.doc(docs.nextDoc()); SolrDocument doc = MergeUtils.toSolrDocument(luceneDoc, schema); SolrInputDocument merged = MergeUtils.merge(merge, doc, schema, overwriteMultivalues); log.debug("MergeUpdateRequest: merged = " + merged); super.processAdd(MergeUtils.addCommandFor(merged)); } } }
From source file:com.ifactory.press.db.solr.processor.FieldMergingProcessor.java
License:Apache License
@Override public void processAdd(AddUpdateCommand cmd) throws IOException { if (sourceAnalyzers != null && destinationField != null) { SolrInputDocument doc = cmd.getSolrInputDocument(); for (Map.Entry<String, PoolingAnalyzerWrapper> entry : sourceAnalyzers.entrySet()) { String sourceFieldName = entry.getKey(); Analyzer fieldAnalyzer = entry.getValue(); Collection<Object> fieldValues = doc.getFieldValues(sourceFieldName); if (fieldValues != null) { for (Object value : fieldValues) { IndexableField fieldValue = new TextField(destinationField, fieldAnalyzer.tokenStream(sourceFieldName, value.toString())); doc.addField(destinationField, fieldValue); }//from w ww .j a v a2s .c o m } } } if (next != null) next.processAdd(cmd); // and then release all the analyzers, readying them for re-use for (Map.Entry<String, PoolingAnalyzerWrapper> entry : sourceAnalyzers.entrySet()) { entry.getValue().release(); } }
From source file:com.sindicetech.siren.solr.facet.SirenFacetProcessor.java
License:Open Source License
@Override public void processAdd(AddUpdateCommand cmd) throws IOException { SolrCore core = cmd.getReq().getCore(); IndexSchema schema = core.getLatestSchema(); if (!schema.isMutable()) { throw new SolrException(BAD_REQUEST, String.format("This IndexSchema, of core %s, is not mutable.", core.getName())); }//from w ww. j a v a 2s .c o m SolrInputDocument doc = cmd.getSolrInputDocument(); extractor.setSchema(schema); List<SirenFacetEntry> entries = extractor.extractFacets(doc); // update schema // use Sets so that we add a fieldname only once even if it is generated multiple times (for // multiple paths) Set<SchemaField> newFields = new HashSet<SchemaField>(); for (SirenFacetEntry entry : entries) { // skip entry if the field is already in the schema if (schema.getFieldOrNull(entry.toFieldName()) != null) { continue; } TypeMapping typeMapping = getTypeMappingValueClass(entry.datatype.xsdDatatype); // skip facet values that are too large altogether - they don't make sense for faceting if (entry.value instanceof String && ((String) entry.value) .length() > (typeMapping.maxFieldSize != null ? typeMapping.maxFieldSize : DEFAULT_MAX_FACET_VALUE_LENGTH)) { continue; } String fieldTypeName = getTypeMappingValueClass(entry.datatype.xsdDatatype).fieldType; Map<String, Boolean> options = new HashMap<String, Boolean>(); // see FieldProperties.propertyNames[] options.put("indexed", false); options.put("stored", false); options.put("docValues", true); options.put("multiValued", true); newFields.add(schema.newField(entry.toFieldName(), fieldTypeName, options)); } if (!newFields.isEmpty()) { IndexSchema newSchema = schema.addFields(newFields); cmd.getReq().getCore().setLatestSchema(newSchema); cmd.getReq().updateSchemaToLatest(); logger.debug("Successfully added field(s) to the schema."); } // update document for (SirenFacetEntry entry : entries) { TypeMapping typeMapping = getTypeMappingValueClass(entry.datatype.xsdDatatype); // skip facet values that are too large altogether - they don't make sense for faceting if (entry.value instanceof String && ((String) entry.value) .length() > (typeMapping.maxFieldSize != null ? typeMapping.maxFieldSize : DEFAULT_MAX_FACET_VALUE_LENGTH)) { continue; } doc.addField(entry.toFieldName(), entry.value); } // call the next one in chain super.processAdd(cmd); }
From source file:com.tamingtext.classifier.bayes.BayesUpdateRequestProcessor.java
License:Apache License
@Override public void processAdd(AddUpdateCommand cmd) throws IOException { SolrInputDocument doc = cmd.getSolrInputDocument(); classifyDocument(doc);/*from ww w.ja va 2 s . c o m*/ super.processAdd(cmd); }
From source file:io.yucca.solr.processor.HierarchyExtractorUpdateProcessor.java
License:Apache License
@Override public void processAdd(AddUpdateCommand cmd) throws IOException { process(cmd.getSolrInputDocument()); super.processAdd(cmd); }
From source file:lux.solr.CloudIdProcessor.java
License:Mozilla Public License
@Override public void processAdd(final AddUpdateCommand cmd) throws IOException { SolrInputDocument solrInputDocument = cmd.getSolrInputDocument(); String uri = (String) solrInputDocument.getFieldValue(uriFieldName); if (uri != null) { // we actually only need about 42 bits to count up to about to 2070, so use the remaining 22 // for some bits from a uri hash to make this (more likely to be) globally unique. long t = System.currentTimeMillis() << 22; long hashCode = uri.hashCode() & 0x2fffff; // would the high-order bits be more random? long luxDocId = t | hashCode; solrInputDocument.addField(idFieldName, luxDocId); }/*w w w . jav a2 s . co m*/ if (next != null) { next.processAdd(cmd); } }