Example usage for org.apache.solr.update AddUpdateCommand getSolrInputDocument

List of usage examples for org.apache.solr.update AddUpdateCommand getSolrInputDocument

Introduction

In this page you can find the example usage for org.apache.solr.update AddUpdateCommand getSolrInputDocument.

Prototype

public SolrInputDocument getSolrInputDocument() 

Source Link

Usage

From source file:com.cominvent.solr.update.processor.MappingUpdateProcessor.java

License:Apache License

@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
    if (isEnabled()) {

        SolrInputDocument doc = cmd.getSolrInputDocument();

        // Fetch document id
        String docId = "";
        if (doc.containsKey(docIdField))
            docId = (String) doc.getFieldValue(docIdField);

        String inValue = (String) doc.getFieldValue(inputField);
        String outValue;/*from w  w w  . j a  v a2  s  . c  o m*/
        if (map.containsKey(inValue)) {
            outValue = map.get(inValue);
        } else {
            outValue = fallbackValue;
        }

        if (outValue != null && outValue.length() > 0) {
            log.debug("Mapping done for document " + docId + ": " + inValue + " => " + outValue);
            doc.setField(outputField, outValue);
        }

    } else {
        log.debug("MappingUpdateProcessor is not enabled. Skipping");
    }

    super.processAdd(cmd);
}

From source file:com.francelabs.datafari.updateprocessor.DatafariUpdateProcessor.java

License:Apache License

@Override
public void processAdd(final AddUpdateCommand cmd) throws IOException {
    final SolrInputDocument doc = cmd.getSolrInputDocument();

    // Sometimes Tika put several ids so we keep the first one which is
    // always the right one
    if (doc.getFieldValues("id").size() > 1) {
        final Object id = doc.getFieldValue("id");
        doc.remove("id");
        doc.addField("id", id);
    }/*ww  w .  ja v  a 2  s. com*/

    // Try to retrieve at the ignored_filelastmodified field to set it's
    // value in the last_modified field
    if (doc.getFieldValue("ignored_filelastmodified") != null) {
        final Object last_modified = doc.getFieldValue("ignored_filelastmodified");
        doc.remove("last_modified");
        doc.addField("last_modified", last_modified);
    }

    // Sometimes Tika put several last_modified dates, so we keep the first
    // one which is always the right one
    if ((doc.getFieldValues("last_modified") != null) && (doc.getFieldValues("last_modified").size() > 1)) {
        final Object last_modified = doc.getFieldValue("last_modified");
        doc.remove("last_modified");
        doc.addField("last_modified", last_modified);
    }

    final String url = (String) doc.getFieldValue("id");

    // Create path hierarchy for facet
    final List<String> urlHierarchy = new ArrayList<>();

    /*
     * // Create path hierarchy for facet
     *
     * final List<String> urlHierarchy = new ArrayList<String>();
     *
     * final String path = url.replace("file:", ""); int previousIndex = 1; int
     * depth = 0; // Tokenize the path and add the depth as first character for
     * each token // (like: 0/home, 1/home/project ...) for (int i = 0; i <
     * path.split("/").length - 2; i++) { int endIndex = path.indexOf('/',
     * previousIndex); if (endIndex == -1) { endIndex = path.length() - 1; }
     * urlHierarchy.add(depth + path.substring(0, endIndex)); depth++;
     * previousIndex = endIndex + 1; }
     *
     * // Add the tokens to the urlHierarchy field doc.addField("urlHierarchy",
     * urlHierarchy);
     */

    doc.addField("url", url);

    String filename = "";
    final SolrInputField streamNameField = doc.get("ignored_stream_name");
    if (streamNameField != null) {
        filename = (String) streamNameField.getFirstValue();
    } else {
        final Pattern pattern = Pattern.compile("[^/]*$");
        final Matcher matcher = pattern.matcher(url);
        if (matcher.find()) {
            filename = matcher.group();
        }
    }

    if (url.startsWith("http")) {
        if (doc.get("title") == null) {
            doc.addField("title", filename);
        }
        doc.addField("source", "web");
    }

    if (url.startsWith("file")) {
        doc.removeField("title");
        doc.addField("title", filename);
        doc.addField("source", "file");
    }

    String extension = "";
    URL urlObject = new URL(url);
    String path = urlObject.getPath();
    final SolrInputField mimeTypeField = doc.get("ignored_content_type");

    String nameExtension = FilenameUtils.getExtension(path);
    String tikaExtension = mimeTypeField == null ? "" : extensionFromMimeTypeField(mimeTypeField);

    if (extensionFromName) {
        extension = nameExtension.length() > 1 && nameExtension.length() < 5 ? nameExtension : tikaExtension;
    } else {
        extension = tikaExtension.length() > 1 && tikaExtension.length() < 5 ? tikaExtension : nameExtension;
    }
    /*
    if (extensionFromName || mimeTypeField == null) {
       if (path.contains(".")){
         extension = FilenameUtils.getExtension(path);
          if (extension.length() > 4 || extension.length() < 1) {
    // If length is too long, try extracting from tika information if available
    String tryExtension = mimeTypeField==null ? null : extensionFromMimeTypeField(mimeTypeField);
    if (tryExtension != null) {
      extension = tryExtension;
    } else {
      // Else default to bin for anything else
      extension = "bin";
    }
          }
       }
       else if (urlObject.getProtocol().equals("http") || urlObject.getProtocol().equals("https")) {
         extension = null;
         if (mimeTypeField != null) {
           extension = extensionFromMimeTypeField(mimeTypeField);
         } 
         if (extension == null) {
           extension = "html";
         }
       }
    } else {
      extension = extensionFromMimeTypeField(mimeTypeField);
      if (extension == null) {
        extension = FilenameUtils.getExtension(path);
      }
    }
    */
    doc.addField("extension", extension.toLowerCase());

    super.processAdd(cmd);
}

From source file:com.github.le11.nls.solr.UIMAAsyncUpdateRequestProcessor.java

License:Apache License

@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
    String text = null;//from w w w.j  a  v  a2  s .  c  o m
    try {
        /* get Solr document */
        SolrInputDocument solrInputDocument = cmd.getSolrInputDocument();

        /* get the fields to analyze */
        String[] texts = getTextsToAnalyze(solrInputDocument);
        for (int i = 0; i < texts.length; i++) {
            text = texts[i];
            if (text != null && text.length() > 0) {
                /* process the text value */
                JCas jcas = UIMAAnalyzersUtils.getInstance()
                        .analyzeAsynchronously(new StringReader(text), solrUIMAConfiguration.getAePath())
                        .getJCas();

                UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(solrInputDocument, jcas);
                /* get field mapping from config */
                /* map type features on fields */
                for (String typeFQN : solrUIMAConfiguration.getTypesFeaturesFieldsMapping().keySet()) {
                    uimaToSolrMapper.map(typeFQN,
                            solrUIMAConfiguration.getTypesFeaturesFieldsMapping().get(typeFQN));
                }
            }
        }
    } catch (Exception e) {
        String logField = solrUIMAConfiguration.getLogField();
        if (logField == null) {
            SchemaField uniqueKeyField = solrCore.getSchema().getUniqueKeyField();
            if (uniqueKeyField != null) {
                logField = uniqueKeyField.getName();
            }
        }
        String optionalFieldInfo = logField == null ? "."
                : new StringBuilder(". ").append(logField).append("=")
                        .append((String) cmd.getSolrInputDocument().getField(logField).getValue()).append(", ")
                        .toString();
        int len = Math.min(text.length(), 100);
        if (solrUIMAConfiguration.isIgnoreErrors()) {
            log.warn(new StringBuilder("skip the text processing due to ").append(e.getLocalizedMessage())
                    .append(optionalFieldInfo).append(" text=\"").append(text.substring(0, len)).append("...\"")
                    .toString());
        } else {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                    new StringBuilder("processing error: ").append(e.getLocalizedMessage())
                            .append(optionalFieldInfo).append(" text=\"").append(text.substring(0, len))
                            .append("...\"").toString(),
                    e);
        }
    }
    super.processAdd(cmd);
}

From source file:com.grantingersoll.intell.index.BayesUpdateRequestProcessor.java

License:Apache License

@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
    SolrInputDocument doc = cmd.getSolrInputDocument();
    ClassifierResult result = classifyDocument(doc);

    if (result != null && result.getLabel() != NULL) {
        doc.addField(outputField, result.getLabel());
    }/*from   w  ww  .  jav  a2s  .co m*/

    super.processAdd(cmd);
}

From source file:com.gu.solr.MergeUpdateRequestProcessorFactory.java

License:Apache License

@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
    if (mergeQuery == null || mergeQuery.trim().equals("")) {
        String id = cmd.getIndexedId(schema);
        log.debug("MergeUpdateRequest: add " + id);

        Searcher search = new Searcher(schema, searcher, log);
        SolrDocument doc = search.findById(id);

        if (doc != null) {
            if (log.isDebugEnabled()) {
                log.debug("MergeUpdateRequest: Merging with existing document.");
            }/*from   w ww . ja  va 2s  .c o m*/

            cmd.solrDoc = MergeUtils.merge(cmd.getSolrInputDocument(), doc, schema, overwriteMultivalues);
        } else {
            if (log.isDebugEnabled()) {
                log.debug("MergeUpdateRequest: New insert.");
            }
        }

        super.processAdd(cmd);

    } else {
        if (log.isDebugEnabled()) {
            log.debug("MergeUpdateRequest: add " + mergeQuery);
            log.debug(String.format("MergeUpdateRequest: Merge into existing documents(%s)", mergeQuery));
        }

        Query q = QueryParsing.parseQuery(mergeQuery, schema);
        DocIterator docs = searcher.getDocSet(q).iterator();

        SolrInputDocument merge = MergeUtils.withoutId(cmd.getSolrInputDocument(), schema);

        while (docs.hasNext()) {
            Document luceneDoc = searcher.doc(docs.nextDoc());
            SolrDocument doc = MergeUtils.toSolrDocument(luceneDoc, schema);
            SolrInputDocument merged = MergeUtils.merge(merge, doc, schema, overwriteMultivalues);
            log.debug("MergeUpdateRequest: merged = " + merged);

            super.processAdd(MergeUtils.addCommandFor(merged));
        }
    }
}

From source file:com.ifactory.press.db.solr.processor.FieldMergingProcessor.java

License:Apache License

@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {

    if (sourceAnalyzers != null && destinationField != null) {
        SolrInputDocument doc = cmd.getSolrInputDocument();
        for (Map.Entry<String, PoolingAnalyzerWrapper> entry : sourceAnalyzers.entrySet()) {
            String sourceFieldName = entry.getKey();
            Analyzer fieldAnalyzer = entry.getValue();
            Collection<Object> fieldValues = doc.getFieldValues(sourceFieldName);
            if (fieldValues != null) {
                for (Object value : fieldValues) {
                    IndexableField fieldValue = new TextField(destinationField,
                            fieldAnalyzer.tokenStream(sourceFieldName, value.toString()));
                    doc.addField(destinationField, fieldValue);
                }//from  w  ww .j  a v  a2s .c  o m
            }
        }
    }

    if (next != null)
        next.processAdd(cmd);

    // and then release all the analyzers, readying them for re-use
    for (Map.Entry<String, PoolingAnalyzerWrapper> entry : sourceAnalyzers.entrySet()) {
        entry.getValue().release();
    }
}

From source file:com.sindicetech.siren.solr.facet.SirenFacetProcessor.java

License:Open Source License

@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
    SolrCore core = cmd.getReq().getCore();
    IndexSchema schema = core.getLatestSchema();

    if (!schema.isMutable()) {
        throw new SolrException(BAD_REQUEST,
                String.format("This IndexSchema, of core %s, is not mutable.", core.getName()));
    }//from w  ww. j  a v  a  2s .c  o  m

    SolrInputDocument doc = cmd.getSolrInputDocument();

    extractor.setSchema(schema);
    List<SirenFacetEntry> entries = extractor.extractFacets(doc);

    // update schema
    // use Sets so that we add a fieldname only once even if it is generated multiple times (for
    // multiple paths)
    Set<SchemaField> newFields = new HashSet<SchemaField>();
    for (SirenFacetEntry entry : entries) {
        // skip entry if the field is already in the schema
        if (schema.getFieldOrNull(entry.toFieldName()) != null) {
            continue;
        }

        TypeMapping typeMapping = getTypeMappingValueClass(entry.datatype.xsdDatatype);

        // skip facet values that are too large altogether - they don't make sense for faceting 
        if (entry.value instanceof String && ((String) entry.value)
                .length() > (typeMapping.maxFieldSize != null ? typeMapping.maxFieldSize
                        : DEFAULT_MAX_FACET_VALUE_LENGTH)) {
            continue;
        }

        String fieldTypeName = getTypeMappingValueClass(entry.datatype.xsdDatatype).fieldType;

        Map<String, Boolean> options = new HashMap<String, Boolean>();
        // see FieldProperties.propertyNames[]
        options.put("indexed", false);
        options.put("stored", false);
        options.put("docValues", true);
        options.put("multiValued", true);

        newFields.add(schema.newField(entry.toFieldName(), fieldTypeName, options));
    }

    if (!newFields.isEmpty()) {
        IndexSchema newSchema = schema.addFields(newFields);
        cmd.getReq().getCore().setLatestSchema(newSchema);
        cmd.getReq().updateSchemaToLatest();
        logger.debug("Successfully added field(s) to the schema.");
    }

    // update document
    for (SirenFacetEntry entry : entries) {
        TypeMapping typeMapping = getTypeMappingValueClass(entry.datatype.xsdDatatype);

        // skip facet values that are too large altogether - they don't make sense for faceting 
        if (entry.value instanceof String && ((String) entry.value)
                .length() > (typeMapping.maxFieldSize != null ? typeMapping.maxFieldSize
                        : DEFAULT_MAX_FACET_VALUE_LENGTH)) {
            continue;
        }

        doc.addField(entry.toFieldName(), entry.value);
    }

    // call the next one in chain
    super.processAdd(cmd);
}

From source file:com.tamingtext.classifier.bayes.BayesUpdateRequestProcessor.java

License:Apache License

@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
    SolrInputDocument doc = cmd.getSolrInputDocument();
    classifyDocument(doc);/*from ww w.ja va 2  s  .  c  o  m*/
    super.processAdd(cmd);
}

From source file:io.yucca.solr.processor.HierarchyExtractorUpdateProcessor.java

License:Apache License

@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
    process(cmd.getSolrInputDocument());
    super.processAdd(cmd);
}

From source file:lux.solr.CloudIdProcessor.java

License:Mozilla Public License

@Override
public void processAdd(final AddUpdateCommand cmd) throws IOException {
    SolrInputDocument solrInputDocument = cmd.getSolrInputDocument();

    String uri = (String) solrInputDocument.getFieldValue(uriFieldName);
    if (uri != null) {
        // we actually only need about 42 bits to count up to about to 2070, so use the remaining 22
        // for some bits from a uri hash to make this (more likely to be) globally unique.
        long t = System.currentTimeMillis() << 22;
        long hashCode = uri.hashCode() & 0x2fffff;
        // would the high-order bits be more random?
        long luxDocId = t | hashCode;
        solrInputDocument.addField(idFieldName, luxDocId);
    }/*w  w  w . jav  a2 s . co  m*/

    if (next != null) {
        next.processAdd(cmd);
    }

}