Example usage for org.apache.solr.common SolrInputDocument removeField

List of usage examples for org.apache.solr.common SolrInputDocument removeField

Introduction

In this page you can find the example usage for org.apache.solr.common SolrInputDocument removeField.

Prototype

public SolrInputField removeField(String name) 

Source Link

Document

Remove a field from the document

Usage

From source file:com.francelabs.datafari.updateprocessor.DatafariUpdateProcessor.java

License:Apache License

@Override
public void processAdd(final AddUpdateCommand cmd) throws IOException {
    final SolrInputDocument doc = cmd.getSolrInputDocument();

    // Sometimes Tika put several ids so we keep the first one which is
    // always the right one
    if (doc.getFieldValues("id").size() > 1) {
        final Object id = doc.getFieldValue("id");
        doc.remove("id");
        doc.addField("id", id);
    }/*w w  w.  j  a va  2  s. c om*/

    // Try to retrieve at the ignored_filelastmodified field to set it's
    // value in the last_modified field
    if (doc.getFieldValue("ignored_filelastmodified") != null) {
        final Object last_modified = doc.getFieldValue("ignored_filelastmodified");
        doc.remove("last_modified");
        doc.addField("last_modified", last_modified);
    }

    // Sometimes Tika put several last_modified dates, so we keep the first
    // one which is always the right one
    if ((doc.getFieldValues("last_modified") != null) && (doc.getFieldValues("last_modified").size() > 1)) {
        final Object last_modified = doc.getFieldValue("last_modified");
        doc.remove("last_modified");
        doc.addField("last_modified", last_modified);
    }

    final String url = (String) doc.getFieldValue("id");

    // Create path hierarchy for facet
    final List<String> urlHierarchy = new ArrayList<>();

    /*
     * // Create path hierarchy for facet
     *
     * final List<String> urlHierarchy = new ArrayList<String>();
     *
     * final String path = url.replace("file:", ""); int previousIndex = 1; int
     * depth = 0; // Tokenize the path and add the depth as first character for
     * each token // (like: 0/home, 1/home/project ...) for (int i = 0; i <
     * path.split("/").length - 2; i++) { int endIndex = path.indexOf('/',
     * previousIndex); if (endIndex == -1) { endIndex = path.length() - 1; }
     * urlHierarchy.add(depth + path.substring(0, endIndex)); depth++;
     * previousIndex = endIndex + 1; }
     *
     * // Add the tokens to the urlHierarchy field doc.addField("urlHierarchy",
     * urlHierarchy);
     */

    doc.addField("url", url);

    String filename = "";
    final SolrInputField streamNameField = doc.get("ignored_stream_name");
    if (streamNameField != null) {
        filename = (String) streamNameField.getFirstValue();
    } else {
        final Pattern pattern = Pattern.compile("[^/]*$");
        final Matcher matcher = pattern.matcher(url);
        if (matcher.find()) {
            filename = matcher.group();
        }
    }

    if (url.startsWith("http")) {
        if (doc.get("title") == null) {
            doc.addField("title", filename);
        }
        doc.addField("source", "web");
    }

    if (url.startsWith("file")) {
        doc.removeField("title");
        doc.addField("title", filename);
        doc.addField("source", "file");
    }

    String extension = "";
    URL urlObject = new URL(url);
    String path = urlObject.getPath();
    final SolrInputField mimeTypeField = doc.get("ignored_content_type");

    String nameExtension = FilenameUtils.getExtension(path);
    String tikaExtension = mimeTypeField == null ? "" : extensionFromMimeTypeField(mimeTypeField);

    if (extensionFromName) {
        extension = nameExtension.length() > 1 && nameExtension.length() < 5 ? nameExtension : tikaExtension;
    } else {
        extension = tikaExtension.length() > 1 && tikaExtension.length() < 5 ? tikaExtension : nameExtension;
    }
    /*
    if (extensionFromName || mimeTypeField == null) {
       if (path.contains(".")){
         extension = FilenameUtils.getExtension(path);
          if (extension.length() > 4 || extension.length() < 1) {
    // If length is too long, try extracting from tika information if available
    String tryExtension = mimeTypeField==null ? null : extensionFromMimeTypeField(mimeTypeField);
    if (tryExtension != null) {
      extension = tryExtension;
    } else {
      // Else default to bin for anything else
      extension = "bin";
    }
          }
       }
       else if (urlObject.getProtocol().equals("http") || urlObject.getProtocol().equals("https")) {
         extension = null;
         if (mimeTypeField != null) {
           extension = extensionFromMimeTypeField(mimeTypeField);
         } 
         if (extension == null) {
           extension = "html";
         }
       }
    } else {
      extension = extensionFromMimeTypeField(mimeTypeField);
      if (extension == null) {
        extension = FilenameUtils.getExtension(path);
      }
    }
    */
    doc.addField("extension", extension.toLowerCase());

    super.processAdd(cmd);
}

From source file:com.gu.solr.MergeUtils.java

License:Apache License

public static SolrInputDocument merge(SolrInputDocument solrInputDocument, SolrDocument existing,
        IndexSchema schema, boolean overwriteMultivalues) {
    SolrInputDocument merged = copy(existing);

    for (SolrInputField field : solrInputDocument) {
        String fieldName = field.getName();
        if (!overwriteMultivalues && schema.getField(fieldName).multiValued()) {
            // leave for additions
        } else {/*from w  w  w .  j  av  a 2 s . com*/
            merged.removeField(fieldName);
        }
    }

    for (SolrInputField field : solrInputDocument) {
        merged.addField(field.getName(), field.getValue());
    }

    return merged;
}

From source file:com.gu.solr.MergeUtils.java

License:Apache License

public static SolrInputDocument withoutId(SolrInputDocument document, IndexSchema schema) {
    SolrInputDocument withoutId = copy(document);
    withoutId.removeField(schema.getUniqueKeyField().getName());

    return withoutId;
}

From source file:com.gu.solr.MergeUtils.java

License:Apache License

public static SolrInputDocument delete(List<String> deleteFields, SolrDocument existing) {
    SolrInputDocument merged = copy(existing);

    for (String name : deleteFields) {
        merged.removeField(name);
    }/*from w w  w.  ja  va  2s.c  o m*/

    return merged;
}

From source file:com.hurence.logisland.service.solr.api.SolrClientService.java

License:Apache License

@Override
public void copyCollection(String reindexScrollTimeout, String src, String dst)
        throws DatastoreClientServiceException {
    SolrQuery solrQuery = new SolrQuery();
    solrQuery.setRows(1000);//  w ww.ja va  2s  .  co  m
    solrQuery.setQuery("*:*");
    solrQuery.addSort("id", SolrQuery.ORDER.asc); // Pay attention to this line
    String cursorMark = CursorMarkParams.CURSOR_MARK_START;
    boolean done = false;
    QueryResponse response;
    try {
        do {
            solrQuery.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
            response = getClient().query(src, solrQuery);
            List<SolrInputDocument> documents = new ArrayList<>();
            for (SolrDocument document : response.getResults()) {
                SolrInputDocument inputDocument = getConverter().toSolrInputDocument(document);
                inputDocument.removeField("_version_");
                documents.add(inputDocument);
            }

            getClient().add(dst, documents);

        } while (cursorMark.equals(response.getNextCursorMark()));

        getClient().commit(dst);
    } catch (Exception e) {
        throw new DatastoreClientServiceException(e);
    }
}

From source file:de.hybris.platform.solrfacetsearch.indexer.impl.SolrDocumentFactoryTest.java

License:Open Source License

@Test
public void testCreateSolrInputDocument() throws Exception {
    final FacetSearchConfig config = facetSearchConfigService.getConfiguration("productSearch");
    assertNotNull("Facet Search Config must not be null", config);
    final ProductModel product = productService.getProduct("HW2310-1004");
    assertNotNull("Product must not be null", product);
    final IndexConfig indexConfig = config.getIndexConfig();
    assertNotNull("Index config must not be null", indexConfig);
    final Collection<IndexedType> indexedTypes = indexConfig.getIndexedTypes().values();
    assertNotNull("Collection of indexed types must not be null", indexedTypes);
    assertEquals("Size of collection of indexed types", 1, indexedTypes.size());
    final IndexedType indexedType = indexedTypes.iterator().next();
    final SolrInputDocument inputDocument = solrDocumentFactory.createInputDocument(product,
            config.getIndexConfig(), indexedType);
    assertNotNull("id must not be null", inputDocument.getField("id"));
    inputDocument.removeField("id");
    assertNotNull("pk must not be null", inputDocument.getField("pk"));
    inputDocument.removeField("pk");
    assertNotNull("catalogId must not be null", inputDocument.getField("catalogId"));
    assertEquals("Catalog ID", product.getCatalogVersion().getCatalog().getId(),
            inputDocument.getField("catalogId").getValue());
    inputDocument.removeField("catalogId");
    assertNotNull("catalogVersion must not be null", inputDocument.getField("catalogVersion"));
    assertEquals("Catalog Version", product.getCatalogVersion().getVersion(),
            inputDocument.getField("catalogVersion").getValue());
    inputDocument.removeField("catalogVersion");
    final Collection<IndexedProperty> indexedProperties = indexedType.getIndexedProperties().values();
    for (final IndexedProperty indexedProperty : indexedProperties) {
        final Collection<FieldValue> fieldValues = IndexedProperties.getFieldValueProvider(indexedProperty)
                .getFieldValues(indexConfig, indexedProperty, product);
        assertContainsValues(inputDocument, fieldValues);
    }//from ww w .jav  a  2s .co  m
    assertTrue("Input document has too many fields", inputDocument.getFieldNames().isEmpty());
}

From source file:de.hybris.platform.solrfacetsearch.indexer.impl.SolrDocumentFactoryTest.java

License:Open Source License

/**
 * @param inputDocument/*ww  w .  ja  va  2 s . c  o  m*/
 * @param fieldValues
 */
private void assertContainsValues(final SolrInputDocument inputDocument,
        final Collection<FieldValue> fieldValues) {
    for (final FieldValue fieldValue : fieldValues) {
        final String fieldName = fieldValue.getFieldName();
        final Object value = inputDocument.getFieldValue(fieldName);
        assertEquals("Field value for " + fieldName, fieldValue.getValue(), value);
        inputDocument.removeField(fieldName);
    }
}

From source file:edu.cornell.mannlib.vitro.webapp.search.solr.CleanAllText.java

License:Open Source License

@Override
public void modifyDocument(Individual ind, SolrInputDocument doc, StringBuffer arg2)
        throws SkipIndividualException {
    List<String> cleanValues = new ArrayList<String>();
    SolrInputField alltext = doc.getField(multiSiteTerm.alltext);
    if (alltext != null) {
        for (Object obj : alltext.getValues()) {
            if (obj instanceof String) {
                cleanValues.add(clean((String) obj));
            }//from   ww  w.  j  av a 2 s.c o  m
        }
        doc.removeField(multiSiteTerm.alltext);
        for (String cleaned : cleanValues) {
            doc.addField(multiSiteTerm.alltext, cleaned);
        }
    }

    doc.addField(multiSiteTerm.alltext, alltext);
}

From source file:elaborate.editor.solr.ElaborateSolrIndexer.java

License:Open Source License

private static void handleMultiValuedFields(String facetName, String multiValue, SolrInputDocument doc) {
    Log.info("facetName={}", facetName);
    doc.removeField(facetName);
    Iterable<String> values = StringUtil.getValues(multiValue);
    if (!values.iterator().hasNext()) {
        doc.addField("mv_" + facetName, EMPTYVALUE_SYMBOL, 1.0f);
    } else {//w  ww .  j  a v a  2s  .  co  m
        for (String value : values) {
            if (StringUtils.isNotBlank(value)) {
                doc.addField("mv_" + facetName, value, 1.0f);
            }
        }
    }
}

From source file:eu.clarin.cmdi.vlo.importer.MetadataImporter.java

/**
 * Adds two fields FIELD_FORMAT and FIELD_RESOURCE. The Type can be
 * specified in the "ResourceType" element of an imdi file or possibly
 * overwritten by some more specific xpath (as in the LRT cmdi files). So if
 * a type is overwritten and already in the solrDocument we take that type.
 *
 * @param solrDocument//from   w w  w  .  j  a v a  2s  .c  o  m
 * @param cmdiData
 */
protected void addResourceData(SolrInputDocument solrDocument, CMDIData cmdiData) {
    List<Object> fieldValues = solrDocument.containsKey(FacetConstants.FIELD_FORMAT)
            ? new ArrayList<>(solrDocument.getFieldValues(FacetConstants.FIELD_FORMAT))
            : null;
    solrDocument.removeField(FacetConstants.FIELD_FORMAT); //Remove old values they might be overwritten.
    List<Resource> resources = cmdiData.getDataResources();
    for (int i = 0; i < resources.size(); i++) {
        Resource resource = resources.get(i);
        String mimeType = resource.getMimeType();
        if (mimeType == null) {
            if (fieldValues != null && i < fieldValues.size()) {
                mimeType = CommonUtils.normalizeMimeType(fieldValues.get(i).toString());
            } else {
                mimeType = CommonUtils.normalizeMimeType("");
            }
        }

        FormatPostProcessor processor = new FormatPostProcessor();
        mimeType = processor.process(mimeType).get(0);

        // TODO check should probably be moved into Solr (by using some minimum length filter)
        if (!mimeType.equals("")) {
            solrDocument.addField(FacetConstants.FIELD_FORMAT, mimeType);
        }
        solrDocument.addField(FacetConstants.FIELD_RESOURCE,
                mimeType + FacetConstants.FIELD_RESOURCE_SPLIT_CHAR + resource.getResourceName());
    }
    solrDocument.addField(FacetConstants.FIELD_RESOURCE_COUNT, resources.size());
}