List of usage examples for org.apache.solr.common SolrInputDocument removeField
public SolrInputField removeField(String name)
From source file:com.francelabs.datafari.updateprocessor.DatafariUpdateProcessor.java
License:Apache License
@Override public void processAdd(final AddUpdateCommand cmd) throws IOException { final SolrInputDocument doc = cmd.getSolrInputDocument(); // Sometimes Tika put several ids so we keep the first one which is // always the right one if (doc.getFieldValues("id").size() > 1) { final Object id = doc.getFieldValue("id"); doc.remove("id"); doc.addField("id", id); }/*w w w. j a va 2 s. c om*/ // Try to retrieve at the ignored_filelastmodified field to set it's // value in the last_modified field if (doc.getFieldValue("ignored_filelastmodified") != null) { final Object last_modified = doc.getFieldValue("ignored_filelastmodified"); doc.remove("last_modified"); doc.addField("last_modified", last_modified); } // Sometimes Tika put several last_modified dates, so we keep the first // one which is always the right one if ((doc.getFieldValues("last_modified") != null) && (doc.getFieldValues("last_modified").size() > 1)) { final Object last_modified = doc.getFieldValue("last_modified"); doc.remove("last_modified"); doc.addField("last_modified", last_modified); } final String url = (String) doc.getFieldValue("id"); // Create path hierarchy for facet final List<String> urlHierarchy = new ArrayList<>(); /* * // Create path hierarchy for facet * * final List<String> urlHierarchy = new ArrayList<String>(); * * final String path = url.replace("file:", ""); int previousIndex = 1; int * depth = 0; // Tokenize the path and add the depth as first character for * each token // (like: 0/home, 1/home/project ...) for (int i = 0; i < * path.split("/").length - 2; i++) { int endIndex = path.indexOf('/', * previousIndex); if (endIndex == -1) { endIndex = path.length() - 1; } * urlHierarchy.add(depth + path.substring(0, endIndex)); depth++; * previousIndex = endIndex + 1; } * * // Add the tokens to the urlHierarchy field doc.addField("urlHierarchy", * urlHierarchy); */ doc.addField("url", url); String filename = ""; final SolrInputField streamNameField = doc.get("ignored_stream_name"); if (streamNameField != null) { filename = (String) streamNameField.getFirstValue(); } else { final Pattern pattern = Pattern.compile("[^/]*$"); final Matcher matcher = pattern.matcher(url); if (matcher.find()) { filename = matcher.group(); } } if (url.startsWith("http")) { if (doc.get("title") == null) { doc.addField("title", filename); } doc.addField("source", "web"); } if (url.startsWith("file")) { doc.removeField("title"); doc.addField("title", filename); doc.addField("source", "file"); } String extension = ""; URL urlObject = new URL(url); String path = urlObject.getPath(); final SolrInputField mimeTypeField = doc.get("ignored_content_type"); String nameExtension = FilenameUtils.getExtension(path); String tikaExtension = mimeTypeField == null ? "" : extensionFromMimeTypeField(mimeTypeField); if (extensionFromName) { extension = nameExtension.length() > 1 && nameExtension.length() < 5 ? nameExtension : tikaExtension; } else { extension = tikaExtension.length() > 1 && tikaExtension.length() < 5 ? tikaExtension : nameExtension; } /* if (extensionFromName || mimeTypeField == null) { if (path.contains(".")){ extension = FilenameUtils.getExtension(path); if (extension.length() > 4 || extension.length() < 1) { // If length is too long, try extracting from tika information if available String tryExtension = mimeTypeField==null ? null : extensionFromMimeTypeField(mimeTypeField); if (tryExtension != null) { extension = tryExtension; } else { // Else default to bin for anything else extension = "bin"; } } } else if (urlObject.getProtocol().equals("http") || urlObject.getProtocol().equals("https")) { extension = null; if (mimeTypeField != null) { extension = extensionFromMimeTypeField(mimeTypeField); } if (extension == null) { extension = "html"; } } } else { extension = extensionFromMimeTypeField(mimeTypeField); if (extension == null) { extension = FilenameUtils.getExtension(path); } } */ doc.addField("extension", extension.toLowerCase()); super.processAdd(cmd); }
From source file:com.gu.solr.MergeUtils.java
License:Apache License
public static SolrInputDocument merge(SolrInputDocument solrInputDocument, SolrDocument existing, IndexSchema schema, boolean overwriteMultivalues) { SolrInputDocument merged = copy(existing); for (SolrInputField field : solrInputDocument) { String fieldName = field.getName(); if (!overwriteMultivalues && schema.getField(fieldName).multiValued()) { // leave for additions } else {/*from w w w . j av a 2 s . com*/ merged.removeField(fieldName); } } for (SolrInputField field : solrInputDocument) { merged.addField(field.getName(), field.getValue()); } return merged; }
From source file:com.gu.solr.MergeUtils.java
License:Apache License
public static SolrInputDocument withoutId(SolrInputDocument document, IndexSchema schema) { SolrInputDocument withoutId = copy(document); withoutId.removeField(schema.getUniqueKeyField().getName()); return withoutId; }
From source file:com.gu.solr.MergeUtils.java
License:Apache License
public static SolrInputDocument delete(List<String> deleteFields, SolrDocument existing) { SolrInputDocument merged = copy(existing); for (String name : deleteFields) { merged.removeField(name); }/*from w w w. ja va 2s.c o m*/ return merged; }
From source file:com.hurence.logisland.service.solr.api.SolrClientService.java
License:Apache License
@Override public void copyCollection(String reindexScrollTimeout, String src, String dst) throws DatastoreClientServiceException { SolrQuery solrQuery = new SolrQuery(); solrQuery.setRows(1000);// w ww.ja va 2s . co m solrQuery.setQuery("*:*"); solrQuery.addSort("id", SolrQuery.ORDER.asc); // Pay attention to this line String cursorMark = CursorMarkParams.CURSOR_MARK_START; boolean done = false; QueryResponse response; try { do { solrQuery.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark); response = getClient().query(src, solrQuery); List<SolrInputDocument> documents = new ArrayList<>(); for (SolrDocument document : response.getResults()) { SolrInputDocument inputDocument = getConverter().toSolrInputDocument(document); inputDocument.removeField("_version_"); documents.add(inputDocument); } getClient().add(dst, documents); } while (cursorMark.equals(response.getNextCursorMark())); getClient().commit(dst); } catch (Exception e) { throw new DatastoreClientServiceException(e); } }
From source file:de.hybris.platform.solrfacetsearch.indexer.impl.SolrDocumentFactoryTest.java
License:Open Source License
@Test public void testCreateSolrInputDocument() throws Exception { final FacetSearchConfig config = facetSearchConfigService.getConfiguration("productSearch"); assertNotNull("Facet Search Config must not be null", config); final ProductModel product = productService.getProduct("HW2310-1004"); assertNotNull("Product must not be null", product); final IndexConfig indexConfig = config.getIndexConfig(); assertNotNull("Index config must not be null", indexConfig); final Collection<IndexedType> indexedTypes = indexConfig.getIndexedTypes().values(); assertNotNull("Collection of indexed types must not be null", indexedTypes); assertEquals("Size of collection of indexed types", 1, indexedTypes.size()); final IndexedType indexedType = indexedTypes.iterator().next(); final SolrInputDocument inputDocument = solrDocumentFactory.createInputDocument(product, config.getIndexConfig(), indexedType); assertNotNull("id must not be null", inputDocument.getField("id")); inputDocument.removeField("id"); assertNotNull("pk must not be null", inputDocument.getField("pk")); inputDocument.removeField("pk"); assertNotNull("catalogId must not be null", inputDocument.getField("catalogId")); assertEquals("Catalog ID", product.getCatalogVersion().getCatalog().getId(), inputDocument.getField("catalogId").getValue()); inputDocument.removeField("catalogId"); assertNotNull("catalogVersion must not be null", inputDocument.getField("catalogVersion")); assertEquals("Catalog Version", product.getCatalogVersion().getVersion(), inputDocument.getField("catalogVersion").getValue()); inputDocument.removeField("catalogVersion"); final Collection<IndexedProperty> indexedProperties = indexedType.getIndexedProperties().values(); for (final IndexedProperty indexedProperty : indexedProperties) { final Collection<FieldValue> fieldValues = IndexedProperties.getFieldValueProvider(indexedProperty) .getFieldValues(indexConfig, indexedProperty, product); assertContainsValues(inputDocument, fieldValues); }//from ww w .jav a 2s .co m assertTrue("Input document has too many fields", inputDocument.getFieldNames().isEmpty()); }
From source file:de.hybris.platform.solrfacetsearch.indexer.impl.SolrDocumentFactoryTest.java
License:Open Source License
/** * @param inputDocument/*ww w . ja va 2 s . c o m*/ * @param fieldValues */ private void assertContainsValues(final SolrInputDocument inputDocument, final Collection<FieldValue> fieldValues) { for (final FieldValue fieldValue : fieldValues) { final String fieldName = fieldValue.getFieldName(); final Object value = inputDocument.getFieldValue(fieldName); assertEquals("Field value for " + fieldName, fieldValue.getValue(), value); inputDocument.removeField(fieldName); } }
From source file:edu.cornell.mannlib.vitro.webapp.search.solr.CleanAllText.java
License:Open Source License
@Override public void modifyDocument(Individual ind, SolrInputDocument doc, StringBuffer arg2) throws SkipIndividualException { List<String> cleanValues = new ArrayList<String>(); SolrInputField alltext = doc.getField(multiSiteTerm.alltext); if (alltext != null) { for (Object obj : alltext.getValues()) { if (obj instanceof String) { cleanValues.add(clean((String) obj)); }//from ww w. j av a 2 s.c o m } doc.removeField(multiSiteTerm.alltext); for (String cleaned : cleanValues) { doc.addField(multiSiteTerm.alltext, cleaned); } } doc.addField(multiSiteTerm.alltext, alltext); }
From source file:elaborate.editor.solr.ElaborateSolrIndexer.java
License:Open Source License
private static void handleMultiValuedFields(String facetName, String multiValue, SolrInputDocument doc) { Log.info("facetName={}", facetName); doc.removeField(facetName); Iterable<String> values = StringUtil.getValues(multiValue); if (!values.iterator().hasNext()) { doc.addField("mv_" + facetName, EMPTYVALUE_SYMBOL, 1.0f); } else {//w ww . j a v a 2s . co m for (String value : values) { if (StringUtils.isNotBlank(value)) { doc.addField("mv_" + facetName, value, 1.0f); } } } }
From source file:eu.clarin.cmdi.vlo.importer.MetadataImporter.java
/** * Adds two fields FIELD_FORMAT and FIELD_RESOURCE. The Type can be * specified in the "ResourceType" element of an imdi file or possibly * overwritten by some more specific xpath (as in the LRT cmdi files). So if * a type is overwritten and already in the solrDocument we take that type. * * @param solrDocument//from w w w . j a v a 2s .c o m * @param cmdiData */ protected void addResourceData(SolrInputDocument solrDocument, CMDIData cmdiData) { List<Object> fieldValues = solrDocument.containsKey(FacetConstants.FIELD_FORMAT) ? new ArrayList<>(solrDocument.getFieldValues(FacetConstants.FIELD_FORMAT)) : null; solrDocument.removeField(FacetConstants.FIELD_FORMAT); //Remove old values they might be overwritten. List<Resource> resources = cmdiData.getDataResources(); for (int i = 0; i < resources.size(); i++) { Resource resource = resources.get(i); String mimeType = resource.getMimeType(); if (mimeType == null) { if (fieldValues != null && i < fieldValues.size()) { mimeType = CommonUtils.normalizeMimeType(fieldValues.get(i).toString()); } else { mimeType = CommonUtils.normalizeMimeType(""); } } FormatPostProcessor processor = new FormatPostProcessor(); mimeType = processor.process(mimeType).get(0); // TODO check should probably be moved into Solr (by using some minimum length filter) if (!mimeType.equals("")) { solrDocument.addField(FacetConstants.FIELD_FORMAT, mimeType); } solrDocument.addField(FacetConstants.FIELD_RESOURCE, mimeType + FacetConstants.FIELD_RESOURCE_SPLIT_CHAR + resource.getResourceName()); } solrDocument.addField(FacetConstants.FIELD_RESOURCE_COUNT, resources.size()); }