List of usage examples for org.apache.solr.common SolrInputDocument containsKey
@Override
public boolean containsKey(Object key)
From source file:com.cominvent.solr.update.processor.MappingUpdateProcessor.java
License:Apache License
@Override public void processAdd(AddUpdateCommand cmd) throws IOException { if (isEnabled()) { SolrInputDocument doc = cmd.getSolrInputDocument(); // Fetch document id String docId = ""; if (doc.containsKey(docIdField)) docId = (String) doc.getFieldValue(docIdField); String inValue = (String) doc.getFieldValue(inputField); String outValue;/*from www . j a va 2 s .c o m*/ if (map.containsKey(inValue)) { outValue = map.get(inValue); } else { outValue = fallbackValue; } if (outValue != null && outValue.length() > 0) { log.debug("Mapping done for document " + docId + ": " + inValue + " => " + outValue); doc.setField(outputField, outValue); } } else { log.debug("MappingUpdateProcessor is not enabled. Skipping"); } super.processAdd(cmd); }
From source file:com.ngdata.hbaseindexer.parse.tika.TikaSolrDocumentExtractorTest.java
License:Apache License
@Test public void testExtractDocument() throws IOException { byte[] columnFamily = Bytes.toBytes("cf"); byte[] columnQualifier = Bytes.toBytes("qualifier"); final String applicableValue = "this is the test data"; final String nonApplicableValue = "not-applicable value"; KeyValue applicableKeyValue = new KeyValue(Bytes.toBytes("row"), columnFamily, columnQualifier, Bytes.toBytes(applicableValue)); KeyValue nonApplicableKeyValue = new KeyValue(Bytes.toBytes("row"), Bytes.toBytes("other cf"), columnQualifier, Bytes.toBytes(nonApplicableValue)); Result result = new Result(new KeyValue[] { applicableKeyValue, nonApplicableKeyValue }); SolrDocumentExtractor documentExtractor = new TikaSolrDocumentExtractor(indexSchema, new SingleCellExtractor(columnFamily, columnQualifier), "prefix_", "text/plain"); SolrInputDocument solrInputDocument = new SolrInputDocument(); documentExtractor.extractDocument(result, solrInputDocument); // Make sure that the input text got in here somehow assertTrue(solrInputDocument.containsKey("prefix_content")); assertTrue(solrInputDocument.get("prefix_content").getValues().toString().contains(applicableValue)); assertFalse(solrInputDocument.get("prefix_content").getValues().toString().contains(nonApplicableValue)); }
From source file:eu.clarin.cmdi.vlo.importer.MetadataImporter.java
/** * Adds some additional information from DataRoot to solrDocument, add * solrDocument to document list, submits list to SolrServer every 1000 * files/*ww w. jav a2 s .co m*/ * * @param solrDocument * @param cmdiData * @param file * @param dataOrigin * @throws SolrServerException * @throws IOException */ protected void updateDocument(SolrInputDocument solrDocument, CMDIData cmdiData, File file, DataRoot dataOrigin) throws SolrServerException, IOException { if (!solrDocument.containsKey(FacetConstants.FIELD_COLLECTION)) { solrDocument.addField(FacetConstants.FIELD_COLLECTION, dataOrigin.getOriginName()); } solrDocument.addField(FacetConstants.FIELD_DATA_PROVIDER, dataOrigin.getOriginName()); solrDocument.addField(FacetConstants.FIELD_ID, cmdiData.getId()); solrDocument.addField(FacetConstants.FIELD_FILENAME, file.getAbsolutePath()); String metadataSourceUrl = dataOrigin.getPrefix(); metadataSourceUrl += file.getAbsolutePath().substring(dataOrigin.getToStrip().length()); solrDocument.addField(FacetConstants.FIELD_COMPLETE_METADATA, metadataSourceUrl); // add SearchServices (should be CQL endpoint) for (Resource resource : cmdiData.getSearchResources()) { solrDocument.addField(FacetConstants.FIELD_SEARCH_SERVICE, resource.getResourceName()); } // add landing page resource for (Resource resource : cmdiData.getLandingPageResources()) { solrDocument.addField(FacetConstants.FIELD_LANDINGPAGE, resource.getResourceName()); } // add search page resource for (Resource resource : cmdiData.getSearchPageResources()) { solrDocument.addField(FacetConstants.FIELD_SEARCHPAGE, resource.getResourceName()); } // add timestamp Date dt = new Date(); SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); solrDocument.addField(FacetConstants.FIELD_LAST_SEEN, df.format(dt)); // set number of days since last import to '0' solrDocument.addField(FacetConstants.FIELD_DAYS_SINCE_LAST_SEEN, 0); // add resource proxys addResourceData(solrDocument, cmdiData); LOG.debug("Adding document for submission to SOLR: {}", file); docs.add(solrDocument); if (docs.size() == config.getMaxDocsInList()) { sendDocs(); } }
From source file:eu.clarin.cmdi.vlo.importer.MetadataImporter.java
/** * Adds two fields FIELD_FORMAT and FIELD_RESOURCE. The Type can be * specified in the "ResourceType" element of an imdi file or possibly * overwritten by some more specific xpath (as in the LRT cmdi files). So if * a type is overwritten and already in the solrDocument we take that type. * * @param solrDocument// w w w . j a va 2 s.c o m * @param cmdiData */ protected void addResourceData(SolrInputDocument solrDocument, CMDIData cmdiData) { List<Object> fieldValues = solrDocument.containsKey(FacetConstants.FIELD_FORMAT) ? new ArrayList<>(solrDocument.getFieldValues(FacetConstants.FIELD_FORMAT)) : null; solrDocument.removeField(FacetConstants.FIELD_FORMAT); //Remove old values they might be overwritten. List<Resource> resources = cmdiData.getDataResources(); for (int i = 0; i < resources.size(); i++) { Resource resource = resources.get(i); String mimeType = resource.getMimeType(); if (mimeType == null) { if (fieldValues != null && i < fieldValues.size()) { mimeType = CommonUtils.normalizeMimeType(fieldValues.get(i).toString()); } else { mimeType = CommonUtils.normalizeMimeType(""); } } FormatPostProcessor processor = new FormatPostProcessor(); mimeType = processor.process(mimeType).get(0); // TODO check should probably be moved into Solr (by using some minimum length filter) if (!mimeType.equals("")) { solrDocument.addField(FacetConstants.FIELD_FORMAT, mimeType); } solrDocument.addField(FacetConstants.FIELD_RESOURCE, mimeType + FacetConstants.FIELD_RESOURCE_SPLIT_CHAR + resource.getResourceName()); } solrDocument.addField(FacetConstants.FIELD_RESOURCE_COUNT, resources.size()); }
From source file:eu.europeana.corelib.edm.utils.SolrConstructor.java
License:Open Source License
private SolrInputDocument generateWRFromAggregation(SolrInputDocument solrInputDocument, List<Aggregation> aggregationList) { if (aggregationList != null) { for (Aggregation aggr : aggregationList) { if (solrInputDocument.containsKey("edm_webResource")) { boolean containsWr = false; if (aggr.getIsShownBy() != null) { String isShownBy = aggr.getIsShownBy().getResource(); for (Object str : solrInputDocument.getFieldValues("edm_webResource")) { if (StringUtils.equals(str.toString(), isShownBy)) { containsWr = true; }/*from w ww . j a v a 2s . c o m*/ } if (!containsWr) { solrInputDocument.addField("edm_webResource", isShownBy); } } containsWr = false; if (aggr.getObject() != null) { String object = aggr.getObject().getResource(); for (Object str : solrInputDocument.getFieldValues("edm_webResource")) { if (StringUtils.equals(str.toString(), object)) { containsWr = true; } } if (!containsWr) { solrInputDocument.addField("edm_webResource", object); } } if (aggr.getHasViewList() != null) { for (HasView hasView : aggr.getHasViewList()) { containsWr = false; String res = hasView.getResource().trim(); for (Object str : solrInputDocument.getFieldValues("edm_webResource")) { if (StringUtils.equals(str.toString(), res)) { containsWr = true; } } if (!containsWr) { solrInputDocument.addField("edm_webResource", res); } } } } } } return solrInputDocument; }
From source file:io.yucca.solr.processor.FieldBoost.java
License:Apache License
/** * Set Field value// w ww. j a va 2 s .com * * @param values * String[] values to set * @param document * SolrInputDocument to set field in * @param schema * IndexSchema * @param overwrite * boolean if true existing values are overwritten */ public void set(String[] values, SolrInputDocument document, IndexSchema schema, boolean overwrite) { boolean multivalued = false; if (schema != null && schema.hasExplicitField(field)) { multivalued = schema.getField(field).multiValued(); } if (overwrite || document.containsKey(field) == false) { document.setField(field, values, boost); } else if (multivalued && overwrite == false) { document.addField(field, values, boost); } }
From source file:net.yacy.cora.federate.solr.connector.SolrServerConnector.java
License:Open Source License
@Override public void add(final SolrInputDocument solrdoc) throws IOException, SolrException { if (this.server == null) return;/*from w ww . j ava 2 s .c o m*/ if (solrdoc.containsKey("_version_")) solrdoc.setField("_version_", 0L); // prevent Solr "version conflict" synchronized (this.server) { try { this.server.add(solrdoc, -1); } catch (final Throwable e) { clearCaches(); // prevent further OOM if this was caused by OOM ConcurrentLog.logException(e); // catches "version conflict for": try this again and delete the document in advance try { this.server.deleteById((String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName())); } catch (final SolrServerException e1) { ConcurrentLog.logException(e1); } try { this.server.add(solrdoc, -1); } catch (final Throwable ee) { ConcurrentLog.logException(ee); try { this.server.commit(); } catch (final Throwable eee) { ConcurrentLog.logException(eee); // a time-out may occur here } try { this.server.add(solrdoc, -1); } catch (final Throwable eee) { ConcurrentLog.logException(eee); throw new IOException(eee); } } } } }
From source file:net.yacy.cora.federate.solr.connector.SolrServerConnector.java
License:Open Source License
@Override public void add(final Collection<SolrInputDocument> solrdocs) throws IOException, SolrException { if (this.server == null) return;/*from ww w . j a v a 2 s. c o m*/ for (SolrInputDocument solrdoc : solrdocs) { if (solrdoc.containsKey("_version_")) solrdoc.setField("_version_", 0L); // prevent Solr "version conflict" } synchronized (this.server) { try { this.server.add(solrdocs, -1); } catch (final Throwable e) { clearCaches(); // prevent further OOM if this was caused by OOM ConcurrentLog.logException(e); // catches "version conflict for": try this again and delete the document in advance List<String> ids = new ArrayList<String>(); for (SolrInputDocument solrdoc : solrdocs) ids.add((String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName())); try { this.server.deleteById(ids); } catch (final SolrServerException e1) { ConcurrentLog.logException(e1); } try { this.server.commit(); } catch (final Throwable eee) { ConcurrentLog.logException(eee); // a time-out may occur here } try { this.server.add(solrdocs, -1); } catch (final Throwable ee) { ConcurrentLog.logException(ee); log.warn(e.getMessage() + " IDs=" + ids.toString()); throw new IOException(ee); } } } }
From source file:nl.minbzk.dwr.zoeken.enricher.uploader.SolrResultUploader.java
License:Open Source License
/** * Determine the database name, based on the composite key, or null if any of the composite key replacements could not be resolved. * * XXX: We only consider the replacement values from the first document given. * * @param name/*from w w w .j av a2 s.c o m*/ * @param nameComposition * @param namePrerequisitesExpression * @param documents * @return String */ private String determineAlternateDatabaseName(final String name, final String nameComposition, final String namePrerequisitesExpression, final List<SolrInputDocument> documents) { GregorianCalendar calendar = new GregorianCalendar(); calendar.setTime(new Date()); String result; result = nameComposition.replace("{name}", name).trim(); result = result.replace("{year}", String.format("%04d", calendar.get(calendar.YEAR))); result = result.replace("{month}", String.format("%02d", calendar.get(calendar.MONTH))); if (documents.size() > 0) { final SolrInputDocument document = documents.get(0); while (result.contains("{") && result.indexOf("}") > result.indexOf("{")) { String fieldName = result.substring(result.indexOf("{") + 1, result.indexOf("}")); if (document.containsKey(fieldName)) result = result.replace("{" + fieldName + "}", document.getFieldValue(fieldName).toString()); else { if (logger.isDebugEnabled()) logger.debug(String.format( "Field '%s' was missing from document with ID '%s' - will revert back to default collection '%s'", fieldName, document.getFieldValue(REFERENCE_FIELD), name)); return null; } } // Also check the pre-requisite expression - only return a composite database name if it's met if (StringUtils.hasText(namePrerequisitesExpression)) { final ExpressionParser parser = new SpelExpressionParser(); final Map<String, Object> values = new HashMap<String, Object>(); for (Map.Entry<String, SolrInputField> entry : document.entrySet()) { // XXX: Always get just the first value /* if (entry.getValue().getValueCount() > 1) values.put(entry.getKey(), entry.getValue().getValues()); else */ values.put(entry.getKey(), entry.getValue().getFirstValue()); } StandardEvaluationContext context = new StandardEvaluationContext(new Object() { public Map<String, Object> getValues() { return values; } }); if (!parser.parseExpression(namePrerequisitesExpression).getValue(context, Boolean.class)) { if (logger.isDebugEnabled()) logger.debug(String.format( "Pre-requisite expression '%s' failed to match against document with ID '%s' - will revert back to default collection '%s'", namePrerequisitesExpression, document.get(REFERENCE_FIELD).toString(), name)); return null; } } } return result; }
From source file:org.apache.sentry.tests.e2e.solr.AbstractSolrSentryTestBase.java
License:Apache License
/** * Function to validate the content of Solr response with that of input document. * @param solrInputDoc - Solr doc inserted into Solr * @param solrRespDocs - List of Solr doc obtained as response * (NOTE: This function ignores "_version_" field in validating Solr doc content) */// w w w . java 2s .c om public void validateSolrDocContent(SolrInputDocument solrInputDoc, SolrDocumentList solrRespDocs) { for (SolrDocument solrRespDoc : solrRespDocs) { String expFieldValue = (String) solrInputDoc.getFieldValue("id"); String resFieldValue = (String) solrRespDoc.getFieldValue("id"); if (expFieldValue.equals(resFieldValue)) { int expectedRespFieldCount = solrRespDoc.size(); if (solrRespDoc.containsKey("_version_")) { expectedRespFieldCount = expectedRespFieldCount - 1; } int expectedOrigFieldCount = solrInputDoc.size(); if (solrInputDoc.containsKey("_version_")) { expectedOrigFieldCount = expectedOrigFieldCount - 1; } assertEquals("Expected " + expectedOrigFieldCount + " fields. But, found " + expectedRespFieldCount + " fields", expectedOrigFieldCount, expectedRespFieldCount); for (String field : solrInputDoc.getFieldNames()) { if (field.equals("_version_") == true) { continue; } expFieldValue = (String) solrInputDoc.getFieldValue(field); resFieldValue = (String) solrRespDoc.getFieldValue(field); assertEquals("Expected value for field: " + field + " is " + expFieldValue + "; But, found " + resFieldValue, expFieldValue, resFieldValue); } return; } } fail("Solr doc not found in Solr collection"); }