List of usage examples for org.apache.solr.common SolrInputDocument getField
public SolrInputField getField(String field)
From source file:at.newmedialab.lmf.search.services.indexing.SolrIndexingServiceImpl.java
License:Apache License
@Override public void indexResource(Resource resource, SolrCoreRuntime runtime) { Program<Value> program = runtime.getConfiguration().getProgram(); if (program == null) { try {// ww w.jav a 2 s . c om program = solrProgramService .parseProgram(new StringReader(runtime.getConfiguration().getProgramString())); runtime.getConfiguration().setProgram(program); } catch (LDPathParseException e) { log.error("error parsing path program for engine {}", runtime.getConfiguration().getName(), e); return; } } if (resource == null) return; final String coreName = runtime.getConfiguration().getName(); final String rID = getResourceId(resource); try { final RepositoryConnection connection = sesameService.getConnection(); try { connection.begin(); //if (resource instanceof KiWiResource && ((KiWiResource) resource).isDeleted()) { // runtime.queueDeletion(rID); //} //FIXME: find a proper way to do this with the new api boolean deleted = true; RepositoryResult<Statement> statements = connection.getStatements(resource, null, null, false); while (statements.hasNext()) { if (!ResourceUtils.isDeleted(connection, statements.next())) { deleted = false; break; } } if (deleted) { runtime.queueDeletion(rID); } final Resource[] contexts; if (program.getGraphs().isEmpty()) { contexts = new Resource[0]; } else { contexts = Collections2.transform(program.getGraphs(), new Function<java.net.URI, URI>() { @Override public URI apply(java.net.URI in) { return connection.getValueFactory().createURI(in.toString()); } }).toArray(new Resource[0]); } final SesameConnectionBackend backend = ContextAwareSesameConnectionBackend .withConnection(connection, contexts); if (program.getFilter() != null && !program.getFilter().apply(backend, resource, Collections.singleton((Value) resource))) { if (log.isDebugEnabled()) { log.debug("({}) <{}> does not match filter '{}', ignoring", coreName, resource, program.getFilter().getPathExpression(backend)); } // Some resources might be still in the index, so delete it. runtime.queueDeletion(rID); connection.commit(); return; } else if (log.isTraceEnabled() && program.getFilter() != null) { log.trace("({}) <{}> matches filter '{}', indexing...", coreName, resource, program.getFilter().getPathExpression(backend)); } SolrInputDocument doc = new SolrInputDocument(); doc.addField("id", rID); doc.addField("lmf.indexed", new Date()); if (resource instanceof KiWiUriResource) { doc.addField("lmf.created", ((KiWiUriResource) resource).getCreated()); } if (resource instanceof URI) { URI r = (URI) resource; doc.addField("lmf.uri", r.stringValue()); } else if (resource instanceof BNode) { BNode r = (BNode) resource; doc.addField("lmf.anon_id", r.getID()); } else { // This should not happen, but never the less... log.warn("Tried to index a Resource that is neither a URI nor BNode: {}", resource); runtime.queueDeletion(rID); connection.rollback(); return; } for (Resource type : getTypes(connection, resource)) { if (type instanceof KiWiUriResource) { doc.addField("lmf.type", type.stringValue()); } } // Set the document boost if (program.getBooster() != null) { final Collection<Float> boostValues = program.getBooster().getValues(backend, resource); if (boostValues.size() > 0) { final Float docBoost = boostValues.iterator().next(); if (boostValues.size() > 1) { log.warn("found more than one boostFactor for <{}>, using {}", resource, docBoost); } doc.setDocumentBoost(docBoost); } } // set shortcut fields Set<Value> dependencies = new HashSet<Value>(); for (FieldMapping<?, Value> rule : program.getFields()) { // Map<Value, List<Value>> paths = new HashMap<Value, List<Value>>(); // Collection<?> values = rule.getValues(backend, resource, paths); //FIXME: Temporary fixing due LDPath reverse properties selector bug Map<Value, List<Value>> paths = null; Collection<?> values = null; if (runtime.getConfiguration().isUpdateDependencies()) { paths = new HashMap<Value, List<Value>>(); values = rule.getValues(backend, resource, paths); } else { values = rule.getValues(backend, resource); } // try { final boolean isSinge = !isMultiValuedField(rule); for (Object value : values) { if (value != null) { doc.addField(rule.getFieldName(), value); if (isSinge) { break; } } } if (rule.getFieldConfig() != null) { final String b = rule.getFieldConfig().get("boost"); try { if (b != null) { doc.getField(rule.getFieldName()).setBoost(Float.parseFloat(b)); } } catch (NumberFormatException e) { throw new NumberFormatException("could not parse boost value: '" + b + "'"); } } } catch (Exception ex) { log.error("({}) exception while building path indexes for <{}>, field {}: {}", coreName, resource, rule.getFieldName(), ex.getMessage()); log.debug("(" + coreName + ") stacktrace", ex); } if (runtime.getConfiguration().isUpdateDependencies()) { for (List<Value> path : paths.values()) { dependencies.addAll(path); } } } if (runtime.getConfiguration().isUpdateDependencies()) { for (Value dependency : dependencies) { if (dependency instanceof URI && !dependency.equals(resource)) { doc.addField("lmf.dependencies", dependency.stringValue()); } } } runtime.queueInputDocument(doc); connection.commit(); } finally { connection.close(); } } catch (RepositoryException e) { log.warn("Could not build index document for " + resource.stringValue(), e); } catch (Throwable t) { log.error("unknown error while indexing document", t); } }
From source file:at.pagu.soldockr.repository.SimpleSolrRepository.java
License:Apache License
private String extractIdFromSolrInputDocument(SolrInputDocument solrInputDocument) { Assert.notNull(solrInputDocument.getField(idFieldName), "Unable to find field '" + idFieldName + "' in SolrDocument."); Assert.notNull(solrInputDocument.getField(idFieldName).getValue(), "ID must not be 'null'."); return solrInputDocument.getField(idFieldName).getValue().toString(); }
From source file:com.grantingersoll.intell.index.BayesUpdateRequestProcessor.java
License:Apache License
public ClassifierResult classifyDocument(SolrInputDocument doc) throws IOException { SolrInputField field = doc.getField(inputField); if (field == null) return null; if (!(field.getValue() instanceof String)) return null; String[] tokens = tokenizeField((String) field.getValue()); try {//from w w w.j a va 2s. c o m return ctx.classifyDocument(tokens, defaultCategory); } catch (InvalidDatastoreException e) { throw new IOException("Invalid Classifier Datastore", e); } }
From source file:com.ngdata.hbaseindexer.indexer.FusionDocumentWriter.java
License:Apache License
/** * shs: This method was modified from its original to add the input parameters 'parent' and 'docCount'. This was done * to enable recursion to be used to find all parent/child relationships to any level. The method will merge the * fields in the parent document into the child document and will then convert that merged document into JSON * format and return that JSON document to the caller. * @param parent The parent document for the child document being passed in. Parent may be null if the child being * passed in is a member of the initial documents submitted. * @param child This is the child document. It will have the parent's fields merged into it. * @param docCount This is a count of the number of documents that have been added in this processing. * @return The merged parent and child documents as a JSON formatted document, in a format acceptable to * Fusion./*from w ww. j a v a 2s . c o m*/ */ protected Map<String, Object> doc2json(SolrInputDocument parent, SolrInputDocument child, int docCount) { Map<String, Object> json = new HashMap<String, Object>(); if (child != null) { String docId = (String) child.getFieldValue("id"); if (docId == null) { if (parent != null) { String parentId = (String) parent.getFieldValue("id"); docId = parentId + "-" + docCount; } if (docId == null) throw new IllegalStateException("Couldn't resolve the id for document: " + child); } json.put("id", docId); List fields = new ArrayList(); if (parent != null) { if (log.isDebugEnabled()) log.debug("Method:doc2json - Merging parent and child docs, parent:[" + parent.toString() + "]; child[" + child.toString() + "]."); // have a parent doc ... flatten by adding all parent doc fields to the child with prefix _p_ for (String f : parent.getFieldNames()) { if ("id".equals(f)) { fields.add(mapField("_p_id", null /* field name prefix */, parent.getField("id").getFirstValue())); } else { appendField(parent, f, "_p_", fields); } } } for (String f : child.getFieldNames()) { if (!"id".equals(f)) { // id already added appendField(child, f, null, fields); } } // keep track of the time we saw this doc on the hbase side String tdt = DateUtil.getThreadLocalDateFormat().format(new Date()); fields.add(mapField("_hbasets_tdt", null, tdt)); if (log.isDebugEnabled()) log.debug(strIndexName + " Reconcile id = " + docId + " and timestamp = " + tdt); json.put("fields", fields); } else { log.warn("method:doc2json - Input parameter 'child' was null."); } return json; }
From source file:com.ngdata.hbaseindexer.indexer.FusionDocumentWriter.java
License:Apache License
protected void appendField(SolrInputDocument doc, String f, String pfx, List fields) { SolrInputField field = doc.getField(f); int vc = field.getValueCount(); if (vc <= 0) return; // no values to add for this field if (vc == 1) { Map<String, Object> fieldMap = mapField(f, pfx, field.getFirstValue()); if (fieldMap != null) fields.add(fieldMap);/*ww w. j av a 2 s .co m*/ } else { for (Object val : field.getValues()) { Map<String, Object> fieldMap = mapField(f, pfx, val); if (fieldMap != null) fields.add(fieldMap); } } }
From source file:com.ngdata.hbaseindexer.indexer.IdAddingSolrUpdateWriter.java
License:Apache License
/** * Add a SolrInputDocument to this writer. * <p>/*from ww w .j ava 2 s. c om*/ * Adding multiple documents without ids will result in an IllegalStateException being thrown. */ @Override public void add(SolrInputDocument solrDocument) { String docId = documentId; SolrInputField uniqueKeySolrField = solrDocument.getField(uniqueKeyField); if (uniqueKeySolrField == null) { if (idUsed) { throw new IllegalStateException( "Document id '" + documentId + "' has already been used by this record"); } solrDocument.addField(uniqueKeyField, documentId); idUsed = true; } else { docId = uniqueKeySolrField.getValue().toString(); } if (tableNameField != null) { solrDocument.addField(tableNameField, tableName); } updateCollector.add(docId, solrDocument); }
From source file:com.ngdata.hbaseindexer.indexer.IdAddingSolrUpdateWriterTest.java
License:Apache License
@Test public void testAdd_MultipleDocumentsWithTheirOwnIds() { String idA = DOCUMENT_ID + "A"; String idB = DOCUMENT_ID + "B"; IdAddingSolrUpdateWriter updateWriter = new IdAddingSolrUpdateWriter(UNIQUE_KEY_FIELD, DOCUMENT_ID, null, TABLE_NAME, updateCollector); SolrInputDocument docA = mock(SolrInputDocument.class); SolrInputDocument docB = mock(SolrInputDocument.class); SolrInputField keyFieldA = new SolrInputField(UNIQUE_KEY_FIELD); keyFieldA.setValue(idA, 1.0f);/*from w w w .jav a 2 s .c om*/ SolrInputField keyFieldB = new SolrInputField(UNIQUE_KEY_FIELD); keyFieldB.setValue(idB, 1.0f); when(docA.getField(UNIQUE_KEY_FIELD)).thenReturn(keyFieldA); when(docB.getField(UNIQUE_KEY_FIELD)).thenReturn(keyFieldB); updateWriter.add(docA); updateWriter.add(docB); verify(updateCollector).add(idA, docA); verify(updateCollector).add(idB, docB); }
From source file:com.ngdata.hbaseindexer.parse.SolrInputDocumentBuilderTest.java
License:Apache License
@Test public void testAdd() { SolrInputDocumentBuilder builder = new SolrInputDocumentBuilder(); SolrInputDocument docA = new SolrInputDocument(); SolrInputDocument docB = new SolrInputDocument(); docA.addField("fieldA", "valueA1"); docA.addField("fieldA", "valueA2"); docB.addField("fieldB", "valueB"); builder.add(docA);// w ww . j a v a 2s.co m builder.add(docB); SolrInputDocument merged = builder.getDocument(); assertEquals(Sets.newHashSet("fieldA", "fieldB"), merged.keySet()); assertEquals(Lists.newArrayList("valueA1", "valueA2"), merged.getField("fieldA").getValues()); assertEquals(Lists.newArrayList("valueB"), merged.getField("fieldB").getValues()); }
From source file:com.ngdata.hbaseindexer.parse.SolrInputDocumentBuilderTest.java
License:Apache License
@Test public void testAdd_WithPrefix() { SolrInputDocumentBuilder builder = new SolrInputDocumentBuilder(); SolrInputDocument docA = new SolrInputDocument(); SolrInputDocument docB = new SolrInputDocument(); docA.addField("fieldA", "valueA"); docB.addField("fieldB", "valueB"); builder.add(docA, "A_"); builder.add(docB);//from ww w .j a v a 2s . co m SolrInputDocument merged = builder.getDocument(); assertEquals(Sets.newHashSet("A_fieldA", "fieldB"), merged.keySet()); assertEquals(Lists.newArrayList("valueA"), merged.getField("A_fieldA").getValues()); assertEquals(Lists.newArrayList("valueB"), merged.getField("fieldB").getValues()); }
From source file:com.ngdata.hbaseindexer.parse.SolrInputDocumentBuilderTest.java
License:Apache License
@Test public void testAdd_OverlappingFields() { SolrInputDocumentBuilder builder = new SolrInputDocumentBuilder(); SolrInputDocument docA = new SolrInputDocument(); SolrInputDocument docB = new SolrInputDocument(); docA.addField("field", "A1", 0.5f); docA.addField("field", "A2", 0.5f); docB.addField("field", "B1", 1.5f); docB.addField("field", "B2", 1.5f); builder.add(docA);//from w ww .j av a 2s . c o m builder.add(docB); SolrInputDocument merged = builder.getDocument(); assertEquals(Sets.newHashSet("field"), merged.keySet()); assertEquals(Lists.newArrayList("A1", "A2", "B1", "B2"), merged.get("field").getValues()); // The boost of the first-added definition of a field is the definitive version assertEquals(0.5f * 0.5f * 1.5f * 1.5f, merged.getField("field").getBoost(), 0f); }