Example usage for org.apache.solr.common SolrInputDocument getField

List of usage examples for org.apache.solr.common SolrInputDocument getField

Introduction

In this page you can find the example usage for org.apache.solr.common SolrInputDocument getField.

Prototype

public SolrInputField getField(String field) 

Source Link

Usage

From source file:at.newmedialab.lmf.search.services.indexing.SolrIndexingServiceImpl.java

License:Apache License

@Override
public void indexResource(Resource resource, SolrCoreRuntime runtime) {
    Program<Value> program = runtime.getConfiguration().getProgram();
    if (program == null) {
        try {// ww w.jav  a  2  s  .  c  om
            program = solrProgramService
                    .parseProgram(new StringReader(runtime.getConfiguration().getProgramString()));
            runtime.getConfiguration().setProgram(program);
        } catch (LDPathParseException e) {
            log.error("error parsing path program for engine {}", runtime.getConfiguration().getName(), e);
            return;
        }
    }

    if (resource == null)
        return;
    final String coreName = runtime.getConfiguration().getName();
    final String rID = getResourceId(resource);

    try {
        final RepositoryConnection connection = sesameService.getConnection();
        try {
            connection.begin();

            //if (resource instanceof KiWiResource && ((KiWiResource) resource).isDeleted()) {
            //    runtime.queueDeletion(rID);
            //}
            //FIXME: find a proper way to do this with the new api
            boolean deleted = true;
            RepositoryResult<Statement> statements = connection.getStatements(resource, null, null, false);
            while (statements.hasNext()) {
                if (!ResourceUtils.isDeleted(connection, statements.next())) {
                    deleted = false;
                    break;
                }
            }
            if (deleted) {
                runtime.queueDeletion(rID);
            }

            final Resource[] contexts;
            if (program.getGraphs().isEmpty()) {
                contexts = new Resource[0];
            } else {
                contexts = Collections2.transform(program.getGraphs(), new Function<java.net.URI, URI>() {
                    @Override
                    public URI apply(java.net.URI in) {
                        return connection.getValueFactory().createURI(in.toString());
                    }
                }).toArray(new Resource[0]);
            }

            final SesameConnectionBackend backend = ContextAwareSesameConnectionBackend
                    .withConnection(connection, contexts);
            if (program.getFilter() != null
                    && !program.getFilter().apply(backend, resource, Collections.singleton((Value) resource))) {
                if (log.isDebugEnabled()) {
                    log.debug("({}) <{}> does not match filter '{}', ignoring", coreName, resource,
                            program.getFilter().getPathExpression(backend));
                }
                // Some resources might be still in the index, so delete it.
                runtime.queueDeletion(rID);
                connection.commit();
                return;
            } else if (log.isTraceEnabled() && program.getFilter() != null) {
                log.trace("({}) <{}> matches filter '{}', indexing...", coreName, resource,
                        program.getFilter().getPathExpression(backend));
            }

            SolrInputDocument doc = new SolrInputDocument();

            doc.addField("id", rID);
            doc.addField("lmf.indexed", new Date());
            if (resource instanceof KiWiUriResource) {
                doc.addField("lmf.created", ((KiWiUriResource) resource).getCreated());
            }

            if (resource instanceof URI) {
                URI r = (URI) resource;

                doc.addField("lmf.uri", r.stringValue());
            } else if (resource instanceof BNode) {
                BNode r = (BNode) resource;
                doc.addField("lmf.anon_id", r.getID());
            } else {
                // This should not happen, but never the less...
                log.warn("Tried to index a Resource that is neither a URI nor BNode: {}", resource);
                runtime.queueDeletion(rID);
                connection.rollback();
                return;
            }

            for (Resource type : getTypes(connection, resource)) {
                if (type instanceof KiWiUriResource) {
                    doc.addField("lmf.type", type.stringValue());
                }
            }

            // Set the document boost
            if (program.getBooster() != null) {
                final Collection<Float> boostValues = program.getBooster().getValues(backend, resource);
                if (boostValues.size() > 0) {
                    final Float docBoost = boostValues.iterator().next();
                    if (boostValues.size() > 1) {
                        log.warn("found more than one boostFactor for <{}>, using {}", resource, docBoost);
                    }
                    doc.setDocumentBoost(docBoost);
                }
            }

            // set shortcut fields
            Set<Value> dependencies = new HashSet<Value>();
            for (FieldMapping<?, Value> rule : program.getFields()) {
                //                    Map<Value, List<Value>> paths = new HashMap<Value, List<Value>>();
                //                    Collection<?> values = rule.getValues(backend, resource, paths);
                //FIXME: Temporary fixing due LDPath reverse properties selector bug
                Map<Value, List<Value>> paths = null;
                Collection<?> values = null;
                if (runtime.getConfiguration().isUpdateDependencies()) {
                    paths = new HashMap<Value, List<Value>>();
                    values = rule.getValues(backend, resource, paths);
                } else {
                    values = rule.getValues(backend, resource);
                }
                //
                try {
                    final boolean isSinge = !isMultiValuedField(rule);
                    for (Object value : values) {
                        if (value != null) {
                            doc.addField(rule.getFieldName(), value);
                            if (isSinge) {
                                break;
                            }
                        }
                    }
                    if (rule.getFieldConfig() != null) {
                        final String b = rule.getFieldConfig().get("boost");
                        try {
                            if (b != null) {
                                doc.getField(rule.getFieldName()).setBoost(Float.parseFloat(b));
                            }
                        } catch (NumberFormatException e) {
                            throw new NumberFormatException("could not parse boost value: '" + b + "'");
                        }
                    }
                } catch (Exception ex) {
                    log.error("({}) exception while building path indexes for <{}>, field {}: {}", coreName,
                            resource, rule.getFieldName(), ex.getMessage());
                    log.debug("(" + coreName + ") stacktrace", ex);
                }
                if (runtime.getConfiguration().isUpdateDependencies()) {
                    for (List<Value> path : paths.values()) {
                        dependencies.addAll(path);
                    }
                }
            }

            if (runtime.getConfiguration().isUpdateDependencies()) {
                for (Value dependency : dependencies) {
                    if (dependency instanceof URI && !dependency.equals(resource)) {
                        doc.addField("lmf.dependencies", dependency.stringValue());
                    }
                }
            }

            runtime.queueInputDocument(doc);

            connection.commit();
        } finally {
            connection.close();
        }
    } catch (RepositoryException e) {
        log.warn("Could not build index document for " + resource.stringValue(), e);
    } catch (Throwable t) {
        log.error("unknown error while indexing document", t);
    }
}

From source file:at.pagu.soldockr.repository.SimpleSolrRepository.java

License:Apache License

private String extractIdFromSolrInputDocument(SolrInputDocument solrInputDocument) {
    Assert.notNull(solrInputDocument.getField(idFieldName),
            "Unable to find field '" + idFieldName + "' in SolrDocument.");
    Assert.notNull(solrInputDocument.getField(idFieldName).getValue(), "ID must not be 'null'.");

    return solrInputDocument.getField(idFieldName).getValue().toString();
}

From source file:com.grantingersoll.intell.index.BayesUpdateRequestProcessor.java

License:Apache License

public ClassifierResult classifyDocument(SolrInputDocument doc) throws IOException {
    SolrInputField field = doc.getField(inputField);
    if (field == null)
        return null;

    if (!(field.getValue() instanceof String))
        return null;

    String[] tokens = tokenizeField((String) field.getValue());

    try {//from   w w w.j  a va  2s. c  o m
        return ctx.classifyDocument(tokens, defaultCategory);
    } catch (InvalidDatastoreException e) {
        throw new IOException("Invalid Classifier Datastore", e);
    }
}

From source file:com.ngdata.hbaseindexer.indexer.FusionDocumentWriter.java

License:Apache License

/**
 * shs: This method was modified from its original to add the input parameters 'parent' and 'docCount'. This was done
 *      to enable recursion to be used to find all parent/child relationships to any level. The method will merge the
 *      fields in the parent document into the child document and will then convert that merged document into JSON
 *      format and return that JSON document to the caller.
 * @param parent    The parent document for the child document being passed in. Parent may be null if the child being
 *                  passed in is a member of the initial documents submitted.
 * @param child     This is the child document. It will have the parent's fields merged into it.
 * @param docCount  This is a count of the number of documents that have been added in this processing.
 * @return          The merged parent and child documents as a JSON formatted document, in a format acceptable to
 *                  Fusion./*from w ww. j  a  v a  2s . c o m*/
 */
protected Map<String, Object> doc2json(SolrInputDocument parent, SolrInputDocument child, int docCount) {
    Map<String, Object> json = new HashMap<String, Object>();
    if (child != null) {
        String docId = (String) child.getFieldValue("id");
        if (docId == null) {
            if (parent != null) {
                String parentId = (String) parent.getFieldValue("id");
                docId = parentId + "-" + docCount;
            }
            if (docId == null)
                throw new IllegalStateException("Couldn't resolve the id for document: " + child);
        }
        json.put("id", docId);

        List fields = new ArrayList();
        if (parent != null) {
            if (log.isDebugEnabled())
                log.debug("Method:doc2json - Merging parent and child docs, parent:[" + parent.toString()
                        + "]; child[" + child.toString() + "].");

            // have a parent doc ... flatten by adding all parent doc fields to the child with prefix _p_
            for (String f : parent.getFieldNames()) {
                if ("id".equals(f)) {
                    fields.add(mapField("_p_id", null /* field name prefix */,
                            parent.getField("id").getFirstValue()));
                } else {
                    appendField(parent, f, "_p_", fields);
                }
            }
        }
        for (String f : child.getFieldNames()) {
            if (!"id".equals(f)) { // id already added
                appendField(child, f, null, fields);
            }
        }
        // keep track of the time we saw this doc on the hbase side
        String tdt = DateUtil.getThreadLocalDateFormat().format(new Date());
        fields.add(mapField("_hbasets_tdt", null, tdt));
        if (log.isDebugEnabled())
            log.debug(strIndexName + " Reconcile id = " + docId + " and timestamp = " + tdt);

        json.put("fields", fields);
    } else {
        log.warn("method:doc2json - Input parameter 'child' was null.");
    }
    return json;
}

From source file:com.ngdata.hbaseindexer.indexer.FusionDocumentWriter.java

License:Apache License

protected void appendField(SolrInputDocument doc, String f, String pfx, List fields) {
    SolrInputField field = doc.getField(f);
    int vc = field.getValueCount();
    if (vc <= 0)
        return; // no values to add for this field

    if (vc == 1) {
        Map<String, Object> fieldMap = mapField(f, pfx, field.getFirstValue());
        if (fieldMap != null)
            fields.add(fieldMap);/*ww  w. j av a  2  s  .co  m*/
    } else {
        for (Object val : field.getValues()) {
            Map<String, Object> fieldMap = mapField(f, pfx, val);
            if (fieldMap != null)
                fields.add(fieldMap);
        }
    }
}

From source file:com.ngdata.hbaseindexer.indexer.IdAddingSolrUpdateWriter.java

License:Apache License

/**
 * Add a SolrInputDocument to this writer.
 * <p>/*from  ww w .j ava  2  s. c  om*/
 * Adding multiple documents without ids will result in an IllegalStateException being thrown.
 */
@Override
public void add(SolrInputDocument solrDocument) {
    String docId = documentId;
    SolrInputField uniqueKeySolrField = solrDocument.getField(uniqueKeyField);
    if (uniqueKeySolrField == null) {
        if (idUsed) {
            throw new IllegalStateException(
                    "Document id '" + documentId + "' has already been used by this record");
        }
        solrDocument.addField(uniqueKeyField, documentId);
        idUsed = true;
    } else {
        docId = uniqueKeySolrField.getValue().toString();
    }

    if (tableNameField != null) {
        solrDocument.addField(tableNameField, tableName);
    }

    updateCollector.add(docId, solrDocument);
}

From source file:com.ngdata.hbaseindexer.indexer.IdAddingSolrUpdateWriterTest.java

License:Apache License

@Test
public void testAdd_MultipleDocumentsWithTheirOwnIds() {

    String idA = DOCUMENT_ID + "A";
    String idB = DOCUMENT_ID + "B";

    IdAddingSolrUpdateWriter updateWriter = new IdAddingSolrUpdateWriter(UNIQUE_KEY_FIELD, DOCUMENT_ID, null,
            TABLE_NAME, updateCollector);

    SolrInputDocument docA = mock(SolrInputDocument.class);
    SolrInputDocument docB = mock(SolrInputDocument.class);

    SolrInputField keyFieldA = new SolrInputField(UNIQUE_KEY_FIELD);
    keyFieldA.setValue(idA, 1.0f);/*from w w w .jav a 2 s .c  om*/
    SolrInputField keyFieldB = new SolrInputField(UNIQUE_KEY_FIELD);
    keyFieldB.setValue(idB, 1.0f);

    when(docA.getField(UNIQUE_KEY_FIELD)).thenReturn(keyFieldA);
    when(docB.getField(UNIQUE_KEY_FIELD)).thenReturn(keyFieldB);

    updateWriter.add(docA);
    updateWriter.add(docB);

    verify(updateCollector).add(idA, docA);
    verify(updateCollector).add(idB, docB);
}

From source file:com.ngdata.hbaseindexer.parse.SolrInputDocumentBuilderTest.java

License:Apache License

@Test
public void testAdd() {
    SolrInputDocumentBuilder builder = new SolrInputDocumentBuilder();

    SolrInputDocument docA = new SolrInputDocument();
    SolrInputDocument docB = new SolrInputDocument();

    docA.addField("fieldA", "valueA1");
    docA.addField("fieldA", "valueA2");
    docB.addField("fieldB", "valueB");

    builder.add(docA);//  w  ww . j  a  v  a  2s.co  m
    builder.add(docB);

    SolrInputDocument merged = builder.getDocument();

    assertEquals(Sets.newHashSet("fieldA", "fieldB"), merged.keySet());

    assertEquals(Lists.newArrayList("valueA1", "valueA2"), merged.getField("fieldA").getValues());
    assertEquals(Lists.newArrayList("valueB"), merged.getField("fieldB").getValues());
}

From source file:com.ngdata.hbaseindexer.parse.SolrInputDocumentBuilderTest.java

License:Apache License

@Test
public void testAdd_WithPrefix() {
    SolrInputDocumentBuilder builder = new SolrInputDocumentBuilder();

    SolrInputDocument docA = new SolrInputDocument();
    SolrInputDocument docB = new SolrInputDocument();

    docA.addField("fieldA", "valueA");
    docB.addField("fieldB", "valueB");

    builder.add(docA, "A_");
    builder.add(docB);//from  ww  w  .j a v  a 2s  . co  m

    SolrInputDocument merged = builder.getDocument();

    assertEquals(Sets.newHashSet("A_fieldA", "fieldB"), merged.keySet());

    assertEquals(Lists.newArrayList("valueA"), merged.getField("A_fieldA").getValues());
    assertEquals(Lists.newArrayList("valueB"), merged.getField("fieldB").getValues());
}

From source file:com.ngdata.hbaseindexer.parse.SolrInputDocumentBuilderTest.java

License:Apache License

@Test
public void testAdd_OverlappingFields() {
    SolrInputDocumentBuilder builder = new SolrInputDocumentBuilder();

    SolrInputDocument docA = new SolrInputDocument();
    SolrInputDocument docB = new SolrInputDocument();

    docA.addField("field", "A1", 0.5f);
    docA.addField("field", "A2", 0.5f);
    docB.addField("field", "B1", 1.5f);
    docB.addField("field", "B2", 1.5f);

    builder.add(docA);//from   w ww  .j av  a  2s  . c  o  m
    builder.add(docB);

    SolrInputDocument merged = builder.getDocument();

    assertEquals(Sets.newHashSet("field"), merged.keySet());
    assertEquals(Lists.newArrayList("A1", "A2", "B1", "B2"), merged.get("field").getValues());

    // The boost of the first-added definition of a field is the definitive version
    assertEquals(0.5f * 0.5f * 1.5f * 1.5f, merged.getField("field").getBoost(), 0f);
}