Example usage for org.apache.solr.common SolrInputDocument getFieldValue

List of usage examples for org.apache.solr.common SolrInputDocument getFieldValue

Introduction

In this page you can find the example usage for org.apache.solr.common SolrInputDocument getFieldValue.

Prototype

@Override
public Object getFieldValue(String name) 

Source Link

Document

Get the first value for a field.

Usage

From source file:actors.SolrActor.java

License:Apache License

public void indexUpdated(SolrIndexEvent msg) {
    try {/*from www .j a v  a  2s  .  c  o m*/
        System.out.println("SolrIndexEvent");
        SolrInputDocument doc = msg.getDocuement();
        //Making realtime GET
        System.out.println("GET");
        SolrQuery parameters = new SolrQuery();
        parameters.setRequestHandler("/get");
        String f1 = doc.getFieldValue("literal.id").toString();
        String f2 = doc.getFieldValue("literal.rev").toString();
        parameters.set("id", f1);
        parameters.set("rev", f2);
        //System.out.println(parameters);

        QueryResponse response = server.query(parameters);

        NamedList<Object> result = response.getResponse();
        //System.out.println(response.getResponse());
        //System.out.println(result.size() );
        //System.out.println();
        //System.out.println(result);
        //validate the doc exists
        if (result == null || result.get("doc") == null) {
            System.out.println("/update/extract");
            ContentStreamUpdateRequest req = new ContentStreamUpdateRequest("/update/extract");
            // url dropbox
            URL url = new URL(doc.getFieldValue("literal.links").toString());
            ContentStreamBase content = new ContentStreamBase.URLStream(url);
            System.out.println("ContentStreamBase");
            req.addContentStream(content);
            // Adittionall metadata
            req.setParam("literal.id", doc.getFieldValue("literal.id").toString());
            req.setParam("literal.title", doc.getFieldValue("literal.title").toString());
            req.setParam("literal.rev", doc.getFieldValue("literal.rev").toString());
            req.setParam("literal.when", doc.getFieldValue("literal.when").toString());
            req.setParam("literal.path", doc.getFieldValue("literal.path").toString());
            req.setParam("literal.icon", doc.getFieldValue("literal.icon").toString());
            req.setParam("literal.size", doc.getFieldValue("literal.size").toString());
            req.setParam("literal.url", doc.getFieldValue("literal.links").toString());

            req.setParam("uprefix", "attr_");
            req.setParam("fmap.content", "attr_content");
            req.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
            //Requesting Solr
            result = server.request(req);
            //System.out.println("Result: " + result.toString());

        } else {
            System.out.println("It's already update");

        }

    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:at.newmedialab.lmf.search.services.indexing.SolrCoreRuntime.java

License:Apache License

/**
 * Queue the input document in the document queue of this SolrCoreRuntime and check whether it is
 * necessary to commit.//from   w  w  w .  j a  v a  2  s.  c om
 * 
 * @param doc the document to be added to the Solr Core
 */
public void queueInputDocument(SolrInputDocument doc) {
    if (doc != null) {
        serverLock.lock();
        try {
            final Object fv = doc.getFieldValue("id");
            if (fv != null) {
                UpdateRequest update = new UpdateRequest();
                update.setCommitWithin(10000);
                update.add(doc);
                //update.setAction(ACTION.COMMIT, false, false);
                server.request(update);
            } else {
                log.warn("({}) rejected document without 'id' for update", config.getName());
            }
        } catch (IOException e) {
            log.warn("I/O exception while adding SOLR document to index", e);
        } catch (SolrServerException e) {
            log.warn("server exception while adding SOLR document to index", e);
        } finally {
            serverLock.unlock();
        }
    }
}

From source file:at.pagu.soldockr.core.convert.MappingSolrConverterTest.java

License:Apache License

@Test
public void testWrite() {
    ConvertableBean convertable = new ConvertableBean("j73x73r", 1979);
    SolrInputDocument solrDocument = new SolrInputDocument();
    converter.write(convertable, solrDocument);

    Assert.assertEquals(convertable.getStringProperty(), solrDocument.getFieldValue("stringProperty"));
    Assert.assertEquals(convertable.getIntProperty(), solrDocument.getFieldValue("intProperty"));
}

From source file:com.cloudera.cdk.morphline.solrcell.SolrCellMorphlineTest.java

License:Apache License

/**
 * Test that the ContentHandler properly strips the illegal characters
 *//*from   w  w w  . j  av  a2 s.c o m*/
@Test
public void testTransformValue() {
    String fieldName = "user_name";
    assertFalse("foobar".equals(getFoobarWithNonChars()));

    Metadata metadata = new Metadata();
    // load illegal char string into a metadata field and generate a new document,
    // which will cause the ContentHandler to be invoked.
    metadata.set(fieldName, getFoobarWithNonChars());
    StripNonCharSolrContentHandlerFactory contentHandlerFactory = new StripNonCharSolrContentHandlerFactory(
            DateUtil.DEFAULT_DATE_FORMATS);
    IndexSchema schema = h.getCore().getLatestSchema();
    SolrContentHandler contentHandler = contentHandlerFactory.createSolrContentHandler(metadata,
            new MapSolrParams(new HashMap()), schema);
    SolrInputDocument doc = contentHandler.newDocument();
    String foobar = doc.getFieldValue(fieldName).toString();
    assertTrue("foobar".equals(foobar));
}

From source file:com.cominvent.solr.update.processor.MappingUpdateProcessor.java

License:Apache License

@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
    if (isEnabled()) {

        SolrInputDocument doc = cmd.getSolrInputDocument();

        // Fetch document id
        String docId = "";
        if (doc.containsKey(docIdField))
            docId = (String) doc.getFieldValue(docIdField);

        String inValue = (String) doc.getFieldValue(inputField);
        String outValue;/*from  w  ww .j a  va 2s  . co m*/
        if (map.containsKey(inValue)) {
            outValue = map.get(inValue);
        } else {
            outValue = fallbackValue;
        }

        if (outValue != null && outValue.length() > 0) {
            log.debug("Mapping done for document " + docId + ": " + inValue + " => " + outValue);
            doc.setField(outputField, outValue);
        }

    } else {
        log.debug("MappingUpdateProcessor is not enabled. Skipping");
    }

    super.processAdd(cmd);
}

From source file:com.francelabs.datafari.updateprocessor.DatafariUpdateProcessor.java

License:Apache License

@Override
public void processAdd(final AddUpdateCommand cmd) throws IOException {
    final SolrInputDocument doc = cmd.getSolrInputDocument();

    // Sometimes Tika put several ids so we keep the first one which is
    // always the right one
    if (doc.getFieldValues("id").size() > 1) {
        final Object id = doc.getFieldValue("id");
        doc.remove("id");
        doc.addField("id", id);
    }/*from   ww w .j  a va2s. c om*/

    // Try to retrieve at the ignored_filelastmodified field to set it's
    // value in the last_modified field
    if (doc.getFieldValue("ignored_filelastmodified") != null) {
        final Object last_modified = doc.getFieldValue("ignored_filelastmodified");
        doc.remove("last_modified");
        doc.addField("last_modified", last_modified);
    }

    // Sometimes Tika put several last_modified dates, so we keep the first
    // one which is always the right one
    if ((doc.getFieldValues("last_modified") != null) && (doc.getFieldValues("last_modified").size() > 1)) {
        final Object last_modified = doc.getFieldValue("last_modified");
        doc.remove("last_modified");
        doc.addField("last_modified", last_modified);
    }

    final String url = (String) doc.getFieldValue("id");

    // Create path hierarchy for facet
    final List<String> urlHierarchy = new ArrayList<>();

    /*
     * // Create path hierarchy for facet
     *
     * final List<String> urlHierarchy = new ArrayList<String>();
     *
     * final String path = url.replace("file:", ""); int previousIndex = 1; int
     * depth = 0; // Tokenize the path and add the depth as first character for
     * each token // (like: 0/home, 1/home/project ...) for (int i = 0; i <
     * path.split("/").length - 2; i++) { int endIndex = path.indexOf('/',
     * previousIndex); if (endIndex == -1) { endIndex = path.length() - 1; }
     * urlHierarchy.add(depth + path.substring(0, endIndex)); depth++;
     * previousIndex = endIndex + 1; }
     *
     * // Add the tokens to the urlHierarchy field doc.addField("urlHierarchy",
     * urlHierarchy);
     */

    doc.addField("url", url);

    String filename = "";
    final SolrInputField streamNameField = doc.get("ignored_stream_name");
    if (streamNameField != null) {
        filename = (String) streamNameField.getFirstValue();
    } else {
        final Pattern pattern = Pattern.compile("[^/]*$");
        final Matcher matcher = pattern.matcher(url);
        if (matcher.find()) {
            filename = matcher.group();
        }
    }

    if (url.startsWith("http")) {
        if (doc.get("title") == null) {
            doc.addField("title", filename);
        }
        doc.addField("source", "web");
    }

    if (url.startsWith("file")) {
        doc.removeField("title");
        doc.addField("title", filename);
        doc.addField("source", "file");
    }

    String extension = "";
    URL urlObject = new URL(url);
    String path = urlObject.getPath();
    final SolrInputField mimeTypeField = doc.get("ignored_content_type");

    String nameExtension = FilenameUtils.getExtension(path);
    String tikaExtension = mimeTypeField == null ? "" : extensionFromMimeTypeField(mimeTypeField);

    if (extensionFromName) {
        extension = nameExtension.length() > 1 && nameExtension.length() < 5 ? nameExtension : tikaExtension;
    } else {
        extension = tikaExtension.length() > 1 && tikaExtension.length() < 5 ? tikaExtension : nameExtension;
    }
    /*
    if (extensionFromName || mimeTypeField == null) {
       if (path.contains(".")){
         extension = FilenameUtils.getExtension(path);
          if (extension.length() > 4 || extension.length() < 1) {
    // If length is too long, try extracting from tika information if available
    String tryExtension = mimeTypeField==null ? null : extensionFromMimeTypeField(mimeTypeField);
    if (tryExtension != null) {
      extension = tryExtension;
    } else {
      // Else default to bin for anything else
      extension = "bin";
    }
          }
       }
       else if (urlObject.getProtocol().equals("http") || urlObject.getProtocol().equals("https")) {
         extension = null;
         if (mimeTypeField != null) {
           extension = extensionFromMimeTypeField(mimeTypeField);
         } 
         if (extension == null) {
           extension = "html";
         }
       }
    } else {
      extension = extensionFromMimeTypeField(mimeTypeField);
      if (extension == null) {
        extension = FilenameUtils.getExtension(path);
      }
    }
    */
    doc.addField("extension", extension.toLowerCase());

    super.processAdd(cmd);
}

From source file:com.github.le11.nls.solr.UIMAAsyncUpdateRequestProcessor.java

License:Apache License

private String[] getTextsToAnalyze(SolrInputDocument solrInputDocument) {
    String[] fieldsToAnalyze = solrUIMAConfiguration.getFieldsToAnalyze();
    boolean merge = solrUIMAConfiguration.isFieldsMerging();
    String[] textVals;//from  w ww . j  av a  2  s  .  c  o m
    if (merge) {
        StringBuilder unifiedText = new StringBuilder("");
        for (int i = 0; i < fieldsToAnalyze.length; i++) {
            unifiedText.append(String.valueOf(solrInputDocument.getFieldValue(fieldsToAnalyze[i])));
        }
        textVals = new String[1];
        textVals[0] = unifiedText.toString();
    } else {
        textVals = new String[fieldsToAnalyze.length];
        for (int i = 0; i < fieldsToAnalyze.length; i++) {
            textVals[i] = String.valueOf(solrInputDocument.getFieldValue(fieldsToAnalyze[i]));
        }
    }
    return textVals;
}

From source file:com.gu.solr.MergeUtils.java

License:Apache License

private static SolrInputDocument copy(SolrInputDocument document) {
    SolrInputDocument copy = new SolrInputDocument();

    for (String name : document.getFieldNames()) {
        copy.addField(name, document.getFieldValue(name));
    }/*ww  w  .  ja  va2s  . c  om*/

    return copy;
}

From source file:com.ibm.watson.developer_cloud.professor_languo.ingestion.indexing.RetrieveAndRankIndexerTest.java

License:Open Source License

/**
 * check if the deserialized candidate answers are exactly the same as the candidate answers we
 * expected. Since the query is "question Title: what is right", the expected candidate answers
 * should be the entire corpus.//  www .  j a va 2s.c  om
 * 
 * @throws IngestionException
 */
private void compare_indexed_records_to_corpus() throws IngestionException {
    indexdCorpus = corpusBuilder.getUniqueThreadSetFromBinFiles();

    // Check that the size of the corpus is the same as the size of the
    // indexed documents
    assertTrue("Wrong number of documents indexed", indexedRecords.size() == indexdCorpus.size());

    // Check that the indexed document in the corpus is in the index
    File serFile = new File(corpusBuilder.getUniqueThreadDirPath()).listFiles()[0];
    StackExchangeThread thread = StackExchangeThreadSerializer.deserializeThreadFromBinFile(serFile.getPath());

    final Document luceneDoc = new LuceneDocumentMapper().createDocument(thread);
    SolrInputDocument recordDoc = indexedRecords.get(0);

    for (IndexableField field : luceneDoc.getFields()) {
        BytesRef bin = luceneDoc.getBinaryValue(field.name());

        // Check that indexed fields (title and id) are indexed correctly
        if (field.name().equals(IndexDocumentFieldName.THREAD_TITLE.toString())
                || field.name().equals(IndexDocumentFieldName.THREAD_POST_ID.toString())) {

            String value = luceneDoc.get(field.name());
            assertEquals(value, recordDoc.getFieldValue(field.name()).toString());
        }

        // Check that indexed serialized field is indexed correctly
        if (bin != null) {
            BytesRef recordbin = new BytesRef((byte[]) recordDoc.getFieldValue(field.name()));
            assertEquals(bin, recordbin);
        }

    }
}

From source file:com.ngdata.hbaseindexer.indexer.FusionDocumentWriter.java

License:Apache License

/**
 * shs: This method was modified from its original to add the input parameters 'parent' and 'docCount'. This was done
 *      to enable recursion to be used to find all parent/child relationships to any level. The method will merge the
 *      fields in the parent document into the child document and will then convert that merged document into JSON
 *      format and return that JSON document to the caller.
 * @param parent    The parent document for the child document being passed in. Parent may be null if the child being
 *                  passed in is a member of the initial documents submitted.
 * @param child     This is the child document. It will have the parent's fields merged into it.
 * @param docCount  This is a count of the number of documents that have been added in this processing.
 * @return          The merged parent and child documents as a JSON formatted document, in a format acceptable to
 *                  Fusion.//from ww  w.j a v a  2 s  .c  o m
 */
protected Map<String, Object> doc2json(SolrInputDocument parent, SolrInputDocument child, int docCount) {
    Map<String, Object> json = new HashMap<String, Object>();
    if (child != null) {
        String docId = (String) child.getFieldValue("id");
        if (docId == null) {
            if (parent != null) {
                String parentId = (String) parent.getFieldValue("id");
                docId = parentId + "-" + docCount;
            }
            if (docId == null)
                throw new IllegalStateException("Couldn't resolve the id for document: " + child);
        }
        json.put("id", docId);

        List fields = new ArrayList();
        if (parent != null) {
            if (log.isDebugEnabled())
                log.debug("Method:doc2json - Merging parent and child docs, parent:[" + parent.toString()
                        + "]; child[" + child.toString() + "].");

            // have a parent doc ... flatten by adding all parent doc fields to the child with prefix _p_
            for (String f : parent.getFieldNames()) {
                if ("id".equals(f)) {
                    fields.add(mapField("_p_id", null /* field name prefix */,
                            parent.getField("id").getFirstValue()));
                } else {
                    appendField(parent, f, "_p_", fields);
                }
            }
        }
        for (String f : child.getFieldNames()) {
            if (!"id".equals(f)) { // id already added
                appendField(child, f, null, fields);
            }
        }
        // keep track of the time we saw this doc on the hbase side
        String tdt = DateUtil.getThreadLocalDateFormat().format(new Date());
        fields.add(mapField("_hbasets_tdt", null, tdt));
        if (log.isDebugEnabled())
            log.debug(strIndexName + " Reconcile id = " + docId + " and timestamp = " + tdt);

        json.put("fields", fields);
    } else {
        log.warn("method:doc2json - Input parameter 'child' was null.");
    }
    return json;
}