Example usage for org.apache.solr.common SolrInputField getFirstValue

List of usage examples for org.apache.solr.common SolrInputField getFirstValue

Introduction

In this page you can find the example usage for org.apache.solr.common SolrInputField getFirstValue.

Prototype

public Object getFirstValue() 

Source Link

Usage

From source file:com.francelabs.datafari.updateprocessor.DatafariUpdateProcessor.java

License:Apache License

@Override
public void processAdd(final AddUpdateCommand cmd) throws IOException {
    final SolrInputDocument doc = cmd.getSolrInputDocument();

    // Sometimes Tika put several ids so we keep the first one which is
    // always the right one
    if (doc.getFieldValues("id").size() > 1) {
        final Object id = doc.getFieldValue("id");
        doc.remove("id");
        doc.addField("id", id);
    }/*from  w  w w . jav a 2s .  co m*/

    // Try to retrieve at the ignored_filelastmodified field to set it's
    // value in the last_modified field
    if (doc.getFieldValue("ignored_filelastmodified") != null) {
        final Object last_modified = doc.getFieldValue("ignored_filelastmodified");
        doc.remove("last_modified");
        doc.addField("last_modified", last_modified);
    }

    // Sometimes Tika put several last_modified dates, so we keep the first
    // one which is always the right one
    if ((doc.getFieldValues("last_modified") != null) && (doc.getFieldValues("last_modified").size() > 1)) {
        final Object last_modified = doc.getFieldValue("last_modified");
        doc.remove("last_modified");
        doc.addField("last_modified", last_modified);
    }

    final String url = (String) doc.getFieldValue("id");

    // Create path hierarchy for facet
    final List<String> urlHierarchy = new ArrayList<>();

    /*
     * // Create path hierarchy for facet
     *
     * final List<String> urlHierarchy = new ArrayList<String>();
     *
     * final String path = url.replace("file:", ""); int previousIndex = 1; int
     * depth = 0; // Tokenize the path and add the depth as first character for
     * each token // (like: 0/home, 1/home/project ...) for (int i = 0; i <
     * path.split("/").length - 2; i++) { int endIndex = path.indexOf('/',
     * previousIndex); if (endIndex == -1) { endIndex = path.length() - 1; }
     * urlHierarchy.add(depth + path.substring(0, endIndex)); depth++;
     * previousIndex = endIndex + 1; }
     *
     * // Add the tokens to the urlHierarchy field doc.addField("urlHierarchy",
     * urlHierarchy);
     */

    doc.addField("url", url);

    String filename = "";
    final SolrInputField streamNameField = doc.get("ignored_stream_name");
    if (streamNameField != null) {
        filename = (String) streamNameField.getFirstValue();
    } else {
        final Pattern pattern = Pattern.compile("[^/]*$");
        final Matcher matcher = pattern.matcher(url);
        if (matcher.find()) {
            filename = matcher.group();
        }
    }

    if (url.startsWith("http")) {
        if (doc.get("title") == null) {
            doc.addField("title", filename);
        }
        doc.addField("source", "web");
    }

    if (url.startsWith("file")) {
        doc.removeField("title");
        doc.addField("title", filename);
        doc.addField("source", "file");
    }

    String extension = "";
    URL urlObject = new URL(url);
    String path = urlObject.getPath();
    final SolrInputField mimeTypeField = doc.get("ignored_content_type");

    String nameExtension = FilenameUtils.getExtension(path);
    String tikaExtension = mimeTypeField == null ? "" : extensionFromMimeTypeField(mimeTypeField);

    if (extensionFromName) {
        extension = nameExtension.length() > 1 && nameExtension.length() < 5 ? nameExtension : tikaExtension;
    } else {
        extension = tikaExtension.length() > 1 && tikaExtension.length() < 5 ? tikaExtension : nameExtension;
    }
    /*
    if (extensionFromName || mimeTypeField == null) {
       if (path.contains(".")){
         extension = FilenameUtils.getExtension(path);
          if (extension.length() > 4 || extension.length() < 1) {
    // If length is too long, try extracting from tika information if available
    String tryExtension = mimeTypeField==null ? null : extensionFromMimeTypeField(mimeTypeField);
    if (tryExtension != null) {
      extension = tryExtension;
    } else {
      // Else default to bin for anything else
      extension = "bin";
    }
          }
       }
       else if (urlObject.getProtocol().equals("http") || urlObject.getProtocol().equals("https")) {
         extension = null;
         if (mimeTypeField != null) {
           extension = extensionFromMimeTypeField(mimeTypeField);
         } 
         if (extension == null) {
           extension = "html";
         }
       }
    } else {
      extension = extensionFromMimeTypeField(mimeTypeField);
      if (extension == null) {
        extension = FilenameUtils.getExtension(path);
      }
    }
    */
    doc.addField("extension", extension.toLowerCase());

    super.processAdd(cmd);
}

From source file:com.ngdata.hbaseindexer.indexer.FusionDocumentWriter.java

License:Apache License

protected void appendField(SolrInputDocument doc, String f, String pfx, List fields) {
    SolrInputField field = doc.getField(f);
    int vc = field.getValueCount();
    if (vc <= 0)
        return; // no values to add for this field

    if (vc == 1) {
        Map<String, Object> fieldMap = mapField(f, pfx, field.getFirstValue());
        if (fieldMap != null)
            fields.add(fieldMap);//from   www  . j a  va  2  s  .co  m
    } else {
        for (Object val : field.getValues()) {
            Map<String, Object> fieldMap = mapField(f, pfx, val);
            if (fieldMap != null)
                fields.add(fieldMap);
        }
    }
}

From source file:com.talis.rdf.solr.DefaultDocumentBuilderTest.java

License:Apache License

@Test
public void literalValuesAreIndexedByPredicateUri() {
    String predicateURI = PREDICATE_BASE + "first";
    ArrayList<Quad> quads = new ArrayList<Quad>();
    quads.add(new Quad(Node.createURI(GRAPH_URI), Node.createURI(SUBJECT_URI), Node.createURI(predicateURI),
            Node.createURI("http://example.com/resource")));
    quads.add(new Quad(Node.createURI(GRAPH_URI), Node.createURI(SUBJECT_URI), Node.createURI(predicateURI),
            Node.createLiteral("Aloha")));
    SolrInputDocument doc = quadsToDoc.getDocument(DOCUMENT_KEY, quads);
    SolrInputField field = doc.getField(predicateURI);
    assertEquals("Aloha", field.getFirstValue());
    assertEquals(1, field.getValues().size());
}

From source file:edu.cornell.mannlib.vitro.webapp.search.solr.ThumbnailImageURLTest.java

License:Open Source License

/**
 * Test method for {@link edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.ThumbnailImageURL#modifyDocument(edu.cornell.mannlib.vitro.webapp.beans.Individual, org.apache.solr.common.SolrInputDocument, java.lang.StringBuffer)}.
 *///w w  w.ja  v a2s  .c  o  m
@Test
public void testModifyDocument() {
    SolrInputDocument doc = new SolrInputDocument();
    ThumbnailImageURL testMe = new ThumbnailImageURL(testModel);
    Individual ind = new IndividualImpl();
    ind.setURI(personsURI);
    try {
        testMe.modifyDocument(ind, doc, null);
    } catch (SkipIndividualException e) {
        Assert.fail("person was skipped: " + e.getMessage());
    }

    SolrInputField thumbnailField = doc.getField(fieldForThumbnailURL);
    Assert.assertNotNull(thumbnailField);

    Assert.assertNotNull(thumbnailField.getValues());
    Assert.assertEquals(1, thumbnailField.getValueCount());

    Assert.assertEquals("http://vivo.cornell.edu/individual/n54945", thumbnailField.getFirstValue());
}

From source file:fr.cnes.sitools.metacatalogue.index.solr.SolrMetadataIndexer.java

License:Open Source License

/**
 * Builds the solr input./*w ww . j a  va 2  s  . c o m*/
 * 
 * @param fields
 *          the {@link MetadataRecords} object containing the document field
 * @return the solr input document
 * @throws Exception
 *           if there is an error
 */
protected SolrInputDocument buildSolrInput(MetadataRecords fields) throws Exception {
    SolrInputDocument document = new SolrInputDocument();
    String text;
    SolrInputField solrField;
    MetacatalogField indexField;
    for (Field field : fields.getList()) {
        String name = field.getName();
        indexField = MetacatalogField.getField(name);
        if (indexField == null) {
            logger.info("Unknown field " + name + " add it to the index as a String object");
            indexField = MetacatalogField._ANY;
        }
        if (field.getValue() != null) {
            text = field.getValue().toString();
            solrField = document.getField(indexField.getField());
            if (StringUtils.isNotBlank(text)
                    && (solrField == null || !text.equals(solrField.getFirstValue()))) {
                if (indexField.isDate()) {
                    Date date = parseDate(text.toUpperCase());
                    if (date != null) {
                        document.addField(name, date);
                    }
                } else if (indexField.isBoolean()) {
                    Boolean bool = Boolean.parseBoolean(text);
                    if (bool != null) {
                        document.addField(name, bool);
                    }
                } else {
                    document.addField(name, indexField.valueToString(text));
                }
            }
        }
    }
    return document;
}

From source file:lux.solr.LuxUpdateProcessor.java

License:Mozilla Public License

@Override
public void processAdd(final AddUpdateCommand cmd) throws IOException {
    SolrInputDocument solrInputDocument = cmd.getSolrInputDocument();
    String xmlFieldName = indexConfig.getFieldName(FieldRole.XML_STORE);
    String idFieldName = indexConfig.getFieldName(FieldRole.ID);

    // remove and stash the xml field value
    SolrInputField xmlField = solrInputDocument.removeField(xmlFieldName);
    SolrInputField luxIdField = solrInputDocument.removeField(idFieldName);
    String uri = (String) solrInputDocument.getFieldValue(indexConfig.getFieldName(FieldRole.URI));
    Document luceneDocument = cmd.getLuceneDocument();
    UpdateDocCommand luxCommand = null;// www. j a  v a 2 s.c  o  m
    if (uri != null && xmlField != null) {
        // restore the xml field value
        solrInputDocument.put(xmlFieldName, xmlField);
        XmlIndexer xmlIndexer = solrIndexConfig.checkoutXmlIndexer();
        Object xml = xmlField.getFirstValue();
        try {
            try {
                if (xml instanceof String) {
                    xmlIndexer.index(new StringReader((String) xml), uri);
                } else if (xml instanceof byte[]) {
                    TinyBinary xmlbin = new TinyBinary((byte[]) xml, Charset.forName("utf-8"));
                    xmlIndexer.index(xmlbin.getTinyDocument(saxonConfig), uri);
                } else if (xml instanceof NodeInfo) {
                    xmlIndexer.index((NodeInfo) xml, uri);
                }
                // why is this here?  we're getting double values now since we also call 
                // addDocumentFIelds below?
                //luceneDocument = xmlIndexer.createLuceneDocument();
            } catch (XMLStreamException e) {
                logger.error("Failed to parse " + FieldRole.XML_STORE, e);
            }
            addDocumentFields(xmlIndexer, solrIndexConfig.getSchema(), luceneDocument);
            if (luxIdField != null) {
                Object id = luxIdField.getValue();
                if (!(id instanceof Long)) {
                    // solr cloud distributes these as Strings
                    id = Long.valueOf(id.toString());
                }
                luceneDocument.add(new LongField(idFieldName, (Long) id, Store.YES));
            }
            luxCommand = new UpdateDocCommand(req, solrInputDocument, luceneDocument, uri);
        } catch (Exception e) {
            logger.error("An error occurred while indexing " + uri, e);
            throw new IOException(e);
        } finally {
            solrIndexConfig.returnXmlIndexer(xmlIndexer);
        }
        // logger.debug ("Indexed XML document " + uri);
    }
    if (next != null) {
        next.processAdd(luxCommand == null ? cmd : luxCommand);
    }
}

From source file:nl.knaw.huygens.timbuctoo.index.solr.SolrInputDocGeneratorTest.java

License:Open Source License

@Test
public void testGetResultOneDocumentWithoutEmptyFields() throws NoSuchMethodException, SecurityException {
    String expected = "test";
    Entity entity = createEntity(expected);
    SolrInputDocGenerator generator = new SolrInputDocGenerator(entity);

    processMethod(entity, generator, GET_DISPLAY_NAME, false, "desc", FacetType.LIST);

    SolrInputDocument solrInputDocument = generator.getResult();
    SolrInputField field = solrInputDocument.getField("desc");
    Object actual = field.getFirstValue();

    assertEquals(expected, actual);/*from  www  .  ja  va 2  s  . co  m*/
}

From source file:nl.knaw.huygens.timbuctoo.index.solr.SolrInputDocGeneratorTest.java

License:Open Source License

@Test
public void testGetResultOneDocumentWithEmptyFieldsThatShouldBeIndexed() throws NoSuchMethodException {
    String description = null;//  w  ww .ja v a  2s.  c o  m
    String expected = "(empty)";
    Entity entity = createEntity(description);
    SolrInputDocGenerator generator = new SolrInputDocGenerator(entity);

    processMethod(entity, generator, GET_DISPLAY_NAME, false, "desc", FacetType.LIST);

    SolrInputDocument solrInputDocument = generator.getResult();
    SolrInputField field = solrInputDocument.getField("desc");
    Object actual = field.getFirstValue();

    assertEquals(expected, actual);
}

From source file:org.apache.blur.slur.RowMutationHelper.java

License:Apache License

private static RecordMutation createRecordMutation(SolrInputDocument doc, String id) {
    RecordMutation recordMutation = new RecordMutation();
    // TODO: what's solr default behavior?
    recordMutation.setRecordMutationType(RecordMutationType.REPLACE_ENTIRE_RECORD);
    Record record = new Record();
    record.setFamily(findFamily(doc));//from w  w  w.  ja v  a2 s .c  o  m
    record.setRecordId(id);

    for (String fieldName : doc.getFieldNames()) {
        if (!fieldName.contains(".")) {
            continue;
        }
        SolrInputField field = doc.getField(fieldName);
        String rawColumnName = fieldName.substring(fieldName.indexOf(".") + 1, fieldName.length());

        if (field.getValueCount() > 1) {
            for (Object fieldVal : field.getValues()) {
                record.addToColumns(new Column(rawColumnName, fieldVal.toString()));
            }
        } else {
            record.addToColumns(new Column(rawColumnName, field.getFirstValue().toString()));
        }
    }
    recordMutation.setRecord(record);
    return recordMutation;
}

From source file:org.opencastproject.archive.opencast.solr.Schema.java

License:Educational Community License

public static String getId(SolrInputDocument doc) {
    SolrInputField f = doc.get(ID);
    return f != null ? mkString(f.getFirstValue()) : null;
}