Example usage for org.apache.solr.common SolrInputDocument get

List of usage examples for org.apache.solr.common SolrInputDocument get

Introduction

In this page you can find the example usage for org.apache.solr.common SolrInputDocument get.

Prototype

@Override
    public SolrInputField get(Object key) 

Source Link

Usage

From source file:bamboo.trove.rule.RuleChangeUpdateManager.java

License:Apache License

@Override
public void errorProcessing(SolrInputDocument doc, Throwable error) {
    //      documents.remove((Integer)doc.get("id").getValue());
    String id = (String) doc.get("id").getValue();

    this.setError("Error updateing document " + id, error);
    stopProcessing();/*w  ww .j a va  2s . c  o  m*/
}

From source file:bamboo.trove.rule.RuleChangeUpdateManager.java

License:Apache License

@Override
public void acknowledge(SolrInputDocument doc) {
    synchronized (documents) {
        documents.remove(doc.get("id").getValue().toString());
    }//w  w w . jav a  2  s . c  o m
}

From source file:com.francelabs.datafari.updateprocessor.DatafariUpdateProcessor.java

License:Apache License

@Override
public void processAdd(final AddUpdateCommand cmd) throws IOException {
    final SolrInputDocument doc = cmd.getSolrInputDocument();

    // Sometimes Tika put several ids so we keep the first one which is
    // always the right one
    if (doc.getFieldValues("id").size() > 1) {
        final Object id = doc.getFieldValue("id");
        doc.remove("id");
        doc.addField("id", id);
    }// ww  w.jav a 2s. c  o  m

    // Try to retrieve at the ignored_filelastmodified field to set it's
    // value in the last_modified field
    if (doc.getFieldValue("ignored_filelastmodified") != null) {
        final Object last_modified = doc.getFieldValue("ignored_filelastmodified");
        doc.remove("last_modified");
        doc.addField("last_modified", last_modified);
    }

    // Sometimes Tika put several last_modified dates, so we keep the first
    // one which is always the right one
    if ((doc.getFieldValues("last_modified") != null) && (doc.getFieldValues("last_modified").size() > 1)) {
        final Object last_modified = doc.getFieldValue("last_modified");
        doc.remove("last_modified");
        doc.addField("last_modified", last_modified);
    }

    final String url = (String) doc.getFieldValue("id");

    // Create path hierarchy for facet
    final List<String> urlHierarchy = new ArrayList<>();

    /*
     * // Create path hierarchy for facet
     *
     * final List<String> urlHierarchy = new ArrayList<String>();
     *
     * final String path = url.replace("file:", ""); int previousIndex = 1; int
     * depth = 0; // Tokenize the path and add the depth as first character for
     * each token // (like: 0/home, 1/home/project ...) for (int i = 0; i <
     * path.split("/").length - 2; i++) { int endIndex = path.indexOf('/',
     * previousIndex); if (endIndex == -1) { endIndex = path.length() - 1; }
     * urlHierarchy.add(depth + path.substring(0, endIndex)); depth++;
     * previousIndex = endIndex + 1; }
     *
     * // Add the tokens to the urlHierarchy field doc.addField("urlHierarchy",
     * urlHierarchy);
     */

    doc.addField("url", url);

    String filename = "";
    final SolrInputField streamNameField = doc.get("ignored_stream_name");
    if (streamNameField != null) {
        filename = (String) streamNameField.getFirstValue();
    } else {
        final Pattern pattern = Pattern.compile("[^/]*$");
        final Matcher matcher = pattern.matcher(url);
        if (matcher.find()) {
            filename = matcher.group();
        }
    }

    if (url.startsWith("http")) {
        if (doc.get("title") == null) {
            doc.addField("title", filename);
        }
        doc.addField("source", "web");
    }

    if (url.startsWith("file")) {
        doc.removeField("title");
        doc.addField("title", filename);
        doc.addField("source", "file");
    }

    String extension = "";
    URL urlObject = new URL(url);
    String path = urlObject.getPath();
    final SolrInputField mimeTypeField = doc.get("ignored_content_type");

    String nameExtension = FilenameUtils.getExtension(path);
    String tikaExtension = mimeTypeField == null ? "" : extensionFromMimeTypeField(mimeTypeField);

    if (extensionFromName) {
        extension = nameExtension.length() > 1 && nameExtension.length() < 5 ? nameExtension : tikaExtension;
    } else {
        extension = tikaExtension.length() > 1 && tikaExtension.length() < 5 ? tikaExtension : nameExtension;
    }
    /*
    if (extensionFromName || mimeTypeField == null) {
       if (path.contains(".")){
         extension = FilenameUtils.getExtension(path);
          if (extension.length() > 4 || extension.length() < 1) {
    // If length is too long, try extracting from tika information if available
    String tryExtension = mimeTypeField==null ? null : extensionFromMimeTypeField(mimeTypeField);
    if (tryExtension != null) {
      extension = tryExtension;
    } else {
      // Else default to bin for anything else
      extension = "bin";
    }
          }
       }
       else if (urlObject.getProtocol().equals("http") || urlObject.getProtocol().equals("https")) {
         extension = null;
         if (mimeTypeField != null) {
           extension = extensionFromMimeTypeField(mimeTypeField);
         } 
         if (extension == null) {
           extension = "html";
         }
       }
    } else {
      extension = extensionFromMimeTypeField(mimeTypeField);
      if (extension == null) {
        extension = FilenameUtils.getExtension(path);
      }
    }
    */
    doc.addField("extension", extension.toLowerCase());

    super.processAdd(cmd);
}

From source file:com.ngdata.hbaseindexer.indexer.FusionDocumentWriter.java

License:Apache License

/**
 * shs: This method was added to handle documents that are submitted as atomic updates. It is the final option to
 *      get documents into the index if the previous indexing attempt failed when sending the entire colleciton of
 *      documents to the Solr proxy. In this case, each document in the collection of documents will be submitted to
 *      the Solr proxy, one document at a time.
 * @param docs The SolrInputDocuments to be added, one at a time.
 * @throws SolrServerException//from   w ww  . ja v  a 2 s.  c o m
 * @throws IOException
 */
private void retrySolrAtomicUpdatesIndividually(Collection<SolrInputDocument> docs)
        throws SolrServerException, IOException {
    for (SolrInputDocument nextDoc : docs) {
        try {
            solrProxy.add(nextDoc);
            solrAtomicUpdatesMeter.mark();
        } catch (Exception e) {
            log.error("Failed to index atomic update document [" + nextDoc.get("id") + "] due to: " + e
                    + "; doc: " + nextDoc + "solrProxy:[" + solrProxy.toString() + "]");
            solrAtomicUpdatesErrorMeter.mark();
        }
    }
}

From source file:com.ngdata.hbaseindexer.parse.DefaultResultToSolrMapperTest.java

License:Apache License

@Test
public void testMap() {
    FieldDefinition fieldDefA = new FieldDefinition("fieldA", "cfA:qualifierA", ValueSource.VALUE, "int");
    FieldDefinition fieldDefB = new FieldDefinition("fieldB", "cfB:qualifierB", ValueSource.VALUE,
            DummyValueMapper.class.getName());
    DefaultResultToSolrMapper resultMapper = new DefaultResultToSolrMapper("index-name",
            Lists.newArrayList(fieldDefA, fieldDefB), Collections.<DocumentExtractDefinition>emptyList());

    KeyValue kvA = new KeyValue(ROW, COLUMN_FAMILY_A, QUALIFIER_A, Bytes.toBytes(42));
    KeyValue kvB = new KeyValue(ROW, COLUMN_FAMILY_B, QUALIFIER_B, Bytes.toBytes("dummy value"));
    Result result = newResult(Lists.newArrayList(kvA, kvB));

    resultMapper.map(result, solrUpdateWriter);
    verify(solrUpdateWriter).add(solrInputDocCaptor.capture());

    SolrInputDocument solrDocument = solrInputDocCaptor.getValue();

    assertEquals(Sets.newHashSet("fieldA", "fieldB"), solrDocument.keySet());

    SolrInputField fieldA = solrDocument.get("fieldA");
    SolrInputField fieldB = solrDocument.get("fieldB");

    assertEquals(Lists.newArrayList(42), fieldA.getValues());
    assertEquals(Lists.newArrayList("A", "B", "C"), fieldB.getValues());
}

From source file:com.ngdata.hbaseindexer.parse.HBaseSolrDocumentExtractorTest.java

License:Apache License

@Test
public void testExtractDocument() {
    byte[] bytesA = new byte[] { 1, 2 };
    byte[] bytesB = new byte[] { 3, 4 };

    Result result = mock(Result.class);

    when(valueExtractor.extract(result)).thenReturn(Lists.newArrayList(bytesA, bytesB));
    doReturn(Lists.newArrayList("A")).when(valueMapper).map(bytesA);
    doReturn(Lists.newArrayList("B")).when(valueMapper).map(bytesB);

    SolrInputDocument solrDocument = new SolrInputDocument();
    documentExtractor.extractDocument(result, solrDocument);

    assertEquals(Sets.newHashSet("fieldName"), solrDocument.keySet());
    assertEquals(Lists.newArrayList("A", "B"), solrDocument.get("fieldName").getValues());
}

From source file:com.ngdata.hbaseindexer.parse.SolrInputDocumentBuilderTest.java

License:Apache License

@Test
public void testAdd_OverlappingFields() {
    SolrInputDocumentBuilder builder = new SolrInputDocumentBuilder();

    SolrInputDocument docA = new SolrInputDocument();
    SolrInputDocument docB = new SolrInputDocument();

    docA.addField("field", "A1", 0.5f);
    docA.addField("field", "A2", 0.5f);
    docB.addField("field", "B1", 1.5f);
    docB.addField("field", "B2", 1.5f);

    builder.add(docA);//from   ww  w.  ja  va2s  . co m
    builder.add(docB);

    SolrInputDocument merged = builder.getDocument();

    assertEquals(Sets.newHashSet("field"), merged.keySet());
    assertEquals(Lists.newArrayList("A1", "A2", "B1", "B2"), merged.get("field").getValues());

    // The boost of the first-added definition of a field is the definitive version
    assertEquals(0.5f * 0.5f * 1.5f * 1.5f, merged.getField("field").getBoost(), 0f);
}

From source file:com.ngdata.hbaseindexer.parse.tika.TikaSolrDocumentExtractorTest.java

License:Apache License

@Test
public void testExtractDocument() throws IOException {

    byte[] columnFamily = Bytes.toBytes("cf");
    byte[] columnQualifier = Bytes.toBytes("qualifier");
    final String applicableValue = "this is the test data";
    final String nonApplicableValue = "not-applicable value";
    KeyValue applicableKeyValue = new KeyValue(Bytes.toBytes("row"), columnFamily, columnQualifier,
            Bytes.toBytes(applicableValue));
    KeyValue nonApplicableKeyValue = new KeyValue(Bytes.toBytes("row"), Bytes.toBytes("other cf"),
            columnQualifier, Bytes.toBytes(nonApplicableValue));
    Result result = new Result(new KeyValue[] { applicableKeyValue, nonApplicableKeyValue });

    SolrDocumentExtractor documentExtractor = new TikaSolrDocumentExtractor(indexSchema,
            new SingleCellExtractor(columnFamily, columnQualifier), "prefix_", "text/plain");
    SolrInputDocument solrInputDocument = new SolrInputDocument();
    documentExtractor.extractDocument(result, solrInputDocument);

    // Make sure that the input text got in here somehow
    assertTrue(solrInputDocument.containsKey("prefix_content"));
    assertTrue(solrInputDocument.get("prefix_content").getValues().toString().contains(applicableValue));

    assertFalse(solrInputDocument.get("prefix_content").getValues().toString().contains(nonApplicableValue));
}

From source file:com.ngdata.hbaseindexer.parse.tika.TikaSolrDocumentExtractorTest.java

License:Apache License

@Test
public void testExtractDocument_CustomTikaParser() throws IOException {
    byte[] columnFamily = Bytes.toBytes("cf");
    byte[] columnQualifier = Bytes.toBytes("qualifier");
    final String applicableValue = "this is the test data";
    KeyValue kv = new KeyValue(Bytes.toBytes("row"), columnFamily, columnQualifier,
            Bytes.toBytes(applicableValue));
    Result result = new Result(new KeyValue[] { kv });

    SolrDocumentExtractor documentExtractor = new TikaSolrDocumentExtractor(indexSchema,
            new SingleCellExtractor(columnFamily, columnQualifier), null, "application/dummy");
    SolrInputDocument solrInputDocument = new SolrInputDocument();
    documentExtractor.extractDocument(result, solrInputDocument);

    // Just make sure that the data came through the dynamically-loaded Tika parser
    assertEquals(DummyParser.INDEX_VALUE, solrInputDocument.get(DummyParser.INDEX_FIELD).getFirstValue());

}

From source file:cz.zcu.kiv.eegdatabase.logic.indexing.Indexer.java

License:Apache License

/**
 * Performs indexing of a POJO object.//from   ww w .  jav a2s  . c om
 * @param instance The POJO to be indexed.
 * @throws IllegalAccessException
 * @throws IOException
 * @throws SolrServerException
 */
public void index(T instance) throws IllegalAccessException, IOException, SolrServerException {
    SolrInputDocument document = prepareForIndexing(instance);
    log.debug(document);

    if (document == null || document.isEmpty()) {
        return;
    }
    solrServer.add(document); // if the document already exists, it is replaced by the new one
    UpdateResponse response = solrServer.commit();
    log.debug("Document " + document.get("uuid").getValue().toString() + " added to the solr index.");
    logCommitResponse(response);
}