List of usage examples for org.apache.solr.common SolrInputDocument get
@Override
public SolrInputField get(Object key)
From source file:bamboo.trove.rule.RuleChangeUpdateManager.java
License:Apache License
@Override public void errorProcessing(SolrInputDocument doc, Throwable error) { // documents.remove((Integer)doc.get("id").getValue()); String id = (String) doc.get("id").getValue(); this.setError("Error updateing document " + id, error); stopProcessing();/*w ww .j a va 2s . c o m*/ }
From source file:bamboo.trove.rule.RuleChangeUpdateManager.java
License:Apache License
@Override public void acknowledge(SolrInputDocument doc) { synchronized (documents) { documents.remove(doc.get("id").getValue().toString()); }//w w w . jav a 2 s . c o m }
From source file:com.francelabs.datafari.updateprocessor.DatafariUpdateProcessor.java
License:Apache License
@Override public void processAdd(final AddUpdateCommand cmd) throws IOException { final SolrInputDocument doc = cmd.getSolrInputDocument(); // Sometimes Tika put several ids so we keep the first one which is // always the right one if (doc.getFieldValues("id").size() > 1) { final Object id = doc.getFieldValue("id"); doc.remove("id"); doc.addField("id", id); }// ww w.jav a 2s. c o m // Try to retrieve at the ignored_filelastmodified field to set it's // value in the last_modified field if (doc.getFieldValue("ignored_filelastmodified") != null) { final Object last_modified = doc.getFieldValue("ignored_filelastmodified"); doc.remove("last_modified"); doc.addField("last_modified", last_modified); } // Sometimes Tika put several last_modified dates, so we keep the first // one which is always the right one if ((doc.getFieldValues("last_modified") != null) && (doc.getFieldValues("last_modified").size() > 1)) { final Object last_modified = doc.getFieldValue("last_modified"); doc.remove("last_modified"); doc.addField("last_modified", last_modified); } final String url = (String) doc.getFieldValue("id"); // Create path hierarchy for facet final List<String> urlHierarchy = new ArrayList<>(); /* * // Create path hierarchy for facet * * final List<String> urlHierarchy = new ArrayList<String>(); * * final String path = url.replace("file:", ""); int previousIndex = 1; int * depth = 0; // Tokenize the path and add the depth as first character for * each token // (like: 0/home, 1/home/project ...) for (int i = 0; i < * path.split("/").length - 2; i++) { int endIndex = path.indexOf('/', * previousIndex); if (endIndex == -1) { endIndex = path.length() - 1; } * urlHierarchy.add(depth + path.substring(0, endIndex)); depth++; * previousIndex = endIndex + 1; } * * // Add the tokens to the urlHierarchy field doc.addField("urlHierarchy", * urlHierarchy); */ doc.addField("url", url); String filename = ""; final SolrInputField streamNameField = doc.get("ignored_stream_name"); if (streamNameField != null) { filename = (String) streamNameField.getFirstValue(); } else { final Pattern pattern = Pattern.compile("[^/]*$"); final Matcher matcher = pattern.matcher(url); if (matcher.find()) { filename = matcher.group(); } } if (url.startsWith("http")) { if (doc.get("title") == null) { doc.addField("title", filename); } doc.addField("source", "web"); } if (url.startsWith("file")) { doc.removeField("title"); doc.addField("title", filename); doc.addField("source", "file"); } String extension = ""; URL urlObject = new URL(url); String path = urlObject.getPath(); final SolrInputField mimeTypeField = doc.get("ignored_content_type"); String nameExtension = FilenameUtils.getExtension(path); String tikaExtension = mimeTypeField == null ? "" : extensionFromMimeTypeField(mimeTypeField); if (extensionFromName) { extension = nameExtension.length() > 1 && nameExtension.length() < 5 ? nameExtension : tikaExtension; } else { extension = tikaExtension.length() > 1 && tikaExtension.length() < 5 ? tikaExtension : nameExtension; } /* if (extensionFromName || mimeTypeField == null) { if (path.contains(".")){ extension = FilenameUtils.getExtension(path); if (extension.length() > 4 || extension.length() < 1) { // If length is too long, try extracting from tika information if available String tryExtension = mimeTypeField==null ? null : extensionFromMimeTypeField(mimeTypeField); if (tryExtension != null) { extension = tryExtension; } else { // Else default to bin for anything else extension = "bin"; } } } else if (urlObject.getProtocol().equals("http") || urlObject.getProtocol().equals("https")) { extension = null; if (mimeTypeField != null) { extension = extensionFromMimeTypeField(mimeTypeField); } if (extension == null) { extension = "html"; } } } else { extension = extensionFromMimeTypeField(mimeTypeField); if (extension == null) { extension = FilenameUtils.getExtension(path); } } */ doc.addField("extension", extension.toLowerCase()); super.processAdd(cmd); }
From source file:com.ngdata.hbaseindexer.indexer.FusionDocumentWriter.java
License:Apache License
/** * shs: This method was added to handle documents that are submitted as atomic updates. It is the final option to * get documents into the index if the previous indexing attempt failed when sending the entire colleciton of * documents to the Solr proxy. In this case, each document in the collection of documents will be submitted to * the Solr proxy, one document at a time. * @param docs The SolrInputDocuments to be added, one at a time. * @throws SolrServerException//from w ww . ja v a 2 s. c o m * @throws IOException */ private void retrySolrAtomicUpdatesIndividually(Collection<SolrInputDocument> docs) throws SolrServerException, IOException { for (SolrInputDocument nextDoc : docs) { try { solrProxy.add(nextDoc); solrAtomicUpdatesMeter.mark(); } catch (Exception e) { log.error("Failed to index atomic update document [" + nextDoc.get("id") + "] due to: " + e + "; doc: " + nextDoc + "solrProxy:[" + solrProxy.toString() + "]"); solrAtomicUpdatesErrorMeter.mark(); } } }
From source file:com.ngdata.hbaseindexer.parse.DefaultResultToSolrMapperTest.java
License:Apache License
@Test public void testMap() { FieldDefinition fieldDefA = new FieldDefinition("fieldA", "cfA:qualifierA", ValueSource.VALUE, "int"); FieldDefinition fieldDefB = new FieldDefinition("fieldB", "cfB:qualifierB", ValueSource.VALUE, DummyValueMapper.class.getName()); DefaultResultToSolrMapper resultMapper = new DefaultResultToSolrMapper("index-name", Lists.newArrayList(fieldDefA, fieldDefB), Collections.<DocumentExtractDefinition>emptyList()); KeyValue kvA = new KeyValue(ROW, COLUMN_FAMILY_A, QUALIFIER_A, Bytes.toBytes(42)); KeyValue kvB = new KeyValue(ROW, COLUMN_FAMILY_B, QUALIFIER_B, Bytes.toBytes("dummy value")); Result result = newResult(Lists.newArrayList(kvA, kvB)); resultMapper.map(result, solrUpdateWriter); verify(solrUpdateWriter).add(solrInputDocCaptor.capture()); SolrInputDocument solrDocument = solrInputDocCaptor.getValue(); assertEquals(Sets.newHashSet("fieldA", "fieldB"), solrDocument.keySet()); SolrInputField fieldA = solrDocument.get("fieldA"); SolrInputField fieldB = solrDocument.get("fieldB"); assertEquals(Lists.newArrayList(42), fieldA.getValues()); assertEquals(Lists.newArrayList("A", "B", "C"), fieldB.getValues()); }
From source file:com.ngdata.hbaseindexer.parse.HBaseSolrDocumentExtractorTest.java
License:Apache License
@Test public void testExtractDocument() { byte[] bytesA = new byte[] { 1, 2 }; byte[] bytesB = new byte[] { 3, 4 }; Result result = mock(Result.class); when(valueExtractor.extract(result)).thenReturn(Lists.newArrayList(bytesA, bytesB)); doReturn(Lists.newArrayList("A")).when(valueMapper).map(bytesA); doReturn(Lists.newArrayList("B")).when(valueMapper).map(bytesB); SolrInputDocument solrDocument = new SolrInputDocument(); documentExtractor.extractDocument(result, solrDocument); assertEquals(Sets.newHashSet("fieldName"), solrDocument.keySet()); assertEquals(Lists.newArrayList("A", "B"), solrDocument.get("fieldName").getValues()); }
From source file:com.ngdata.hbaseindexer.parse.SolrInputDocumentBuilderTest.java
License:Apache License
@Test public void testAdd_OverlappingFields() { SolrInputDocumentBuilder builder = new SolrInputDocumentBuilder(); SolrInputDocument docA = new SolrInputDocument(); SolrInputDocument docB = new SolrInputDocument(); docA.addField("field", "A1", 0.5f); docA.addField("field", "A2", 0.5f); docB.addField("field", "B1", 1.5f); docB.addField("field", "B2", 1.5f); builder.add(docA);//from ww w. ja va2s . co m builder.add(docB); SolrInputDocument merged = builder.getDocument(); assertEquals(Sets.newHashSet("field"), merged.keySet()); assertEquals(Lists.newArrayList("A1", "A2", "B1", "B2"), merged.get("field").getValues()); // The boost of the first-added definition of a field is the definitive version assertEquals(0.5f * 0.5f * 1.5f * 1.5f, merged.getField("field").getBoost(), 0f); }
From source file:com.ngdata.hbaseindexer.parse.tika.TikaSolrDocumentExtractorTest.java
License:Apache License
@Test public void testExtractDocument() throws IOException { byte[] columnFamily = Bytes.toBytes("cf"); byte[] columnQualifier = Bytes.toBytes("qualifier"); final String applicableValue = "this is the test data"; final String nonApplicableValue = "not-applicable value"; KeyValue applicableKeyValue = new KeyValue(Bytes.toBytes("row"), columnFamily, columnQualifier, Bytes.toBytes(applicableValue)); KeyValue nonApplicableKeyValue = new KeyValue(Bytes.toBytes("row"), Bytes.toBytes("other cf"), columnQualifier, Bytes.toBytes(nonApplicableValue)); Result result = new Result(new KeyValue[] { applicableKeyValue, nonApplicableKeyValue }); SolrDocumentExtractor documentExtractor = new TikaSolrDocumentExtractor(indexSchema, new SingleCellExtractor(columnFamily, columnQualifier), "prefix_", "text/plain"); SolrInputDocument solrInputDocument = new SolrInputDocument(); documentExtractor.extractDocument(result, solrInputDocument); // Make sure that the input text got in here somehow assertTrue(solrInputDocument.containsKey("prefix_content")); assertTrue(solrInputDocument.get("prefix_content").getValues().toString().contains(applicableValue)); assertFalse(solrInputDocument.get("prefix_content").getValues().toString().contains(nonApplicableValue)); }
From source file:com.ngdata.hbaseindexer.parse.tika.TikaSolrDocumentExtractorTest.java
License:Apache License
@Test public void testExtractDocument_CustomTikaParser() throws IOException { byte[] columnFamily = Bytes.toBytes("cf"); byte[] columnQualifier = Bytes.toBytes("qualifier"); final String applicableValue = "this is the test data"; KeyValue kv = new KeyValue(Bytes.toBytes("row"), columnFamily, columnQualifier, Bytes.toBytes(applicableValue)); Result result = new Result(new KeyValue[] { kv }); SolrDocumentExtractor documentExtractor = new TikaSolrDocumentExtractor(indexSchema, new SingleCellExtractor(columnFamily, columnQualifier), null, "application/dummy"); SolrInputDocument solrInputDocument = new SolrInputDocument(); documentExtractor.extractDocument(result, solrInputDocument); // Just make sure that the data came through the dynamically-loaded Tika parser assertEquals(DummyParser.INDEX_VALUE, solrInputDocument.get(DummyParser.INDEX_FIELD).getFirstValue()); }
From source file:cz.zcu.kiv.eegdatabase.logic.indexing.Indexer.java
License:Apache License
/** * Performs indexing of a POJO object.//from ww w . jav a2s . c om * @param instance The POJO to be indexed. * @throws IllegalAccessException * @throws IOException * @throws SolrServerException */ public void index(T instance) throws IllegalAccessException, IOException, SolrServerException { SolrInputDocument document = prepareForIndexing(instance); log.debug(document); if (document == null || document.isEmpty()) { return; } solrServer.add(document); // if the document already exists, it is replaced by the new one UpdateResponse response = solrServer.commit(); log.debug("Document " + document.get("uuid").getValue().toString() + " added to the solr index."); logCommitResponse(response); }