List of usage examples for org.apache.solr.handler.extraction SolrContentHandler newDocument
public SolrInputDocument newDocument()
From source file:com.cloudera.cdk.morphline.solrcell.SolrCellMorphlineTest.java
License:Apache License
/** * Test that the ContentHandler properly strips the illegal characters *///from w w w . j av a 2s.c om @Test public void testTransformValue() { String fieldName = "user_name"; assertFalse("foobar".equals(getFoobarWithNonChars())); Metadata metadata = new Metadata(); // load illegal char string into a metadata field and generate a new document, // which will cause the ContentHandler to be invoked. metadata.set(fieldName, getFoobarWithNonChars()); StripNonCharSolrContentHandlerFactory contentHandlerFactory = new StripNonCharSolrContentHandlerFactory( DateUtil.DEFAULT_DATE_FORMATS); IndexSchema schema = h.getCore().getLatestSchema(); SolrContentHandler contentHandler = contentHandlerFactory.createSolrContentHandler(metadata, new MapSolrParams(new HashMap()), schema); SolrInputDocument doc = contentHandler.newDocument(); String foobar = doc.getFieldValue(fieldName).toString(); assertTrue("foobar".equals(foobar)); }
From source file:com.ngdata.hbaseindexer.parse.tika.TikaSolrDocumentExtractor.java
License:Apache License
private SolrInputDocument extractInternal(byte[] input) { Metadata metadata = new Metadata(); metadata.add(LiteralMimeDetector.MIME_TYPE, mimeType); Map<String, String> cellParams = (params == null || params.isEmpty()) ? DEFAULT_CELL_PARAMS : params; SolrContentHandler handler = new SolrContentHandler(metadata, new MapSolrParams(cellParams), indexSchema); try {//from ww w .ja v a 2 s . c o m parser.parse(new ByteArrayInputStream(input), handler, metadata, new ParseContext()); } catch (Exception e) { throw new RuntimeException(e); } return handler.newDocument(); }