Example usage for org.xml.sax.helpers DefaultHandler DefaultHandler

List of usage examples for org.xml.sax.helpers DefaultHandler DefaultHandler

Introduction

In this page you can find the example usage for org.xml.sax.helpers DefaultHandler DefaultHandler.

Prototype

DefaultHandler

Source Link

Usage

From source file:org.apache.tika.server.MetadataEP.java

/**
 * Get a specific metadata field. If the input stream cannot be parsed, but a
 * value was found for the given metadata field, then the value of the field
 * is returned as part of a 200 OK response; otherwise a
 * {@link Status#BAD_REQUEST} is generated. If the stream was successfully
 * parsed but the specific metadata field was not found, then a
 * {@link Status#NOT_FOUND} is returned.
 * <p>//from   w w  w .j a v  a 2  s  . c o m
 * Note that this method handles multivalue fields and returns possibly more
 * metadata than requested.
 * 
 * @param field
 *          the tika metadata field name
 * @param is
 *          the document stream
 * @return one of {@link Status#OK}, {@link Status#NOT_FOUND}, or
 *         {@link Status#BAD_REQUEST}
 * @throws Exception
 */
@POST
@Path("{field}")
public Response getMetadataField(@PathParam("field") String field, InputStream is) throws Exception {

    // use BAD request to indicate that we may not have had enough data to
    // process the request
    Status defaultErrorResponse = Status.BAD_REQUEST;
    try {
        parser.parse(is, new DefaultHandler(), metadata);
        // once we've parsed the document successfully, we should use NOT_FOUND
        // if we did not see the field
        defaultErrorResponse = Status.NOT_FOUND;
    } catch (Exception e) {
        logger.info("Failed to process field " + field, e);
    }
    String[] values = metadata.getValues(field);
    if (values.length == 0) {
        return Response.status(defaultErrorResponse).entity("Failed to get metadata field " + field).build();
    }
    // remove fields we don't care about for the response
    for (String name : metadata.names()) {
        if (!field.equals(name)) {
            metadata.remove(name);
        }
    }
    return Response.ok(metadata).build();
}

From source file:org.apache.tika.server.MetadataResource.java

@PUT
@Produces("text/csv")
public StreamingOutput getMetadata(InputStream is, @Context HttpHeaders httpHeaders, @Context UriInfo info)
        throws Exception {
    final Metadata metadata = new Metadata();
    AutoDetectParser parser = TikaResource.createParser();
    TikaResource.fillMetadata(parser, metadata, httpHeaders);
    TikaResource.logRequest(logger, info, metadata);

    parser.parse(is, new DefaultHandler(), metadata);

    return new StreamingOutput() {
        public void write(OutputStream outputStream) throws IOException, WebApplicationException {
            CSVWriter writer = new CSVWriter(new OutputStreamWriter(outputStream));
            for (String name : metadata.names()) {
                String[] values = metadata.getValues(name);
                ArrayList<String> list = new ArrayList<String>(values.length + 1);
                list.add(name);//from  ww  w .j  av  a  2 s. co m
                list.addAll(Arrays.asList(values));
                writer.writeNext(list.toArray(values));
            }
            writer.close();
        }
    };
}

From source file:org.apache.tika.server.RecursiveMetadataResource.java

private MetadataList parseMetadata(InputStream is, MultivaluedMap<String, String> httpHeaders, UriInfo info)
        throws Exception {
    final Metadata metadata = new Metadata();
    final ParseContext context = new ParseContext();
    AutoDetectParser parser = TikaResource.createParser(tikaConfig);
    //TODO: parameterize choice of handler and max chars?
    BasicContentHandlerFactory.HANDLER_TYPE type = BasicContentHandlerFactory.HANDLER_TYPE.TEXT;
    RecursiveParserWrapper wrapper = new RecursiveParserWrapper(parser,
            new BasicContentHandlerFactory(type, -1));
    TikaResource.fillMetadata(parser, metadata, context, httpHeaders);
    TikaResource.fillParseContext(context, httpHeaders);
    TikaResource.logRequest(logger, info, metadata);

    wrapper.parse(is, new DefaultHandler(), metadata, context);
    return new MetadataList(wrapper.getMetadata());
}

From source file:org.apache.tika.server.resource.UnpackerResource.java

private Map<String, byte[]> process(InputStream is, @Context HttpHeaders httpHeaders, @Context UriInfo info,
        boolean saveAll) throws Exception {
    Metadata metadata = new Metadata();
    ParseContext pc = new ParseContext();

    Parser parser = TikaResource.createParser();
    if (parser instanceof DigestingParser) {
        //no need to digest for unwrapping
        parser = ((DigestingParser) parser).getWrappedParser();
    }//from  w w  w. j  a  v a  2s . c  o m

    TikaResource.fillMetadata(parser, metadata, pc, httpHeaders.getRequestHeaders());
    TikaResource.logRequest(LOG, info, metadata);

    ContentHandler ch;
    ByteArrayOutputStream text = new ByteArrayOutputStream();

    if (saveAll) {
        ch = new BodyContentHandler(new RichTextContentHandler(new OutputStreamWriter(text, UTF_8)));
    } else {
        ch = new DefaultHandler();
    }

    Map<String, byte[]> files = new HashMap<>();
    MutableInt count = new MutableInt();

    pc.set(EmbeddedDocumentExtractor.class, new MyEmbeddedDocumentExtractor(count, files));
    TikaResource.parse(parser, LOG, info.getPath(), is, ch, metadata, pc);

    if (count.intValue() == 0 && !saveAll) {
        throw new WebApplicationException(Response.Status.NO_CONTENT);
    }

    if (saveAll) {
        files.put(TEXT_FILENAME, text.toByteArray());

        ByteArrayOutputStream metaStream = new ByteArrayOutputStream();
        metadataToCsv(metadata, metaStream);

        files.put(META_FILENAME, metaStream.toByteArray());
    }

    return files;
}

From source file:org.apache.tika.server.UnpackerResource.java

@PUT
@Produces("application/zip")
public StreamingOutput getText(InputStream is, @Context HttpHeaders httpHeaders, @Context UriInfo info)
        throws Exception {
    Metadata metadata = new Metadata();

    AutoDetectParser parser = TikaResource.createParser();

    TikaResource.fillMetadata(parser, metadata, httpHeaders);
    TikaResource.logRequest(logger, info, metadata);

    ContentHandler ch = new DefaultHandler();

    ParseContext pc = new ParseContext();

    ZipOutput zout = new ZipOutput();
    MutableInt count = new MutableInt();

    pc.set(EmbeddedDocumentExtractor.class, new MyEmbeddedDocumentExtractor(count, zout));

    try {//from   ww  w  . j a v a2  s.  c om
        parser.parse(is, ch, metadata, pc);
    } catch (TikaException ex) {
        logger.warn(String.format("%s: Unpacker failed", info.getPath()), ex);
    }

    if (count.intValue() == 0) {
        throw new WebApplicationException(Response.Status.NO_CONTENT);
    }

    return zout;
}

From source file:org.datacleaner.cli.MainTest.java

public void testWriteHtmlToFile() throws Throwable {
    String filename = "target/test_write_html_to_file.html";
    Main.main(/*  w ww . j  a  v  a 2  s .c  o m*/
            ("-conf src/test/resources/cli-examples/conf.xml -job src/test/resources/cli-examples/employees_job.xml -of "
                    + filename + " -ot HTML").split(" "));

    File file = new File(filename);
    assertTrue(file.exists());

    {
        String result = FileHelper.readFileAsString(file);
        String[] lines = result.split("\n");

        assertEquals("<html>", lines[1]);
    }

    InputStream in = FileHelper.getInputStream(file);
    try {
        // parse it with validator.nu for HTML correctness
        final HtmlParser htmlParser = new HtmlParser(XmlViolationPolicy.FATAL);
        final AtomicInteger elementCounter = new AtomicInteger();
        htmlParser.setContentHandler(new DefaultHandler() {
            @Override
            public void startElement(String uri, String localName, String qName, Attributes attributes)
                    throws SAXException {
                elementCounter.incrementAndGet();
            }
        });
        final List<Exception> warningsAndErrors = new ArrayList<Exception>();
        htmlParser.setErrorHandler(new ErrorHandler() {
            @Override
            public void warning(SAXParseException exception) throws SAXException {
                System.err.println("Warning: " + exception.getMessage());
                warningsAndErrors.add(exception);
            }

            @Override
            public void fatalError(SAXParseException exception) throws SAXException {
                System.out.println("Fatal error: " + exception.getMessage());
                throw exception;
            }

            @Override
            public void error(SAXParseException exception) throws SAXException {
                System.err.println("Error: " + exception.getMessage());
                warningsAndErrors.add(exception);
            }
        });

        htmlParser.parse(new InputSource(in));

        // the output has approx 3600 XML elements
        int elementCount = elementCounter.get();
        assertTrue("Element count: " + elementCount, elementCount > 3000);
        assertTrue("Element count: " + elementCount, elementCount < 5000);

        if (!warningsAndErrors.isEmpty()) {
            for (Exception error : warningsAndErrors) {
                String message = error.getMessage();
                if (message.startsWith("No explicit character encoding declaration has been seen yet")
                        || message.startsWith("The character encoding of the document was not declared.")) {
                    // ignore/accept this one
                    continue;
                }
                error.printStackTrace();
                fail("Got " + warningsAndErrors.size() + " warnings and errors, see log for details");
            }
        }
    } finally {
        in.close();
    }
}

From source file:org.dita.dost.module.GenMapAndTopicListModule.java

/**
 * Initialize reusable filters.//  ww  w .  j a  va2s.  c  o m
 */
private void initFilters() {
    listFilter = new GenListModuleReader();
    listFilter.setLogger(logger);
    listFilter.setPrimaryDitamap(rootFile);
    listFilter.setJob(job);

    if (profilingEnabled) {
        filterUtils = parseFilterFile();
    }

    exportAnchorsFilter = new ExportAnchorsFilter();
    exportAnchorsFilter.setInputFile(rootFile);

    keydefFilter = new KeydefFilter();
    keydefFilter.setLogger(logger);
    keydefFilter.setCurrentFile(rootFile);
    keydefFilter.setJob(job);

    nullHandler = new DefaultHandler();
}

From source file:org.dita.dost.module.reader.AbstractReaderModule.java

/**
 * Initialize reusable filters./*from  w ww.j  a v a2  s .  c  om*/
 */
void initFilters() {
    tempFileNameScheme.setBaseDir(job.getInputDir());

    listFilter = new GenListModuleReader();
    listFilter.setLogger(logger);
    listFilter.setPrimaryDitamap(rootFile);
    listFilter.setJob(job);
    listFilter.setFormatFilter(formatFilter);

    if (profilingEnabled) {
        filterUtils = parseFilterFile();
    }

    if (INDEX_TYPE_ECLIPSEHELP.equals(transtype)) {
        exportAnchorsFilter = new ExportAnchorsFilter();
        exportAnchorsFilter.setInputFile(rootFile);
    }

    keydefFilter = new KeydefFilter();
    keydefFilter.setLogger(logger);
    keydefFilter.setCurrentFile(rootFile);
    keydefFilter.setJob(job);

    nullHandler = new DefaultHandler();

    ditaWriterFilter = new DitaWriterFilter();
    ditaWriterFilter.setTempFileNameScheme(tempFileNameScheme);
    ditaWriterFilter.setLogger(logger);
    ditaWriterFilter.setJob(job);
    ditaWriterFilter.setEntityResolver(reader.getEntityResolver());

    topicFragmentFilter = new TopicFragmentFilter(ATTRIBUTE_NAME_CONREF, ATTRIBUTE_NAME_CONREFEND);

}

From source file:org.dita.dost.reader.TestGenListModuleReader.java

private void run(final File rootFile) throws Exception {
    final File ditaDir = new File("src" + File.separator + "main").getAbsoluteFile();

    final boolean validate = false;
    reader = new GenListModuleReader();
    reader.setLogger(new TestUtils.TestLogger());
    reader.setCurrentFile(rootFile.toURI());
    reader.setPrimaryDitamap(rootFile.toURI());
    reader.setJob(new Job(tempDir));

    reader.setContentHandler(new DefaultHandler());

    final XMLReader parser = initXMLReader(ditaDir, validate, new File(rootFile.getPath()).getCanonicalFile());
    parser.setContentHandler(reader);/*  w  w  w . j a v a2  s .  c  o m*/

    parser.parse(rootFile.toURI().toString());
}

From source file:org.eclim.plugin.core.util.XmlUtils.java

/**
 * Gets an aggregate handler which delegates accordingly to the supplied
 * handlers.//  ww w. j  a  v  a2s  .  c o  m
 *
 * @param handler Main DefaultHandler to delegate to (may be null).
 * @param errorHandler DefaultHandler to delegate errors to (may be null).
 * @param entityResolver EntityResolver to delegate to (may be null).
 * @return
 */
private static DefaultHandler getHandler(DefaultHandler handler, DefaultHandler errorHandler,
        EntityResolver entityResolver) {
    DefaultHandler hdlr = handler != null ? handler : new DefaultHandler();
    return new AggregateHandler(hdlr, errorHandler, entityResolver);
}