Example usage for org.apache.poi.hwpf.usermodel Picture getMimeType

List of usage examples for org.apache.poi.hwpf.usermodel Picture getMimeType

Introduction

In this page you can find the example usage for org.apache.poi.hwpf.usermodel Picture getMimeType.

Prototype

public String getMimeType() 

Source Link

Document

Returns the MIME type for the image

Usage

From source file:mj.ocraptor.extraction.tika.parser.microsoft.WordExtractor.java

License:Apache License

private void handlePictureCharacterRun(CharacterRun cr, Picture picture, PicturesSource pictures,
        XHTMLContentHandler xhtml) throws SAXException, IOException, TikaException {
    if (!isRendered(cr) || picture == null) {
        // Oh dear, we've run out...
        // Probably caused by multiple \u0008 images referencing
        // the same real image
        return;//  ww  w  . ja  va2  s .  c  om
    }

    // Which one is it?
    String extension = picture.suggestFileExtension();
    int pictureNumber = pictures.pictureNumber(picture);

    // Make up a name for the picture
    // There isn't one in the file, but we need to be able to reference
    // the picture from the img tag and the embedded resource
    String filename = "image" + pictureNumber + (extension.length() > 0 ? "." + extension : "");

    // Grab the mime type for the picture
    String mimeType = picture.getMimeType();

    // Output the img tag
    AttributesImpl attr = new AttributesImpl();
    attr.addAttribute("", "src", "src", "CDATA", "embedded:" + filename);
    attr.addAttribute("", "alt", "alt", "CDATA", filename);
    xhtml.startElement("img", attr);
    xhtml.endElement("img");

    // Have we already output this one?
    // (Only expose each individual image once)
    if (!pictures.hasOutput(picture)) {
        TikaInputStream stream = TikaInputStream.get(picture.getContent());
        handleEmbeddedResource(stream, filename, null, mimeType, xhtml, false);
        pictures.recordOutput(picture);
    }
}

From source file:org.apache.tika.parser.microsoft.WordExtractor.java

License:Apache License

private void handlePictureCharacterRun(CharacterRun cr, Picture picture, PicturesSource pictures,
        XHTMLContentHandler xhtml) throws SAXException, IOException, TikaException {
    if (!isRendered(cr) || picture == null) {
        // Oh dear, we've run out...
        // Probably caused by multiple \u0008 images referencing
        //  the same real image
        return;/*from   w  w  w . jav  a 2s .  c  o  m*/
    }

    // Which one is it?
    String extension = picture.suggestFileExtension();
    int pictureNumber = pictures.pictureNumber(picture);

    // Make up a name for the picture
    // There isn't one in the file, but we need to be able to reference
    //  the picture from the img tag and the embedded resource
    String filename = "image" + pictureNumber + (extension.length() > 0 ? "." + extension : "");

    // Grab the mime type for the picture
    String mimeType = picture.getMimeType();

    // Output the img tag
    AttributesImpl attr = new AttributesImpl();
    attr.addAttribute("", "src", "src", "CDATA", "embedded:" + filename);
    attr.addAttribute("", "alt", "alt", "CDATA", filename);
    xhtml.startElement("img", attr);
    xhtml.endElement("img");

    // Have we already output this one?
    // (Only expose each individual image once)
    if (!pictures.hasOutput(picture)) {
        TikaInputStream stream = TikaInputStream.get(picture.getContent());
        handleEmbeddedResource(stream, filename, null, mimeType, xhtml, false);
        pictures.recordOutput(picture);
    }
}

From source file:org.opf_labs.aqua.OfficeAnalyser.java

License:Apache License

public static void main(String[] args) throws Exception {
    //import org.apache.poi.poifs.dev.POIFSDump;
    //POIFSDump.main(args);

    SMOutputDocument xmldoc = SMOutputFactory.createOutputDocument(
            SMOutputFactory.getGlobalXMLOutputFactory().createXMLStreamWriter(System.out, "UTF-8"), "1.1",
            "UTF-8", true);

    xmldoc.setIndentation("\n ", 1, 2); // for unix linefeed, 2 spaces per level

    SMOutputElement xmlroot = xmldoc.addElement("properties");

    // Loop through arguments:
    for (int i = 0; i < args.length; i++) {
        SMOutputElement xd = xmlroot.addElement("document");
        xd.addAttribute("href", args[i]);
        HWPFDocument doc = new HWPFDocument(new FileInputStream(args[i]));

        // SummaryInformation
        SMOutputElement sie = xd.addElement("SummaryInformation");
        sie.addElement("ApplicationName").addCharacters(doc.getSummaryInformation().getApplicationName());
        sie.addElement("OSVersion").addCharacters("" + doc.getSummaryInformation().getOSVersion());
        sie.addElement("Author").addCharacters("" + doc.getSummaryInformation().getAuthor());
        sie.addElement("CharCount").addCharacters("" + doc.getSummaryInformation().getCharCount());
        sie.addElement("Comments").addCharacters("" + doc.getSummaryInformation().getComments());
        sie.addElement("EditTime").addCharacters("" + doc.getSummaryInformation().getEditTime());
        sie.addElement("Format").addCharacters("" + doc.getSummaryInformation().getFormat());
        sie.addElement("Keywords").addCharacters("" + doc.getSummaryInformation().getKeywords());
        sie.addElement("LastAuthor").addCharacters("" + doc.getSummaryInformation().getLastAuthor());
        sie.addElement("PageCount").addCharacters("" + doc.getSummaryInformation().getPageCount());
        sie.addElement("RevNumber").addCharacters("" + doc.getSummaryInformation().getRevNumber());
        sie.addElement("SectionCount").addCharacters("" + doc.getSummaryInformation().getSectionCount());
        sie.addElement("Security").addCharacters("" + doc.getSummaryInformation().getSecurity());
        sie.addElement("Subject").addCharacters("" + doc.getSummaryInformation().getSubject());
        sie.addElement("Template").addCharacters("" + doc.getSummaryInformation().getTemplate());
        sie.addElement("Title").addCharacters("" + doc.getSummaryInformation().getTitle());
        sie.addElement("WordCount").addCharacters("" + doc.getSummaryInformation().getWordCount());
        sie.addElement("CreatedDateTime").addCharacters("" + doc.getSummaryInformation().getCreateDateTime());
        sie.addElement("LastPrinted").addCharacters("" + doc.getSummaryInformation().getLastPrinted());
        sie.addElement("LastSaveDateTime")
                .addCharacters("" + doc.getSummaryInformation().getLastSaveDateTime());
        sie.addElement("Thumbnail").addCharacters("" + doc.getSummaryInformation().getThumbnail());

        // TextTable
        SMOutputElement tte = xd.addElement("TextTable");
        for (TextPiece tp : doc.getTextTable().getTextPieces()) {
            SMOutputElement tpe = tte.addElement("TextPiece");
            tpe.addAttribute("isUnicode", "" + tp.getPieceDescriptor().isUnicode());
            tpe.addCharacters(tp.getStringBuilder().toString());
        }/*from  w w  w  .java  2  s .  c  o  m*/

        // DocumentSummaryInformation
        SMOutputElement dsie = xd.addElement("DocumentSummaryInformation");
        dsie.addElement("ParCount").addCharacters("" + doc.getDocumentSummaryInformation().getParCount());
        dsie.addElement("ByteCount").addCharacters("" + doc.getDocumentSummaryInformation().getByteCount());
        dsie.addElement("HiddenCount").addCharacters("" + doc.getDocumentSummaryInformation().getHiddenCount());
        dsie.addElement("LineCount").addCharacters("" + doc.getDocumentSummaryInformation().getLineCount());
        dsie.addElement("MMClipCount").addCharacters("" + doc.getDocumentSummaryInformation().getMMClipCount());
        dsie.addElement("NoteCount").addCharacters("" + doc.getDocumentSummaryInformation().getNoteCount());
        dsie.addElement("SectionCount")
                .addCharacters("" + doc.getDocumentSummaryInformation().getSectionCount());
        dsie.addElement("SlideCount").addCharacters("" + doc.getDocumentSummaryInformation().getSlideCount());
        dsie.addElement("Format").addCharacters("" + doc.getDocumentSummaryInformation().getFormat());
        dsie.addElement("PresentationFormat")
                .addCharacters("" + doc.getDocumentSummaryInformation().getPresentationFormat());
        dsie.addElement("Company").addCharacters("" + doc.getDocumentSummaryInformation().getCompany());
        dsie.addElement("Category").addCharacters("" + doc.getDocumentSummaryInformation().getCategory());
        // Sections
        for (Object os : doc.getDocumentSummaryInformation().getSections()) {
            Section s = (Section) os;
            SMOutputElement se = dsie.addElement("Section");
            se.addElement("FormatID").addCharacters("" + s.getFormatID());
            se.addElement("CodePage").addCharacters("" + s.getCodepage());
            se.addElement("PropertyCount").addCharacters("" + s.getPropertyCount());
            for (Property sp : s.getProperties()) {
                SMOutputElement pe = se.addElement("Property");
                pe.addAttribute("class", sp.getValue().getClass().getCanonicalName());
                pe.addCharacters(sp.getValue().toString());
            }
        }
        SMOutputElement fte = xd.addElement("FontTable");
        for (Ffn f : doc.getFontTable().getFontNames()) {
            SMOutputElement fe = fte.addElement("Font");
            fe.addElement("MainFontName").addCharacters(f.getMainFontName());
            try {
                fe.addElement("AltFontName").addCharacters(f.getAltFontName());
            } catch (Exception e) {
                // Seems to fail, and no safe test found as yet.
            }
            fe.addElement("Size").addCharacters("" + f.getSize());
            fe.addElement("Weight").addCharacters("" + f.getWeight());
        }
        SMOutputElement pte = xd.addElement("PicturesTable");
        for (Picture p : doc.getPicturesTable().getAllPictures()) {
            SMOutputElement pe = pte.addElement("Picture");
            pe.addElement("MimeType").addCharacters(p.getMimeType());
            pe.addElement("Width").addCharacters("" + p.getWidth());
            pe.addElement("Height").addCharacters("" + p.getHeight());
            pe.addElement("HorizontalScalingFactor").addCharacters("" + p.getHorizontalScalingFactor());
            pe.addElement("VerticalScalingFactor").addCharacters("" + p.getVerticalScalingFactor());
            pe.addElement("Content").addCharacters("" + p.getContent());
        }
        //parseCompObj( new File(args[i]) );

        // This
        //System.out.println("Dumping " + args[i]);
        FileInputStream is = new FileInputStream(args[i]);
        POIFSFileSystem fs = new POIFSFileSystem(is);
        is.close();

        DirectoryEntry root = fs.getRoot();

        //dump(root);

        xmldoc.closeRoot(); // important, flushes, closes output

    }
}