Example usage for org.apache.pdfbox.pdmodel PDDocument load

List of usage examples for org.apache.pdfbox.pdmodel PDDocument load

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument load.

Prototype

public static PDDocument load(byte[] input) throws IOException 

Source Link

Document

Parses a PDF.

Usage

From source file:org.mabb.fontverter.opentype.TtfInstructions.TestFullTtfPrograms.java

License:Open Source License

@Test
public void executeSecondGlyphIn_BrokenHelveticaNeueTtf() throws Exception {
    PDDocument doc = PDDocument.load(TestUtils.readTestFile("pdf/HorariosMadrid_Segovia.pdf"));

    PDFont rawType0Font = extractFont(doc, "TCQDAA+HelveticaNeue-Light-Identity-H");
    OpenTypeFont font = (OpenTypeFont) PdfFontExtractor.convertType0FontToOpenType((PDType0Font) rawType0Font);

    List<TtfGlyph> glyphs = font.getGlyfTable().getNonEmptyGlyphs();
    TtfGlyph glyph = glyphs.get(1);//from www .  j a  va2 s. c  om
    List<TtfInstruction> instructions = glyph.getInstructions();

    TtfVirtualMachine vm = new TtfVirtualMachine(font);
    vm.execute(instructions);
}

From source file:org.mabb.fontverter.pdf.PdfFontExtractor.java

License:Open Source License

private static void extractPdfFonts(String extractPath, File pdfFile, FontFormat format) throws IOException {
    File fontExtractDir = new File(extractPath);
    if (!fontExtractDir.exists())
        fontExtractDir.mkdir();//ww  w  .ja v  a 2s  .c  o  m

    PDDocument pdf = PDDocument.load(pdfFile);

    PdfFontExtractor fontExtractor = new PdfFontExtractor();
    fontExtractor.setExtractFormat(format);
    fontExtractor.extractFontsToDir(pdf, extractPath);

    pdf.close();
}

From source file:org.mabb.fontverter.pdf.PdfFontExtractor.java

License:Open Source License

public void extractFontsToDir(File pdf, String path) throws IOException {
    PDDocument doc = PDDocument.load(pdf);
    extractFontsToDir(doc, path);
    doc.close();
}

From source file:org.mabb.fontverter.pdf.PdfFontExtractor.java

License:Open Source License

public void extractFontsToDir(byte[] pdf, String path) throws IOException {
    PDDocument doc = PDDocument.load(pdf);
    extractFontsToDir(doc, path);/*from ww  w .j  ava 2  s  .c  o m*/
    doc.close();
}

From source file:org.mabb.fontverter.pdf.TestPdfFontExtractor.java

License:Open Source License

@Test
public void givenPdfWith2Fonts_extractFontsToFVFontList_thenListHasSameNumberOfFonts() throws IOException {
    PDDocument doc = PDDocument.load(TestUtils.readTestFile("pdf/brno30.pdf"));
    PdfFontExtractor extractor = new PdfFontExtractor();

    List<FVFont> fonts = extractor.extractToFVFonts(doc);

    Assert.assertEquals(3, fonts.size());
    doc.close();/*from  w  w w.j a v  a  2  s  .c om*/
}

From source file:org.mabb.fontverter.pdf.TestPdfFontExtractor.java

License:Open Source License

@Test
public void givenPdfWith2Fonts_extractFontsToDir_thenDirectoryHasThreeTtfFiles() throws IOException {
    PDDocument doc = PDDocument.load(TestUtils.readTestFile("pdf/brno30.pdf"));
    PdfFontExtractor extractor = new PdfFontExtractor();

    File extractDir = folder.getRoot();
    extractor.extractFontsToDir(doc, extractDir);
    File[] fontFiles = extractDir.listFiles();

    Assert.assertEquals(3, fontFiles.length);
    for (File fileOn : fontFiles)
        Assert.assertEquals("ttf", FilenameUtils.getExtension(fileOn.getPath()));

    doc.close();//from  ww w.j a va  2s  . c o m
}

From source file:org.mabb.fontverter.pdf.TestPdfFontExtractor.java

License:Open Source License

@Test
public void givenPdfWith2Fonts_extractFontsToDirWithWoff1FormatSet_thenDirectoryHasThreeWoffFiles()
        throws IOException {
    File extractDir = folder.getRoot();
    PDDocument doc = PDDocument.load(TestUtils.readTestFile("pdf/brno30.pdf"));

    PdfFontExtractor extractor = new PdfFontExtractor();
    extractor.setExtractFormat(FontVerter.FontFormat.WOFF1);

    extractor.extractFontsToDir(doc, extractDir);
    File[] fontFiles = extractDir.listFiles();

    Assert.assertEquals(3, fontFiles.length);
    for (File fileOn : fontFiles)
        Assert.assertEquals("woff", FilenameUtils.getExtension(fileOn.getPath()));

    doc.close();//from   ww  w  . j av  a 2  s .c om
}

From source file:org.mabb.fontverter.pdf.TestType0ToOpenTypeConverter.java

License:Open Source License

public TestType0ToOpenTypeConverter() throws IOException {
    doc = PDDocument.load(TestUtils.readTestFile("pdf/HorariosMadrid_Segovia.pdf"));
}

From source file:org.MagicBeans.latexFileType.PDFGenerator.java

public PDPage getPDFPage(int number) {
    PDF_PATH = ApplicationUtils.getTempPDFFile(workingDir);

    PDPage page = null;/*from   ww  w .  j  av  a2s . c  om*/

    File pdfFile = null;
    try {
        pdfFile = new File(PDF_PATH);
        if (pdfFile.exists()) {
            inputPDF = PDDocument.load(pdfFile);
            List<PDPage> allPages = inputPDF.getDocumentCatalog().getAllPages();
            if (allPages != null && !allPages.isEmpty() && allPages.size() >= number && number > 0) {
                page = allPages.get(number - 1);
            }
        }
    } catch (IOException ex) {
        return null;
    }
    return page;
}

From source file:org.mitre.xtext.converters.PDFConverter.java

License:Apache License

/** Implementation is informed by PDFBox authors.
 *///from   ww  w . j av  a 2s  .  com
@Override
public synchronized ConvertedDocument convert(java.io.File doc) throws IOException {

    /*
     * Licensed to the Apache Software Foundation (ASF) under one or more
     * contributor license agreements.  See the NOTICE file distributed with
     * this work for additional information regarding copyright ownership.
     * The ASF licenses this file to You under the Apache License, Version 2.0
     * (the "License"); you may not use this file except in compliance with
     * the License.  You may obtain a copy of the License at
     *
     *      http://www.apache.org/licenses/LICENSE-2.0
     *
     * Unless required by applicable law or agreed to in writing, software
     * distributed under the License is distributed on an "AS IS" BASIS,
     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     * See the License for the specific language governing permissions and
     * limitations under the License.
     */

    /**
     * Adapted from LucenePDFDocument.java from PDFBox lucene project
     *
     * This class is used to create a document for the lucene search engine.
     * This should easily plug into the IndexHTML or IndexFiles that comes with
     * the lucene project. This class will populate the following fields.
     * <table> <tr> <th>Lucene Field Name</th> <th>Description</th> </tr> <tr>
     * <td>path</td> <td>File system path if loaded from a file</td> </tr> <tr>
     * <td>url</td> <td>URL to PDF document</td> </tr> <tr> <td>contents</td>
     * <td>Entire contents of PDF document, indexed but not stored</td> </tr>
     * <tr> <td>summary</td> <td>First 500 characters of content</td> </tr> <tr>
     * <td>modified</td> <td>The modified date/time according to the url or
     * path</td> </tr> <tr> <td>uid</td> <td>A unique identifier for the Lucene
     * document.</td> </tr> <tr> <td>CreationDate</td> <td>From PDF meta-data if
     * available</td> </tr> <tr> <td>Creator</td> <td>From PDF meta-data if
     * available</td> </tr> <tr> <td>Keywords</td> <td>From PDF meta-data if
     * available</td> </tr> <tr> <td>ModificationDate</td> <td>From PDF
     * meta-data if available</td> </tr> <tr> <td>Producer</td> <td>From PDF
     * meta-data if available</td> </tr> <tr> <td>Subject</td> <td>From PDF
     * meta-data if available</td> </tr> <tr> <td>Trapped</td> <td>From PDF
     * meta-data if available</td> </tr> <tr> <td>Encrypted</td> <td>From PDF
     * meta-data if available</td> </tr> </table>
     *
     * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
     * @version $Revision: 1.23 $
     *
     * @throws IOException If there is an error parsing the document.
     */
    PDDocument pdfDocument = null;
    ConvertedDocument textdoc = new ConvertedDocument(doc);

    try {
        pdfDocument = PDDocument.load(doc);

        if (pdfDocument.isEncrypted()) {
            //Just try using the default password and move on
            /**
             *
             * Exception in thread "main" java.lang.NoClassDefFoundError:
             * org/bouncycastle/jce/provider/BouncyCastleProvider at
             * org.apache.pdfbox.pdmodel.PDDocument.openProtection(PDDocument.java:1090)
             * at
             * org.apache.pdfbox.pdmodel.PDDocument.decrypt(PDDocument.java:594)
             *
             * CRYPTO stuff -- load BouncyCastle crypto JAR files. try {
             * pdfDocument.decrypt(""); } catch (CryptographyException e) {
             * throw new IOException("Error decrypting document(" + pdf_file
             * + "): " + e); } catch (InvalidPasswordException e) { //they
             * didn't suppply a password and the default of "" was wrong.
             * throw new IOException( "Error: The document(" + pdf_file + ")
             * is encrypted "); } finally { if (pdfDocument != null) {
             * pdfDocument.close();} }
             */
            textdoc.addProperty("encrypted", "YES");
        } else {

            //create a writer where to append the text content.
            StringWriter writer = new StringWriter();
            stripper.resetEngine();
            stripper.writeText(pdfDocument, writer);

            PDDocumentInformation info = pdfDocument.getDocumentInformation();
            if (info != null) {
                textdoc.addAuthor(info.getAuthor());
                try {
                    textdoc.addCreateDate(info.getCreationDate());
                } catch (IOException io) {
                    //ignore, bad date but continue with indexing
                }
                textdoc.addProperty("creator_tool", info.getCreator());
                textdoc.addProperty("keywords", info.getKeywords());
                /* try {
                 metadata.add("ModificationDate", info.getModificationDate());
                 } catch (IOException io) {
                 //ignore, bad date but continue with indexing
                 } */
                //metadata.add("Producer", info.getProducer());
                textdoc.addProperty("subject", info.getSubject());
                String ttl = info.getTitle();
                if (ttl == null || "untitled".equalsIgnoreCase(ttl)) {
                    ttl = textdoc.filename;
                }
                textdoc.addTitle(ttl);
                // metadata.add("Trapped", info.getTrapped());

                // TODO: Character set is what?
                textdoc.setEncoding("UTF-8");
            }

            // Note: the buffer to string operation is costless;
            // the char array value of the writer buffer and the content string
            // is shared as long as the buffer content is not modified, which will
            // not occur here.
            textdoc.setPayload(writer.getBuffer().toString());
        }
        return textdoc;

    } finally {
        if (pdfDocument != null) {
            pdfDocument.close();
        }
    }
}