Example usage for org.apache.pdfbox.pdmodel PDDocument close

List of usage examples for org.apache.pdfbox.pdmodel PDDocument close

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument close.

Prototype

@Override
public void close() throws IOException 

Source Link

Document

This will close the underlying COSDocument object.

Usage

From source file:eu.transkribus.languageresources.extractor.pdf.PDFExtractor.java

@Override
public List<String> extractTextFromDocumentPagewise(String pathToFile) {
    List<String> pageWiseText = new LinkedList<>();

    COSDocument cosDoc = null;/*w w w.  j a  va2  s  .  c  o  m*/
    PDDocument pdDoc = null;
    try {
        PDFParser parser = new PDFParser(new FileInputStream(new File(pathToFile)));
        parser.parse();
        cosDoc = parser.getDocument();
        pdDoc = new PDDocument(cosDoc);

        for (int pageId = 0; pageId < pdDoc.getNumberOfPages(); pageId++) {
            pageWiseText.add(extractTextFromPage(pdDoc, pageId));
        }
    } catch (IOException ex) {
        Logger.getLogger(PDFExtractor.class.getName()).log(Level.SEVERE, null, ex);
    } finally {
        if (pdDoc != null) {
            try {
                pdDoc.close();
            } catch (IOException ex) {
                Logger.getLogger(PDFExtractor.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
        if (cosDoc != null) {
            try {
                cosDoc.close();
            } catch (IOException ex) {
                Logger.getLogger(PDFExtractor.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }

    return pageWiseText;
}

From source file:eu.transkribus.languageresources.extractor.pdf.PDFExtractor.java

@Override
public String extractTextFromPage(String pathToFile, int page) {
    StringBuilder sb = new StringBuilder();

    COSDocument cosDoc = null;//from  ww  w. j a  va 2  s.  co  m
    PDDocument pdDoc = null;
    try {
        PDFParser parser = new PDFParser(new FileInputStream(new File(pathToFile)));
        parser.parse();
        cosDoc = parser.getDocument();
        pdDoc = new PDDocument(cosDoc);
        sb.append(extractTextFromPage(pdDoc, page));
    } catch (IOException ex) {
        Logger.getLogger(PDFExtractor.class.getName()).log(Level.SEVERE, null, ex);
    } finally {
        if (pdDoc != null) {
            try {
                pdDoc.close();
            } catch (IOException ex) {
                Logger.getLogger(PDFExtractor.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
        if (cosDoc != null) {
            try {
                cosDoc.close();
            } catch (IOException ex) {
                Logger.getLogger(PDFExtractor.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }

    return sb.toString();
}

From source file:extractor.Extractor.java

public static ArrayList<Document> returnDocuments(String pathBase, String[] files) {

    ArrayList<Document> documents = new ArrayList<>();

    for (String file : files) {
        PDDocument pdDocument = null;
        String paperString = null;
        try {/*from ww  w.  java  2 s  .  c  o m*/
            pdDocument = PDDocument.load(new File(pathBase + file));
            paperString = new PDFTextStripper().getText(pdDocument);
            pdDocument.close();
            Document document = new Document(paperString);
            documents.add(document);

        } catch (FileNotFoundException ex) {
            System.out.println("Arquivo no encontrado! Detalhes: " + ex.getLocalizedMessage());
            continue;
        } catch (IOException ex) {
            Logger.getLogger(Classifierdoc.class.getName()).log(Level.SEVERE, null, ex);

        }
    }

    return documents;
}

From source file:extractor.pdftotext.PdfToText.java

private String getPdfBoxRaw(File file) {
    try {// w  w w . j a v  a2  s . c  o  m
        PDDocument doc = PDDocument.load(file);
        PDFTextStripper stripper = new PDFTextStripper();

        stripper.setPageStart("PAGE START");
        stripper.setPageEnd("PAGE END");
        //gets the text form the doc and replaces unknown signs with \n
        String rawText = stripper.getText(doc).replaceAll("[\\p{Cc}\\p{Cf}\\p{Co}\\p{Cn}]", "\n");
        doc.close();
        return rawText;

    } catch (IOException ex) {
        Logger.getLogger(PdfToText.class.getName()).log(Level.SEVERE, null, ex);
    }
    return "";
}

From source file:FeatureExtraction.FeatureExtractorPDFStructuralPaths.java

/**
 * Return true if the PDF is compatible// www. j  a  va2 s  .  c  o m
 *
 * @param filePath pdf file path
 * @return true if the PDF is compatible
 */
public boolean IsCompatiblePDF2(String filePath) {
    File pdfFile = new File(filePath);
    Map<String, Integer> structuralPaths = new HashMap<>();
    HashSet<COSBase> visitedObjects = new HashSet<>();
    boolean compatible = true;
    PDDocument pdf = new PDDocument();
    COSDocument pdfDocument = new COSDocument();
    String parentPath = "";
    String pdfObjectName = "Trailer";
    try {
        pdf = PDDocument.load(pdfFile);
        pdfDocument = pdf.getDocument();
        ExtractPDFStructuralPathsRecursively(pdfDocument.getTrailer().getCOSObject(), pdfObjectName, parentPath,
                structuralPaths, visitedObjects, 1);
    } catch (IOException e) {
        compatible = false;
    } finally {
        try {
            pdf.close();
            pdfDocument.close();
        } catch (IOException e) {
            Console.PrintException(String.format("Error closing PDF file: '%s'", filePath), e);
        }
    }
    return compatible;
}

From source file:FeatureExtraction.FeatureExtractorPDFStructuralPathsOld.java

/**
 * Return true if the PDF is compatible//from   ww w .  j  a v a  2 s . c om
 *
 * @param filePath pdf file path
 * @return true if the PDF is compatible
 */
public boolean IsCompatiblePDF2(String filePath) {
    File pdfFile = new File(filePath);
    Map<String, Integer> structuralPaths = new HashMap<>();
    HashSet<COSBase> visitedObjects = new HashSet<>();
    boolean compatible = true;
    PDDocument pdf = new PDDocument();
    COSDocument pdfDocument = new COSDocument();
    try {
        pdf = PDDocument.load(pdfFile);
        pdfDocument = pdf.getDocument();
        ExtractPDFStructuralPathsRecursively(pdfDocument.getTrailer().getCOSObject(), "Trailer", "",
                structuralPaths, visitedObjects);
    } catch (IOException e) {
        compatible = false;
    } finally {
        try {
            pdf.close();
            pdfDocument.close();
        } catch (IOException e) {
            Console.PrintException(String.format("Error closing PDF file: '%s'", filePath), e);
        }
    }
    return compatible;
}

From source file:fi.nls.oskari.printout.printing.pdfbox.UsingTextMatrix.java

License:Apache License

/**
 * creates a sample document with some text using a text matrix.
 * /*from w  ww . j  a v a2 s .c  o  m*/
 * @param message
 *            The message to write in the file.
 * @param outfile
 *            The resulting PDF.
 * 
 * @throws IOException
 *             If there is an error writing the data.
 * @throws COSVisitorException
 *             If there is an error writing the PDF.
 */
public void doIt(String message, String outfile) throws IOException, COSVisitorException {
    // the document
    PDDocument doc = null;
    try {
        doc = new PDDocument();

        // Page 1
        PDFont font = PDType1Font.HELVETICA;
        PDPage page = new PDPage();
        page.setMediaBox(PDPage.PAGE_SIZE_A4);
        doc.addPage(page);
        float fontSize = 12.0f;

        PDRectangle pageSize = page.findMediaBox();

        System.err.println("pageSize " + pageSize);
        System.err.println(
                "pageSize cm " + pageSize.getWidth() / 72 * 2.54 + "," + pageSize.getHeight() / 72 * 2.54);

        float centeredXPosition = (pageSize.getWidth() - fontSize / 1000f) / 2f;
        float stringWidth = font.getStringWidth(message);
        float centeredYPosition = (pageSize.getHeight() - (stringWidth * fontSize) / 1000f) / 3f;

        PDPageContentStream contentStream = new PDPageContentStream(doc, page, false, false);
        contentStream.setFont(font, fontSize);
        contentStream.beginText();
        // counterclockwise rotation
        for (int i = 0; i < 8; i++) {
            contentStream.setTextRotation(i * Math.PI * 0.25, centeredXPosition,
                    pageSize.getHeight() - centeredYPosition);
            contentStream.drawString(message + " " + i);
        }
        // clockwise rotation
        for (int i = 0; i < 8; i++) {
            contentStream.setTextRotation(-i * Math.PI * 0.25, centeredXPosition, centeredYPosition);
            contentStream.drawString(message + " " + i);
        }

        contentStream.endText();
        contentStream.close();

        // Page 2
        page = new PDPage();
        page.setMediaBox(PDPage.PAGE_SIZE_A4);
        doc.addPage(page);
        fontSize = 1.0f;

        contentStream = new PDPageContentStream(doc, page, false, false);
        contentStream.setFont(font, fontSize);
        contentStream.beginText();

        // text scaling
        for (int i = 0; i < 10; i++) {
            contentStream.setTextScaling(12 + (i * 6), 12 + (i * 6), 100, 100 + i * 50);
            contentStream.drawString(message + " " + i);
        }
        contentStream.endText();
        contentStream.close();

        // Page 3
        page = new PDPage();
        page.setMediaBox(PDPage.PAGE_SIZE_A4);
        doc.addPage(page);
        fontSize = 1.0f;

        contentStream = new PDPageContentStream(doc, page, false, false);
        contentStream.setFont(font, fontSize);
        contentStream.beginText();

        int i = 0;
        // text scaling combined with rotation
        contentStream.setTextMatrix(12, 0, 0, 12, centeredXPosition, centeredYPosition * 1.5);
        contentStream.drawString(message + " " + i++);

        contentStream.setTextMatrix(0, 18, -18, 0, centeredXPosition, centeredYPosition * 1.5);
        contentStream.drawString(message + " " + i++);

        contentStream.setTextMatrix(-24, 0, 0, -24, centeredXPosition, centeredYPosition * 1.5);
        contentStream.drawString(message + " " + i++);

        contentStream.setTextMatrix(0, -30, 30, 0, centeredXPosition, centeredYPosition * 1.5);
        contentStream.drawString(message + " " + i++);

        contentStream.endText();
        contentStream.close();

        // Page 4
        {
            page = new PDPage();
            page.setMediaBox(PDPage.PAGE_SIZE_A4);
            doc.addPage(page);
            fontSize = 1.0f;

            contentStream = new PDPageContentStream(doc, page, false, false);
            contentStream.setFont(font, fontSize);
            contentStream.beginText();

            AffineTransform root = new AffineTransform();
            root.scale(72.0 / 2.54, 72.0 / 2.54);

            for (i = 0; i < pageSize.getHeight() / 72 * 2.54; i++) {
                // text scaling combined with rotation
                {
                    AffineTransform rowMatrix = new AffineTransform(root);
                    rowMatrix.translate(1, i);
                    contentStream.setTextMatrix(rowMatrix);
                    contentStream.drawString(message + " " + i);
                }

            }

            contentStream.endText();
            contentStream.close();
        }

        doc.save(outfile);
    } finally {
        if (doc != null) {
            doc.close();
        }
    }
}

From source file:FileIOAux.PrintAux.java

/**
 * @see/*w w  w .  j a v a  2 s. com*/
 * http://stackoverflow.com/questions/23326562/apache-pdfbox-convert-pdf-to-images
 * @param fil
 * @return
 */
public static BufferedImage[] pdfToImage(String fil) {
    BufferedImage[] bim = null;
    try {
        PDDocument document = PDDocument.load(new File(fil));
        if (document != null) {
            PDFRenderer pdfRenderer = new PDFRenderer(document);
            bim = new BufferedImage[document.getNumberOfPages()];
            for (int i = 0; i < document.getNumberOfPages(); i++) {
                bim[i] = pdfRenderer.renderImage(i);
            }
            document.close();
        }
    } catch (IOException ex) {
        Logger.getLogger(PrintAux.class.getName()).log(Level.SEVERE, null, ex);
    }
    return bim;
}

From source file:firmaapp.MainFormController.java

@FXML
void workOrderDateButtonAction(ActionEvent event) throws IOException {
    LocalDate date = workOrderDatePicker.getValue();
    ProductMySQL prodMySQL = new ProductMySQL();
    String numOfWO = "";
    numOfWO = prodMySQL.createWorkOrder(Date.from(date.atStartOfDay(ZoneId.systemDefault()).toInstant()));

    WorkOrderPDF woPDF = new WorkOrderPDF(numOfWO,
            Date.from(date.atStartOfDay(ZoneId.systemDefault()).toInstant()), RootController.workOrderProducts,
            RootController.workOrderSupplies);
    woPDF.createWorkOrder();/*  ww w  .  jav a2 s  . c  o  m*/

    DateFormat df = new SimpleDateFormat("yy"); // Just the year, with 2 digits
    String formatedDate = df.format(woPDF.getDateOfWorkOrder());

    File file = new File("pdf_docs" + File.separator + "work_orders" + File.separator + "work_order_"
            + woPDF.getNumOfWorkOrder().split("/")[0] + "_" + formatedDate + ".pdf");

    PDDocument doc = PDDocument.load(file);
    PDFRenderer renderer = new PDFRenderer(doc);
    renderer.renderImageWithDPI(0, 300);
    //BufferedImage image = renderer.renderImage(0);
    BufferedImage image = renderer.renderImageWithDPI(0, 300);
    ImageIO.write(image, "PNG", new File("pdf_docs" + File.separator + "work_orders_picture" + File.separator
            + "work_order_" + woPDF.getNumOfWorkOrder().split("/")[0] + "_" + formatedDate + ".png"));
    doc.close();

    WorkOrderMySQL woMySQL = new WorkOrderMySQL();
    ArrayList<WorkOrder> orders = woMySQL.getAllWokrOrders();
    RootController.allWorkOrders = orders;
    Collections.sort(RootController.allWorkOrders, new WorkOrderCompare());
    dataWorkOrders.clear();
    for (WorkOrder w : RootController.allWorkOrders) {
        dataWorkOrders.add(w);
    }
}

From source file:firmaapp.MainFormController.java

public void printPDFDocument(File file) throws Exception {
    PrinterJob job = PrinterJob.getPrinterJob();
    PageFormat pf = job.defaultPage();
    Paper paper = new Paper();
    //paper.setSize(8.5 * 72, 11 * 72);
    double margin = 20;
    paper.setImageableArea(0, 0, paper.getWidth() - margin, paper.getHeight() - margin);
    pf.setPaper(paper);/*from  w w  w.ja va2 s.c om*/
    pf.setOrientation(PageFormat.LANDSCAPE);
    // PDFBox
    PDDocument doc = PDDocument.load(file);
    Book book = new Book();
    book.append(new PDFPrintable(doc), pf);
    //job.setPageable(new PDFPageable(doc));
    job.setPageable(book);
    //Book b = new Book();

    //job.setJobName("Job");
    if (job.printDialog()) {
        job.print();
    }

    doc.close();
}