Example usage for org.apache.pdfbox.text PDFTextStripper getLineSeparator

Introduction

In this page you can find the example usage for org.apache.pdfbox.text PDFTextStripper getLineSeparator.

Prototype

public String getLineSeparator()

Source Link

Document

This will get the line separator.

Usage

From source file:it.myideas.bancamarcheextractor.Distinta.java

public static Distinta parse(Path file) {

    try (PDDocument doc = PDDocument.load(file.toFile())) {

        Distinta distinta = new Distinta();

        PDFTextStripper stripper = new PDFTextStripper();
        String contents = stripper.getText(doc);
        Stream<String> lines = Arrays.stream(contents.split(stripper.getLineSeparator()));

        log.debug("FILE:" + file.toString());
        log.debug(contents);//from   w  ww.  j  a  v a 2  s. c  om

        lines.forEach(line -> {

            if (line.startsWith("Tipo disposizione")) {
                distinta.tipoDisposizione = line.replace("Tipo disposizione", "").trim().toLowerCase();
            } else if (line.startsWith("1 Esecuzione")) {
                String[] p = line.split(" ");

                distinta.beneficiario = Arrays.stream(Arrays.copyOfRange(p, 4, p.length))
                        .map(String::toLowerCase).collect(Collectors.joining("_"));

                distinta.data = LocalDate.parse(p[2], DateTimeFormatter.ofPattern("dd/MM/yyyy"));
            }

        });

        if (!isOk(distinta.beneficiario) || !isOk(distinta.tipoDisposizione) || distinta.data == null) {
            throw new IOException("Parser failure for file " + file.toString());
        }

        return distinta;
    } catch (IOException e) {
        log.error("Error parsing PDF", e);
        return null;
    }
}