List of usage examples for org.apache.pdfbox.text PDFTextStripper getLineSeparator
public String getLineSeparator()
From source file:it.myideas.bancamarcheextractor.Distinta.java
public static Distinta parse(Path file) { try (PDDocument doc = PDDocument.load(file.toFile())) { Distinta distinta = new Distinta(); PDFTextStripper stripper = new PDFTextStripper(); String contents = stripper.getText(doc); Stream<String> lines = Arrays.stream(contents.split(stripper.getLineSeparator())); log.debug("FILE:" + file.toString()); log.debug(contents);//from w ww. j a v a 2 s. c om lines.forEach(line -> { if (line.startsWith("Tipo disposizione")) { distinta.tipoDisposizione = line.replace("Tipo disposizione", "").trim().toLowerCase(); } else if (line.startsWith("1 Esecuzione")) { String[] p = line.split(" "); distinta.beneficiario = Arrays.stream(Arrays.copyOfRange(p, 4, p.length)) .map(String::toLowerCase).collect(Collectors.joining("_")); distinta.data = LocalDate.parse(p[2], DateTimeFormatter.ofPattern("dd/MM/yyyy")); } }); if (!isOk(distinta.beneficiario) || !isOk(distinta.tipoDisposizione) || distinta.data == null) { throw new IOException("Parser failure for file " + file.toString()); } return distinta; } catch (IOException e) { log.error("Error parsing PDF", e); return null; } }