Example usage for org.apache.poi.xwpf.extractor XWPFWordExtractor close

Introduction

In this page you can find the example usage for org.apache.poi.xwpf.extractor XWPFWordExtractor close.

Prototype

@Override
    public void close() throws IOException

Source Link

Usage

From source file:com.aurel.track.lucene.index.associatedFields.textExctractor.DocxExtractor.java

License:Open Source License

/**
 * Gets the text from file content /*from  ww w .j av a2 s  .  c  o m*/
 * @param file
 * @param fileExtension
 * @return
 */
@Override
public String getText(File file, String fileExtension) {
    FileInputStream fis = null;
    XWPFWordExtractor ex = null;
    try {
        fis = new FileInputStream(file);
        XWPFDocument doc = new XWPFDocument(fis);
        if (doc != null) {
            ex = new XWPFWordExtractor(doc);
            return ex.getText();
        }
    } catch (FileNotFoundException e) {
        LOGGER.info("File " + file.getName() + " not found. " + e.getMessage());
        LOGGER.debug(ExceptionUtils.getStackTrace(e));
    } catch (Exception e) {
        LOGGER.debug("Extracting text from the .doc file " + file.getName() + " failed with " + e.getMessage());
        LOGGER.error(ExceptionUtils.getStackTrace(e));
    } finally {
        try {
            if (fis != null) {
                fis.close();
            }
        } catch (IOException e) {
            LOGGER.debug("Closing the FileInputStream for file " + file.getName() + " failed with "
                    + e.getMessage());
            LOGGER.error(ExceptionUtils.getStackTrace(e));
        }
        if (ex != null) {
            try {
                ex.close();
            } catch (IOException e) {
                LOGGER.debug("Closing the text extractor from the .docx file " + file.getName()
                        + " failed with " + e.getMessage());
                LOGGER.error(ExceptionUtils.getStackTrace(e));
            }
        }
    }
    return null;
}

From source file:de.iisys.schub.processMining.similarity.parsing.DocxParser.java

License:Apache License

/**
 * Only use this method if you don't want to get chapters sometimes.
 * Otherwise use 'parseDocxAndChapters' and 'getFullText' methods.
 * //from www  .  ja v  a2s .co m
 * Parses a .docx Word file and returns its text.
 * @return
 *       Returns the full text (incl. tables) as string.
 */
public String parseDocxSimple() {
    if (theDoc != null) {
        XWPFWordExtractor extr = new XWPFWordExtractor(theDoc);
        this.fullText = extr.getText();
        try {
            extr.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    return this.fullText;
}

From source file:eu.modelwriter.ide.ui.command.ExtractTextHandler.java

License:Open Source License

/**
 * Extracts text from the given .docx {@link IFile}.
 * //ww  w.  ja  v a  2  s. c o m
 * @param file
 *            the .docx {@link IFile}
 */
private void exctractDocx(final IFile file) {
    try {
        FileInputStream fis = new FileInputStream(file.getLocation().toFile());
        XWPFDocument docx = new XWPFDocument(fis);
        XWPFWordExtractor we = new XWPFWordExtractor(docx);
        final IPath textPath = file.getFullPath().removeFileExtension().addFileExtension("txt");
        final IFile textFile = ResourcesPlugin.getWorkspace().getRoot().getFile(textPath);
        if (textFile.exists()) {
            textFile.delete(true, new NullProgressMonitor());
        }
        textFile.create(new ByteArrayInputStream(we.getText().getBytes()), true, new NullProgressMonitor());
        we.close();
        docx.close();
        fis.close();
    } catch (IOException e) {
        Activator.getDefault().getLog().log(new Status(IStatus.ERROR, Activator.PLUGIN_ID,
                UNABLE_TO_EXTRACT_TEXT_FROM + file.getFullPath(), e));
    } catch (CoreException e) {
        Activator.getDefault().getLog().log(new Status(IStatus.ERROR, Activator.PLUGIN_ID,
                UNABLE_TO_EXTRACT_TEXT_FROM + file.getFullPath(), e));
    }
}

From source file:mc.program.Importer.java

public void importDOCX() {
    try {//w w w .  j  av a 2s  .  c  o  m
        // Set up objects for getting from .docx file
        FileInputStream fis = new FileInputStream(sourceFile.getAbsolutePath());
        XWPFDocument document = new XWPFDocument(fis);
        XWPFWordExtractor extractor = new XWPFWordExtractor(document);

        // Extract text
        String fileData = extractor.getText();

        // Put text into array list
        Scanner scanner = new Scanner(fileData);
        while (scanner.hasNext()) {
            sourceText.add(scanner.next());
        }

        fis.close();
        extractor.close();
    } catch (Exception ex) {
        System.out.print(ex);
    }
}

From source file:steffen.haertlein.file.FileObject.java

License:Apache License

private void readWordDocument() {
    try {/*  w  w  w  .  ja  v  a  2  s. c  o  m*/
        FileInputStream fs = new FileInputStream(f);
        XWPFDocument document;
        document = new XWPFDocument(OPCPackage.open(fs));
        XWPFWordExtractor docxReader = new XWPFWordExtractor(document);
        String text = docxReader.getText();
        docxReader.close();
        String[] docxLines = text.split("\n");
        for (String line : docxLines) {
            lines.add(line);
        }
        fs.close();
    } catch (InvalidFormatException e) {
        JOptionPane.showMessageDialog(null, "InvalidFormatException in readWordDocument", "Fehler",
                JOptionPane.ERROR_MESSAGE);
        e.printStackTrace();
    } catch (FileNotFoundException e) {
        JOptionPane.showMessageDialog(null, "FileNotFoundException in readWordDocument", "Fehler",
                JOptionPane.ERROR_MESSAGE);
        e.printStackTrace();
    } catch (IOException e) {
        JOptionPane.showMessageDialog(null, "IOException in readWordDocument", "Fehler",
                JOptionPane.ERROR_MESSAGE);
        e.printStackTrace();
    }
}