Example usage for org.apache.poi.xwpf.extractor XWPFWordExtractor getText

Introduction

In this page you can find the example usage for org.apache.poi.xwpf.extractor XWPFWordExtractor getText.

Prototype

public String getText()

Source Link

Usage

From source file:authorslilhelper.FXMLDocumentController.java

License:Open Source License

public void loadWordDocument() {
    try {/*from w  w w. j  a  v  a 2s  . c o m*/
        JFileChooser chooser = new JFileChooser();
        int value = chooser.showOpenDialog(null);
        if (value == JFileChooser.APPROVE_OPTION) {
            XWPFDocument docToAppend = new XWPFDocument(new FileInputStream(chooser.getSelectedFile()));
            XWPFWordExtractor extract = new XWPFWordExtractor(docToAppend);
            String fullText = extract.getText();

            /*                            for(int q = 0; q < fullText.length(); q++)
                    {
                        if(q < fullText.length() - 3)
                        {
                            if(fullText.charAt(q) == '\n' && fullText.charAt(q+1) == '\t')
                            {
                                fullText = fullText.substring(0, q) + "\n\n" + fullText.substring(q + 1, fullText.length() - 1);
                            }
                        }
                    }*/ //newly bracketed out

            //Primary.appendText(extract.getText());

            String[] buttons = { "Append to end", "Insert at cursor location" };
            int result = JOptionPane.showOptionDialog(null, "How would you like to insert the text? ",
                    "Insert Text", JOptionPane.WARNING_MESSAGE, 0, null, buttons, buttons[1]);

            if (result == 0) {
                try {
                    //FileReader reader = new FileReader(fileLocation);
                    //InputStream in = IOUtils.toInputStream(fullText, "UTF-8");
                    StringReader reader = new StringReader(fullText);
                    BufferedReader br = new BufferedReader(reader);

                    String s;
                    int index = 0;
                    //int charCounter = 0;

                    boolean endOfPara = false;

                    while ((s = br.readLine()) != null) {
                        index = 0;
                        while (index < s.length()) //change this
                        {

                            if ((index == characterCapacity) && (index < s.length() - 1)) {
                                while (s.charAt(index) != ' ') {
                                    index--;
                                }

                                String n = s.substring(0, index);
                                s = s.substring(index);

                                Primary.appendText(n + "\n");

                                index = 0;
                            } else if ((index == s.length() - 1) && (index <= characterCapacity)
                                    && (s.length() > 0)) {
                                Primary.appendText(s + "\n");
                                index = s.length(); //OR use break;
                                endOfPara = true;
                            } else {
                                index++;
                            }

                        }

                        if (endOfPara == true) {
                            Primary.appendText("\n");
                        }

                        endOfPara = false;
                    }

                } catch (Exception e) {

                }
                //Primary.appendText(fullText);

                //resetOnPaste();
                onClickOrKeyPress();

                /*save();
                Primary.selectAll();
                Primary.clear();
                reinitializePrimary();*/

                String pathToSave = chooser.getSelectedFile().getAbsolutePath();

                //adds the loaded file's name/path to the filetracker
                File filesLoadedLog = new File(installationPath + "/FileTracker" + currentUser);
                FileWriter writ = new FileWriter(filesLoadedLog, true);
                BufferedWriter bw = new BufferedWriter(writ);
                //writ.append("Hello World");
                // if(filesLoadedLog.exists() == true)
                // {
                writ.append(pathToSave + "\n");
                //     title.setText(fileLocation);
                // }
                // else
                // {
                //     writ.write(fileLocation);
                //     title.setText(fileLocation);
                // }

                bw.close();
                writ.close();

            }
            if (result == 1) {

                Primary.insertText(Primary.getCaretPosition(), "\n");
                //Primary.nextWord();
                //Primary.forward();

                int desiredCaretPosition = Primary.getCaretPosition();
                Primary.end();
                int start = Primary.getCaretPosition();

                try {
                    StringReader reader = new StringReader(fullText);
                    BufferedReader br = new BufferedReader(reader);
                    //FileReader reader = new FileReader(fileLocation);
                    //BufferedReader br = new BufferedReader(reader); 
                    String s;
                    int index = 0;
                    //int charCounter = 0;

                    boolean endOfPara = false;

                    while ((s = br.readLine()) != null) {
                        index = 0;
                        while (index < s.length()) //change this
                        {

                            if ((index == characterCapacity) && (index < s.length() - 1)) {
                                while (s.charAt(index) != ' ') {
                                    index--;
                                }

                                String n = s.substring(0, index);
                                s = s.substring(index);

                                Primary.appendText(n + "\n");

                                index = 0;
                            } else if ((index == s.length() - 1) && (index <= characterCapacity)
                                    && (s.length() > 0)) {
                                Primary.appendText(s + "\n");
                                index = s.length(); //OR use break;
                                endOfPara = true;
                            } else {
                                index++;
                            }

                        }

                        if (endOfPara == true) {
                            Primary.appendText("\n");
                        }

                        endOfPara = false;
                    }

                } catch (Exception e) {

                }

                Primary.end();
                int end = Primary.getCaretPosition();
                Primary.selectRange(start, end);
                String appendedText = Primary.getSelectedText();
                Primary.deleteText(start, end);

                Primary.insertText(desiredCaretPosition, appendedText);

                //                       int caretPosition = Primary.getCaretPosition();        //NEWLY BRACKETED OUT
                //                       Primary.insertText(caretPosition, fullText);           //NEWLY BRACKETED OUT

                //resetOnPaste();
                onClickOrKeyPress();

                /*save();
                Primary.selectAll();
                Primary.clear();
                reinitializePrimary();*/

                String pathToSave = chooser.getSelectedFile().getAbsolutePath();

                //adds the loaded file's name/path to the filetracker
                File filesLoadedLog = new File(installationPath + "/FileTracker" + currentUser);
                FileWriter writ = new FileWriter(filesLoadedLog, true);
                BufferedWriter bw = new BufferedWriter(writ);
                //writ.append("Hello World");
                // if(filesLoadedLog.exists() == true)
                // {
                writ.append(pathToSave + "\n");
                //     title.setText(fileLocation);
                // }
                // else
                // {
                //     writ.write(fileLocation);
                //     title.setText(fileLocation);
                // }

                bw.close();
                writ.close();

            }

        } else {

        }

    } catch (Exception e) {
        JOptionPane.showMessageDialog(null,
                "Images are not supported. If your document contains images, it will not be loaded. Please remove the images or paste the text into another document and try again.");
    }
}

From source file:avoking.com.documentos.scheduler.core.Core.java

private String leerDocx(InputStream docx) throws IOException {
    //Se crea un documento que la POI entiende pasandole el stream
    //instanciamos el obj para extraer contenido pasando el documento
    XWPFWordExtractor xwpf_we = new XWPFWordExtractor(new XWPFDocument(docx));

    return xwpf_we.getText();
}

From source file:br.gov.lexml.parser.documentoarticulado.LexMLParserFromTextTest.java

License:Open Source License

private String sampleDocx(String resourceName) {
    String content = null;/*from w w  w.ja  v a 2  s  . co  m*/
    try {
        InputStream input = new BOMInputStream(TestUtil.class.getResourceAsStream(resourceName));
        XWPFDocument document = new XWPFDocument(OPCPackage.open(input));
        @SuppressWarnings("resource")
        XWPFWordExtractor wordExtractor = new XWPFWordExtractor(document);
        content = wordExtractor.getText();
    } catch (Exception exep) {
        exep.printStackTrace();
    }
    return content;
}

From source file:com.artech.prototype2.bardakov.utils.impl.MultiParserImpl.java

/**
 *      doc/docx/*from   w w w .j a  va  2  s .c om*/
 * @param FilePath -   
 * @return ?? ?
 */
private ArrayList<String> getListOfWordsFromDoc(String FilePath) {
    FileInputStream fis;
    List<String> result = new ArrayList<String>();
    if (FilePath.substring(FilePath.length() - 1).equals("x")) { //is a docx
        try {
            fis = new FileInputStream(new File(FilePath));
            XWPFDocument doc = new XWPFDocument(fis);
            XWPFWordExtractor extract = new XWPFWordExtractor(doc);
            // System.out.println(extract.getText());
            StringBuilder builder = new StringBuilder();
            builder.append(extract.getText());
            String[] words = builder.toString().split(" ");
            for (String s : words) {
                result.add(s);
            }
        } catch (IOException e) {

            e.printStackTrace();
        }
    } else { //is not a docx
        try {
            fis = new FileInputStream(new File(FilePath));
            HWPFDocument doc = new HWPFDocument(fis);
            WordExtractor extractor = new WordExtractor(doc);
            StringBuilder builder = new StringBuilder();
            builder.append(extractor.getText());
            String[] words = builder.toString().split(" ");
            for (String s : words) {
                result.add(s);
            }

        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    return (ArrayList<String>) result;
}

From source file:com.aurel.track.lucene.index.associatedFields.textExctractor.DocxExtractor.java

License:Open Source License

/**
 * Gets the text from file content /* w  w w.ja v a  2  s .  co m*/
 * @param file
 * @param fileExtension
 * @return
 */
@Override
public String getText(File file, String fileExtension) {
    FileInputStream fis = null;
    XWPFWordExtractor ex = null;
    try {
        fis = new FileInputStream(file);
        XWPFDocument doc = new XWPFDocument(fis);
        if (doc != null) {
            ex = new XWPFWordExtractor(doc);
            return ex.getText();
        }
    } catch (FileNotFoundException e) {
        LOGGER.info("File " + file.getName() + " not found. " + e.getMessage());
        LOGGER.debug(ExceptionUtils.getStackTrace(e));
    } catch (Exception e) {
        LOGGER.debug("Extracting text from the .doc file " + file.getName() + " failed with " + e.getMessage());
        LOGGER.error(ExceptionUtils.getStackTrace(e));
    } finally {
        try {
            if (fis != null) {
                fis.close();
            }
        } catch (IOException e) {
            LOGGER.debug("Closing the FileInputStream for file " + file.getName() + " failed with "
                    + e.getMessage());
            LOGGER.error(ExceptionUtils.getStackTrace(e));
        }
        if (ex != null) {
            try {
                ex.close();
            } catch (IOException e) {
                LOGGER.debug("Closing the text extractor from the .docx file " + file.getName()
                        + " failed with " + e.getMessage());
                LOGGER.error(ExceptionUtils.getStackTrace(e));
            }
        }
    }
    return null;
}

From source file:com.bluetech.reader.WordReader.java

License:Apache License

public static String readWordDoc(String filePath) throws FileNotFoundException, IOException {

    File file = new File(filePath);

    FileInputStream fis = new FileInputStream(file.getAbsolutePath());
    XWPFDocument document = new XWPFDocument(fis);
    XWPFWordExtractor extractor = new XWPFWordExtractor(document);
    // String[] fileData = extractor.getText().split("##\\d{4}[_]\\d{4}[a-z]*");

    return extractor.getText();
}

From source file:com.docdoku.server.esindexer.ESTools.java

License:Open Source License

private static String microsoftWordDocumentToString(InputStream inputStream) throws IOException {
    String strRet;/*from w ww. j a  v  a 2s .c  om*/
    try (InputStream wordStream = new BufferedInputStream(inputStream)) {
        if (POIFSFileSystem.hasPOIFSHeader(wordStream)) {
            WordExtractor wordExtractor = new WordExtractor(wordStream);
            strRet = wordExtractor.getText();
        } else {
            XWPFWordExtractor wordXExtractor = new XWPFWordExtractor(new XWPFDocument(wordStream));
            strRet = wordXExtractor.getText();
        }
    }
    return strRet;
}

From source file:com.jaeksoft.searchlib.parser.DocxParser.java

License:Open Source License

@Override
protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException {

    ParserResultItem result = getNewParserResultItem();

    XWPFDocument document = new XWPFDocument(streamLimiter.getNewInputStream());
    XWPFWordExtractor word = null;
    try {/*  w  ww  . java2  s .  c o  m*/
        word = new XWPFWordExtractor(document);

        CoreProperties info = word.getCoreProperties();
        if (info != null) {
            result.addField(ParserFieldEnum.title, info.getTitle());
            result.addField(ParserFieldEnum.creator, info.getCreator());
            result.addField(ParserFieldEnum.subject, info.getSubject());
            result.addField(ParserFieldEnum.description, info.getDescription());
            result.addField(ParserFieldEnum.keywords, info.getKeywords());
        }

        String content = word.getText();
        result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(content, " "));

        result.langDetection(10000, ParserFieldEnum.content);
    } finally {
        IOUtils.close(word);
    }
}

From source file:com.jgaap.generics.DocumentHelper.java

License:Open Source License

/**
 * Extracts text from a Word document and stores it in the document.
 * /*  w  ww  . j a v a  2 s  .  c  o m*/
 * @param inputStream
 *            An input stream pointing to the Word document to be read.
 * @throws IOException
 */
static private char[] loadMSWordDocx(InputStream inputStream) throws IOException {
    XWPFDocument docx = new XWPFDocument(inputStream);
    XWPFWordExtractor extractor = new XWPFWordExtractor(docx);
    return extractor.getText().toCharArray();
}

From source file:com.min.word.core.ReadWordFileTest.java

License:Apache License

public static void main(String[] args) throws Exception {
    System.out.println("---------------- Read File Start ------------------");
    XWPFDocument document = new XWPFDocument(new FileInputStream("test.docx"));
    XWPFWordExtractor we = new XWPFWordExtractor(document);
    System.out.println(we.getText());
    System.out.println("---------------- Read File End ------------------");
}