Example usage for org.apache.poi.xwpf.extractor XWPFWordExtractor getText

List of usage examples for org.apache.poi.xwpf.extractor XWPFWordExtractor getText

Introduction

In this page you can find the example usage for org.apache.poi.xwpf.extractor XWPFWordExtractor getText.

Prototype

public String getText() 

Source Link

Usage

From source file:authorslilhelper.FXMLDocumentController.java

License:Open Source License

public void loadWordDocument() {
    try {/*from w  w w. j  a  v  a 2s  . c o m*/
        JFileChooser chooser = new JFileChooser();
        int value = chooser.showOpenDialog(null);
        if (value == JFileChooser.APPROVE_OPTION) {
            XWPFDocument docToAppend = new XWPFDocument(new FileInputStream(chooser.getSelectedFile()));
            XWPFWordExtractor extract = new XWPFWordExtractor(docToAppend);
            String fullText = extract.getText();

            /*                            for(int q = 0; q < fullText.length(); q++)
                    {
                        if(q < fullText.length() - 3)
                        {
                            if(fullText.charAt(q) == '\n' && fullText.charAt(q+1) == '\t')
                            {
                                fullText = fullText.substring(0, q) + "\n\n" + fullText.substring(q + 1, fullText.length() - 1);
                            }
                        }
                    }*/ //newly bracketed out

            //Primary.appendText(extract.getText());

            String[] buttons = { "Append to end", "Insert at cursor location" };
            int result = JOptionPane.showOptionDialog(null, "How would you like to insert the text? ",
                    "Insert Text", JOptionPane.WARNING_MESSAGE, 0, null, buttons, buttons[1]);

            if (result == 0) {
                try {
                    //FileReader reader = new FileReader(fileLocation);
                    //InputStream in = IOUtils.toInputStream(fullText, "UTF-8");
                    StringReader reader = new StringReader(fullText);
                    BufferedReader br = new BufferedReader(reader);

                    String s;
                    int index = 0;
                    //int charCounter = 0;

                    boolean endOfPara = false;

                    while ((s = br.readLine()) != null) {
                        index = 0;
                        while (index < s.length()) //change this
                        {

                            if ((index == characterCapacity) && (index < s.length() - 1)) {
                                while (s.charAt(index) != ' ') {
                                    index--;
                                }

                                String n = s.substring(0, index);
                                s = s.substring(index);

                                Primary.appendText(n + "\n");

                                index = 0;
                            } else if ((index == s.length() - 1) && (index <= characterCapacity)
                                    && (s.length() > 0)) {
                                Primary.appendText(s + "\n");
                                index = s.length(); //OR use break;
                                endOfPara = true;
                            } else {
                                index++;
                            }

                        }

                        if (endOfPara == true) {
                            Primary.appendText("\n");
                        }

                        endOfPara = false;
                    }

                } catch (Exception e) {

                }
                //Primary.appendText(fullText);

                //resetOnPaste();
                onClickOrKeyPress();

                /*save();
                Primary.selectAll();
                Primary.clear();
                reinitializePrimary();*/

                String pathToSave = chooser.getSelectedFile().getAbsolutePath();

                //adds the loaded file's name/path to the filetracker
                File filesLoadedLog = new File(installationPath + "/FileTracker" + currentUser);
                FileWriter writ = new FileWriter(filesLoadedLog, true);
                BufferedWriter bw = new BufferedWriter(writ);
                //writ.append("Hello World");
                // if(filesLoadedLog.exists() == true)
                // {
                writ.append(pathToSave + "\n");
                //     title.setText(fileLocation);
                // }
                // else
                // {
                //     writ.write(fileLocation);
                //     title.setText(fileLocation);
                // }

                bw.close();
                writ.close();

            }
            if (result == 1) {

                Primary.insertText(Primary.getCaretPosition(), "\n");
                //Primary.nextWord();
                //Primary.forward();

                int desiredCaretPosition = Primary.getCaretPosition();
                Primary.end();
                int start = Primary.getCaretPosition();

                try {
                    StringReader reader = new StringReader(fullText);
                    BufferedReader br = new BufferedReader(reader);
                    //FileReader reader = new FileReader(fileLocation);
                    //BufferedReader br = new BufferedReader(reader); 
                    String s;
                    int index = 0;
                    //int charCounter = 0;

                    boolean endOfPara = false;

                    while ((s = br.readLine()) != null) {
                        index = 0;
                        while (index < s.length()) //change this
                        {

                            if ((index == characterCapacity) && (index < s.length() - 1)) {
                                while (s.charAt(index) != ' ') {
                                    index--;
                                }

                                String n = s.substring(0, index);
                                s = s.substring(index);

                                Primary.appendText(n + "\n");

                                index = 0;
                            } else if ((index == s.length() - 1) && (index <= characterCapacity)
                                    && (s.length() > 0)) {
                                Primary.appendText(s + "\n");
                                index = s.length(); //OR use break;
                                endOfPara = true;
                            } else {
                                index++;
                            }

                        }

                        if (endOfPara == true) {
                            Primary.appendText("\n");
                        }

                        endOfPara = false;
                    }

                } catch (Exception e) {

                }

                Primary.end();
                int end = Primary.getCaretPosition();
                Primary.selectRange(start, end);
                String appendedText = Primary.getSelectedText();
                Primary.deleteText(start, end);

                Primary.insertText(desiredCaretPosition, appendedText);

                //                       int caretPosition = Primary.getCaretPosition();        //NEWLY BRACKETED OUT
                //                       Primary.insertText(caretPosition, fullText);           //NEWLY BRACKETED OUT

                //resetOnPaste();
                onClickOrKeyPress();

                /*save();
                Primary.selectAll();
                Primary.clear();
                reinitializePrimary();*/

                String pathToSave = chooser.getSelectedFile().getAbsolutePath();

                //adds the loaded file's name/path to the filetracker
                File filesLoadedLog = new File(installationPath + "/FileTracker" + currentUser);
                FileWriter writ = new FileWriter(filesLoadedLog, true);
                BufferedWriter bw = new BufferedWriter(writ);
                //writ.append("Hello World");
                // if(filesLoadedLog.exists() == true)
                // {
                writ.append(pathToSave + "\n");
                //     title.setText(fileLocation);
                // }
                // else
                // {
                //     writ.write(fileLocation);
                //     title.setText(fileLocation);
                // }

                bw.close();
                writ.close();

            }

        } else {

        }

    } catch (Exception e) {
        JOptionPane.showMessageDialog(null,
                "Images are not supported. If your document contains images, it will not be loaded. Please remove the images or paste the text into another document and try again.");
    }
}

From source file:avoking.com.documentos.scheduler.core.Core.java

private String leerDocx(InputStream docx) throws IOException {
    //Se crea un documento que la POI entiende pasandole el stream
    //instanciamos el obj para extraer contenido pasando el documento
    XWPFWordExtractor xwpf_we = new XWPFWordExtractor(new XWPFDocument(docx));

    return xwpf_we.getText();
}

From source file:br.gov.lexml.parser.documentoarticulado.LexMLParserFromTextTest.java

License:Open Source License

private String sampleDocx(String resourceName) {
    String content = null;/*from w w  w.ja  v a 2  s  . co  m*/
    try {
        InputStream input = new BOMInputStream(TestUtil.class.getResourceAsStream(resourceName));
        XWPFDocument document = new XWPFDocument(OPCPackage.open(input));
        @SuppressWarnings("resource")
        XWPFWordExtractor wordExtractor = new XWPFWordExtractor(document);
        content = wordExtractor.getText();
    } catch (Exception exep) {
        exep.printStackTrace();
    }
    return content;
}

From source file:com.artech.prototype2.bardakov.utils.impl.MultiParserImpl.java

/**
 *      doc/docx/*from   w w w .j a  va  2  s .c om*/
 * @param FilePath -   
 * @return ?? ?
 */
private ArrayList<String> getListOfWordsFromDoc(String FilePath) {
    FileInputStream fis;
    List<String> result = new ArrayList<String>();
    if (FilePath.substring(FilePath.length() - 1).equals("x")) { //is a docx
        try {
            fis = new FileInputStream(new File(FilePath));
            XWPFDocument doc = new XWPFDocument(fis);
            XWPFWordExtractor extract = new XWPFWordExtractor(doc);
            // System.out.println(extract.getText());
            StringBuilder builder = new StringBuilder();
            builder.append(extract.getText());
            String[] words = builder.toString().split(" ");
            for (String s : words) {
                result.add(s);
            }
        } catch (IOException e) {

            e.printStackTrace();
        }
    } else { //is not a docx
        try {
            fis = new FileInputStream(new File(FilePath));
            HWPFDocument doc = new HWPFDocument(fis);
            WordExtractor extractor = new WordExtractor(doc);
            StringBuilder builder = new StringBuilder();
            builder.append(extractor.getText());
            String[] words = builder.toString().split(" ");
            for (String s : words) {
                result.add(s);
            }

        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    return (ArrayList<String>) result;
}

From source file:com.aurel.track.lucene.index.associatedFields.textExctractor.DocxExtractor.java

License:Open Source License

/**
 * Gets the text from file content /* w  w w.ja v a  2  s .  co m*/
 * @param file
 * @param fileExtension
 * @return
 */
@Override
public String getText(File file, String fileExtension) {
    FileInputStream fis = null;
    XWPFWordExtractor ex = null;
    try {
        fis = new FileInputStream(file);
        XWPFDocument doc = new XWPFDocument(fis);
        if (doc != null) {
            ex = new XWPFWordExtractor(doc);
            return ex.getText();
        }
    } catch (FileNotFoundException e) {
        LOGGER.info("File " + file.getName() + " not found. " + e.getMessage());
        LOGGER.debug(ExceptionUtils.getStackTrace(e));
    } catch (Exception e) {
        LOGGER.debug("Extracting text from the .doc file " + file.getName() + " failed with " + e.getMessage());
        LOGGER.error(ExceptionUtils.getStackTrace(e));
    } finally {
        try {
            if (fis != null) {
                fis.close();
            }
        } catch (IOException e) {
            LOGGER.debug("Closing the FileInputStream for file " + file.getName() + " failed with "
                    + e.getMessage());
            LOGGER.error(ExceptionUtils.getStackTrace(e));
        }
        if (ex != null) {
            try {
                ex.close();
            } catch (IOException e) {
                LOGGER.debug("Closing the text extractor from the .docx file " + file.getName()
                        + " failed with " + e.getMessage());
                LOGGER.error(ExceptionUtils.getStackTrace(e));
            }
        }
    }
    return null;
}

From source file:com.bluetech.reader.WordReader.java

License:Apache License

public static String readWordDoc(String filePath) throws FileNotFoundException, IOException {

    File file = new File(filePath);

    FileInputStream fis = new FileInputStream(file.getAbsolutePath());
    XWPFDocument document = new XWPFDocument(fis);
    XWPFWordExtractor extractor = new XWPFWordExtractor(document);
    // String[] fileData = extractor.getText().split("##\\d{4}[_]\\d{4}[a-z]*");

    return extractor.getText();
}

From source file:com.docdoku.server.esindexer.ESTools.java

License:Open Source License

private static String microsoftWordDocumentToString(InputStream inputStream) throws IOException {
    String strRet;/*from w ww. j a  v  a 2s .c  om*/
    try (InputStream wordStream = new BufferedInputStream(inputStream)) {
        if (POIFSFileSystem.hasPOIFSHeader(wordStream)) {
            WordExtractor wordExtractor = new WordExtractor(wordStream);
            strRet = wordExtractor.getText();
        } else {
            XWPFWordExtractor wordXExtractor = new XWPFWordExtractor(new XWPFDocument(wordStream));
            strRet = wordXExtractor.getText();
        }
    }
    return strRet;
}

From source file:com.jaeksoft.searchlib.parser.DocxParser.java

License:Open Source License

@Override
protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException {

    ParserResultItem result = getNewParserResultItem();

    XWPFDocument document = new XWPFDocument(streamLimiter.getNewInputStream());
    XWPFWordExtractor word = null;
    try {/*  w  ww  . java2  s .  c o  m*/
        word = new XWPFWordExtractor(document);

        CoreProperties info = word.getCoreProperties();
        if (info != null) {
            result.addField(ParserFieldEnum.title, info.getTitle());
            result.addField(ParserFieldEnum.creator, info.getCreator());
            result.addField(ParserFieldEnum.subject, info.getSubject());
            result.addField(ParserFieldEnum.description, info.getDescription());
            result.addField(ParserFieldEnum.keywords, info.getKeywords());
        }

        String content = word.getText();
        result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(content, " "));

        result.langDetection(10000, ParserFieldEnum.content);
    } finally {
        IOUtils.close(word);
    }
}

From source file:com.jgaap.generics.DocumentHelper.java

License:Open Source License

/**
 * Extracts text from a Word document and stores it in the document.
 * /*  w  ww  . j a v a  2 s  .  c  o m*/
 * @param inputStream
 *            An input stream pointing to the Word document to be read.
 * @throws IOException
 */
static private char[] loadMSWordDocx(InputStream inputStream) throws IOException {
    XWPFDocument docx = new XWPFDocument(inputStream);
    XWPFWordExtractor extractor = new XWPFWordExtractor(docx);
    return extractor.getText().toCharArray();
}

From source file:com.min.word.core.ReadWordFileTest.java

License:Apache License

public static void main(String[] args) throws Exception {
    System.out.println("---------------- Read File Start ------------------");
    XWPFDocument document = new XWPFDocument(new FileInputStream("test.docx"));
    XWPFWordExtractor we = new XWPFWordExtractor(document);
    System.out.println(we.getText());
    System.out.println("---------------- Read File End ------------------");
}