Example usage for org.apache.poi.poifs.eventfilesystem POIFSReader registerListener

List of usage examples for org.apache.poi.poifs.eventfilesystem POIFSReader registerListener

Introduction

In this page you can find the example usage for org.apache.poi.poifs.eventfilesystem POIFSReader registerListener.

Prototype


public void registerListener(final POIFSReaderListener listener, final String name) 

Source Link

Document

Register a POIFSReaderListener for a document in the root directory

Usage

From source file:com.flexive.extractor.ExcelExtractor.java

License:Open Source License

/**
 * Extracts the text informations from the excel file.
 *
 * @param in the input stream to read from
 * @return the extraxted informations, or null if no text extraction was possible
 *///from  w  w w  .j a  va  2  s.  c om
public ExtractedData extract(final InputStream in) {

    BufferedInputStream bis = null;
    try {

        writer = new ByteArrayOutputStream();

        // We need to read the stream 2 times, so we use a buffered input stream and mark the
        // beginning
        bis = new BufferedInputStream(in);
        bis.mark(Integer.MAX_VALUE);

        // Retrieve summary information
        POIFSReader r = new POIFSReader();
        r.registerListener(this, "\005SummaryInformation");
        r.read(bis);
        bis.reset();

        // Retrieve text by processing all sheets
        HSSFWorkbook wb = new HSSFWorkbook(bis);
        for (int i = 0; i < wb.getNumberOfSheets(); i++) {
            HSSFSheet sheet = wb.getSheetAt(i);
            processSheet(sheet);
        }

        // Append summary info to text
        if (fxsi != null) {
            writer.write(FxSharedUtils.getBytes(fxsi.getFTIndexInformations()));
        }
        writer.flush();

        return new ExtractedData(fxsi, writer.toString());
    } catch (Exception exc) {
        exc.printStackTrace();
        return null;
    } finally {
        try {
            if (writer != null)
                writer.close();
        } catch (Exception exc) {
            /*ignore*/}
        try {
            if (bis != null)
                bis.close();
        } catch (Exception exc) {
            /*ignore*/}
    }
}

From source file:com.flexive.extractor.FxSummaryInformation.java

License:Open Source License

/**
 * Reads the summary information from a document.
 *
 * @param input the input stream to read from, will not be closed at the end
 * @return the summary information//from  w  ww.jav a2 s  . co m
 */
public static FxSummaryInformation getSummaryInformation(InputStream input) {
    class SummaryStore implements POIFSReaderListener {
        private FxSummaryInformation fxsi = null;

        /**
         * Processes the Summary section.
         *
         * @param event the summary section event.
         */
        @Override
        public void processPOIFSReaderEvent(POIFSReaderEvent event) {
            try {
                SummaryInformation si = (SummaryInformation) PropertySetFactory.create(event.getStream());
                fxsi = new FxSummaryInformation(si);
            } catch (Exception ex) {
                /* ignore */
            }
        }

        protected FxSummaryInformation getFxSummaryInformation() {
            return fxsi;
        }
    }
    try {
        POIFSReader reader = new POIFSReader();
        SummaryStore st = new SummaryStore();
        reader.registerListener(st, "\005SummaryInformation");
        reader.read(input);
        return st.getFxSummaryInformation();
    } catch (Exception ex) {
        return null;
    }
}

From source file:com.flexive.extractor.WordExtractor.java

License:Open Source License

/**
 * Extracts the text informations from the word file.
 *
 * @param in the input stream to read from
 * @return the extraxted informations, or null if no text extraction was possible
 *//*from   w  w  w  .  j av a  2 s .  co m*/
public ExtractedData extract(final InputStream in) {
    ByteArrayOutputStream baos = null;
    PrintWriter writer = null;
    BufferedInputStream bis = null;
    try {

        baos = new ByteArrayOutputStream();
        writer = new PrintWriter(baos);

        // We need to read the stream 2 times, so we use a buffered input stream and mark the
        // beginning
        bis = new BufferedInputStream(in);
        bis.mark(Integer.MAX_VALUE);

        // Retrieve summary information
        POIFSReader r = new POIFSReader();
        r.registerListener(this, "\005SummaryInformation");
        r.read(bis);
        bis.reset();

        // Retrieve text
        WordDocument wd = new WordDocument(bis);
        wd.writeAllText(writer);
        if (fxsi != null) {
            writer.write(fxsi.getFTIndexInformations());
        }
        writer.flush();

        return new ExtractedData(fxsi, baos.toString());
    } catch (Exception exc) {
        return null;
    } finally {
        try {
            if (writer != null)
                writer.close();
        } catch (Exception exc) {
            /*ignore*/}
        try {
            if (baos != null)
                baos.close();
        } catch (Exception exc) {
            /*ignore*/}
        try {
            if (bis != null)
                bis.close();
        } catch (Exception exc) {
            /*ignore*/}
    }
}

From source file:com.krawler.esp.fileparser.word.ExtractWordFile.java

License:Open Source License

public Properties extractProperties(InputStream in) throws IOException {

    PropertiesBroker propertiesBroker = new PropertiesBroker();
    POIFSReader reader = new POIFSReader();
    reader.registerListener(new PropertiesReaderListener(propertiesBroker), "\005SummaryInformation");
    reader.read(in);/*from   www . ja va  2s . com*/
    return propertiesBroker.getProperties();
}

From source file:net.sourceforge.docfetcher.model.parse.MSOfficeParser.java

License:Open Source License

protected final ParseResult parse(File file, ParseContext context) throws ParseException {
    String contents = renderText(file, context.getFilename());
    ParseResult parseResult = new ParseResult(contents);

    POIFSReader reader = new POIFSReader();
    MyReaderListener listener = new MyReaderListener();
    reader.registerListener(listener, "\005SummaryInformation"); //$NON-NLS-1$

    InputStream in = null;/*from w  w w .  ja  v a  2s  .c o  m*/
    try {
        in = new FileInputStream(file);
        try {
            reader.read(in);
        } catch (IllegalArgumentException e) {
            // Bug #3537738: "IllegalArgumentException: name cannot be empty"
            throw new ParseException(e);
        }

        parseResult.setTitle(listener.title).addAuthor(listener.author).addMiscMetadata(listener.subject)
                .addMiscMetadata(listener.keywords).addMiscMetadata(listener.comments);
    } catch (IOException e) {
        // Ignore, we can live without meta data
        Util.printErr(e);
    } finally {
        Closeables.closeQuietly(in);
    }

    return parseResult;
}

From source file:net.sourceforge.docfetcher.model.parse.TestParseFromZip.java

License:Open Source License

@Test
public void testZippedOffice() throws Exception {
    new ZipAndRun(TestFiles.doc) {
        protected void handleInputStream(InputStream in) throws Exception {
            POIFSReader reader = new POIFSReader();
            reader.registerListener(new POIFSReaderListener() {
                public void processPOIFSReaderEvent(POIFSReaderEvent event) {
                    // Nothing
                }/*from w  ww.ja v  a  2 s  . c om*/
            }, "\005SummaryInformation"); //$NON-NLS-1$
            reader.read(in);
        }
    };
    new ZipAndRun(TestFiles.doc) {
        protected void handleInputStream(InputStream in) throws Exception {
            WordExtractor extractor = null;
            try {
                extractor = new WordExtractor(in);
                extractor.getText();
            } finally {
                Closeables.closeQuietly(extractor);
            }
        }
    };
}

From source file:net.sourceforge.docfetcher.model.parse.TestParseFromZip.java

License:Open Source License

@Test(expected = IOException.class)
public void testZippedOfficeFail() throws Exception {
    // This will fail because we're trying to read the same InputStream twice
    new ZipAndRun(TestFiles.doc) {
        protected void handleInputStream(InputStream in) throws Exception {
            POIFSReader reader = new POIFSReader();
            reader.registerListener(new POIFSReaderListener() {
                public void processPOIFSReaderEvent(POIFSReaderEvent event) {
                    // Nothing
                }//from   w w  w.j  a v  a 2s.com
            }, "\005SummaryInformation"); //$NON-NLS-1$
            reader.read(in);
            WordExtractor extractor = null;
            try {
                extractor = new WordExtractor(in);
                extractor.getText();
            } finally {
                Closeables.closeQuietly(extractor);
            }
        }
    };
}

From source file:net.sourceforge.docfetcher.parse.MSOfficeParser.java

License:Open Source License

public Document parse(File file) throws ParseException {
    StringBuffer contents = new StringBuffer(renderText(file));

    POIFSReader reader = new POIFSReader();
    MyReaderListener listener = new MyReaderListener();
    reader.registerListener(listener, "\005SummaryInformation"); //$NON-NLS-1$

    try {// ww w .jav a2s  . c  om
        InputStream in = new FileInputStream(file);
        reader.read(in);
        in.close();
    } catch (FileNotFoundException e) {
        throw new ParseException(file, Msg.file_not_found.value());
    } catch (IOException e) {
        throw new ParseException(file, Msg.file_not_readable.value());
    }

    String[] metaData = new String[] { listener.author, listener.title, listener.subject, listener.keywords,
            listener.comments };
    for (String field : metaData)
        if (field != null)
            contents.append(" ").append(field); //$NON-NLS-1$

    return new Document(file, listener.title, contents).addAuthor(listener.author);
}

From source file:net.sourceforge.vaticanfetcher.model.parse.TestParseFromZip.java

License:Open Source License

@Test
public void testZippedOffice() throws Exception {
    new ZipAndRun(TestFiles.doc) {
        protected void handleInputStream(InputStream in) throws Exception {
            POIFSReader reader = new POIFSReader();
            reader.registerListener(new POIFSReaderListener() {
                public void processPOIFSReaderEvent(POIFSReaderEvent event) {
                    // Nothing
                }// w w  w.  java  2  s .  c  om
            }, "\005SummaryInformation"); //$NON-NLS-1$
            reader.read(in);
        }
    };
    new ZipAndRun(TestFiles.doc) {
        protected void handleInputStream(InputStream in) throws Exception {
            WordExtractor extractor = new WordExtractor(in);
            extractor.getText();
        }
    };
}

From source file:net.sourceforge.vaticanfetcher.model.parse.TestParseFromZip.java

License:Open Source License

@Test(expected = IOException.class)
public void testZippedOfficeFail() throws Exception {
    // This will fail because we're trying to read the same InputStream twice
    new ZipAndRun(TestFiles.doc) {
        protected void handleInputStream(InputStream in) throws Exception {
            POIFSReader reader = new POIFSReader();
            reader.registerListener(new POIFSReaderListener() {
                public void processPOIFSReaderEvent(POIFSReaderEvent event) {
                    // Nothing
                }/*  w w  w.ja v a2s . c o  m*/
            }, "\005SummaryInformation"); //$NON-NLS-1$
            reader.read(in);
            WordExtractor extractor = new WordExtractor(in);
            extractor.getText();
        }
    };
}