Example usage for org.apache.poi.hdgf.extractor VisioTextExtractor getText

List of usage examples for org.apache.poi.hdgf.extractor VisioTextExtractor getText

Introduction

In this page you can find the example usage for org.apache.poi.hdgf.extractor VisioTextExtractor getText.

Prototype

@Override
public String getText() 

Source Link

Document

Returns the textual contents of the file.

Usage

From source file:net.sourceforge.docfetcher.parse.MSVisioParser.java

License:Open Source License

public String renderText(File file) throws ParseException {
    InputStream in = null;/* w ww  .java 2 s .  c  o m*/
    try {
        in = new FileInputStream(file);
        VisioTextExtractor extractor = null;
        try {
            extractor = new VisioTextExtractor(in);
        } catch (Exception e) {
            // This can happen if the file has the "vsd" extension, but is not a Visio document
            throw new ParseException(file, Msg.file_corrupted.value());
        } finally {
            in.close();
        }
        return extractor.getText();
    } catch (FileNotFoundException e) {
        throw new ParseException(file, Msg.file_not_found.value());
    } catch (IOException ioe) {
        throw new ParseException(file, Msg.file_not_readable.value());
    }
}

From source file:net.yacy.document.parser.vsdParser.java

License:Open Source License

@Override
public Document[] parse(final AnchorURL location, final String mimeType, final String charset,
        final VocabularyScraper scraper, final int timezoneOffset, final InputStream source)
        throws Parser.Failure, InterruptedException {

    Document theDoc = null;/*from   ww  w .  j a  v  a  2s .  com*/

    try {
        String contents = "";
        SummaryInformation summary = null;
        try {
            final VisioTextExtractor extractor = new VisioTextExtractor(source);
            contents = extractor.getText();
            summary = extractor.getSummaryInformation();
        } catch (final Exception e) {
            ConcurrentLog.warn("vsdParser", e.getMessage());
        }

        String author = null;
        String[] keywords = null;
        String title = null;
        if (summary != null) {
            author = summary.getAuthor();
            if (summary.getKeywords() != null) {
                keywords = summary.getKeywords().split("[ ,;]");
            }
            title = summary.getTitle();
        }

        List<String> abstrct = new ArrayList<String>();
        if (contents.length() > 0)
            abstrct.add(((contents.length() > 80) ? contents.substring(0, 80) : contents.trim())
                    .replaceAll("\r\n", " ").replaceAll("\n", " ").replaceAll("\r", " ").replaceAll("\t", " "));

        if (title == null)
            title = location.toNormalform(true);

        // As the result of parsing this function must return a plasmaParserDocument object
        return new Document[] { new Document(location, // url of the source document
                mimeType, // the documents mime type
                "UTF-8", // charset of the document text
                this, null, // language
                keywords, singleList(title), author, "", null, // an array of section headlines
                abstrct, // an abstract
                0.0f, 0.0f, contents, // the parsed document text
                null, // a map of extracted anchors
                null, null, // a treeset of image URLs
                false, new Date()) };
    } catch (final Exception e) {
        if (e instanceof InterruptedException)
            throw (InterruptedException) e;

        // if an unexpected error occures just log the error and raise a new ParserException
        final String errorMsg = "Unable to parse the vsd document '" + location + "':" + e.getMessage();
        AbstractParser.log.severe(errorMsg);
        throw new Parser.Failure(errorMsg, location);
    } finally {
        if (theDoc == null) {
            // if an unexpected error occures just log the error and raise a new Parser.Failure
            final String errorMsg = "Unable to parse the vsd document '" + location
                    + "': possibly out of memory";
            AbstractParser.log.severe(errorMsg);
            throw new Parser.Failure(errorMsg, location);
        }
    }
}

From source file:org.paxle.parser.msoffice.impl.MsVisioParser.java

License:Open Source License

@Override
protected void extractText(POIFSFileSystem fs, IParserDocument parserDoc) throws ParserException, IOException {
    // extract plain text
    final VisioTextExtractor parser = new VisioTextExtractor(fs);
    final String text = parser.getText();
    if (text != null && text.length() > 0) {
        parserDoc.append(text);/*from  ww w  . j  ava2s. c  o m*/
    }
}