List of usage examples for org.apache.commons.io.input CloseShieldInputStream CloseShieldInputStream
public CloseShieldInputStream(InputStream in)
From source file:org.apache.solr.servlet.HttpSolrCall.java
private void remoteQuery(String coreUrl, HttpServletResponse resp) throws IOException { HttpRequestBase method = null;/*w ww . j a v a 2s. c o m*/ HttpEntity httpEntity = null; try { String urlstr = coreUrl + queryParams.toQueryString(); boolean isPostOrPutRequest = "POST".equals(req.getMethod()) || "PUT".equals(req.getMethod()); if ("GET".equals(req.getMethod())) { method = new HttpGet(urlstr); } else if ("HEAD".equals(req.getMethod())) { method = new HttpHead(urlstr); } else if (isPostOrPutRequest) { HttpEntityEnclosingRequestBase entityRequest = "POST".equals(req.getMethod()) ? new HttpPost(urlstr) : new HttpPut(urlstr); InputStream in = new CloseShieldInputStream(req.getInputStream()); // Prevent close of container streams HttpEntity entity = new InputStreamEntity(in, req.getContentLength()); entityRequest.setEntity(entity); method = entityRequest; } else if ("DELETE".equals(req.getMethod())) { method = new HttpDelete(urlstr); } else { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unexpected method type: " + req.getMethod()); } for (Enumeration<String> e = req.getHeaderNames(); e.hasMoreElements();) { String headerName = e.nextElement(); if (!"host".equalsIgnoreCase(headerName) && !"authorization".equalsIgnoreCase(headerName) && !"accept".equalsIgnoreCase(headerName)) { method.addHeader(headerName, req.getHeader(headerName)); } } // These headers not supported for HttpEntityEnclosingRequests if (method instanceof HttpEntityEnclosingRequest) { method.removeHeaders(TRANSFER_ENCODING_HEADER); method.removeHeaders(CONTENT_LENGTH_HEADER); } final HttpResponse response = solrDispatchFilter.httpClient.execute(method, HttpClientUtil.createNewHttpClientRequestContext()); int httpStatus = response.getStatusLine().getStatusCode(); httpEntity = response.getEntity(); resp.setStatus(httpStatus); for (HeaderIterator responseHeaders = response.headerIterator(); responseHeaders.hasNext();) { Header header = responseHeaders.nextHeader(); // We pull out these two headers below because they can cause chunked // encoding issues with Tomcat if (header != null && !header.getName().equalsIgnoreCase(TRANSFER_ENCODING_HEADER) && !header.getName().equalsIgnoreCase(CONNECTION_HEADER)) { resp.addHeader(header.getName(), header.getValue()); } } if (httpEntity != null) { if (httpEntity.getContentEncoding() != null) resp.setCharacterEncoding(httpEntity.getContentEncoding().getValue()); if (httpEntity.getContentType() != null) resp.setContentType(httpEntity.getContentType().getValue()); InputStream is = httpEntity.getContent(); OutputStream os = resp.getOutputStream(); IOUtils.copyLarge(is, os); } } catch (IOException e) { sendError(new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error trying to proxy request for url: " + coreUrl, e)); } finally { Utils.consumeFully(httpEntity); } }
From source file:org.apache.tika.cli.TikaCLI.java
public void process(String arg) throws Exception { if (arg.equals("-?") || arg.equals("--help")) { pipeMode = false;/*from ww w . j a v a2s . c o m*/ usage(); } else if (arg.equals("-V") || arg.equals("--version")) { pipeMode = false; version(); } else if (arg.equals("-v") || arg.equals("--verbose")) { org.apache.log4j.Logger.getRootLogger().setLevel(Level.DEBUG); } else if (arg.equals("-g") || arg.equals("--gui")) { pipeMode = false; if (configFilePath != null) { TikaGUI.main(new String[] { configFilePath }); } else { TikaGUI.main(new String[0]); } } else if (arg.equals("--list-parser") || arg.equals("--list-parsers")) { pipeMode = false; displayParsers(false, false); } else if (arg.equals("--list-detector") || arg.equals("--list-detectors")) { pipeMode = false; displayDetectors(); } else if (arg.equals("--list-parser-detail") || arg.equals("--list-parser-details")) { pipeMode = false; displayParsers(true, false); } else if (arg.equals("--list-parser-detail-apt") || arg.equals("--list-parser-details-apt")) { pipeMode = false; displayParsers(true, true); } else if (arg.equals("--list-met-models")) { pipeMode = false; displayMetModels(); } else if (arg.equals("--list-supported-types")) { pipeMode = false; displaySupportedTypes(); } else if (arg.startsWith("--compare-file-magic=")) { pipeMode = false; compareFileMagic(arg.substring(arg.indexOf('=') + 1)); } else if (arg.equals("--dump-minimal-config")) { pipeMode = false; dumpConfig(TikaConfigSerializer.Mode.MINIMAL); } else if (arg.equals("--dump-current-config")) { pipeMode = false; dumpConfig(TikaConfigSerializer.Mode.CURRENT); } else if (arg.equals("--dump-static-config")) { pipeMode = false; dumpConfig(TikaConfigSerializer.Mode.STATIC); } else if (arg.equals("--dump-static-full-config")) { pipeMode = false; dumpConfig(TikaConfigSerializer.Mode.STATIC_FULL); } else if (arg.equals("--container-aware") || arg.equals("--container-aware-detector")) { // ignore, as container-aware detectors are now always used } else if (arg.equals("-f") || arg.equals("--fork")) { fork = true; } else if (arg.startsWith("--config=")) { configure(arg.substring("--config=".length())); } else if (arg.startsWith("--digest=")) { CommonsDigester.DigestAlgorithm[] algos = CommonsDigester.parse(arg.substring("--digest=".length())); digester = new CommonsDigester(MAX_MARK, algos); parser = new DigestingParser(parser, digester); } else if (arg.startsWith("-e")) { encoding = arg.substring("-e".length()); } else if (arg.startsWith("--encoding=")) { encoding = arg.substring("--encoding=".length()); } else if (arg.startsWith("-p") && !arg.equals("-p")) { password = arg.substring("-p".length()); } else if (arg.startsWith("--password=")) { password = arg.substring("--password=".length()); } else if (arg.equals("-j") || arg.equals("--json")) { type = JSON; } else if (arg.equals("-J") || arg.equals("--jsonRecursive")) { recursiveJSON = true; } else if (arg.equals("-y") || arg.equals("--xmp")) { type = XMP; } else if (arg.equals("-x") || arg.equals("--xml")) { type = XML; } else if (arg.equals("-h") || arg.equals("--html")) { type = HTML; } else if (arg.equals("-t") || arg.equals("--text")) { type = TEXT; } else if (arg.equals("-T") || arg.equals("--text-main")) { type = TEXT_MAIN; } else if (arg.equals("-m") || arg.equals("--metadata")) { type = METADATA; } else if (arg.equals("-l") || arg.equals("--language")) { type = LANGUAGE; } else if (arg.equals("-d") || arg.equals("--detect")) { type = DETECT; } else if (arg.startsWith("--extract-dir=")) { extractDir = new File(arg.substring("--extract-dir=".length())); } else if (arg.equals("-z") || arg.equals("--extract")) { type = NO_OUTPUT; context.set(EmbeddedDocumentExtractor.class, new FileEmbeddedDocumentExtractor()); } else if (arg.equals("-r") || arg.equals("--pretty-print")) { prettyPrint = true; } else if (arg.equals("-p") || arg.equals("--port") || arg.equals("-s") || arg.equals("--server")) { serverMode = true; pipeMode = false; } else if (arg.startsWith("-c")) { URI uri = new URI(arg.substring("-c".length())); parser = new NetworkParser(uri); } else if (arg.startsWith("--client=")) { URI uri = new URI(arg.substring("--client=".length())); parser = new NetworkParser(uri); } else { pipeMode = false; if (serverMode) { new TikaServer(Integer.parseInt(arg)).start(); } else if (arg.equals("-")) { try (InputStream stream = TikaInputStream.get(new CloseShieldInputStream(System.in))) { type.process(stream, System.out, new Metadata()); } } else { URL url; File file = new File(arg); if (file.isFile()) { url = file.toURI().toURL(); } else { url = new URL(arg); } if (recursiveJSON) { handleRecursiveJson(url, System.out); } else { Metadata metadata = new Metadata(); try (InputStream input = TikaInputStream.get(url, metadata)) { type.process(input, System.out, metadata); } finally { System.out.flush(); } } } } }
From source file:org.apache.tika.parser.apple.AppleSingleFileParser.java
@Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { EmbeddedDocumentExtractor ex = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context); short numEntries = readThroughNumEntries(stream); long bytesRead = 26; List<FieldInfo> fieldInfoList = getSortedFieldInfoList(stream, numEntries); bytesRead += 12 * numEntries;//from w w w .ja v a 2 s . c o m Metadata embeddedMetadata = new Metadata(); bytesRead = processFieldEntries(stream, fieldInfoList, embeddedMetadata, bytesRead); FieldInfo contentFieldInfo = getContentFieldInfo(fieldInfoList); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); if (contentFieldInfo != null) { long diff = contentFieldInfo.offset - bytesRead; IOUtils.skipFully(stream, diff); if (ex.shouldParseEmbedded(embeddedMetadata)) { // TODO: we should probably add a readlimiting wrapper around this // stream to ensure that not more than contentFieldInfo.length bytes // are read ex.parseEmbedded(new CloseShieldInputStream(stream), xhtml, embeddedMetadata, false); } } xhtml.endDocument(); }
From source file:org.apache.tika.parser.code.SourceCodeParser.java
@Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { try (AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(stream), metadata, getEncodingDetector(context))) { Charset charset = reader.getCharset(); String mediaType = metadata.get(Metadata.CONTENT_TYPE); String name = metadata.get(Metadata.RESOURCE_NAME_KEY); if (mediaType != null && name != null) { MediaType type = MediaType.parse(mediaType); metadata.set(Metadata.CONTENT_TYPE, type.toString()); metadata.set(Metadata.CONTENT_ENCODING, charset.name()); StringBuilder out = new StringBuilder(); String line;//from w w w . ja v a 2s . co m int nbLines = 0; while ((line = reader.readLine()) != null) { out.append(line + System.getProperty("line.separator")); String author = parserAuthor(line); if (author != null) { metadata.add(TikaCoreProperties.CREATOR, author); } nbLines++; } metadata.set("LoC", String.valueOf(nbLines)); Renderer renderer = getRenderer(type.toString()); String codeAsHtml = renderer.highlight(name, out.toString(), charset.name(), false); Schema schema = context.get(Schema.class, HTML_SCHEMA); org.ccil.cowan.tagsoup.Parser parser = new org.ccil.cowan.tagsoup.Parser(); parser.setProperty(org.ccil.cowan.tagsoup.Parser.schemaProperty, schema); parser.setContentHandler(handler); parser.parse(new InputSource(new StringReader(codeAsHtml))); } } }
From source file:org.apache.tika.parser.crypto.Pkcs7Parser.java
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { try {//w ww . jav a 2s .c om DigestCalculatorProvider digestCalculatorProvider = new JcaDigestCalculatorProviderBuilder() .setProvider("BC").build(); CMSSignedDataParser parser = new CMSSignedDataParser(digestCalculatorProvider, new CloseShieldInputStream(stream)); try { CMSTypedStream content = parser.getSignedContent(); if (content == null) { throw new TikaException("cannot parse detached pkcs7 signature (no signed data to parse)"); } try (InputStream input = content.getContentStream()) { Parser delegate = context.get(Parser.class, EmptyParser.INSTANCE); delegate.parse(input, handler, metadata, context); } } finally { parser.close(); } } catch (OperatorCreationException e) { throw new TikaException("Unable to create DigestCalculatorProvider", e); } catch (CMSException e) { throw new TikaException("Unable to parse pkcs7 signed data", e); } }
From source file:org.apache.tika.parser.csv.TextAndCSVParser.java
private Reader detect(CSVParams params, InputStream stream, Metadata metadata, ParseContext context) throws IOException, TikaException { //if the file was already identified as not .txt, .csv or .tsv //don't even try to csv or not String mediaString = metadata.get(Metadata.CONTENT_TYPE); if (mediaString != null) { MediaType mediaType = MediaType.parse(mediaString); if (!SUPPORTED_TYPES.contains(mediaType.getBaseType())) { params.setMediaType(mediaType); return new AutoDetectReader(new CloseShieldInputStream(stream), metadata, getEncodingDetector(context)); }/*from ww w . ja v a 2 s .c o m*/ } Reader reader = null; if (params.getCharset() == null) { reader = new AutoDetectReader(new CloseShieldInputStream(stream), metadata, getEncodingDetector(context)); params.setCharset(((AutoDetectReader) reader).getCharset()); if (params.isComplete()) { return reader; } } else { reader = new BufferedReader( new InputStreamReader(new CloseShieldInputStream(stream), params.getCharset())); } if (params.getDelimiter() == null && (params.getMediaType() == null || isCSVOrTSV(params.getMediaType()))) { CSVSniffer sniffer = new CSVSniffer(markLimit, delimiters); CSVResult result = sniffer.getBest(reader, metadata); params.setMediaType(result.getMediaType()); params.setDelimiter(result.getDelimiter()); } return reader; }
From source file:org.apache.tika.parser.dif.DIFParser.java
@Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { // TODO Auto-generated method stub final XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument();/*from ww w . j ava 2 s .c o m*/ xhtml.startElement("p"); TaggedContentHandler tagged = new TaggedContentHandler(handler); try { context.getSAXParser().parse(new CloseShieldInputStream(stream), new OfflineContentHandler( new EmbeddedContentHandler(getContentHandler(tagged, metadata, context)))); } catch (SAXException e) { tagged.throwIfCauseOf(e); throw new TikaException("XML parse error", e); } finally { xhtml.endElement("p"); xhtml.endDocument(); } }
From source file:org.apache.tika.parser.envi.EnviHeaderParser.java
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { // Only outputting the MIME type as metadata metadata.set(Metadata.CONTENT_TYPE, ENVI_MIME_TYPE); // The following code was taken from the TXTParser // Automatically detect the character encoding TikaConfig tikaConfig = context.get(TikaConfig.class); if (tikaConfig == null) { tikaConfig = TikaConfig.getDefaultConfig(); }/*www . j a va 2 s.com*/ try (AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(stream), metadata, getEncodingDetector(context))) { Charset charset = reader.getCharset(); MediaType type = new MediaType(MediaType.TEXT_PLAIN, charset); // deprecated, see TIKA-431 metadata.set(Metadata.CONTENT_ENCODING, charset.name()); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); // text contents of the xhtml String line; while ((line = reader.readLine()) != null) { xhtml.startElement("p"); xhtml.characters(line); xhtml.endElement("p"); } xhtml.endDocument(); } }
From source file:org.apache.tika.parser.epub.EpubContentParser.java
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { SAXParser parser = context.getSAXParser(); parser.parse(new CloseShieldInputStream(stream), new OfflineContentHandler(handler)); }
From source file:org.apache.tika.parser.feed.FeedParser.java
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { // set the encoding? try {/*ww w. ja va 2 s . co m*/ SyndFeed feed = new SyndFeedInput().build(new InputSource(new CloseShieldInputStream(stream))); String title = stripTags(feed.getTitleEx()); String description = stripTags(feed.getDescriptionEx()); metadata.set(TikaCoreProperties.TITLE, title); metadata.set(TikaCoreProperties.DESCRIPTION, description); // store the other fields in the metadata XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); xhtml.element("h1", title); xhtml.element("p", description); xhtml.startElement("ul"); for (Object e : feed.getEntries()) { SyndEntry entry = (SyndEntry) e; String link = entry.getLink(); if (link != null) { xhtml.startElement("li"); xhtml.startElement("a", "href", link); xhtml.characters(stripTags(entry.getTitleEx())); xhtml.endElement("a"); SyndContent content = entry.getDescription(); if (content != null) { xhtml.newline(); xhtml.characters(stripTags(content)); } xhtml.endElement("li"); } } xhtml.endElement("ul"); xhtml.endDocument(); } catch (FeedException e) { throw new TikaException("RSS parse error", e); } }