List of usage examples for org.apache.commons.httpclient HttpParser readLine
public static String readLine(InputStream paramInputStream, String paramString) throws IOException
From source file:com.cyberway.issue.io.warc.WARCRecord.java
/** * Parse WARC Header Line and Named Fields. * @param in Stream to read.//from w w w . j a v a2 s. c o m * @param identifier Identifier for the hosting Reader. * @param offset Absolute offset into Reader. * @param strict Whether to be loose parsing or not. * @return An ArchiveRecordHeader. * @throws IOException */ protected ArchiveRecordHeader parseHeaders(final InputStream in, final String identifier, final long offset, final boolean strict) throws IOException { final Map<Object, Object> m = new HashMap<Object, Object>(); m.put(ABSOLUTE_OFFSET_KEY, new Long(offset)); m.put(READER_IDENTIFIER_FIELD_KEY, identifier); long startPosition = -1; if (in instanceof RepositionableStream) { startPosition = ((RepositionableStream) in).position(); } String firstLine = new String(HttpParser.readLine(in, WARC_HEADER_ENCODING)); if (firstLine == null || firstLine.length() <= 0) { throw new IOException("Failed to read WARC_MAGIC"); } if (!firstLine.startsWith(WARC_MAGIC)) { throw new IOException("Failed to find WARC MAGIC: " + firstLine); } // Here we start reading off the inputstream but we're reading the // stream direct rather than going via WARCRecord#read. The latter will // keep count of bytes read, digest and fail properly if EOR too soon... // We don't want digesting while reading Headers. // Header[] h = HttpParser.parseHeaders(in, WARC_HEADER_ENCODING); for (int i = 0; i < h.length; i++) { m.put(h[i].getName(), h[i].getValue()); } int headerLength = -1; if (in instanceof RepositionableStream) { headerLength = (int) (((RepositionableStream) in).position() - startPosition); } final int contentOffset = headerLength; incrementPosition(contentOffset); return new ArchiveRecordHeader() { private Map<Object, Object> headers = m; private int contentBegin = contentOffset; public String getDate() { return (String) this.headers.get(HEADER_KEY_DATE); } public String getDigest() { return null; // TODO: perhaps return block-digest? // superclass def implies this is calculated ("only after // read in totality"), not pulled from header // return (String)this.headers.get(HEADER_KEY_CHECKSUM); } public String getReaderIdentifier() { return (String) this.headers.get(READER_IDENTIFIER_FIELD_KEY); } public Set getHeaderFieldKeys() { return this.headers.keySet(); } public Map getHeaderFields() { return this.headers; } public Object getHeaderValue(String key) { return this.headers.get(key); } public long getLength() { Object o = this.headers.get(CONTENT_LENGTH); if (o == null) { return -1; } long contentLength = (o instanceof Long) ? ((Long) o).longValue() : Long.parseLong((String) o); return contentLength + contentOffset; } public String getMimetype() { return (String) this.headers.get(CONTENT_TYPE); } public long getOffset() { Object o = this.headers.get(ABSOLUTE_OFFSET_KEY); if (o == null) { return -1; } return (o instanceof Long) ? ((Long) o).longValue() : Long.parseLong((String) o); } public String getRecordIdentifier() { return (String) this.headers.get(RECORD_IDENTIFIER_FIELD_KEY); } public String getUrl() { return (String) this.headers.get(HEADER_KEY_URI); } public String getVersion() { return (String) this.headers.get(VERSION_FIELD_KEY); } public int getContentBegin() { return this.contentBegin; } @Override public String toString() { return this.headers.toString(); } }; }
From source file:com.eviware.soapui.impl.wsdl.monitor.TcpMonWsdlMonitorMessageExchange.java
private void parseReponseData(byte[] capturedResponseData, IncomingWss responseWss) { responseContentLength = capturedResponseData.length; ByteArrayInputStream in = new ByteArrayInputStream(capturedResponseData); try {/*from www. j a v a 2 s .co m*/ String line = null; do { line = HttpParser.readLine(in, HTTP_ELEMENT_CHARSET); } while (line != null && line.length() == 0); if (line == null) { throw new Exception("Missing request status line"); } Header[] headers = HttpParser.parseHeaders(in, HTTP_ELEMENT_CHARSET); if (headers != null) { for (Header header : headers) { responseHeaders.put(header.getName(), header.getValue()); } } responseContentType = responseHeaders.get("Content-Type", ""); if (responseContentType != null && responseContentType.toUpperCase().startsWith("MULTIPART")) { StringToStringMap values = StringToStringMap.fromHttpHeader(responseContentType); responseMmSupport = new MultipartMessageSupport( new MonitorMessageExchangeDataSource("monitor response", in, responseContentType), values.get("start"), null, true, SoapUI.getSettings().getBoolean(WsdlSettings.PRETTY_PRINT_RESPONSE_MESSAGES)); responseContentType = responseMmSupport.getRootPart().getContentType(); } else { this.responseContent = XmlUtils.prettyPrintXml(Tools.readAll(in, 0).toString()); } processResponseWss(responseWss); } catch (Exception e) { try { in.close(); } catch (IOException e1) { e1.printStackTrace(); } } }
From source file:com.eviware.soapui.impl.wsdl.monitor.TcpMonWsdlMonitorMessageExchange.java
private void parseRequestData(byte[] capturedRequestData, IncomingWss requestWss) { requestContentLength = capturedRequestData.length; ByteArrayInputStream in = new ByteArrayInputStream(capturedRequestData); try {//from w ww . j av a2 s . com String line = null; do { line = HttpParser.readLine(in, HTTP_ELEMENT_CHARSET); } while (line != null && line.length() == 0); if (line == null) { throw new Exception("Missing request status line"); } Header[] headers = HttpParser.parseHeaders(in, HTTP_ELEMENT_CHARSET); if (headers != null) { for (Header header : headers) { requestHeaders.put(header.getName(), header.getValue()); } } requestContentType = requestHeaders.get("Content-Type", ""); if (requestContentType != null && requestContentType.toUpperCase().startsWith("MULTIPART")) { StringToStringMap values = StringToStringMap.fromHttpHeader(requestContentType); requestMmSupport = new MultipartMessageSupport( new MonitorMessageExchangeDataSource("monitor request", in, requestContentType), values.get("start"), null, true, SoapUI.getSettings().getBoolean(WsdlSettings.PRETTY_PRINT_RESPONSE_MESSAGES)); requestContentType = requestMmSupport.getRootPart().getContentType(); } else { this.requestContent = XmlUtils.prettyPrintXml(Tools.readAll(in, 0).toString()); } processRequestWss(requestWss); operation = findOperation(); } catch (Exception e) { try { in.close(); } catch (IOException e1) { e1.printStackTrace(); } } }
From source file:com.tasktop.c2c.server.ssh.server.commands.AbstractInteractiveProxyCommand.java
private void readHttpResponse(InputStream proxyInput) throws IOException, CommandException { String statusLineText = HttpParser.readLine(proxyInput, HTTP_ENTITY_CHARSET); StatusLine statusLine = new StatusLine(statusLineText); if (statusLine.getStatusCode() != HttpServletResponse.SC_OK) { String message = Integer.toString(statusLine.getStatusCode()); String reasonPhrase = statusLine.getReasonPhrase(); if (reasonPhrase != null && !reasonPhrase.isEmpty()) { message += ": " + reasonPhrase; }//from ww w . j a va 2s . com throw new CommandException(-1, message); } Header[] parsedHeaders = HttpParser.parseHeaders(proxyInput, HTTP_ENTITY_CHARSET); HeaderGroup headerGroup = new HeaderGroup(); headerGroup.setHeaders(parsedHeaders); Header transferEncoding = headerGroup.getFirstHeader("Transfer-Encoding"); if (transferEncoding == null || !transferEncoding.getValue().equals("chunked")) { throw new IOException("Expected Transfer-Encoding of \"chunked\" but received " + transferEncoding); } Header contentType = headerGroup.getFirstHeader("Content-Type"); if (contentType == null || !contentType.getValue().equals(MIME_TYPE_APPLICATION_OCTET_STREAM)) { throw new IOException("Unexpected Content-Type " + contentType); } }
From source file:org.archive.jbs.arc.ArchiveRecordProxy.java
/** * Construct an WARCRecord proxy. Read at most sizeLimit * bytes from the record body./* w w w . ja v a2 s . co m*/ */ public ArchiveRecordProxy(WARCRecord warc, int sizeLimit) throws IOException { ArchiveRecordHeader header = warc.getHeader(); this.warcRecordType = (String) header.getHeaderValue(WARCConstants.HEADER_KEY_TYPE); this.warcContentType = (String) header.getHeaderValue(WARCConstants.CONTENT_TYPE); this.url = header.getUrl(); this.digest = (String) header.getHeaderValue(WARCConstants.HEADER_KEY_PAYLOAD_DIGEST); // Convert to familiar YYYYMMDDHHMMSS format String warcDate = (String) header.getHeaderValue(WARCConstants.HEADER_KEY_DATE); this.date = new StringBuilder().append(warcDate, 0, 4).append(warcDate, 5, 7).append(warcDate, 8, 10) .append(warcDate, 11, 13).append(warcDate, 14, 16).append(warcDate, 17, 19).toString(); // Check if HTTP (not dns or the like) and then read the HTTP // headers so that the file position is at the response body if (WARCConstants.HTTP_RESPONSE_MIMETYPE.equals(this.warcContentType)) { // Sometimes an HTTP response will have whitespace (such as // blank lines) before the actual HTTP status line like: // [blank] // HTTP/1.0 200 OK // so we have to gobble them up. String line; while ((line = HttpParser.readLine(warc, "utf-8")) != null) { line = line.trim(); // If an empty line, or an invalid HTTP-status line: skip it! if (line.length() == 0) continue; if (!line.startsWith("HTTP")) continue; try { // Now get on with parsing the status line. StatusLine statusLine = new StatusLine(line); this.code = Integer.toString(statusLine.getStatusCode()); break; } catch (HttpException e) { // The line started with "HTTP", but was not a full, // valid HTTP-Status line. Assume that we won't see // one, so break out of the loop. // But first, set the HTTP code to a value indicating // there was no valid HTTP code. this.code = ""; break; } } // Skip over the HTTP headers, we just want the body of the HTTP response. skipHttpHeaders(warc); // The length of the HTTP response body is equal to the number // of bytes remaining in the WARC record. this.length = header.getLength() - warc.getPosition(); this.body = readBytes(warc, this.length, sizeLimit); } else if (WARCConstants.WARCRecordType.resource.toString().equals(this.warcRecordType) && // We check for "ftp://" here because we don't want to waste the time to copy the // bytes for resource record types we don't care about. If we want to pass along // all resource records, simply remove this check. this.url.startsWith("ftp://")) { // HACK: We set a bogus HTTP status code 200 here because // later in the indexing workflow, non-200 codes are // filtered out so that we don't index 404 pages and // such. this.code = "200"; // The length of the FTP response body is equal to the number // of bytes remaining in the WARC record. this.length = header.getLength() - warc.getPosition(); this.body = readBytes(warc, this.length, sizeLimit); } }
From source file:org.mule.transport.http.functional.MockHttpServer.java
protected HttpRequest parseRequest(InputStream in, String encoding) { try {/* w ww .j a va 2s .co m*/ String line = HttpParser.readLine(in, encoding); RequestLine requestLine = RequestLine.parseLine(line); return new HttpRequest(requestLine, HttpParser.parseHeaders(in, encoding), in, encoding); } catch (Exception e) { throw new RuntimeException(e); } }
From source file:org.mule.transport.http.functional.SingleRequestMockHttpServer.java
@Override protected void processRequests(InputStream in, OutputStream out) throws Exception { String line = HttpParser.readLine(in, encoding); RequestLine requestLine = RequestLine.parseLine(line); HttpRequest request = new HttpRequest(requestLine, HttpParser.parseHeaders(in, encoding), in, encoding); processSingleRequest(request);//from www. j av a2 s. co m out.write(statusLine.getBytes()); out.write('\n'); out.flush(); }
From source file:org.mule.transport.http.HttpServerConnection.java
private String readLine() throws IOException { String line;/*w w w . j av a 2s .c o m*/ do { line = HttpParser.readLine(in, encoding); } while (line != null && line.length() == 0); if (line == null) { setKeepAlive(false); return null; } return line; }
From source file:org.zaproxy.zap.network.ZapHttpParser.java
@SuppressWarnings({ "rawtypes", "unchecked", "null" }) public static Header[] parseHeaders(InputStream is, String charset) throws IOException, HttpException { ArrayList headers = new ArrayList(); String name = null;//w w w.j a v a 2s . co m StringBuffer value = null; for (;;) { String line = HttpParser.readLine(is, charset); if ((line == null) || (line.trim().length() < 1)) { break; } // Parse the header name and value // Check for folded headers first // Detect LWS-char see HTTP/1.0 or HTTP/1.1 Section 2.2 // discussion on folded headers if ((line.charAt(0) == ' ') || (line.charAt(0) == '\t')) { // we have continuation folded header // so append value if (value != null) { value.append(' '); value.append(line.trim()); } } else { // make sure we save the previous name,value pair if present if (name != null) { headers.add(new Header(name, value.toString())); } // Otherwise we should have normal HTTP header line // Parse the header name and value int colon = line.indexOf(":"); if (colon < 0) { // Do not thrown the exception ignore it instead // throw new ProtocolException("Unable to parse header: " + line); logger.warn("Ignoring malformed HTTP header line: \"" + line + "\""); name = null; value = null; } else { name = line.substring(0, colon).trim(); value = new StringBuffer(line.substring(colon + 1).trim()); } } } // make sure we save the last name,value pair if present if (name != null) { headers.add(new Header(name, value.toString())); } return (Header[]) headers.toArray(new Header[headers.size()]); }
From source file:uk.bl.wa.hadoop.mapreduce.warcstats.WARCRawStatsMapper.java
@Override public void map(Text key, WritableArchiveRecord value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { ArchiveRecord record = value.getRecord(); ArchiveRecordHeader header = record.getHeader(); // The header.url() might be encapsulated in '<>', which normally gives problems as they are passed back // when using header.getUrl(), but this code looks like stats extraction, so we leave them be. // Logging for debug info: log.debug("Processing @" + header.getOffset() + "+" + record.available() + "," + header.getLength() + ": " + header.getUrl());//from w w w .j a v a2 s . c o m for (String h : header.getHeaderFields().keySet()) { log.debug("ArchiveHeader: " + h + " -> " + header.getHeaderValue(h)); } try { MDX mdx = new MDX(); Date crawl_date = ArchiveUtils.parse14DigitISODate(header.getDate(), null); if (crawl_date != null) { mdx.setTs(ArchiveUtils.get14DigitDate(crawl_date)); } else { mdx.setTs(header.getDate()); } mdx.setUrl(header.getUrl()); mdx.setHash(header.getDigest()); // Data from WARC record: mdx.put("source-file", key.toString()); mdx.put("content-type", header.getMimetype()); mdx.put("content-length", "" + header.getContentLength()); mdx.put("length", "" + header.getLength()); mdx.put("source-offset", "" + header.getOffset()); mdx.put("record-identifier", header.getRecordIdentifier()); for (String k : header.getHeaderFieldKeys()) { mdx.put("HEADER-" + k, "" + header.getHeaderValue(k)); } // check record type and look for HTTP data: Header[] httpHeaders = null; if (record instanceof WARCRecord) { mdx.setRecordType("warc." + header.getHeaderValue(HEADER_KEY_TYPE)); mdx.setHash("" + header.getHeaderValue(WARCConstants.HEADER_KEY_PAYLOAD_DIGEST)); // There are not always headers! The code should check first. String statusLine = HttpParser.readLine(record, "UTF-8"); if (statusLine != null && statusLine.startsWith("HTTP")) { String firstLine[] = statusLine.split(" "); if (firstLine.length > 1) { String statusCode = firstLine[1].trim(); mdx.put("status-code", statusCode); try { httpHeaders = HttpParser.parseHeaders(record, "UTF-8"); } catch (ProtocolException p) { log.error("ProtocolException [" + statusCode + "]: " + header.getHeaderValue(WARCConstants.HEADER_KEY_FILENAME) + "@" + header.getHeaderValue(WARCConstants.ABSOLUTE_OFFSET_KEY), p); } } else { log.warn("Could not parse status line: " + statusLine); } } else { log.warn("Invalid status line: " + header.getHeaderValue(WARCConstants.HEADER_KEY_FILENAME) + "@" + header.getHeaderValue(WARCConstants.ABSOLUTE_OFFSET_KEY)); } } else if (record instanceof ARCRecord) { mdx.setRecordType("arc"); ARCRecord arcr = (ARCRecord) record; mdx.put("status-code", "" + arcr.getStatusCode()); httpHeaders = arcr.getHttpHeaders(); } else { mdx.setRecordType("unknown"); } // Add in http headers if (httpHeaders != null) { for (Header h : httpHeaders) { mdx.put("HTTP-" + h.getName(), h.getValue()); } } // URL: String uri = header.getUrl(); if (uri != null) { UsableURI uuri = UsableURIFactory.getInstance(uri); // Hosts: if ("https".contains(uuri.getScheme())) { mdx.put("host", uuri.getAuthority()); } } else { mdx.put("errors", "malformed-url"); } // Year String date = header.getDate(); if (date != null && date.length() > 4) { mdx.put("year", date.substring(0, 4)); } else { mdx.put("errors", "malformed-date"); } // And collect: String outKey = mdx.getHash(); if (outKey == null || outKey == "" || "null".equals(outKey)) { outKey = mdx.getRecordType() + ":" + header.getMimetype(); } else { outKey = mdx.getRecordType() + ":" + outKey; } output.collect(new Text(outKey), new Text(mdx.toString())); } catch (JSONException e) { // TODO Auto-generated catch block e.printStackTrace(); } }