Example usage for org.apache.commons.httpclient HttpParser readLine

List of usage examples for org.apache.commons.httpclient HttpParser readLine

Introduction

In this page you can find the example usage for org.apache.commons.httpclient HttpParser readLine.

Prototype

public static String readLine(InputStream paramInputStream, String paramString) throws IOException 

Source Link

Usage

From source file:com.cyberway.issue.io.warc.WARCRecord.java

/**
 * Parse WARC Header Line and Named Fields.
 * @param in Stream to read.//from  w w w  .  j a  v  a2  s. c  o m
 * @param identifier Identifier for the hosting Reader.
 * @param offset Absolute offset into Reader.
 * @param strict Whether to be loose parsing or not.
 * @return An ArchiveRecordHeader.
 * @throws IOException 
 */
protected ArchiveRecordHeader parseHeaders(final InputStream in, final String identifier, final long offset,
        final boolean strict) throws IOException {
    final Map<Object, Object> m = new HashMap<Object, Object>();
    m.put(ABSOLUTE_OFFSET_KEY, new Long(offset));
    m.put(READER_IDENTIFIER_FIELD_KEY, identifier);

    long startPosition = -1;
    if (in instanceof RepositionableStream) {
        startPosition = ((RepositionableStream) in).position();
    }
    String firstLine = new String(HttpParser.readLine(in, WARC_HEADER_ENCODING));
    if (firstLine == null || firstLine.length() <= 0) {
        throw new IOException("Failed to read WARC_MAGIC");
    }
    if (!firstLine.startsWith(WARC_MAGIC)) {
        throw new IOException("Failed to find WARC MAGIC: " + firstLine);
    }
    // Here we start reading off the inputstream but we're reading the
    // stream direct rather than going via WARCRecord#read.  The latter will
    // keep count of bytes read, digest and fail properly if EOR too soon...
    // We don't want digesting while reading Headers.
    // 
    Header[] h = HttpParser.parseHeaders(in, WARC_HEADER_ENCODING);
    for (int i = 0; i < h.length; i++) {
        m.put(h[i].getName(), h[i].getValue());
    }
    int headerLength = -1;
    if (in instanceof RepositionableStream) {
        headerLength = (int) (((RepositionableStream) in).position() - startPosition);
    }
    final int contentOffset = headerLength;
    incrementPosition(contentOffset);

    return new ArchiveRecordHeader() {
        private Map<Object, Object> headers = m;
        private int contentBegin = contentOffset;

        public String getDate() {
            return (String) this.headers.get(HEADER_KEY_DATE);
        }

        public String getDigest() {
            return null;
            // TODO: perhaps return block-digest? 
            // superclass def implies this is calculated ("only after
            // read in totality"), not pulled from header
            //            return (String)this.headers.get(HEADER_KEY_CHECKSUM);
        }

        public String getReaderIdentifier() {
            return (String) this.headers.get(READER_IDENTIFIER_FIELD_KEY);
        }

        public Set getHeaderFieldKeys() {
            return this.headers.keySet();
        }

        public Map getHeaderFields() {
            return this.headers;
        }

        public Object getHeaderValue(String key) {
            return this.headers.get(key);
        }

        public long getLength() {
            Object o = this.headers.get(CONTENT_LENGTH);
            if (o == null) {
                return -1;
            }
            long contentLength = (o instanceof Long) ? ((Long) o).longValue() : Long.parseLong((String) o);
            return contentLength + contentOffset;
        }

        public String getMimetype() {
            return (String) this.headers.get(CONTENT_TYPE);
        }

        public long getOffset() {
            Object o = this.headers.get(ABSOLUTE_OFFSET_KEY);
            if (o == null) {
                return -1;
            }
            return (o instanceof Long) ? ((Long) o).longValue() : Long.parseLong((String) o);
        }

        public String getRecordIdentifier() {
            return (String) this.headers.get(RECORD_IDENTIFIER_FIELD_KEY);
        }

        public String getUrl() {
            return (String) this.headers.get(HEADER_KEY_URI);
        }

        public String getVersion() {
            return (String) this.headers.get(VERSION_FIELD_KEY);
        }

        public int getContentBegin() {
            return this.contentBegin;
        }

        @Override
        public String toString() {
            return this.headers.toString();
        }
    };
}

From source file:com.eviware.soapui.impl.wsdl.monitor.TcpMonWsdlMonitorMessageExchange.java

private void parseReponseData(byte[] capturedResponseData, IncomingWss responseWss) {
    responseContentLength = capturedResponseData.length;
    ByteArrayInputStream in = new ByteArrayInputStream(capturedResponseData);
    try {/*from   www.  j  a v  a 2 s .co m*/

        String line = null;
        do {
            line = HttpParser.readLine(in, HTTP_ELEMENT_CHARSET);
        } while (line != null && line.length() == 0);

        if (line == null) {
            throw new Exception("Missing request status line");
        }

        Header[] headers = HttpParser.parseHeaders(in, HTTP_ELEMENT_CHARSET);
        if (headers != null) {
            for (Header header : headers) {
                responseHeaders.put(header.getName(), header.getValue());
            }
        }

        responseContentType = responseHeaders.get("Content-Type", "");
        if (responseContentType != null && responseContentType.toUpperCase().startsWith("MULTIPART")) {
            StringToStringMap values = StringToStringMap.fromHttpHeader(responseContentType);
            responseMmSupport = new MultipartMessageSupport(
                    new MonitorMessageExchangeDataSource("monitor response", in, responseContentType),
                    values.get("start"), null, true,
                    SoapUI.getSettings().getBoolean(WsdlSettings.PRETTY_PRINT_RESPONSE_MESSAGES));
            responseContentType = responseMmSupport.getRootPart().getContentType();
        } else {
            this.responseContent = XmlUtils.prettyPrintXml(Tools.readAll(in, 0).toString());
        }

        processResponseWss(responseWss);
    } catch (Exception e) {
        try {
            in.close();
        } catch (IOException e1) {
            e1.printStackTrace();
        }
    }
}

From source file:com.eviware.soapui.impl.wsdl.monitor.TcpMonWsdlMonitorMessageExchange.java

private void parseRequestData(byte[] capturedRequestData, IncomingWss requestWss) {
    requestContentLength = capturedRequestData.length;
    ByteArrayInputStream in = new ByteArrayInputStream(capturedRequestData);
    try {//from w ww  . j  av a2  s . com

        String line = null;
        do {
            line = HttpParser.readLine(in, HTTP_ELEMENT_CHARSET);
        } while (line != null && line.length() == 0);

        if (line == null) {
            throw new Exception("Missing request status line");
        }

        Header[] headers = HttpParser.parseHeaders(in, HTTP_ELEMENT_CHARSET);
        if (headers != null) {
            for (Header header : headers) {
                requestHeaders.put(header.getName(), header.getValue());
            }
        }

        requestContentType = requestHeaders.get("Content-Type", "");
        if (requestContentType != null && requestContentType.toUpperCase().startsWith("MULTIPART")) {
            StringToStringMap values = StringToStringMap.fromHttpHeader(requestContentType);
            requestMmSupport = new MultipartMessageSupport(
                    new MonitorMessageExchangeDataSource("monitor request", in, requestContentType),
                    values.get("start"), null, true,
                    SoapUI.getSettings().getBoolean(WsdlSettings.PRETTY_PRINT_RESPONSE_MESSAGES));
            requestContentType = requestMmSupport.getRootPart().getContentType();
        } else {
            this.requestContent = XmlUtils.prettyPrintXml(Tools.readAll(in, 0).toString());
        }

        processRequestWss(requestWss);

        operation = findOperation();
    } catch (Exception e) {
        try {
            in.close();
        } catch (IOException e1) {
            e1.printStackTrace();
        }
    }
}

From source file:com.tasktop.c2c.server.ssh.server.commands.AbstractInteractiveProxyCommand.java

private void readHttpResponse(InputStream proxyInput) throws IOException, CommandException {
    String statusLineText = HttpParser.readLine(proxyInput, HTTP_ENTITY_CHARSET);
    StatusLine statusLine = new StatusLine(statusLineText);
    if (statusLine.getStatusCode() != HttpServletResponse.SC_OK) {
        String message = Integer.toString(statusLine.getStatusCode());
        String reasonPhrase = statusLine.getReasonPhrase();
        if (reasonPhrase != null && !reasonPhrase.isEmpty()) {
            message += ": " + reasonPhrase;
        }//from ww w  . j  a  va  2s .  com
        throw new CommandException(-1, message);
    }
    Header[] parsedHeaders = HttpParser.parseHeaders(proxyInput, HTTP_ENTITY_CHARSET);
    HeaderGroup headerGroup = new HeaderGroup();
    headerGroup.setHeaders(parsedHeaders);

    Header transferEncoding = headerGroup.getFirstHeader("Transfer-Encoding");
    if (transferEncoding == null || !transferEncoding.getValue().equals("chunked")) {
        throw new IOException("Expected Transfer-Encoding of \"chunked\" but received " + transferEncoding);
    }
    Header contentType = headerGroup.getFirstHeader("Content-Type");
    if (contentType == null || !contentType.getValue().equals(MIME_TYPE_APPLICATION_OCTET_STREAM)) {
        throw new IOException("Unexpected Content-Type " + contentType);
    }
}

From source file:org.archive.jbs.arc.ArchiveRecordProxy.java

/**
 * Construct an WARCRecord proxy.  Read at most sizeLimit
 * bytes from the record body./*  w  w  w . ja  v  a2  s  . co m*/
 */
public ArchiveRecordProxy(WARCRecord warc, int sizeLimit) throws IOException {
    ArchiveRecordHeader header = warc.getHeader();

    this.warcRecordType = (String) header.getHeaderValue(WARCConstants.HEADER_KEY_TYPE);
    this.warcContentType = (String) header.getHeaderValue(WARCConstants.CONTENT_TYPE);

    this.url = header.getUrl();
    this.digest = (String) header.getHeaderValue(WARCConstants.HEADER_KEY_PAYLOAD_DIGEST);

    // Convert to familiar YYYYMMDDHHMMSS format
    String warcDate = (String) header.getHeaderValue(WARCConstants.HEADER_KEY_DATE);
    this.date = new StringBuilder().append(warcDate, 0, 4).append(warcDate, 5, 7).append(warcDate, 8, 10)
            .append(warcDate, 11, 13).append(warcDate, 14, 16).append(warcDate, 17, 19).toString();

    // Check if HTTP (not dns or the like) and then read the HTTP
    // headers so that the file position is at the response body
    if (WARCConstants.HTTP_RESPONSE_MIMETYPE.equals(this.warcContentType)) {
        // Sometimes an HTTP response will have whitespace (such as
        // blank lines) before the actual HTTP status line like:
        //   [blank]
        //   HTTP/1.0 200 OK
        // so we have to gobble them up.
        String line;
        while ((line = HttpParser.readLine(warc, "utf-8")) != null) {
            line = line.trim();

            // If an empty line, or an invalid HTTP-status line: skip it!
            if (line.length() == 0)
                continue;
            if (!line.startsWith("HTTP"))
                continue;

            try {
                // Now get on with parsing the status line.
                StatusLine statusLine = new StatusLine(line);
                this.code = Integer.toString(statusLine.getStatusCode());
                break;
            } catch (HttpException e) {
                // The line started with "HTTP", but was not a full,
                // valid HTTP-Status line.  Assume that we won't see
                // one, so break out of the loop.
                // But first, set the HTTP code to a value indicating
                // there was no valid HTTP code.
                this.code = "";
                break;
            }
        }

        // Skip over the HTTP headers, we just want the body of the HTTP response.
        skipHttpHeaders(warc);

        // The length of the HTTP response body is equal to the number
        // of bytes remaining in the WARC record.
        this.length = header.getLength() - warc.getPosition();

        this.body = readBytes(warc, this.length, sizeLimit);
    } else if (WARCConstants.WARCRecordType.resource.toString().equals(this.warcRecordType) &&
    // We check for "ftp://" here because we don't want to waste the time to copy the
    // bytes for resource record types we don't care about.  If we want to pass along
    // all resource records, simply remove this check.
            this.url.startsWith("ftp://")) {
        // HACK: We set a bogus HTTP status code 200 here because
        //       later in the indexing workflow, non-200 codes are
        //       filtered out so that we don't index 404 pages and
        //       such.
        this.code = "200";

        // The length of the FTP response body is equal to the number
        // of bytes remaining in the WARC record.
        this.length = header.getLength() - warc.getPosition();

        this.body = readBytes(warc, this.length, sizeLimit);
    }

}

From source file:org.mule.transport.http.functional.MockHttpServer.java

protected HttpRequest parseRequest(InputStream in, String encoding) {
    try {/*  w ww .j a va 2s  .co m*/
        String line = HttpParser.readLine(in, encoding);
        RequestLine requestLine = RequestLine.parseLine(line);

        return new HttpRequest(requestLine, HttpParser.parseHeaders(in, encoding), in, encoding);

    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:org.mule.transport.http.functional.SingleRequestMockHttpServer.java

@Override
protected void processRequests(InputStream in, OutputStream out) throws Exception {
    String line = HttpParser.readLine(in, encoding);
    RequestLine requestLine = RequestLine.parseLine(line);
    HttpRequest request = new HttpRequest(requestLine, HttpParser.parseHeaders(in, encoding), in, encoding);

    processSingleRequest(request);//from   www. j av a2  s. co  m

    out.write(statusLine.getBytes());
    out.write('\n');
    out.flush();
}

From source file:org.mule.transport.http.HttpServerConnection.java

private String readLine() throws IOException {
    String line;/*w  w  w  . j av  a 2s .c  o  m*/

    do {
        line = HttpParser.readLine(in, encoding);
    } while (line != null && line.length() == 0);

    if (line == null) {
        setKeepAlive(false);
        return null;
    }

    return line;
}

From source file:org.zaproxy.zap.network.ZapHttpParser.java

@SuppressWarnings({ "rawtypes", "unchecked", "null" })
public static Header[] parseHeaders(InputStream is, String charset) throws IOException, HttpException {
    ArrayList headers = new ArrayList();
    String name = null;//w  w w.j a  v a  2s .  co m
    StringBuffer value = null;
    for (;;) {
        String line = HttpParser.readLine(is, charset);
        if ((line == null) || (line.trim().length() < 1)) {
            break;
        }

        // Parse the header name and value
        // Check for folded headers first
        // Detect LWS-char see HTTP/1.0 or HTTP/1.1 Section 2.2
        // discussion on folded headers
        if ((line.charAt(0) == ' ') || (line.charAt(0) == '\t')) {
            // we have continuation folded header
            // so append value
            if (value != null) {
                value.append(' ');
                value.append(line.trim());
            }
        } else {
            // make sure we save the previous name,value pair if present
            if (name != null) {
                headers.add(new Header(name, value.toString()));
            }

            // Otherwise we should have normal HTTP header line
            // Parse the header name and value
            int colon = line.indexOf(":");
            if (colon < 0) {
                // Do not thrown the exception ignore it instead
                // throw new ProtocolException("Unable to parse header: " + line);
                logger.warn("Ignoring malformed HTTP header line: \"" + line + "\"");
                name = null;
                value = null;
            } else {
                name = line.substring(0, colon).trim();
                value = new StringBuffer(line.substring(colon + 1).trim());
            }
        }

    }

    // make sure we save the last name,value pair if present
    if (name != null) {
        headers.add(new Header(name, value.toString()));
    }

    return (Header[]) headers.toArray(new Header[headers.size()]);
}

From source file:uk.bl.wa.hadoop.mapreduce.warcstats.WARCRawStatsMapper.java

@Override
public void map(Text key, WritableArchiveRecord value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    ArchiveRecord record = value.getRecord();
    ArchiveRecordHeader header = record.getHeader();

    // The header.url() might be encapsulated in '<>', which normally gives problems as they are passed back
    // when using header.getUrl(), but this code looks like stats extraction, so we leave them be.

    // Logging for debug info:
    log.debug("Processing @" + header.getOffset() + "+" + record.available() + "," + header.getLength() + ": "
            + header.getUrl());//from w  w w .j  a  v  a2  s . c o m
    for (String h : header.getHeaderFields().keySet()) {
        log.debug("ArchiveHeader: " + h + " -> " + header.getHeaderValue(h));
    }

    try {
        MDX mdx = new MDX();
        Date crawl_date = ArchiveUtils.parse14DigitISODate(header.getDate(), null);
        if (crawl_date != null) {
            mdx.setTs(ArchiveUtils.get14DigitDate(crawl_date));
        } else {
            mdx.setTs(header.getDate());
        }
        mdx.setUrl(header.getUrl());
        mdx.setHash(header.getDigest());

        // Data from WARC record:
        mdx.put("source-file", key.toString());
        mdx.put("content-type", header.getMimetype());
        mdx.put("content-length", "" + header.getContentLength());
        mdx.put("length", "" + header.getLength());
        mdx.put("source-offset", "" + header.getOffset());
        mdx.put("record-identifier", header.getRecordIdentifier());
        for (String k : header.getHeaderFieldKeys()) {
            mdx.put("HEADER-" + k, "" + header.getHeaderValue(k));
        }

        // check record type and look for HTTP data:
        Header[] httpHeaders = null;
        if (record instanceof WARCRecord) {
            mdx.setRecordType("warc." + header.getHeaderValue(HEADER_KEY_TYPE));
            mdx.setHash("" + header.getHeaderValue(WARCConstants.HEADER_KEY_PAYLOAD_DIGEST));
            // There are not always headers! The code should check first.
            String statusLine = HttpParser.readLine(record, "UTF-8");
            if (statusLine != null && statusLine.startsWith("HTTP")) {
                String firstLine[] = statusLine.split(" ");
                if (firstLine.length > 1) {
                    String statusCode = firstLine[1].trim();
                    mdx.put("status-code", statusCode);
                    try {
                        httpHeaders = HttpParser.parseHeaders(record, "UTF-8");
                    } catch (ProtocolException p) {
                        log.error("ProtocolException [" + statusCode + "]: "
                                + header.getHeaderValue(WARCConstants.HEADER_KEY_FILENAME) + "@"
                                + header.getHeaderValue(WARCConstants.ABSOLUTE_OFFSET_KEY), p);
                    }
                } else {
                    log.warn("Could not parse status line: " + statusLine);
                }
            } else {
                log.warn("Invalid status line: " + header.getHeaderValue(WARCConstants.HEADER_KEY_FILENAME)
                        + "@" + header.getHeaderValue(WARCConstants.ABSOLUTE_OFFSET_KEY));
            }

        } else if (record instanceof ARCRecord) {
            mdx.setRecordType("arc");
            ARCRecord arcr = (ARCRecord) record;
            mdx.put("status-code", "" + arcr.getStatusCode());
            httpHeaders = arcr.getHttpHeaders();

        } else {
            mdx.setRecordType("unknown");
        }

        // Add in http headers
        if (httpHeaders != null) {
            for (Header h : httpHeaders) {
                mdx.put("HTTP-" + h.getName(), h.getValue());
            }
        }

        // URL:
        String uri = header.getUrl();
        if (uri != null) {
            UsableURI uuri = UsableURIFactory.getInstance(uri);
            // Hosts:
            if ("https".contains(uuri.getScheme())) {
                mdx.put("host", uuri.getAuthority());
            }
        } else {
            mdx.put("errors", "malformed-url");
        }

        // Year
        String date = header.getDate();
        if (date != null && date.length() > 4) {
            mdx.put("year", date.substring(0, 4));
        } else {
            mdx.put("errors", "malformed-date");
        }

        // And collect:
        String outKey = mdx.getHash();
        if (outKey == null || outKey == "" || "null".equals(outKey)) {
            outKey = mdx.getRecordType() + ":" + header.getMimetype();
        } else {
            outKey = mdx.getRecordType() + ":" + outKey;
        }

        output.collect(new Text(outKey), new Text(mdx.toString()));
    } catch (JSONException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

}