List of usage examples for org.apache.commons.httpclient URIException printStackTrace
public void printStackTrace()
From source file:org.archive.wayback.archivalurl.requestparser.ReplayRequestParser.java
public WaybackRequest parse(String requestPath) { WaybackRequest wbRequest = null;/* www . j a v a 2s .c o m*/ Matcher matcher = WB_REQUEST_REGEX.matcher(requestPath); String urlStr = null; if (matcher != null && matcher.matches()) { wbRequest = new WaybackRequest(); String dateStr = matcher.group(1); urlStr = matcher.group(2); if (!urlStr.startsWith("http://")) { urlStr = "http://" + urlStr; } // The logic of the classic WM wrt timestamp bounding: // if 14-digits are specified, assume min-max range boundaries // if less than 14 are specified, assume min-max range boundaries // based upon amount given (2001 => 20010101... - 20011231...) // AND assume the user asked for the LATEST possible date // within that range... // // ...don't ask me, I just work here. String startDate = null; String endDate = null; if (dateStr.length() == 14) { startDate = getEarliestTimestamp(); endDate = getLatestTimestamp(); } else { // classic behavior: // startDate = Timestamp.parseBefore(dateStr).getDateStr(); // endDate = Timestamp.parseAfter(dateStr).getDateStr(); // dateStr = endDate; // "better" behavior: startDate = getEarliestTimestamp(); endDate = getLatestTimestamp(); dateStr = Timestamp.parseAfter(dateStr).getDateStr(); } wbRequest.put(WaybackConstants.REQUEST_EXACT_DATE, dateStr); //wbRequest.put(WaybackConstants.REQUEST_START_DATE, startDate); BUG MC 120608 //wbRequest.put(WaybackConstants.REQUEST_END_DATE, endDate); BUG MC 120608 wbRequest.put(WaybackConstants.REQUEST_TYPE, WaybackConstants.REQUEST_REPLAY_QUERY); try { // String wbPrefix = wbRequest.getDefaultWaybackPrefix(); // if (urlStr.startsWith(wbPrefix)) { // wbRequest.setBetterRequestURI(urlStr); // } wbRequest.setRequestUrl(urlStr); } catch (URIException e) { if (urlStr != null) { LOGGER.severe("Failed parse of url(" + urlStr + ")"); } e.printStackTrace(); wbRequest = null; } } return wbRequest; }
From source file:org.archive.wayback.domainprefix.DomainPrefixRequestParser.java
public WaybackRequest parse(HttpServletRequest httpRequest, AccessPoint wbContext) throws BadQueryException { WaybackRequest wbRequest = null;//from w w w . j ava2 s .c o m String server = httpRequest.getServerName() + ":" + httpRequest.getServerPort(); if (server.endsWith(hostPort)) { int length = server.length() - hostPort.length(); if (server.length() > hostPort.length()) { String prefix = server.substring(0, length - 1); Matcher replayMatcher = REPLAY_REGEX.matcher(prefix); if (replayMatcher != null && replayMatcher.matches()) { wbRequest = new WaybackRequest(); String dateStr = replayMatcher.group(1); String host = replayMatcher.group(2); String requestUrl = getRequestString(host, httpRequest); wbRequest.put(WaybackConstants.REQUEST_EXACT_DATE, dateStr); wbRequest.put(WaybackConstants.REQUEST_TYPE, WaybackConstants.REQUEST_REPLAY_QUERY); try { wbRequest.setRequestUrl(requestUrl); } catch (URIException e) { e.printStackTrace(); wbRequest = null; } } else { Matcher queryMatcher = QUERY_REGEX.matcher(prefix); if (queryMatcher != null && queryMatcher.matches()) { wbRequest = new WaybackRequest(); String dateStr = queryMatcher.group(1); String host = queryMatcher.group(2); String startDate; String endDate; if (dateStr.length() == 0) { startDate = getEarliestTimestamp(); endDate = getLatestTimestamp(); } else { startDate = Timestamp.parseBefore(dateStr).getDateStr(); endDate = Timestamp.parseAfter(dateStr).getDateStr(); } wbRequest.put(WaybackConstants.REQUEST_START_DATE, startDate); wbRequest.put(WaybackConstants.REQUEST_END_DATE, endDate); wbRequest.put(WaybackConstants.REQUEST_TYPE, WaybackConstants.REQUEST_URL_QUERY); String requestUrl = getRequestString(host, httpRequest); try { wbRequest.setRequestUrl(requestUrl); } catch (URIException e) { e.printStackTrace(); wbRequest = null; } } } } } return wbRequest; }
From source file:org.archive.wayback.liveweb.ARCRecordingProxy.java
public boolean handleRequest(HttpServletRequest httpRequest, HttpServletResponse httpResponse) throws ServletException, IOException { StringBuffer sb = httpRequest.getRequestURL(); String query = httpRequest.getQueryString(); if (query != null) { sb.append("?").append(query); }/*from w ww . ja va 2 s .c om*/ FileRegion r = null; try { String url = sb.toString(); LOGGER.info("Caching URL(" + url + ")"); r = cacher.cacheURL(url, arcCacheDir); httpResponse.setStatus(HttpServletResponse.SC_OK); httpResponse.setContentLength((int) r.getLength()); httpResponse.setContentType(ARC_RECORD_CONTENT_TYPE); long exp = System.currentTimeMillis(); exp += (r.isFake ? fakeExpiresMS : expiresMS); httpResponse.setDateHeader(EXPIRES_HEADER, exp); r.copyToOutputStream(httpResponse.getOutputStream()); } catch (URIException e) { e.printStackTrace(); httpResponse.sendError(HttpServletResponse.SC_NOT_FOUND); } return true; }
From source file:org.archive.wayback.liveweb.LiveWebCache.java
private SearchResult forgeFailedSearchResult(URL url) { SearchResult result = new SearchResult(); result.put(WaybackConstants.RESULT_ARC_FILE, "-"); result.put(WaybackConstants.RESULT_OFFSET, "0"); result.put(WaybackConstants.RESULT_HTTP_CODE, "0"); result.put(WaybackConstants.RESULT_MD5_DIGEST, "-"); result.put(WaybackConstants.RESULT_MIME_TYPE, "-"); result.put(WaybackConstants.RESULT_CAPTURE_DATE, Timestamp.currentTimestamp().getDateStr()); result.put(WaybackConstants.RESULT_ORIG_HOST, url.getHost()); result.put(WaybackConstants.RESULT_REDIRECT_URL, "-"); result.put(WaybackConstants.RESULT_URL, url.toString()); String indexUrl;/*from w w w . j a v a2 s .c o m*/ try { indexUrl = canonicalizer.urlStringToKey(url.toString()); } catch (URIException e) { // not gonna happen... e.printStackTrace(); indexUrl = url.toString(); } result.put(WaybackConstants.RESULT_URL_KEY, indexUrl); return result; }
From source file:org.archive.wayback.liveweb.URLCacher.java
/** * @param args//from w w w . j a v a 2 s. com */ public static void main(String[] args) { int DEFAULT_MAX_ARC_FILE_SIZE = 1024 * 1024 * 100; File arcDir = new File(args[0]); URL url; if (!arcDir.isDirectory()) { arcDir.mkdir(); } File[] files = { arcDir }; boolean compress = true; ARCWriter writer = new ARCWriter(new AtomicInteger(), Arrays.asList(files), "test", compress, DEFAULT_MAX_ARC_FILE_SIZE); Properties p = new Properties(); p.setProperty(ARCCacheDirectory.LIVE_WEB_ARC_DIR, args[0]); p.setProperty(ARCCacheDirectory.LIVE_WEB_ARC_PREFIX, "test"); p.setProperty(CACHE_PATH, arcDir.getAbsolutePath()); URLCacher uc = new URLCacher(); ARCCacheDirectory cache = new ARCCacheDirectory(); // try { //// cache.init(p); //// uc.init(p); // } catch (ConfigurationException e) { // e.printStackTrace(); // System.exit(1); // } for (int k = 1; k < args.length; k++) { try { url = new URL(args[k]); } catch (MalformedURLException e1) { e1.printStackTrace(); continue; } try { uc.cache(cache, url.toString()); } catch (URIException e) { e.printStackTrace(); } catch (LiveDocumentNotAvailableException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } try { writer.close(); } catch (IOException e) { e.printStackTrace(); } }
From source file:org.archive.wayback.liveweb.URLtoARCCacher.java
/** * @param url to cache/*from w ww . j a v a2s . c om*/ * @param cache ARCCacheDirectory for storing result or faked result * @return FileRegion of compressed byte range for ARCRecord. * @throws IOException for the usual reasons * @throws URIException if url argument isn't really an URL.. */ public FileRegion cacheURL(String url, ARCCacheDirectory cache) throws IOException, URIException { FileRegion region = null; // to track if we got a response (any response) or an exception. boolean gotUrl = false; boolean isTimeout = false; String fName = backingFileBase + "-" + Thread.currentThread().getId(); Recorder recorder = new Recorder(recorderCacheDir, fName, outBufferSize, inBufferSize); ExtendedGetMethod getMethod = null; // TWO STEPS: // first do the GET, using a Recorder to get the response. // then, if that worked, save the recorded value into an ARC // and return it's region // if we didn't get a response, forge a fake record and return that. try { Recorder.setHttpRecorder(recorder); LaxURI lURI = new LaxURI(url, true); getMethod = new ExtendedGetMethod(url, recorder); getMethod.setURI(lURI); HttpClient client = getHttpClient(); getMethod.getParams().setCookiePolicy(CookiePolicy.IGNORE_COOKIES); getMethod.setFollowRedirects(false); getMethod.setRequestHeader("User-Agent", userAgent); int code = client.executeMethod(getMethod); LOGGER.info("URL(" + url + ") HTTP:" + code); InputStream responseIS = getMethod.getResponseBodyAsStream(); if (responseIS != null) { ByteOp.discardStream(responseIS); responseIS.close(); } gotUrl = true; } catch (URIException e) { e.printStackTrace(); } catch (UnknownHostException e) { LOGGER.warning("Unknown host for " + url); } catch (ConnectTimeoutException e) { // TODO: should we act like it's a full block? LOGGER.warning("Timeout out connecting to " + url); isTimeout = true; } catch (SocketTimeoutException e) { LOGGER.warning("Timeout out socket for " + url); isTimeout = true; } catch (ConnectException e) { LOGGER.warning("ConnectionRefused to " + url); } catch (NoRouteToHostException e) { LOGGER.warning("NoRouteToHost for " + url); } catch (SocketException e) { // should only be things like "Connection Reset", etc.. LOGGER.warning("SocketException for " + url); } catch (HttpException e) { e.printStackTrace(); // we have to let IOExceptions out, problems caused by local disk // NEED to return errors, indicating that there is not an // authoritative answer, and thus... NOTHING can be shown. // } catch (IOException e) { // e.printStackTrace(); } finally { recorder.closeRecorders(); Recorder.setHttpRecorder(null); if (getMethod != null) { getMethod.releaseConnection(); } } // now write the content, or a fake record: ARCWriter writer = null; ReplayInputStream replayIS = null; try { writer = cache.getWriter(); if (gotUrl) { RecordingInputStream ris = recorder.getRecordedInput(); replayIS = ris.getReplayInputStream(); region = storeInputStreamARCRecord(writer, url, getMethod.getMime(), getMethod.getRemoteIP(), getMethod.getCaptureDate(), replayIS, (int) ris.getSize()); } else if (isTimeout) { region = storeTimeout(writer, url); } else { region = storeNotAvailable(writer, url); } } finally { IOUtils.closeQuietly(replayIS); if (writer != null) { cache.returnWriter(writer); } } recorder.close(); return region; }
From source file:org.archive.wayback.proxy.ProxyReplayRequestParser.java
@Override public WaybackRequest parse(HttpServletRequest httpRequest, AccessPoint wbContext) throws BadQueryException { if (isLocalRequest(httpRequest)) { // local means query: let the following RequestParsers have a go // at it. return null; }// ww w .j a v a 2s. com WaybackRequest wbRequest = null; String requestServer = httpRequest.getServerName(); String requestPath = httpRequest.getRequestURI(); //int port = httpRequest.getServerPort(); String requestQuery = httpRequest.getQueryString(); String requestScheme = httpRequest.getScheme(); if (requestQuery != null) { requestPath = requestPath + "?" + requestQuery; } String requestUrl = requestScheme + "://" + requestServer + requestPath; wbRequest = new WaybackRequest(); try { wbRequest.setRequestUrl(requestUrl); } catch (URIException e) { e.printStackTrace(); return null; } wbRequest.put(WaybackConstants.REQUEST_TYPE, WaybackConstants.REQUEST_REPLAY_QUERY); return wbRequest; }
From source file:org.archive.wayback.query.UIQueryResults.java
/** * @param url/*from w ww. ja va 2 s. c o m*/ * @return String url that will make a query for all captures of an URL. */ public String makeCaptureQueryUrl(String url) { WaybackRequest newWBR = wbRequest.clone(); newWBR.put(WaybackConstants.REQUEST_TYPE, WaybackConstants.REQUEST_URL_QUERY); try { newWBR.setRequestUrl(url); } catch (URIException e) { // should not happen... e.printStackTrace(); } return newWBR.getContextPrefix() + "query?" + newWBR.getQueryArguments(1); }
From source file:org.archive.wayback.resourceindex.cdx.CDXLineToSearchResultAdapter.java
/** * @param line/* w w w.j a v a 2s. c o m*/ * @return SearchResult representation of input line */ public static SearchResult doAdapt(String line) { SearchResult result = new SearchResult(); String[] tokens = line.split(" "); if (tokens.length != 9) { return null; //throw new IllegalArgumentException("Need 9 columns("+line+")"); } String url = tokens[0]; String captureDate = tokens[1]; String origHost = tokens[2]; String mimeType = tokens[3]; String httpResponseCode = tokens[4]; String md5Fragment = tokens[5]; String redirectUrl = tokens[6]; long compressedOffset = -1; if (!tokens[7].equals("-")) { compressedOffset = Long.parseLong(tokens[7]); } String arcFileName = tokens[8]; String origUrl = url; if (!url.startsWith(WaybackConstants.DNS_URL_PREFIX)) { try { UURI uri = UURIFactory.getInstance(WaybackConstants.HTTP_URL_PREFIX + url); if (uri.getPort() != -1) { origHost += ":" + uri.getPort(); } origUrl = origHost + uri.getEscapedPathQuery(); } catch (URIException e) { // TODO Stifle? throw an error? e.printStackTrace(); return null; } } result.put(WaybackConstants.RESULT_URL, origUrl); result.put(WaybackConstants.RESULT_URL_KEY, url); result.put(WaybackConstants.RESULT_CAPTURE_DATE, captureDate); result.put(WaybackConstants.RESULT_ORIG_HOST, origHost); result.put(WaybackConstants.RESULT_MIME_TYPE, mimeType); result.put(WaybackConstants.RESULT_HTTP_CODE, httpResponseCode); result.put(WaybackConstants.RESULT_MD5_DIGEST, md5Fragment); result.put(WaybackConstants.RESULT_REDIRECT_URL, redirectUrl); // HACKHACK: result.put(WaybackConstants.RESULT_OFFSET, String.valueOf(compressedOffset)); result.put(WaybackConstants.RESULT_ARC_FILE, arcFileName); return result; }
From source file:org.archive.wayback.surt.SURTTokenizerTest.java
private SURTTokenizer toSurtT(final String u) { SURTTokenizer tok = null;//from w ww. j a v a 2 s . c o m try { tok = new SURTTokenizer(u, false); } catch (URIException e) { e.printStackTrace(); assertFalse("URL Exception " + e.getLocalizedMessage(), true); } return tok; }