List of usage examples for com.google.common.io ByteStreams readBytes
public static <T> T readBytes(InputStream input, ByteProcessor<T> processor) throws IOException
From source file:org.commoncrawl.service.parser.client.Dispatcher.java
public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); CrawlEnvironment.setHadoopConfig(conf); String baseURL = "http://unknown.com/"; if (args.length != 0) { baseURL = args[0];//from w w w. ja v a 2 s .c o m } URL baseURLObj; try { baseURLObj = new URL(baseURL); } catch (MalformedURLException e2) { throw new IOException("Invalid Base Link"); } final URL finalBaseURL = (baseURLObj != null) ? baseURLObj : null; final DataOutputBuffer headerBuffer = new DataOutputBuffer(); final DataOutputBuffer contentBuffer = new DataOutputBuffer(); try { ByteStreams.readBytes(new InputSupplier<InputStream>() { @Override public InputStream getInput() throws IOException { return System.in; } }, new ByteProcessor<Long>() { @Override public Long getResult() { return 0L; } int currLineCharCount = 0; boolean processingHeaders = true; @Override public boolean processBytes(byte[] buf, int start, int length) throws IOException { if (processingHeaders) { int current = start; int end = current + length; while (processingHeaders && current != end) { if (buf[current] != '\r' && buf[current] != '\n') { currLineCharCount++; } else if (buf[current] == '\n') { if (currLineCharCount == 0) { headerBuffer.write(buf, start, current - start + 1); processingHeaders = false; } currLineCharCount = 0; } current++; } if (processingHeaders) { headerBuffer.write(buf, start, length); } else { length -= current - start; start = current; } } if (!processingHeaders) { contentBuffer.write(buf, start, length); } return true; } }); LOG.info("HEADER LEN:" + headerBuffer.getLength()); // System.out.println(new String(headerBuffer.getData(),0,headerBuffer.getLength(),Charset.forName("UTF-8"))); LOG.info("CONTENT LEN:" + contentBuffer.getLength()); //System.out.println(new String(contentBuffer.getData(),0,contentBuffer.getLength(),Charset.forName("UTF-8"))); // decode header bytes ... String header = ""; if (headerBuffer.getLength() != 0) { try { header = new String(headerBuffer.getData(), 0, headerBuffer.getLength(), Charset.forName("UTF-8")); } catch (Exception e) { LOG.warn(CCStringUtils.stringifyException(e)); header = new String(headerBuffer.getData(), 0, headerBuffer.getLength(), Charset.forName("ASCII")); } } final String headersFinal = (header != null) ? header : ""; LOG.info("Starting Event Loop"); final EventLoop eventLoop = new EventLoop(); eventLoop.start(); try { // create fake hosts file ... //String hosts = "10.0.20.101:8072"; // reader //Reader reader = new StringReader(hosts); // dispatcher init LOG.info("initializing Dispatcher"); final Dispatcher dispatcher = new Dispatcher(eventLoop, "parserNodes"); LOG.info("Waiting for a few seconds"); Thread.sleep(5000); Thread threads[] = new Thread[TEST_THREAD_COUNT]; final Semaphore threadWaitSem = new Semaphore(-TEST_THREAD_COUNT - 1); // start 100 threads for (int threadIdx = 0; threadIdx < TEST_THREAD_COUNT; ++threadIdx) { threads[threadIdx] = new Thread(new Runnable() { @Override public void run() { for (int i = 0; i < ITERATIONS_PER_THREAD; ++i) { // build parse request ParseRequest request = new ParseRequest(); request.setDocId(1); request.setDomainId(1); request.setDocURL(finalBaseURL.toString()); request.setDocHeaders(headersFinal); request.setDocContent( new FlexBuffer(contentBuffer.getData(), 0, contentBuffer.getLength())); //LOG.info("Dispatching parse request"); ParseResult result = dispatcher.dispatchRequest(request); LOG.info("TID[" + Thread.currentThread().getId() + "]ReqID[" + i + "]" + " Success:" + ((result != null) ? result.getParseSuccessful() : false) + " LinkCount:" + ((result != null) ? result.getExtractedLinks().size() : 0)); } LOG.info("Thread:" + Thread.currentThread().getId() + " Exiting"); threadWaitSem.release(); } }); threads[threadIdx].start(); } LOG.info("Waiting for threads to die"); threadWaitSem.acquireUninterruptibly(); LOG.info("All Threads dead."); } finally { eventLoop.stop(); } } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); } catch (InterruptedException e) { } }
From source file:com.google.copybara.util.RenameDetector.java
/** * Hashes a single file until the end of the stream. *//*from w ww . ja v a 2 s.com*/ private int[] hashes(InputStream input) throws IOException { try { return ByteStreams.readBytes(input, new HashingByteProcessor()); } finally { input.close(); } }
From source file:org.commoncrawl.service.parser.server.ParseWorker.java
public static void main(String[] args) throws IOException { String baseURL = "http://unknown.com/"; NIOHttpHeaders headers = null;//ww w. j a v a 2 s .c o m if (args.length != 0) { for (int i = 0; i < args.length; ++i) { if (args[i].equalsIgnoreCase("--noHeaders")) { headers = new NIOHttpHeaders(); headers.add("content-type", "text/html"); } else if (args[i].equalsIgnoreCase("--baseURL")) { baseURL = args[++i]; } } } URL baseURLObj; try { baseURLObj = new URL(baseURL); } catch (MalformedURLException e2) { LOG.error(CCStringUtils.stringifyException(e2)); throw new IOException("Invalid Base Link"); } final DataOutputBuffer headerBuffer = new DataOutputBuffer(); final DataOutputBuffer contentBuffer = new DataOutputBuffer(); final boolean processHeaders = (headers == null); try { ByteStreams.readBytes(new InputSupplier<InputStream>() { @Override public InputStream getInput() throws IOException { return System.in; } }, new ByteProcessor<Long>() { @Override public Long getResult() { return 0L; } int currLineCharCount = 0; boolean processingHeaders = processHeaders; @Override public boolean processBytes(byte[] buf, int start, int length) throws IOException { if (processingHeaders) { int current = start; int end = current + length; while (processingHeaders && current != end) { if (buf[current] != '\r' && buf[current] != '\n') { currLineCharCount++; } else if (buf[current] == '\n') { if (currLineCharCount == 0) { headerBuffer.write(buf, start, current - start + 1); processingHeaders = false; } currLineCharCount = 0; } current++; } if (processingHeaders) { headerBuffer.write(buf, start, length); } else { length -= current - start; start = current; } } if (!processingHeaders) { contentBuffer.write(buf, start, length); } return true; } }); LOG.info("CONTENT LEN:" + contentBuffer.getLength()); //System.out.println(new String(contentBuffer.getData(),0,contentBuffer.getLength(),Charset.forName("UTF-8"))); // decode header bytes ... String header = ""; if (headerBuffer.getLength() != 0) { try { header = new String(headerBuffer.getData(), 0, headerBuffer.getLength(), Charset.forName("UTF-8")); } catch (Exception e) { LOG.warn(CCStringUtils.stringifyException(e)); header = new String(headerBuffer.getData(), 0, headerBuffer.getLength(), Charset.forName("ASCII")); } } else { if (headers != null) { header = headers.toString(); } } LOG.info("HEADER LEN:" + header.length()); System.out.println(header); //LOG.info("Parsing Document"); ParseWorker worker = new ParseWorker(); ParseResult result = new ParseResult(); worker.parseDocument(result, 0L, 0L, baseURLObj, header, new FlexBuffer(contentBuffer.getData(), 0, contentBuffer.getLength())); LOG.info("Parse Result:" + result.getParseSuccessful()); //LOG.info("Parse Data:" + result.toString()); OutputStreamWriter outputWriter = new OutputStreamWriter(System.out, "UTF-8"); JsonElement resultObj = parseResultToJSON(result); JsonWriter writer = new JsonWriter(outputWriter); writer.setIndent(" "); writer.setHtmlSafe(true); writer.setLenient(true); Streams.write(resultObj, writer); writer.flush(); outputWriter.write("******** TEXT OUTPUT **********\n"); outputWriter.write(result.getText()); outputWriter.flush(); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } }
From source file:co.cask.cdap.test.internal.DefaultStreamManager.java
@Override public void send(File file, String contentType) throws Exception { String path = String.format("/v3/namespaces/%s/streams/%s/batch", streamId.getNamespaceId(), streamId.getId());// w ww . j a v a 2 s .com HttpRequest request = new DefaultHttpRequest(HttpVersion.HTTP_1_1, HttpMethod.POST, path); request.setHeader(HttpHeaders.Names.CONTENT_TYPE, contentType); final MockResponder responder = new MockResponder(); final BodyConsumer bodyConsumer = streamHandler.batch(request, responder, streamId.getNamespaceId(), streamId.getId()); Preconditions.checkNotNull(bodyConsumer, "BodyConsumer from stream batch load call should not be null"); ByteStreams.readBytes(Files.newInputStreamSupplier(file), new ByteProcessor<BodyConsumer>() { @Override public boolean processBytes(byte[] buf, int off, int len) throws IOException { bodyConsumer.chunk(ChannelBuffers.wrappedBuffer(buf, off, len), responder); return true; } @Override public BodyConsumer getResult() { bodyConsumer.finished(responder); return bodyConsumer; } }); Preconditions.checkState(HttpResponseStatus.OK.equals(responder.getStatus()), "Failed to load events to stream %s in batch", streamId); }