Example usage for java.util.concurrent Semaphore acquireUninterruptibly

List of usage examples for java.util.concurrent Semaphore acquireUninterruptibly

Introduction

In this page you can find the example usage for java.util.concurrent Semaphore acquireUninterruptibly.

Prototype

public void acquireUninterruptibly() 

Source Link

Document

Acquires a permit from this semaphore, blocking until one is available.

Usage

From source file:org.commoncrawl.service.listcrawler.CacheManager.java

/********************************************************************************************************/

public static void main(String[] args) {

    final EventLoop eventLoop = new EventLoop();
    eventLoop.start();/*from www  . j  av a 2 s  . co  m*/

    final CacheManager manager = new CacheManager(eventLoop);
    // delete active log if it exists ... 
    manager.getActiveLogFilePath().delete();
    try {
        manager.initialize(INIT_FLAG_SKIP_CACHE_WRITER_INIT | INIT_FLAG_SKIP_HDFS_WRITER_INIT);
    } catch (IOException e1) {
        LOG.error(CCStringUtils.stringifyException(e1));
        return;
    }

    MessageDigest digester;
    try {
        digester = MessageDigest.getInstance("MD5");
    } catch (NoSuchAlgorithmException e1) {
        LOG.error(CCStringUtils.stringifyException(e1));
        return;
    }

    final byte[] randomBytes = new byte[1 << 15];
    LOG.info("Building Random Digest");
    for (int i = 0; i < randomBytes.length; i += 16) {
        long time = System.nanoTime();
        digester.update((new UID() + "@" + time).getBytes());
        System.arraycopy(digester.digest(), 0, randomBytes, i, 16);
    }

    final Semaphore semaphore = new Semaphore(0);

    if (args[0].equals("populate")) {

        manager.startCacheWriterThread();
        manager.startHDFSFlusherThread();

        try {

            LOG.info("Done Building Random Digest");

            LOG.info("Writing Items To Disk");
            for (int i = 0; i < 1000000; ++i) {

                if (i % 1000 == 0) {
                    LOG.info("Wrote:" + i + " entries");
                }

                final CacheItem item1 = new CacheItem();
                item1.setUrl(manager.normalizeURL("http://www.domain.com/foobar/" + i));
                item1.setContent(new Buffer(randomBytes));
                item1.setUrlFingerprint(URLFingerprint.generate64BitURLFPrint(item1.getUrl()));
                manager.cacheItem(item1, null);
                Thread.sleep(1);

                if (i != 0 && i % 10000 == 0) {
                    LOG.info("Hit 10000 items.. sleeping for 20 seconds");
                    Thread.sleep(20 * 1000);
                }
            }

            Thread.sleep(30000);

            for (int i = 0; i < 1000000; ++i) {

                final String url = new String("http://www.domain.com/foobar/" + i);
                manager.checkCacheForItem(url, new CacheItemCheckCallback() {

                    @Override
                    public void cacheItemAvailable(String url, CacheItem item) {
                        Assert.assertTrue(item.getUrl().equals(url));
                        String itemIndex = url.substring("http://www.domain.com/foobar/".length());
                        int itemNumber = Integer.parseInt(itemIndex);
                        if (itemNumber == 999999) {
                            semaphore.release();
                        }
                    }

                    @Override
                    public void cacheItemNotFound(String url) {
                        Assert.assertTrue(false);
                    }
                });
            }
        } catch (IOException e) {
            LOG.error(CCStringUtils.stringifyException(e));
        } catch (InterruptedException e2) {

        }
    } else if (args[0].equals("read")) {

        try {
            final CacheItem item1 = new CacheItem();
            item1.setUrl(manager.normalizeURL("http://www.domain.com/barz/"));
            item1.setUrlFingerprint(URLFingerprint.generate64BitURLFPrint(item1.getUrl()));
            item1.setContent(new Buffer(randomBytes));
            manager.cacheItem(item1, null);

            // queue up cache load requests .... 
            for (int i = 0; i < 10000; ++i) {

                final String url = new String("http://www.domain.com/foobar/" + i);

                eventLoop.setTimer(new Timer(1, false, new Timer.Callback() {

                    @Override
                    public void timerFired(Timer timer) {
                        manager.checkCacheForItem(url, new CacheItemCheckCallback() {

                            @Override
                            public void cacheItemAvailable(String url, CacheItem item) {
                                LOG.info("FOUND Item for URL:" + url + " ContentSize:"
                                        + item.getContent().getCount());
                            }

                            @Override
                            public void cacheItemNotFound(String url) {
                                LOG.info("DIDNOT Find Item for URL:" + url);
                            }

                        });
                    }
                }));
            }

            eventLoop.setTimer(new Timer(1, false, new Timer.Callback() {

                @Override
                public void timerFired(Timer timer) {
                    manager.checkCacheForItem(item1.getUrl(), new CacheItemCheckCallback() {

                        @Override
                        public void cacheItemAvailable(String url, CacheItem item) {
                            LOG.info("FOUND Item for URL:" + url + " ContentSize:"
                                    + item.getContent().getCount());
                        }

                        @Override
                        public void cacheItemNotFound(String url) {
                            LOG.info("DIDNOT Find Item for URL:" + url);
                        }

                    });
                }

            }));
        } catch (IOException e) {
            LOG.error(CCStringUtils.stringifyException(e));
        }
    }
    semaphore.acquireUninterruptibly();

}

From source file:org.commoncrawl.service.listcrawler.CrawlHistoryManager.java

private static void testWriteMapFileToHDFS(EventLoop eventLoop) {
    try {//from  w  ww.  j ava 2  s.  co m
        // initialize log manager
        CrawlHistoryManager logManager = initializeTestLogManager(eventLoop, true);

        // initialize item list
        TreeMap<URLFP, ProxyCrawlHistoryItem> items = buildTestList(urlList1);
        final TreeMap<String, URLFP> urlToURLFPMap = new TreeMap<String, URLFP>();

        for (Map.Entry<URLFP, ProxyCrawlHistoryItem> item : items.entrySet()) {
            urlToURLFPMap.put(item.getValue().getOriginalURL(), item.getKey());
        }

        // add to local item map in log manager
        for (ProxyCrawlHistoryItem item : items.values()) {
            logManager.appendItemToLog(item);
        }
        // ok shutdown log manager ...
        logManager.shutdown();

        // restart - reload log file ...
        logManager = initializeTestLogManager(eventLoop, false);

        // write to 'hdfs'
        logManager.doCheckpoint();

        syncAndValidateItems(items, logManager);

        logManager.shutdown();

        // restart
        logManager = initializeTestLogManager(eventLoop, false);

        // tweak original items
        updateTestItemStates(items);

        // ok append items
        for (ProxyCrawlHistoryItem item : items.values()) {
            logManager.appendItemToLog(item);
        }

        syncAndValidateItems(items, logManager);

        // ok now checkpoint the items
        logManager.doCheckpoint();

        // ok now validate one last time
        syncAndValidateItems(items, logManager);

        // shutown
        logManager.shutdown();

        logManager = null;

        {
            // start from scratch ...
            final CrawlHistoryManager logManagerTest = initializeTestLogManager(eventLoop, true);

            // create a final version of the tree map reference
            final TreeMap<URLFP, ProxyCrawlHistoryItem> itemList = items;
            // create filename
            File urlInputFile = new File(logManagerTest.getLocalDataDir(),
                    "testURLS-" + System.currentTimeMillis());
            // ok create a crawl list from urls
            CrawlList.generateTestURLFile(urlInputFile, urlList1);
            long listId = logManagerTest.loadList(urlInputFile, 0);

            CrawlList listObject = logManagerTest.getList(listId);

            final Semaphore listCompletionSemaphore = new Semaphore(-(itemList.size() - 1));

            listObject.setEventListener(new CrawlList.CrawlListEvents() {

                @Override
                public void itemUpdated(URLFP itemFingerprint) {
                    // TODO Auto-generated method stub
                    listCompletionSemaphore.release();
                }
            });

            // ok start the appropriate threads
            logManagerTest.startLogWriterThread(0);
            logManagerTest.startListLoaderThread();
            logManagerTest.startQueueLoaderThread(new CrawlQueueLoader() {

                @Override
                public void queueURL(URLFP urlfp, String url) {
                    logManagerTest.crawlComplete(
                            proxyCrawlHitoryItemToCrawlURL(itemList.get(urlToURLFPMap.get(url))));
                }

                @Override
                public void flush() {
                    // TODO Auto-generated method stub

                }
            });

            LOG.info("Waiting for Release");

            // and wait for the finish
            listCompletionSemaphore.acquireUninterruptibly();

            LOG.info("Got Here");

        }

    } catch (IOException e) {
        LOG.error(CCStringUtils.stringifyException(e));
    }
}

From source file:org.commoncrawl.service.listcrawler.CrawlHistoryManager.java

private static void launchInTestMode() {

    File baseTestDir = new File("/tmp/logManagerTest");
    FileUtils.recursivelyDeleteFile(baseTestDir);
    baseTestDir.mkdir();/*w  ww  .  ja  v a 2s.  c o  m*/
    File remoteDir = new File(baseTestDir, "remote");
    File localDir = new File(baseTestDir, "local");
    remoteDir.mkdir();
    localDir.mkdir();

    final TreeMap<String, URLFP> urlToFPMap = new TreeMap<String, URLFP>();
    final TreeMap<URLFP, String> urlFPToString = new TreeMap<URLFP, String>();

    Set<String> list1 = Sets.newHashSet(urlList1);
    Set<String> list2 = Sets.newHashSet(urlList2);
    final Set<String> combined = Sets.union(list1, list2);
    Set<String> difference = Sets.difference(list1, list2);
    final Set<String> completedURLS = new HashSet<String>();
    for (String url : combined) {
        URLFP fingerprint = URLUtils.getURLFPFromURL(url, true);
        urlToFPMap.put(url, fingerprint);
        urlFPToString.put(fingerprint, url);
    }

    File testInputFile1 = new File(localDir, "INPUT_LIST-" + System.currentTimeMillis());
    File testInputFile2 = new File(localDir, "INPUT_LIST-" + (System.currentTimeMillis() + 1));

    try {

        generateTestURLFile(testInputFile1, urlList1);
        generateTestURLFile(testInputFile2, urlList2);

        FileSystem localFileSystem = FileSystem.getLocal(CrawlEnvironment.getHadoopConfig());

        EventLoop eventLoop = new EventLoop();
        eventLoop.start();

        final CrawlHistoryManager logManager = new CrawlHistoryManager(localFileSystem,
                new Path(remoteDir.getAbsolutePath()), localDir, eventLoop, 0);

        final LinkedBlockingQueue<ProxyCrawlHistoryItem> queue = new LinkedBlockingQueue<ProxyCrawlHistoryItem>();

        final Semaphore initialListComplete = new Semaphore(0);

        logManager.startQueueLoaderThread(new CrawlQueueLoader() {

            @Override
            public void queueURL(URLFP urlfp, String url) {
                ProxyCrawlHistoryItem item = new ProxyCrawlHistoryItem();
                item.setOriginalURL(url);
                queue.add(item);
            }

            @Override
            public void flush() {
                // TODO Auto-generated method stub

            }
        });

        Thread queueTestThread = new Thread(new Runnable() {

            @Override
            public void run() {
                while (true) {
                    try {
                        ProxyCrawlHistoryItem item = queue.take();

                        if (item.getOriginalURL().length() == 0) {
                            break;
                        } else {

                            System.out.println("Got:" + item.getOriginalURL());

                            CrawlURL urlObject = new CrawlURL();

                            Assert.assertTrue(!completedURLS.contains(item.getOriginalURL()));
                            completedURLS.add(item.getOriginalURL());

                            urlObject.setLastAttemptResult((byte) CrawlURL.CrawlResult.SUCCESS);
                            urlObject.setUrl(item.getOriginalURL());
                            urlObject.setResultCode(200);

                            logManager.crawlComplete(urlObject);

                            if (completedURLS.equals(combined)) {
                                System.out.println("Hit Trigger URL. Releasing InitialListComplete Sempahore");
                                initialListComplete.release(1);
                            }
                        }

                    } catch (InterruptedException e) {
                    }
                }
            }

        });

        queueTestThread.start();

        logManager.loadList(testInputFile1, 0);
        logManager.loadList(testInputFile2, 0);
        System.out.println("Waiting for Initial List to Complete");
        initialListComplete.acquireUninterruptibly();
        System.out.println("Woke Up");

        try {
            eventLoop.getEventThread().join();
        } catch (InterruptedException e) {
            e.printStackTrace();
        }

    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:org.commoncrawl.service.listcrawler.DataTransferAgent.java

static int uploadSingeFile(CCBridgeServerMapping mapping, FileSystem fs, Configuration conf, Path hdfsFilePath,
        String uploadName, EventLoop eventLoop) throws IOException {

    final FileStatus fileStatus = fs.getFileStatus(hdfsFilePath);
    LOG.info("Uploading:" + uploadName + " size:" + fileStatus.getLen() + " to:" + mapping._internalName);

    {//  w ww .ja  v  a2s .com
        // construct url 
        URL fePostURL = new URL("http://" + mapping._externalName + ":8090/");
        LOG.info("POST URL IS:" + fePostURL.toString());

        // open input stream 
        final FSDataInputStream is = fs.open(hdfsFilePath);
        final Semaphore blockingSemaphore = new Semaphore(0);
        NIOHttpConnection connection = null;
        try {
            // create connection 
            connection = new NIOHttpConnection(fePostURL, eventLoop.getSelector(), eventLoop.getResolver(),
                    null);
            // set listener 
            connection.setListener(new Listener() {

                @Override
                public void HttpConnectionStateChanged(NIOHttpConnection theConnection, State oldState,
                        State state) {
                    LOG.info("Connection State Changed to:" + state.toString());
                    if (state == State.DONE || state == State.ERROR) {
                        //LOG.info("Connection Transition to Done or Error");
                        //LOG.info("Response Headers:" + theConnection.getResponseHeaders().toString());
                        blockingSemaphore.release();
                    }
                }

                @Override
                public void HttpContentAvailable(NIOHttpConnection theConnection, NIOBufferList contentBuffer) {
                    // TODO Auto-generated method stub

                }
            });
            // set headers 
            connection.getRequestHeaders().reset();
            connection.getRequestHeaders().prepend("PUT /put?src=" + uploadName + " HTTP/1.1", null);
            connection.getRequestHeaders().set("Host", mapping._internalName + ":8090");
            connection.getRequestHeaders().set("Content-Length", Long.toString(fileStatus.getLen()));
            connection.getRequestHeaders().set("Connection", "keep-alive");
            connection.setPopulateDefaultHeaderItems(false);

            final LinkedBlockingDeque<BufferStruct> _loaderQueue = new LinkedBlockingDeque<BufferStruct>(20);
            final AtomicBoolean eof = new AtomicBoolean();
            final ByteBuffer sentinel = ByteBuffer.allocate(4096);
            sentinel.position(sentinel.position());
            final Thread loaderThread = new Thread(new Runnable() {

                int _id = 0;

                @Override
                public void run() {
                    int bytesRead;
                    byte incomingBuffer[] = new byte[4096 * 10];
                    try {
                        while ((bytesRead = is.read(incomingBuffer)) != -1) {
                            ByteBuffer buffer = ByteBuffer.wrap(incomingBuffer, 0, bytesRead);
                            buffer.position(bytesRead);

                            //LOG.info("Loader Thread Read:"+ bytesRead + " Buffer:" + ++_id);
                            try {
                                _loaderQueue.put(new BufferStruct(buffer, _id));
                            } catch (InterruptedException e) {
                                LOG.error(CCStringUtils.stringifyException(e));
                                break;
                            }
                            incomingBuffer = new byte[4096 * 10];
                        }
                        try {
                            _loaderQueue.put(new BufferStruct(sentinel, ++_id));
                        } catch (InterruptedException e) {
                        }
                    } catch (IOException e) {
                        LOG.error(CCStringUtils.stringifyException(e));
                        return;
                    }
                }

            });

            loaderThread.start();

            // set data source ... 
            connection.setDataSource(new DataSource() {

                int bytesTransferred = 0;

                @Override
                public boolean read(NIOBufferList dataBuffer) throws IOException {
                    if (eof.get())
                        return true;
                    //LOG.info("Connect read callback triggered");
                    BufferStruct buffer = _loaderQueue.poll();
                    if (buffer != null) {
                        if (buffer._buffer != sentinel) {
                            //LOG.info("Got Buffer:"+ buffer._id);
                            if (buffer._id == 1) {
                                //LOG.info("Inital Buffer Bytes:" + new String(buffer._buffer.array(),0,10).toString());
                            }
                            dataBuffer.write(buffer._buffer);
                            bytesTransferred += buffer._buffer.limit();
                            //LOG.info("Read:" + buffer._buffer.limit() + " Transfered:" + bytesTransferred);
                            return false;
                        } else {
                            //LOG.info("EOF Condition");
                            dataBuffer.write(sentinel);
                            eof.set(true);
                            return true;
                        }
                    }
                    return false;
                }
            });

            // open connection 
            connection.open();
            // wait for connection to complete ... 
            blockingSemaphore.acquireUninterruptibly();
            // kill loader thread 
            loaderThread.interrupt();
            try {
                LOG.info("Waiting for Loader Thread");
                loaderThread.join();
                LOG.info("Done Waiting for Loader Thread");
            } catch (InterruptedException e) {
            }
        } finally {
            is.close();
            if (connection != null) {
                connection.close();
                LOG.info("Response Code for File:" + uploadName + "to Host: " + mapping._internalName + " is:"
                        + connection.getResponseHeaders().getHttpResponseCode());
                return connection.getResponseHeaders().getHttpResponseCode();
                /*
                if (connection.getResponseHeaders().getHttpResponseCode() != 200) { 
                  throw new IOException("Failed to upload file:" + dataFile.getName() + " responseCode:" + connection.getResponseHeaders().getHttpResponseCode());
                }
                */
            }
        }
    }
    // something went wrong ??? 
    LOG.error("Failed to upload file:" + uploadName + " unknown response code");
    return 500;
}

From source file:org.commoncrawl.service.listcrawler.ProxyServlet.java

@Override
public void doGet(final HttpServletRequest req, final HttpServletResponse response)
        throws ServletException, IOException {

    // allocate a response data object ... which will be used by async thread to pass data to calling thread...
    final AsyncResponse responseData = new AsyncResponse();

    String queryString = req.getQueryString();
    final String originalPath = req.getParameter("url");
    final String format = (req.getParameter("renderAs") != null) ? req.getParameter("renderAs")
            : PROXY_RENDER_TYPE_NONE;/* w  w w.  ja  v a  2 s .c  o  m*/
    final String timeoutStr = req.getParameter("timeout");
    final String skipHTTPGET = req.getParameter("nocachenodice");

    final long desiredTimeOutInMS = (timeoutStr != null) ? Long.parseLong(timeoutStr) : 30000;
    final boolean skipHTTPGet = (skipHTTPGET != null && skipHTTPGET.equals("1"));
    final Semaphore semaphore = new Semaphore(0);

    //LOG.info("Got Request:" + originalPath);

    final long requestStartTime = System.currentTimeMillis();

    //LOG.info("Processing Request:" + originalPath);

    String hostName = (originalPath != null) ? URLUtils.fastGetHostFromURL(originalPath) : "";
    String fullPath = null;
    if (originalPath == null || !originalPath.startsWith("http:") || hostName.length() == 0
            || queryString == null) {
        LOG.info("URL From Proxy Request:" + originalPath + " is Invalid. Sending 400 Result Code");
        responseData.setHttpErrorResponse(400, "URL From Proxy Request:" + originalPath + " is Invalid");
    } else {

        // build url path from query string 
        int pathIndex = queryString.indexOf("url=");
        // grab the whole path ... 
        fullPath = queryString.substring(pathIndex + "url=".length());
        // unescape it 
        fullPath = URLDecoder.decode(fullPath, "UTF-8");

        //LOG.info("Doing Cache Lookup for URL:" + fullPath);
        boolean isAsyncOperation = checkCacheForURLV2(fullPath, responseData, semaphore, desiredTimeOutInMS,
                skipHTTPGet);
        if (isAsyncOperation) {
            //LOG.info("Waiting on Async Completion for URL:" + fullPath);
            semaphore.acquireUninterruptibly();
            //LOG.info("Done Waiting for Async Completion for URL:" + fullPath);
        }
    }

    // upon return we need to check the response object ... 
    if (responseData.getResponseType() == AsyncResponse.ResponseType.CacheItemResponse) {
        // send cache item response ... 
        sendCacheItemResponse(req, response, responseData.getCacheItem(), false, format, responseData,
                requestStartTime);
    } else if (responseData.getResponseType() == AsyncResponse.ResponseType.CrawlURLResponse) {
        sendCrawlURLResponse(req, response, responseData.getCrawlURL(), format, responseData, requestStartTime);
    } else if (responseData.getResponseType() == AsyncResponse.ResponseType.S3Response) {
        sendS3ItemResponse(req, response, responseData.getArcFileItem(), format, responseData,
                requestStartTime);
    } else {
        response.sendError(responseData.getHttpErrorCode(), responseData.getHttpErrorDesc());
        ProxyServer.getSingleton().logProxyFailure(responseData.getHttpErrorCode(),
                responseData.getHttpErrorDesc(), fullPath, "", responseData.getStartTime());
    }
}

From source file:org.commoncrawl.service.parser.client.Dispatcher.java

public static void main(String[] args) throws IOException {
    Configuration conf = new Configuration();
    CrawlEnvironment.setHadoopConfig(conf);
    String baseURL = "http://unknown.com/";
    if (args.length != 0) {
        baseURL = args[0];/*  ww w.  j ava 2  s.  c  om*/
    }
    URL baseURLObj;
    try {
        baseURLObj = new URL(baseURL);
    } catch (MalformedURLException e2) {
        throw new IOException("Invalid Base Link");
    }
    final URL finalBaseURL = (baseURLObj != null) ? baseURLObj : null;
    final DataOutputBuffer headerBuffer = new DataOutputBuffer();
    final DataOutputBuffer contentBuffer = new DataOutputBuffer();

    try {
        ByteStreams.readBytes(new InputSupplier<InputStream>() {

            @Override
            public InputStream getInput() throws IOException {
                return System.in;
            }
        }, new ByteProcessor<Long>() {

            @Override
            public Long getResult() {
                return 0L;
            }

            int currLineCharCount = 0;
            boolean processingHeaders = true;

            @Override
            public boolean processBytes(byte[] buf, int start, int length) throws IOException {

                if (processingHeaders) {
                    int current = start;
                    int end = current + length;
                    while (processingHeaders && current != end) {
                        if (buf[current] != '\r' && buf[current] != '\n') {
                            currLineCharCount++;
                        } else if (buf[current] == '\n') {
                            if (currLineCharCount == 0) {
                                headerBuffer.write(buf, start, current - start + 1);
                                processingHeaders = false;
                            }
                            currLineCharCount = 0;
                        }
                        current++;
                    }
                    if (processingHeaders) {
                        headerBuffer.write(buf, start, length);
                    } else {
                        length -= current - start;
                        start = current;
                    }
                }
                if (!processingHeaders) {
                    contentBuffer.write(buf, start, length);
                }
                return true;
            }
        });

        LOG.info("HEADER LEN:" + headerBuffer.getLength());
        // System.out.println(new String(headerBuffer.getData(),0,headerBuffer.getLength(),Charset.forName("UTF-8")));
        LOG.info("CONTENT LEN:" + contentBuffer.getLength());
        //System.out.println(new String(contentBuffer.getData(),0,contentBuffer.getLength(),Charset.forName("UTF-8")));
        // decode header bytes ... 
        String header = "";
        if (headerBuffer.getLength() != 0) {
            try {
                header = new String(headerBuffer.getData(), 0, headerBuffer.getLength(),
                        Charset.forName("UTF-8"));
            } catch (Exception e) {
                LOG.warn(CCStringUtils.stringifyException(e));
                header = new String(headerBuffer.getData(), 0, headerBuffer.getLength(),
                        Charset.forName("ASCII"));
            }
        }
        final String headersFinal = (header != null) ? header : "";

        LOG.info("Starting Event Loop");
        final EventLoop eventLoop = new EventLoop();
        eventLoop.start();

        try {
            // create fake hosts file ...  
            //String hosts = "10.0.20.101:8072";
            // reader 
            //Reader reader = new StringReader(hosts);
            // dispatcher init 
            LOG.info("initializing Dispatcher");
            final Dispatcher dispatcher = new Dispatcher(eventLoop, "parserNodes");
            LOG.info("Waiting for a few seconds");
            Thread.sleep(5000);
            Thread threads[] = new Thread[TEST_THREAD_COUNT];
            final Semaphore threadWaitSem = new Semaphore(-TEST_THREAD_COUNT - 1);
            // start 100 threads 
            for (int threadIdx = 0; threadIdx < TEST_THREAD_COUNT; ++threadIdx) {
                threads[threadIdx] = new Thread(new Runnable() {

                    @Override
                    public void run() {
                        for (int i = 0; i < ITERATIONS_PER_THREAD; ++i) {
                            // build parse request 
                            ParseRequest request = new ParseRequest();
                            request.setDocId(1);
                            request.setDomainId(1);
                            request.setDocURL(finalBaseURL.toString());
                            request.setDocHeaders(headersFinal);
                            request.setDocContent(
                                    new FlexBuffer(contentBuffer.getData(), 0, contentBuffer.getLength()));
                            //LOG.info("Dispatching parse request");
                            ParseResult result = dispatcher.dispatchRequest(request);
                            LOG.info("TID[" + Thread.currentThread().getId() + "]ReqID[" + i + "]" + " Success:"
                                    + ((result != null) ? result.getParseSuccessful() : false) + " LinkCount:"
                                    + ((result != null) ? result.getExtractedLinks().size() : 0));
                        }
                        LOG.info("Thread:" + Thread.currentThread().getId() + " Exiting");
                        threadWaitSem.release();
                    }

                });
                threads[threadIdx].start();
            }

            LOG.info("Waiting for threads to die");
            threadWaitSem.acquireUninterruptibly();
            LOG.info("All Threads dead.");

        } finally {
            eventLoop.stop();
        }
    } catch (IOException e) {
        LOG.error(CCStringUtils.stringifyException(e));
    } catch (InterruptedException e) {
    }
}

From source file:org.commoncrawl.service.parser.client.ParserNode.java

public ParseResult dispatchRequest(final ParseRequest request) throws IOException {
    final AtomicReference<ParseResult> result = new AtomicReference<ParseResult>();

    if (_online.get()) {
        //      LOG.info("Dispatching Parse Request for URL:" + request.getDocURL() 
        //          + " to Node:" + _nodeName);  
        final Semaphore requestSemaphore = new Semaphore(0);

        _eventLoop.queueAsyncCallback(new org.commoncrawl.async.Callback() {

            @Override//from  w  w w  . j av a  2s  .  c o m
            public void execute() {
                try {
                    _asyncStub.parseDocument(request, new Callback<ParseRequest, ParseResult>() {

                        @Override
                        public void requestComplete(AsyncRequest<ParseRequest, ParseResult> request) {
                            try {
                                //                  LOG.info("Parse Request for URL:" + request.getInput().getDocURL() 
                                //                      + " recvd responseStatus:" + request.getStatus() 
                                //                      + " from Node:" + _nodeName); 

                                if (request.getStatus() == Status.Success) {
                                    result.set(request.getOutput());
                                }
                            } finally {
                                //                  LOG.info("Releasing Request Semaphore for URL:" + request.getInput().getDocURL());
                                requestSemaphore.release();
                            }
                        }
                    });
                } catch (Exception e) {
                    LOG.error(CCStringUtils.stringifyException(e));
                    LOG.info("Releasing Request Semaphore for URL:" + request.getDocURL());
                    requestSemaphore.release();
                }
            }
        });

        //      LOG.info("Waiting on ParseReq Semaphore for URL:"+ request.getDocURL());
        requestSemaphore.acquireUninterruptibly();
        //      LOG.info("ParseReq Semaphore signlaed for URL:"+ request.getDocURL());
    }
    return result.get();
}

From source file:org.commoncrawl.service.queryserver.master.S3Helper.java

public static ArcFileItem retrieveArcFileItem(ArchiveInfo archiveInfo, EventLoop eventLoop) throws IOException {

    // the default bucket id 
    String bucketId = "commoncrawl-crawl-002";

    //ok, see if we need to switch buckets 
    if (archiveInfo.getCrawlNumber() == 1) {
        bucketId = "commoncrawl";
    }/*w w w.  j a v  a 2  s.  c o m*/

    S3Downloader downloader = new S3Downloader(bucketId, "", "", false);

    // now activate the segment log ... 
    final Semaphore downloadCompleteSemaphore = new Semaphore(0);
    final StreamingArcFileReader arcFileReader = new StreamingArcFileReader(false);
    //arcFileReader.setArcFileHasHeaderItemFlag(false);

    // create a buffer list we will append incoming content into ... 
    final LinkedList<ByteBuffer> bufferList = new LinkedList<ByteBuffer>();

    downloader.initialize(new S3Downloader.Callback() {

        @Override
        public boolean contentAvailable(int itemId, String itemKey, NIOBufferList contentBuffer) {
            LOG.info("ContentQuery contentAvailable called for Item:" + itemKey + " totalBytesAvailable:"
                    + contentBuffer.available());

            try {
                while (contentBuffer.available() != 0) {
                    bufferList.add(contentBuffer.read());
                }
                return true;
            } catch (IOException e) {
                LOG.error(CCStringUtils.stringifyException(e));
                return false;
            }
        }

        @Override
        public void downloadComplete(int itemId, String itemKey) {
            LOG.info("S3 Download Complete for item:" + itemKey);
            downloadCompleteSemaphore.release();
        }

        @Override
        public void downloadFailed(int itemId, String itemKey, String errorCode) {
            LOG.info("S3 Download Failed for item:" + itemKey);
            downloadCompleteSemaphore.release();
        }

        @Override
        public boolean downloadStarting(int itemId, String itemKey, int contentLength) {
            LOG.info("ContentQuery DownloadStarting for Item:" + itemKey + " contentLength:" + contentLength);
            return true;
        }

    }, eventLoop);

    LOG.info("Starting request for Item:"
            + hdfsNameToS3ArcFileName(archiveInfo.getArcfileDate(), archiveInfo.getArcfileIndex()) + " Offset:"
            + archiveInfo.getArcfileOffset());

    int sizeToRetrieve = (archiveInfo.getCompressedSize() != 0) ? archiveInfo.getCompressedSize() : 30000;
    sizeToRetrieve += 10;

    downloader.fetchPartialItem(
            hdfsNameToS3ArcFileName(archiveInfo.getArcfileDate(), archiveInfo.getArcfileIndex()),
            archiveInfo.getArcfileOffset() - 10, sizeToRetrieve);
    downloadCompleteSemaphore.acquireUninterruptibly();

    if (bufferList.size() == 0) {
        return null;
    }

    ByteBuffer firstBuffer = bufferList.getFirst();
    if (firstBuffer != null) {
        int offsetToGZIPHeader = scanForGZIPHeader(firstBuffer.duplicate());
        if (offsetToGZIPHeader != -1) {
            firstBuffer.position(offsetToGZIPHeader);
            LOG.info("*** Offset to GZIP Header:" + offsetToGZIPHeader);
        } else {
            LOG.error("*** Failed to find GZIP Header offset");
        }
    }

    // now try to decode content if possible
    for (ByteBuffer buffer : bufferList) {
        LOG.info("Adding Buffer of Size:" + buffer.remaining() + " Position:" + buffer.position() + " Limit:"
                + buffer.limit());
        arcFileReader.available(buffer);
    }

    ArcFileItem item = arcFileReader.getNextItem();

    if (item != null) {
        LOG.info("Request Returned item:" + item.getUri());
        LOG.info("Uncompressed Size:" + item.getContent().getCount());
    }
    return item;
}

From source file:org.commoncrawl.service.queryserver.query.DomainListQuery.java

@Override
protected long executeRemote(final FileSystem fileSystem, final Configuration conf, EventLoop eventLoop,
        SlaveDatabaseIndex instanceIndex, File tempFirDir,
        QueryProgressCallback<DomainListQueryInfo, Text, SubDomainMetadata> progressCallback)
        throws IOException {

    int shardsProcessed = 0;

    // ok create a semaphore for the number of shard we are going to query ...
    final Semaphore semaphore = new Semaphore(-(getCommonQueryInfo().getRelevantShardIds().size() - 1));
    // and create a record count array 
    final long recordCounts[] = new long[getCommonQueryInfo().getRelevantShardIds().size()];
    final IOException exceptions[] = new IOException[getCommonQueryInfo().getRelevantShardIds().size()];

    int threadIdx = 0;
    // ok dispatch queries for each shard we are responsible for ... 
    for (int shardId : getCommonQueryInfo().getRelevantShardIds()) {

        final int currentShardId = shardId;
        final int currentThreadIdx = threadIdx++;

        Thread subQueryThread = new Thread(new Runnable() {

            @Override//from   w  w w  . ja va2s.  c o  m
            public void run() {
                Path shardOutputPath = getHDFSQueryResultsFilePathForShard(currentShardId);

                LOG.info("Execute Remote for Query:" + getQueryId() + " for shardId:" + currentShardId
                        + "  Creating spill file @:" + shardOutputPath);

                try {
                    // create SequenceFile Spill Writer ... 
                    SequenceFileSpillWriter<Text, SubDomainMetadata> spillWriter = new SequenceFileSpillWriter<Text, SubDomainMetadata>(
                            fileSystem, conf, shardOutputPath, Text.class, SubDomainMetadata.class, null, true);
                    try {
                        LOG.info("Execute Remote for Query:" + getQueryId()
                                + " calling executeDomainListQuery on index");
                        // scan index for matching patterns ... spill into writer ...
                        recordCounts[currentThreadIdx] += _slaveDatabaseIndex.queryDomainsGivenPattern(
                                getQueryData().getSearchPattern(), currentShardId, spillWriter);
                        LOG.info("Execute Remote for Query:" + getQueryId()
                                + " executeDomainListQuery returned:" + recordCounts[currentThreadIdx]);
                    } finally {
                        spillWriter.close();
                        // increment semaphore count 
                        semaphore.release();
                    }
                } catch (IOException e) {
                    LOG.error("Execute Remote for Query:" + getQueryId()
                            + " executeDomainListQuery failed with error:"
                            + CCStringUtils.stringifyException(e));
                    exceptions[currentThreadIdx] = e;
                }
            }
        });
        subQueryThread.start();
    }

    // ok block until all queries are complete
    LOG.info("Query:" + getQueryId() + " Waiting on Worker Threads");
    semaphore.acquireUninterruptibly();
    LOG.info("Query:" + getQueryId() + " All Threads Compelted");

    for (IOException e : exceptions) {
        if (e != null) {
            LOG.error(
                    "Query:" + getQueryId() + " Failed with Exception:" + CCStringUtils.stringifyException(e));
            throw e;
        }
    }
    long cumilativeRecordCount = 0L;
    for (long recordCount : recordCounts)
        cumilativeRecordCount += recordCount;
    return cumilativeRecordCount;
}

From source file:org.commoncrawl.util.MapReduceJobStatsWriter.java

/** close and flush the log file **/
public void close(final Callback optionalAsyncCallback) {

    if (_eventLoop != null) {
        // allocate a blocking semaphore in case async callback was not specified 
        final Semaphore blockingCallSemaphore = new Semaphore(0);

        // perform shutdown in worker thread ... 
        _eventLoop.setTimer(new Timer(0, false, new Timer.Callback() {

            @Override//from ww  w .ja  va2s .  c  o m
            public void timerFired(Timer timer) {

                try {
                    try {
                        if (_writer != null) {
                            _writer.close();
                        }
                    } catch (IOException e) {
                        LOG.error(CCStringUtils.stringifyException(e));
                        _lastLogWriteException = e;
                    } finally {
                        _writer = null;

                        try {
                            if (_outputStream != null) {
                                _outputStream.flush();
                                _outputStream.close();
                            }
                        } catch (IOException e) {
                            LOG.error(CCStringUtils.stringifyException(e));
                            _lastLogWriteException = e;
                        } finally {
                            _outputStream = null;
                        }
                    }

                    // now figure out if everything went smoothly or not 
                    if (_entryCount != 0 && _lastLogWriteException == null) {
                        // ok so far so good... time to copy the local log file to hdfs ... 
                        Path hdfsPath = new Path(Environment.HDFS_LOGCOLLECTOR_BASEDIR,
                                _logFamily + "/" + _groupingKey + "/" + Long.toString(_uniqueKey));

                        try {

                            // delete the remote file if it exists
                            _remoteFileSystem.delete(hdfsPath, false);
                            // ensure parent path 
                            _remoteFileSystem.mkdirs(hdfsPath.getParent());
                            // now if the local file exists and has data 
                            if (_tempFileName.exists() && _tempFileName.length() != 0) {
                                // copy the file to hdfs 
                                _remoteFileSystem.copyFromLocalFile(new Path(_tempFileName.getAbsolutePath()),
                                        hdfsPath);
                            }
                        } catch (IOException e) {
                            LOG.error(CCStringUtils.stringifyException(e));
                            _lastLogWriteException = e;
                        }
                    }
                } finally {
                    // always delete the temp file ... 
                    _tempFileName.delete();

                    // release semaphore 
                    blockingCallSemaphore.release();

                    // if callback was specified , call it now 
                    if (optionalAsyncCallback != null) {
                        optionalAsyncCallback.execute();
                    }

                    // stop the event loop ... 
                    _eventLoop.stop();
                    _eventLoop = null;
                }
            }
        }));

        // now if callback was not specified... wait for blocking semaphore to signal ... 
        if (optionalAsyncCallback == null) {
            blockingCallSemaphore.acquireUninterruptibly();
        }
    }
}