Example usage for java.lang Math ceil

List of usage examples for java.lang Math ceil


In this page you can find the example usage for java.lang Math ceil.


public static double ceil(double a) 

Source Link


Returns the smallest (closest to negative infinity) double value that is greater than or equal to the argument and is equal to a mathematical integer.


From source file:com.linkedin.pinot.perf.MultiValueReaderWriterBenchmark.java

public static void main(String[] args) throws Exception {
    List<String> lines = IOUtils.readLines(new FileReader(new File(args[0])));
    int totalDocs = lines.size();
    int max = Integer.MIN_VALUE;
    int maxNumberOfMultiValues = Integer.MIN_VALUE;
    int totalNumValues = 0;
    int data[][] = new int[totalDocs][];
    for (int i = 0; i < lines.size(); i++) {
        String line = lines.get(i);
        String[] split = line.split(",");
        totalNumValues = totalNumValues + split.length;
        if (split.length > maxNumberOfMultiValues) {
            maxNumberOfMultiValues = split.length;
        }/*from w  w w  .ja v  a  2s. com*/
        data[i] = new int[split.length];
        for (int j = 0; j < split.length; j++) {
            String token = split[j];
            int val = Integer.parseInt(token);
            data[i][j] = val;
            if (val > max) {
                max = val;
    int maxBitsNeeded = (int) Math.ceil(Math.log(max) / Math.log(2));
    int size = 2048;
    int[] offsets = new int[size];
    int bitMapSize = 0;
    File outputFile = new File("output.mv.fwd");

    FixedBitSkipListSCMVWriter fixedBitSkipListSCMVWriter = new FixedBitSkipListSCMVWriter(outputFile,
            totalDocs, totalNumValues, maxBitsNeeded);

    for (int i = 0; i < totalDocs; i++) {
        fixedBitSkipListSCMVWriter.setIntArray(i, data[i]);
        if (i % size == size - 1) {
            MutableRoaringBitmap rr1 = MutableRoaringBitmap.bitmapOf(offsets);
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            DataOutputStream dos = new DataOutputStream(bos);
            //System.out.println("Chunk " + i / size + " bitmap size:" + bos.size());
            bitMapSize += bos.size();
        } else if (i == totalDocs - 1) {
            MutableRoaringBitmap rr1 = MutableRoaringBitmap.bitmapOf(Arrays.copyOf(offsets, i % size));
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            DataOutputStream dos = new DataOutputStream(bos);
            //System.out.println("Chunk " + i / size + " bitmap size:" + bos.size());
            bitMapSize += bos.size();
    System.out.println("Output file size:" + outputFile.length());
    System.out.println("totalNumberOfDoc\t\t\t:" + totalDocs);
    System.out.println("totalNumberOfValues\t\t\t:" + totalNumValues);
    System.out.println("chunk size\t\t\t\t:" + size);
    System.out.println("Num chunks\t\t\t\t:" + totalDocs / size);
    int numChunks = totalDocs / size + 1;
    int totalBits = (totalNumValues * maxBitsNeeded);
    int dataSizeinBytes = (totalBits + 7) / 8;

    System.out.println("Raw data size with fixed bit encoding\t:" + dataSizeinBytes);
    System.out.println("\nPer encoding size");
    System.out.println("size (offset + length)\t\t\t:" + ((totalDocs * (4 + 4)) + dataSizeinBytes));
    System.out.println("size (offset only)\t\t\t:" + ((totalDocs * (4)) + dataSizeinBytes));
    System.out.println("bitMapSize\t\t\t\t:" + bitMapSize);
    System.out.println("size (with bitmap)\t\t\t:" + (bitMapSize + (numChunks * 4) + dataSizeinBytes));

    System.out.println("Custom Bitset\t\t\t\t:" + (totalNumValues + 7) / 8);
    System.out.println("size (with custom bitset)\t\t\t:"
            + (((totalNumValues + 7) / 8) + (numChunks * 4) + dataSizeinBytes));


From source file:loanbroker.normalizer.NormalizerOurJsonBank.java

public static void main(String[] args) {
    try {/*from  w w  w .  j  a  v a2s  . c o  m*/
        ConnectionFactory factory = new ConnectionFactory();
        Connection connection = factory.newConnection();
        Channel channel = connection.createChannel();

        channel.exchangeDeclare(EXCHANGE_NAME, "fanout");
        String queueName = channel.queueDeclare().getQueue();
        channel.queueBind(queueName, EXCHANGE_NAME, "");
        //channel.queueDeclare(RPC_QUEUE_NAME,false, false, false, null);
        System.out.println(" [*] Waiting for messages. To exit press CTRL+C");

        QueueingConsumer consumer = new QueueingConsumer(channel);
        channel.basicConsume(queueName, true, consumer);

        Channel channelOutput = connection.createChannel();
        channelOutput.exchangeDeclare(ExchangeName.GLOBAL, "direct");
        String queueNameProducer = channelOutput.queueDeclare().getQueue();
        channelOutput.queueBind(queueNameProducer, ExchangeName.GLOBAL, RoutingKeys.NormalizerToAggregator);

        LoanResponse loanResponse;
        while (true) {
            QueueingConsumer.Delivery delivery = consumer.nextDelivery();
            System.out.println("CorrelationId: " + delivery.getProperties().getCorrelationId());

            String message = new String(delivery.getBody());

            JSONObject jsonObj = new JSONObject(message);

            loanResponse = new LoanResponse(jsonObj.getInt("ssn"), Math.ceil(jsonObj.getDouble("interestRate")),
                    "Our Json Bank", delivery.getProperties().getCorrelationId());
            System.out.println("renter: " + loanResponse.getInterestRate());
            System.out.println("ssn: " + loanResponse.getSsn());
            System.out.println("bank : " + loanResponse.getBank());
            System.out.println("JSON:" + loanResponse);
            System.out.println("TOstring:" + jsonObj.toString());
            Gson g = new Gson();
            String fm = g.toJson(loanResponse);
            channelOutput.basicPublish(ExchangeName.GLOBAL, RoutingKeys.NormalizerToAggregator, null,


    } catch (IOException | TimeoutException | InterruptedException e) {

From source file:friendsandfollowers.DBFollowersIDs.java

public static void main(String[] args) throws ClassNotFoundException, SQLException, JSONException,
        FileNotFoundException, UnsupportedEncodingException {

    // Check arguments that passed in
    if ((args == null) || (args.length == 0)) {
        System.err.println("2 Parameters are required plus one optional " + "parameter to launch a Job.");
        System.err.println("First: String 'OUTPUT: /output/path/'");
        System.err.println("Second: (int) Number of ids to fetch. " + "Provide number which increment by 5000 "
                + "(5000, 10000, 15000 etc) " + "or -1 to fetch all ids.");
        System.err.println("Third (optional): 'screen_name / user_id_str'");
        System.err.println("If 3rd argument not provided then provide" + " Twitter users through database.");
        System.exit(-1);/* w ww  .  ja va  2  s . c om*/

    MysqlDB DB = new MysqlDB();
    AppOAuth AppOAuths = new AppOAuth();
    Misc helpers = new Misc();
    String endpoint = "/followers/ids";

    String OutputDirPath = null;
    try {
        OutputDirPath = StringEscapeUtils.escapeJava(args[0]);
    } catch (Exception e) {
        System.err.println("Argument" + args[0] + " must be an String.");

    int IDS_TO_FETCH_INT = -1;
    try {
        IDS_TO_FETCH_INT = Integer.parseInt(args[1]);
    } catch (NumberFormatException e) {
        System.err.println("Argument" + args[1] + " must be an integer.");

    int IDS_TO_FETCH = 0;
    if (IDS_TO_FETCH_INT > 5000) {

        float IDS_TO_FETCH_F = (float) IDS_TO_FETCH_INT / 5000;
        IDS_TO_FETCH = (int) Math.ceil(IDS_TO_FETCH_F);
    } else if ((IDS_TO_FETCH_INT <= 5000) && (IDS_TO_FETCH_INT > 0)) {
        IDS_TO_FETCH = 1;

    String targetedUser = "";
    if (args.length == 3) {
        try {
            targetedUser = StringEscapeUtils.escapeJava(args[2]);
        } catch (Exception e) {
            System.err.println("Argument" + args[2] + " must be an String.");

    try {

        TwitterFactory tf = AppOAuths.loadOAuthUser(endpoint);
        Twitter twitter = tf.getInstance();

        int RemainingCalls = AppOAuths.RemainingCalls;
        int RemainingCallsCounter = 0;
        System.out.println("First Time Remianing Calls: " + RemainingCalls);

        String Screen_name = AppOAuths.screen_name;
        System.out.println("First Time Loaded OAuth Screen_name: " + Screen_name);

        IDs ids;
        System.out.println("Listing followers ids.");

        // if targetedUser not provided by argument, then look into database.
        if (StringUtils.isEmpty(targetedUser)) {

            String selectQuery = "SELECT * FROM `followers_parent` WHERE " + "`targeteduser` != '' AND "
                    + "`nextcursor` != '0' AND " + "`nextcursor` != '2'";

            ResultSet results = DB.selectQ(selectQuery);

            int numRows = DB.numRows(results);
            if (numRows < 1) {
                System.err.println("No User in database to get followersIDS");

            OUTERMOST: while (results.next()) {

                int followers_parent_id = results.getInt("id");
                targetedUser = results.getString("targeteduser");
                long cursor = results.getLong("nextcursor");
                System.out.println("Targeted User: " + targetedUser);

                int idsLoopCounter = 0;
                int totalIDs = 0;

                // put idsJSON in a file
                PrintWriter writer = new PrintWriter(OutputDirPath + "/" + targetedUser, "UTF-8");

                // call different functions for screen_name and id_str
                Boolean chckedNumaric = helpers.isNumeric(targetedUser);

                do {
                    ids = null;
                    try {

                        if (chckedNumaric) {

                            long LongValueTargetedUser = Long.valueOf(targetedUser).longValue();

                            ids = twitter.getFollowersIDs(LongValueTargetedUser, cursor);
                        } else {
                            ids = twitter.getFollowersIDs(targetedUser, cursor);

                    } catch (TwitterException te) {

                        // do not throw if user has protected tweets, 
                        // or if they deleted their account
                        if (te.getStatusCode() == HttpResponseCode.UNAUTHORIZED
                                || te.getStatusCode() == HttpResponseCode.NOT_FOUND) {

                            System.out.println(targetedUser + " is protected or account is deleted");
                        } else {
                            System.out.println("Followers Get Exception: " + te.getMessage());

                        // If rate limit reached then switch Auth user
                        if (RemainingCallsCounter >= RemainingCalls) {

                            // load auth user
                            tf = AppOAuths.loadOAuthUser(endpoint);
                            twitter = tf.getInstance();

                                    "New User Loaded OAuth" + " Screen_name: " + AppOAuths.screen_name);

                            RemainingCalls = AppOAuths.RemainingCalls;
                            RemainingCallsCounter = 0;

                            System.out.println("New Remianing Calls: " + RemainingCalls);

                        // update cursor in "followers_parent"
                        String fieldValues = "`nextcursor` = 2";
                        String where = "id = " + followers_parent_id;
                        DB.Update("`followers_parent`", fieldValues, where);

                        // If error then switch to next user
                        continue OUTERMOST;

                    if (ids.getIDs().length > 0) {

                        totalIDs += ids.getIDs().length;
                        System.out.println(idsLoopCounter + ": IDS length: " + ids.getIDs().length);

                        JSONObject responseDetailsJson = new JSONObject();
                        JSONArray jsonArray = new JSONArray();
                        for (long id : ids.getIDs()) {
                        Object idsJSON = responseDetailsJson.put("ids", jsonArray);


                    // If rate limit reached then switch Auth user
                    if (RemainingCallsCounter >= RemainingCalls) {

                        // load auth user
                        tf = AppOAuths.loadOAuthUser(endpoint);
                        twitter = tf.getInstance();

                        System.out.println("New User Loaded OAuth " + "Screen_name: " + AppOAuths.screen_name);

                        RemainingCalls = AppOAuths.RemainingCalls;
                        RemainingCallsCounter = 0;

                        System.out.println("New Remianing Calls: " + RemainingCalls);

                    if (IDS_TO_FETCH_INT != -1) {
                        if (idsLoopCounter == IDS_TO_FETCH) {

                } while ((cursor = ids.getNextCursor()) != 0);
                System.out.println("Total ids dumped of " + targetedUser + " are: " + totalIDs);

                // update cursor in "followers_parent"
                String fieldValues = "`nextcursor` = " + cursor;
                String where = "id = " + followers_parent_id;
                DB.Update("`followers_parent`", fieldValues, where);

            } // loop through every result found in db
        } else {

            // Second Argument Set, so we are here.
            System.out.println("screen_name / user_id_str passed by argument");

            int idsLoopCounter = 0;
            int totalIDs = 0;

            // put idsJSON in a file
            PrintWriter writer = new PrintWriter(
                    OutputDirPath + "/" + targetedUser + "_ids_" + helpers.getUnixTimeStamp(), "UTF-8");

            // call different functions for screen_name and id_str
            Boolean chckedNumaric = helpers.isNumeric(targetedUser);
            long cursor = -1;

            do {
                ids = null;
                try {

                    if (chckedNumaric) {

                        long LongValueTargetedUser = Long.valueOf(targetedUser).longValue();

                        ids = twitter.getFollowersIDs(LongValueTargetedUser, cursor);
                    } else {
                        ids = twitter.getFollowersIDs(targetedUser, cursor);

                } catch (TwitterException te) {

                    // do not throw if user has protected tweets, or if they deleted their account
                    if (te.getStatusCode() == HttpResponseCode.UNAUTHORIZED
                            || te.getStatusCode() == HttpResponseCode.NOT_FOUND) {

                        System.out.println(targetedUser + " is protected or account is deleted");
                    } else {
                        System.out.println("Followers Get Exception: " + te.getMessage());

                if (ids.getIDs().length > 0) {

                    totalIDs += ids.getIDs().length;
                    System.out.println(idsLoopCounter + ": IDS length: " + ids.getIDs().length);

                    JSONObject responseDetailsJson = new JSONObject();
                    JSONArray jsonArray = new JSONArray();

                    for (long id : ids.getIDs()) {

                    Object idsJSON = responseDetailsJson.put("ids", jsonArray);

                // If rate limit reach then switch Auth user
                if (RemainingCallsCounter >= RemainingCalls) {

                    // load auth user
                    tf = AppOAuths.loadOAuthUser(endpoint);
                    twitter = tf.getInstance();

                    System.out.println("New User Loaded OAuth Screen_name: " + AppOAuths.screen_name);

                    RemainingCalls = AppOAuths.RemainingCalls;
                    RemainingCallsCounter = 0;

                    System.out.println("New Remianing Calls: " + RemainingCalls);

                if (IDS_TO_FETCH_INT != -1) {
                    if (idsLoopCounter == IDS_TO_FETCH) {

            } while ((cursor = ids.getNextCursor()) != 0);

            System.out.println("Total ids dumped of " + targetedUser + " are: " + totalIDs);

    } catch (TwitterException te) {
        // te.printStackTrace();
        System.err.println("Failed to get followers' ids: " + te.getMessage());
    System.out.println("!!!! DONE !!!!");

From source file:friendsandfollowers.DBFriendsIDs.java

public static void main(String[] args) throws ClassNotFoundException, SQLException, JSONException,
        FileNotFoundException, UnsupportedEncodingException {

    // Check arguments that passed in
    if ((args == null) || (args.length == 0)) {
        System.err.println("2 Parameters are required plus one optional " + "parameter to launch a Job.");
        System.err.println("First: String 'OUTPUT: /output/path/'");
        System.err.println("Second: (int) Number of ids to fetch. " + "Provide number which increment by 5000 "
                + "(5000, 10000, 15000 etc) " + "or -1 to fetch all ids.");
        System.err.println("Third (optional): 'screen_name / user_id_str'");
        System.err.println("If 3rd argument not provided then provide" + " Twitter users through database.");
        System.exit(-1);// w  ww.  j a  v a2s . c  o m

    MysqlDB DB = new MysqlDB();
    AppOAuth AppOAuths = new AppOAuth();
    Misc helpers = new Misc();
    String endpoint = "/friends/ids";

    String OutputDirPath = null;
    try {
        OutputDirPath = StringEscapeUtils.escapeJava(args[0]);
    } catch (Exception e) {
        System.err.println("Argument" + args[0] + " must be an String.");

    int IDS_TO_FETCH_INT = -1;
    try {
        IDS_TO_FETCH_INT = Integer.parseInt(args[1]);
    } catch (NumberFormatException e) {
        System.err.println("Argument" + args[1] + " must be an integer.");

    int IDS_TO_FETCH = 0;
    if (IDS_TO_FETCH_INT > 5000) {

        float IDS_TO_FETCH_F = (float) IDS_TO_FETCH_INT / 5000;
        IDS_TO_FETCH = (int) Math.ceil(IDS_TO_FETCH_F);
    } else if ((IDS_TO_FETCH_INT <= 5000) && (IDS_TO_FETCH_INT > 0)) {
        IDS_TO_FETCH = 1;

    String targetedUser = "";
    if (args.length == 3) {
        try {
            targetedUser = StringEscapeUtils.escapeJava(args[2]);
        } catch (Exception e) {
            System.err.println("Argument" + args[2] + " must be an String.");

    try {

        TwitterFactory tf = AppOAuths.loadOAuthUser(endpoint);
        Twitter twitter = tf.getInstance();

        int RemainingCalls = AppOAuths.RemainingCalls;
        int RemainingCallsCounter = 0;
        System.out.println("First Time Remianing Calls: " + RemainingCalls);

        String Screen_name = AppOAuths.screen_name;
        System.out.println("First Time Loaded OAuth Screen_name: " + Screen_name);

        IDs ids;
        System.out.println("Listing friends ids.");

        // if targetedUser not provided by argument, then look into database.
        if (StringUtils.isEmpty(targetedUser)) {

            String selectQuery = "SELECT * FROM `followings_parent` WHERE " + "`targeteduser` != '' AND "
                    + "`nextcursor` != '0' AND " + "`nextcursor` != '2'";

            ResultSet results = DB.selectQ(selectQuery);

            int numRows = DB.numRows(results);
            if (numRows < 1) {
                System.err.println("No User in database to get friendsIDS");

            OUTERMOST: while (results.next()) {

                int following_parent_id = results.getInt("id");
                targetedUser = results.getString("targeteduser");
                long cursor = results.getLong("nextcursor");
                System.out.println("Targeted User: " + targetedUser);

                int idsLoopCounter = 0;
                int totalIDs = 0;

                // put idsJSON in a file
                PrintWriter writer = new PrintWriter(OutputDirPath + "/" + targetedUser, "UTF-8");

                // call different functions for screen_name and id_str
                Boolean chckedNumaric = helpers.isNumeric(targetedUser);

                do {
                    ids = null;
                    try {

                        if (chckedNumaric) {

                            long LongValueTargetedUser = Long.valueOf(targetedUser).longValue();

                            ids = twitter.getFriendsIDs(LongValueTargetedUser, cursor);
                        } else {
                            ids = twitter.getFriendsIDs(targetedUser, cursor);

                    } catch (TwitterException te) {

                        // do not throw if user has protected tweets, 
                        // or if they deleted their account
                        if (te.getStatusCode() == HttpResponseCode.UNAUTHORIZED
                                || te.getStatusCode() == HttpResponseCode.NOT_FOUND) {

                            System.out.println(targetedUser + " is protected or account is deleted");
                        } else {
                            System.out.println("Friends Get Exception: " + te.getMessage());

                        // If rate limit reached then switch Auth user
                        if (RemainingCallsCounter >= RemainingCalls) {

                            // load auth user
                            tf = AppOAuths.loadOAuthUser(endpoint);
                            twitter = tf.getInstance();

                                    "New User Loaded OAuth" + " Screen_name: " + AppOAuths.screen_name);

                            RemainingCalls = AppOAuths.RemainingCalls;
                            RemainingCallsCounter = 0;

                            System.out.println("New Remianing Calls: " + RemainingCalls);

                        // update cursor in "followings_parent"
                        String fieldValues = "`nextcursor` = 2";
                        String where = "id = " + following_parent_id;
                        DB.Update("`followings_parent`", fieldValues, where);

                        // If error then switch to next user
                        continue OUTERMOST;

                    if (ids.getIDs().length > 0) {

                        totalIDs += ids.getIDs().length;
                        System.out.println(idsLoopCounter + ": IDS length: " + ids.getIDs().length);

                        JSONObject responseDetailsJson = new JSONObject();
                        JSONArray jsonArray = new JSONArray();
                        for (long id : ids.getIDs()) {
                        Object idsJSON = responseDetailsJson.put("ids", jsonArray);


                    // If rate limit reached then switch Auth user.
                    if (RemainingCallsCounter >= RemainingCalls) {

                        // load auth user
                        tf = AppOAuths.loadOAuthUser(endpoint);
                        twitter = tf.getInstance();

                        System.out.println("New User Loaded OAuth " + "Screen_name: " + AppOAuths.screen_name);

                        RemainingCalls = AppOAuths.RemainingCalls;
                        RemainingCallsCounter = 0;

                        System.out.println("New Remianing Calls: " + RemainingCalls);

                    if (IDS_TO_FETCH_INT != -1) {
                        if (idsLoopCounter == IDS_TO_FETCH) {

                } while ((cursor = ids.getNextCursor()) != 0);
                System.out.println("Total ids dumped of " + targetedUser + " are: " + totalIDs);

                // update cursor in "followings_parent"
                String fieldValues = "`nextcursor` = " + cursor;
                String where = "id = " + following_parent_id;
                DB.Update("`followings_parent`", fieldValues, where);

            } // loop through every result found in db
        } else {

            // Second Argument Sets, so we are here.
            System.out.println("screen_name / user_id_str " + "passed by argument");

            int idsLoopCounter = 0;
            int totalIDs = 0;

            // put idsJSON in a file
            PrintWriter writer = new PrintWriter(
                    OutputDirPath + "/" + targetedUser + "_ids_" + helpers.getUnixTimeStamp(), "UTF-8");

            // call different functions for screen_name and id_str
            Boolean chckedNumaric = helpers.isNumeric(targetedUser);
            long cursor = -1;

            do {
                ids = null;
                try {

                    if (chckedNumaric) {

                        long LongValueTargetedUser = Long.valueOf(targetedUser).longValue();

                        ids = twitter.getFriendsIDs(LongValueTargetedUser, cursor);
                    } else {
                        ids = twitter.getFriendsIDs(targetedUser, cursor);

                } catch (TwitterException te) {

                    // do not throw if user has protected tweets, 
                    // or if they deleted their account
                    if (te.getStatusCode() == HttpResponseCode.UNAUTHORIZED
                            || te.getStatusCode() == HttpResponseCode.NOT_FOUND) {

                        System.out.println(targetedUser + " is protected or account is deleted");
                    } else {
                        System.out.println("Friends Get Exception: " + te.getMessage());

                if (ids.getIDs().length > 0) {

                    totalIDs += ids.getIDs().length;
                    System.out.println(idsLoopCounter + ": IDS length: " + ids.getIDs().length);

                    JSONObject responseDetailsJson = new JSONObject();
                    JSONArray jsonArray = new JSONArray();
                    for (long id : ids.getIDs()) {
                    Object idsJSON = responseDetailsJson.put("ids", jsonArray);



                // If rate limit reach then switch Auth user
                if (RemainingCallsCounter >= RemainingCalls) {

                    // load auth user
                    tf = AppOAuths.loadOAuthUser(endpoint);
                    twitter = tf.getInstance();

                    System.out.println("New User Loaded OAuth Screen_name: " + AppOAuths.screen_name);

                    RemainingCalls = AppOAuths.RemainingCalls;
                    RemainingCallsCounter = 0;

                    System.out.println("New Remianing Calls: " + RemainingCalls);

                if (IDS_TO_FETCH_INT != -1) {
                    if (idsLoopCounter == IDS_TO_FETCH) {

            } while ((cursor = ids.getNextCursor()) != 0);
            System.out.println("Total ids dumped of " + targetedUser + " are: " + totalIDs);


    } catch (TwitterException te) {
        // te.printStackTrace();
        System.err.println("Failed to get friends' ids: " + te.getMessage());
    System.out.println("!!!! DONE !!!!");

From source file:friendsandfollowers.FilesThreaderFriendsIDsParser.java

public static void main(String[] args) throws ClassNotFoundException, SQLException, JSONException,
        FileNotFoundException, UnsupportedEncodingException {

    // Check how many arguments were passed in
    if ((args == null) || (args.length < 5)) {
        System.err.println("5 Parameters are required  plus one optional " + "parameter to launch a Job.");
        System.err.println("First: String 'INPUT: DB or /input/path/'");
        System.err.println("Second: String 'OUTPUT: /output/path/'");
        System.err.println("Third: (int) Total Number Of Jobs");
        System.err.println("Fourth: (int) This Job Number");
        System.err.println("Fifth: (int) Number of seconds to pause");
        System.err.println("Sixth: (int) Number of ids to fetch" + "Provide number which increment by 5000 "
                + "(5000, 10000, 15000 etc) " + "or -1 to fetch all ids.");
        System.err.println("Example: fileToRun /input/path/ " + "/output/path/ 10 1 3 75000");
        System.exit(-1);/* w w w. j  a  va  2s .  c om*/

    // TODO documentation for command line
    AppOAuth AppOAuths = new AppOAuth();
    Misc helpers = new Misc();
    String endpoint = "/friends/ids";

    String inputPath = null;
    try {
        inputPath = StringEscapeUtils.escapeJava(args[0]);
    } catch (Exception e) {
        System.err.println("Argument " + args[0] + " must be an String.");

    String outputPath = null;
    try {
        outputPath = StringEscapeUtils.escapeJava(args[1]);
    } catch (Exception e) {
        System.err.println("Argument " + args[1] + " must be an String.");

    int TOTAL_JOBS = 0;
    try {
        TOTAL_JOBS = Integer.parseInt(args[2]);
    } catch (NumberFormatException e) {
        System.err.println("Argument " + args[2] + " must be an integer.");

    int JOB_NO = 0;
    try {
        JOB_NO = Integer.parseInt(args[3]);
    } catch (NumberFormatException e) {
        System.err.println("Argument " + args[3] + " must be an integer.");

    int secondsToPause = 0;
    try {
        secondsToPause = Integer.parseInt(args[4]);
    } catch (NumberFormatException e) {
        System.err.println("Argument" + args[4] + " must be an integer.");

    int IDS_TO_FETCH_INT = -1;
    if (args.length == 6) {
        try {
            IDS_TO_FETCH_INT = Integer.parseInt(args[5]);
        } catch (NumberFormatException e) {
            System.err.println("Argument" + args[5] + " must be an integer.");

    int IDS_TO_FETCH = 0;
    if (IDS_TO_FETCH_INT > 5000) {

        float IDS_TO_FETCH_F = (float) IDS_TO_FETCH_INT / 5000;
        IDS_TO_FETCH = (int) Math.ceil(IDS_TO_FETCH_F);
    } else if ((IDS_TO_FETCH_INT <= 5000) && (IDS_TO_FETCH_INT > 0)) {
        IDS_TO_FETCH = 1;

    secondsToPause = (TOTAL_JOBS * secondsToPause) - (JOB_NO * secondsToPause);
    System.out.println("secondsToPause: " + secondsToPause);

    try {

        int TotalWorkLoad = 0;
        ArrayList<String> allFiles = null;
        try {
            final File folder = new File(inputPath);
            allFiles = helpers.listFilesForSingleFolder(folder);
            TotalWorkLoad = allFiles.size();
        } catch (Exception e) {

            System.err.println("Input folder is not exists: " + e.getMessage());

        System.out.println("Total Workload is: " + TotalWorkLoad);

        if (TotalWorkLoad < 1) {
            System.err.println("No screen names file exists in: " + inputPath);

        if (TOTAL_JOBS > TotalWorkLoad) {
            System.err.println("Number of jobs are more than total work"
                    + " load. Please reduce 'Number of jobs' to launch.");

        float TotalWorkLoadf = TotalWorkLoad;
        float TOTAL_JOBSf = TOTAL_JOBS;
        float res = (TotalWorkLoadf / TOTAL_JOBSf);

        int chunkSize = (int) Math.ceil(res);
        int offSet = JOB_NO * chunkSize;
        int chunkSizeToGet = (JOB_NO + 1) * chunkSize;

        System.out.println("My Share is " + chunkSize);

        // Load OAuh User
        TwitterFactory tf = AppOAuths.loadOAuthUser(endpoint, TOTAL_JOBS, JOB_NO);
        Twitter twitter = tf.getInstance();

        int RemainingCalls = AppOAuths.RemainingCalls;
        int RemainingCallsCounter = 0;
        System.out.println("First Time OAuth Remianing Calls: " + RemainingCalls);

        String Screen_name = AppOAuths.screen_name;
        System.out.println("First Time Loaded OAuth Screen_name: " + Screen_name);

        IDs ids;
        System.out.println("Going to get friends ids.");

        // to write output in a file

        if (JOB_NO + 1 == TOTAL_JOBS) {
            chunkSizeToGet = TotalWorkLoad;

        List<String> myFilesShare = allFiles.subList(offSet, chunkSizeToGet);

        for (String myFile : myFilesShare) {
            System.out.println("Going to parse file: " + myFile);

            try (BufferedReader br = new BufferedReader(new FileReader(inputPath + "/" + myFile))) {
                String line;
                OUTERMOST: while ((line = br.readLine()) != null) {
                    // process the line.

                    System.out.println("Going to get friends ids of Screen-name / user_id: " + line);

                    String targetedUser = line.trim(); // tmp
                    long cursor = -1;
                    int idsLoopCounter = 0;
                    int totalIDs = 0;

                    PrintWriter writer = new PrintWriter(outputPath + "/" + targetedUser, "UTF-8");

                    // call different functions for screen_name and id_str
                    Boolean chckedNumaric = helpers.isNumeric(targetedUser);

                    do {
                        ids = null;
                        try {

                            if (chckedNumaric) {

                                long LongValueTargetedUser = Long.valueOf(targetedUser).longValue();

                                ids = twitter.getFriendsIDs(LongValueTargetedUser, cursor);
                            } else {
                                ids = twitter.getFriendsIDs(targetedUser, cursor);

                        } catch (TwitterException te) {

                            // do not throw if user has protected tweets, or
                            // if they deleted their account
                            if (te.getStatusCode() == HttpResponseCode.UNAUTHORIZED
                                    || te.getStatusCode() == HttpResponseCode.NOT_FOUND) {

                                System.out.println(targetedUser + " is protected or account is deleted");
                            } else {
                                System.out.println("Friends Get Exception: " + te.getMessage());

                            // If rate limit reached then switch Auth user
                            if (RemainingCallsCounter >= RemainingCalls) {

                                // load auth user
                                tf = AppOAuths.loadOAuthUser(endpoint, TOTAL_JOBS, JOB_NO);
                                twitter = tf.getInstance();

                                        "New Loaded OAuth User " + " Screen_name: " + AppOAuths.screen_name);

                                RemainingCalls = AppOAuths.RemainingCalls;
                                RemainingCallsCounter = 0;

                                System.out.println("New OAuth Remianing Calls: " + RemainingCalls);

                            // Remove file if ids not found
                            if (totalIDs == 0) {

                                System.out.println("No ids fetched so removing " + "file " + targetedUser);

                                File fileToDelete = new File(outputPath + "/" + targetedUser);

                            // If error then switch to next user
                            continue OUTERMOST;

                        if (ids.getIDs().length > 0) {

                            totalIDs += ids.getIDs().length;
                            System.out.println(idsLoopCounter + ": IDS length: " + ids.getIDs().length);

                            JSONObject responseDetailsJson = new JSONObject();
                            JSONArray jsonArray = new JSONArray();
                            for (long id : ids.getIDs()) {
                            Object idsJSON = responseDetailsJson.put("ids", jsonArray);



                        // If rate limit reached then switch Auth user
                        if (RemainingCallsCounter >= RemainingCalls) {

                            // load auth user
                            tf = AppOAuths.loadOAuthUser(endpoint, TOTAL_JOBS, JOB_NO);
                            twitter = tf.getInstance();

                            System.out.println("New Loaded OAuth User Screen_name: " + AppOAuths.screen_name);

                            RemainingCalls = AppOAuths.RemainingCalls;
                            RemainingCallsCounter = 0;

                            System.out.println("New OAuth Remianing Calls: " + RemainingCalls);

                        if (IDS_TO_FETCH_INT != -1) {
                            if (idsLoopCounter == IDS_TO_FETCH) {

                    } while ((cursor = ids.getNextCursor()) != 0);

                    System.out.println("Total ids dumped of " + targetedUser + " are: " + totalIDs);

                    // Remove file if ids not found
                    if (totalIDs == 0) {

                        System.out.println("No ids fetched so removing " + "file " + targetedUser);

                        File fileToDelete = new File(outputPath + "/" + targetedUser);

                } // while get records from single file
            } // read my single file
            catch (IOException e) {
                System.err.println("Failed to read lines from " + myFile);

            // to write output in a file
        } // all my files share

    } catch (TwitterException te) {
        // te.printStackTrace();
        System.err.println("Failed to get friends' ids: " + te.getMessage());
    System.out.println("!!!! DONE !!!!");

    // Close System.out for this thread which will
    // flush and close this thread.

From source file:friendsandfollowers.FilesThreaderFollowersIDsParser.java

public static void main(String[] args) throws ClassNotFoundException, SQLException, JSONException,
        FileNotFoundException, UnsupportedEncodingException {

    // Check how many arguments were passed in
    if ((args == null) || (args.length < 5)) {
        System.err.println("5 Parameters are required  plus one optional " + "parameter to launch a Job.");
        System.err.println("First: String 'INPUT: DB or /input/path/'");
        System.err.println("Second: String 'OUTPUT: /output/path/'");
        System.err.println("Third: (int) Total Number Of Jobs");
        System.err.println("Fourth: (int) This Job Number");
        System.err.println("Fifth: (int) Number of seconds to pause");
        System.err.println("Sixth: (int) Number of ids to fetch" + "Provide number which increment by 5000 "
                + "(5000, 10000, 15000 etc) " + "or -1 to fetch all ids.");
        System.err.println("Example: fileToRun /input/path/ " + "/output/path/ 10 1 3 75000");
        System.exit(-1);/*from  w ww. j a va2  s. co m*/

    // TODO documentation for command line
    AppOAuth AppOAuths = new AppOAuth();
    Misc helpers = new Misc();
    String endpoint = "/followers/ids";

    String inputPath = null;
    try {
        inputPath = StringEscapeUtils.escapeJava(args[0]);
    } catch (Exception e) {
        System.err.println("Argument " + args[0] + " must be an String.");

    String outputPath = null;
    try {
        outputPath = StringEscapeUtils.escapeJava(args[1]);
    } catch (Exception e) {
        System.err.println("Argument " + args[1] + " must be an String.");

    int TOTAL_JOBS = 0;
    try {
        TOTAL_JOBS = Integer.parseInt(args[2]);
    } catch (NumberFormatException e) {
        System.err.println("Argument " + args[2] + " must be an integer.");

    int JOB_NO = 0;
    try {
        JOB_NO = Integer.parseInt(args[3]);
    } catch (NumberFormatException e) {
        System.err.println("Argument " + args[3] + " must be an integer.");

    int secondsToPause = 0;
    try {
        secondsToPause = Integer.parseInt(args[4]);
    } catch (NumberFormatException e) {
        System.err.println("Argument" + args[4] + " must be an integer.");

    int IDS_TO_FETCH_INT = -1;
    if (args.length == 6) {
        try {
            IDS_TO_FETCH_INT = Integer.parseInt(args[5]);
        } catch (NumberFormatException e) {
            System.err.println("Argument" + args[5] + " must be an integer.");

    int IDS_TO_FETCH = 0;
    if (IDS_TO_FETCH_INT > 5000) {

        float IDS_TO_FETCH_F = (float) IDS_TO_FETCH_INT / 5000;
        IDS_TO_FETCH = (int) Math.ceil(IDS_TO_FETCH_F);
    } else if ((IDS_TO_FETCH_INT <= 5000) && (IDS_TO_FETCH_INT > 0)) {
        IDS_TO_FETCH = 1;

    secondsToPause = (TOTAL_JOBS * secondsToPause) - (JOB_NO * secondsToPause);
    System.out.println("secondsToPause: " + secondsToPause);

    try {

        int TotalWorkLoad = 0;
        ArrayList<String> allFiles = null;
        try {
            final File folder = new File(inputPath);
            allFiles = helpers.listFilesForSingleFolder(folder);
            TotalWorkLoad = allFiles.size();
        } catch (Exception e) {

            System.err.println("Input folder is not exists: " + e.getMessage());

        System.out.println("Total Workload is: " + TotalWorkLoad);

        if (TotalWorkLoad < 1) {
            System.err.println("No screen names file exists in: " + inputPath);

        if (TOTAL_JOBS > TotalWorkLoad) {
            System.err.println("Number of jobs are more than total work"
                    + " load. Please reduce 'Number of jobs' to launch.");

        float TotalWorkLoadf = TotalWorkLoad;
        float TOTAL_JOBSf = TOTAL_JOBS;
        float res = (TotalWorkLoadf / TOTAL_JOBSf);

        int chunkSize = (int) Math.ceil(res);
        int offSet = JOB_NO * chunkSize;
        int chunkSizeToGet = (JOB_NO + 1) * chunkSize;

        System.out.println("My Share is " + chunkSize);

        // Load OAuh User
        TwitterFactory tf = AppOAuths.loadOAuthUser(endpoint, TOTAL_JOBS, JOB_NO);
        Twitter twitter = tf.getInstance();

        int RemainingCalls = AppOAuths.RemainingCalls;
        int RemainingCallsCounter = 0;
        System.out.println("First Time OAuth Remianing Calls: " + RemainingCalls);

        String Screen_name = AppOAuths.screen_name;
        System.out.println("First Time Loaded OAuth Screen_name: " + Screen_name);

        IDs ids;
        System.out.println("Going to get followers ids.");

        // to write output in a file

        if (JOB_NO + 1 == TOTAL_JOBS) {
            chunkSizeToGet = TotalWorkLoad;

        List<String> myFilesShare = allFiles.subList(offSet, chunkSizeToGet);

        for (String myFile : myFilesShare) {
            System.out.println("Going to parse file: " + myFile);

            try (BufferedReader br = new BufferedReader(new FileReader(inputPath + "/" + myFile))) {
                String line;
                OUTERMOST: while ((line = br.readLine()) != null) {
                    // process the line.

                    System.out.println("Going to get followers ids of Screen-name / user_id: " + line);

                    String targetedUser = line.trim(); // tmp
                    long cursor = -1;
                    int idsLoopCounter = 0;
                    int totalIDs = 0;

                    PrintWriter writer = new PrintWriter(outputPath + "/" + targetedUser, "UTF-8");

                    // call different functions for screen_name and id_str
                    Boolean chckedNumaric = helpers.isNumeric(targetedUser);

                    do {
                        ids = null;
                        try {

                            if (chckedNumaric) {

                                long LongValueTargetedUser = Long.valueOf(targetedUser).longValue();

                                ids = twitter.getFollowersIDs(LongValueTargetedUser, cursor);
                            } else {
                                ids = twitter.getFollowersIDs(targetedUser, cursor);

                        } catch (TwitterException te) {

                            // do not throw if user has protected tweets, or
                            // if they deleted their account
                            if (te.getStatusCode() == HttpResponseCode.UNAUTHORIZED
                                    || te.getStatusCode() == HttpResponseCode.NOT_FOUND) {

                                System.out.println(targetedUser + " is protected or account is deleted");
                            } else {
                                System.out.println("Followers Get Exception: " + te.getMessage());

                            // If rate limit reached then switch Auth user
                            if (RemainingCallsCounter >= RemainingCalls) {

                                // load auth user
                                tf = AppOAuths.loadOAuthUser(endpoint, TOTAL_JOBS, JOB_NO);
                                twitter = tf.getInstance();

                                        "New Loaded OAuth User " + " Screen_name: " + AppOAuths.screen_name);

                                RemainingCalls = AppOAuths.RemainingCalls;
                                RemainingCallsCounter = 0;

                                System.out.println("New OAuth Remianing Calls: " + RemainingCalls);

                            // Remove file if ids not found
                            if (totalIDs == 0) {

                                System.out.println("No ids fetched so removing " + "file " + targetedUser);

                                File fileToDelete = new File(outputPath + "/" + targetedUser);

                            // If error then switch to next user
                            continue OUTERMOST;

                        if (ids.getIDs().length > 0) {

                            totalIDs += ids.getIDs().length;
                            System.out.println(idsLoopCounter + ": IDS length: " + ids.getIDs().length);

                            JSONObject responseDetailsJson = new JSONObject();
                            JSONArray jsonArray = new JSONArray();
                            for (long id : ids.getIDs()) {
                            Object idsJSON = responseDetailsJson.put("ids", jsonArray);



                        // If rate limit reached then switch Auth user
                        if (RemainingCallsCounter >= RemainingCalls) {

                            // load auth user
                            tf = AppOAuths.loadOAuthUser(endpoint, TOTAL_JOBS, JOB_NO);
                            twitter = tf.getInstance();

                            System.out.println("New Loaded OAuth User Screen_name: " + AppOAuths.screen_name);

                            RemainingCalls = AppOAuths.RemainingCalls;
                            RemainingCallsCounter = 0;

                            System.out.println("New OAuth Remianing Calls: " + RemainingCalls);

                        if (IDS_TO_FETCH_INT != -1) {
                            if (idsLoopCounter == IDS_TO_FETCH) {

                    } while ((cursor = ids.getNextCursor()) != 0);

                    System.out.println("Total ids dumped of " + targetedUser + " are: " + totalIDs);

                    // Remove file if ids not found
                    if (totalIDs == 0) {

                        System.out.println("No ids fetched so removing " + "file " + targetedUser);

                        File fileToDelete = new File(outputPath + "/" + targetedUser);

                } // while get records from single file
            } // read my single file
            catch (IOException e) {
                System.err.println("Failed to read lines from " + myFile);

            // to write output in a file
        } // all my files share

    } catch (TwitterException te) {
        // te.printStackTrace();
        System.err.println("Failed to get followers' ids: " + te.getMessage());
    System.out.println("!!!! DONE !!!!");

    // Close System.out for this thread which will
    // flush and close this thread.

From source file:cc.twittertools.search.api.RunQueriesBaselineThrift.java

public static void main(String[] args) throws Exception {
    Options options = new Options();

            .withDescription("file containing topics in TREC format").create(QUERIES_OPTION));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of results to return")
    options.addOption(/*from  ww w .j  a  v a2 s  . c o m*/
            OptionBuilder.withArgName("string").hasArg().withDescription("group id").create(GROUP_OPTION));
            OptionBuilder.withArgName("string").hasArg().withDescription("access token").create(TOKEN_OPTION));
    options.addOption(new Option(VERBOSE_OPTION, "print out complete document"));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());

    if (!cmdline.hasOption(HOST_OPTION) || !cmdline.hasOption(PORT_OPTION)
            || !cmdline.hasOption(QUERIES_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(RunQueriesThrift.class.getName(), options);

    String queryFile = cmdline.getOptionValue(QUERIES_OPTION);
    if (!new File(queryFile).exists()) {
        System.err.println("Error: " + queryFile + " doesn't exist!");

    String runtag = cmdline.hasOption(RUNTAG_OPTION) ? cmdline.getOptionValue(RUNTAG_OPTION) : DEFAULT_RUNTAG;

    TrecTopicSet topicsFile = TrecTopicSet.fromFile(new File(queryFile));

    int numResults = 1000;
    try {
        if (cmdline.hasOption(NUM_RESULTS_OPTION)) {
            numResults = Integer.parseInt(cmdline.getOptionValue(NUM_RESULTS_OPTION));
    } catch (NumberFormatException e) {
        System.err.println("Invalid " + NUM_RESULTS_OPTION + ": " + cmdline.getOptionValue(NUM_RESULTS_OPTION));

    String group = cmdline.hasOption(GROUP_OPTION) ? cmdline.getOptionValue(GROUP_OPTION) : null;
    String token = cmdline.hasOption(TOKEN_OPTION) ? cmdline.getOptionValue(TOKEN_OPTION) : null;

    boolean verbose = cmdline.hasOption(VERBOSE_OPTION);

    PrintStream out = new PrintStream(System.out, true, "UTF-8");

    TrecSearchThriftClient client = new TrecSearchThriftClient(cmdline.getOptionValue(HOST_OPTION),
            Integer.parseInt(cmdline.getOptionValue(PORT_OPTION)), group, token);

    for (cc.twittertools.search.TrecTopic query : topicsFile) {
        List<TResult> results = client.search(query.getQuery(), query.getQueryTweetTime(), numResults);

        SortedSet<TResultComparable> sortedResults = new TreeSet<TResultComparable>();
        for (TResult result : results) {
            // Throw away retweets.
            if (result.getRetweeted_status_id() == 0) {
                sortedResults.add(new TResultComparable(result));

        int i = 1;
        int dupliCount = 0;
        double rsvPrev = 0;
        for (TResultComparable sortedResult : sortedResults) {
            TResult result = sortedResult.getTResult();
            double rsvCurr = result.rsv;
            if (Math.abs(rsvCurr - rsvPrev) > 0.0000001) {
                dupliCount = 0;
            } else {
                rsvCurr = rsvCurr - 0.000001 / numResults * dupliCount;
            out.println(String.format("%s Q0 %d %d %." + (int) (6 + Math.ceil(Math.log10(numResults))) + "f %s",
                    query.getId(), result.id, i, rsvCurr, runtag));
            if (verbose) {
                out.println("# " + result.toString().replaceAll("[\\n\\r]+", " "));
            rsvPrev = result.rsv;


From source file:de.tudarmstadt.ukp.csniper.resbuild.EvaluationItemFixer.java

public static void main(String[] args) {

    Map<Integer, String> items = new HashMap<Integer, String>();
    Map<Integer, String> failed = new HashMap<Integer, String>();

    // fetch coveredTexts of dubious items and clean it
    PreparedStatement select = null;
    try {//from ww  w . j  av  a  2 s  .  c o  m
        StringBuilder selectQuery = new StringBuilder();
        selectQuery.append("SELECT * FROM EvaluationItem ");
        selectQuery.append("WHERE LOCATE(coveredText, '  ') > 0 ");
        selectQuery.append("OR LOCATE('" + LRB + "', coveredText) > 0 ");
        selectQuery.append("OR LOCATE('" + RRB + "', coveredText) > 0 ");
        selectQuery.append("OR LEFT(coveredText, 1) = ' ' ");
        selectQuery.append("OR RIGHT(coveredText, 1) = ' ' ");

        select = connection.prepareStatement(selectQuery.toString());
        log.info("Running query [" + selectQuery.toString() + "].");
        ResultSet rs = select.executeQuery();

        while (rs.next()) {
            int id = rs.getInt("id");
            String coveredText = rs.getString("coveredText");

            try {
                // special handling of double whitespace: in this case, re-fetch the text
                if (coveredText.contains("  ")) {
                    coveredText = retrieveCoveredText(rs.getString("collectionId"), rs.getString("documentId"),
                            rs.getInt("beginOffset"), rs.getInt("endOffset"));

                // replace bracket placeholders and trim the text
                coveredText = StringUtils.replace(coveredText, LRB, "(");
                coveredText = StringUtils.replace(coveredText, RRB, ")");
                coveredText = coveredText.trim();

                items.put(id, coveredText);
            } catch (IllegalArgumentException e) {
                failed.put(id, e.getMessage());
    } catch (SQLException e) {
        log.error("Exception while selecting: " + e.getMessage());
    } finally {

    // write logs
    BufferedWriter bwf = null;
    BufferedWriter bws = null;
    try {
        bwf = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(LOG_FAILED)), "UTF-8"));
        for (Entry<Integer, String> e : failed.entrySet()) {
            bwf.write(e.getKey() + " - " + e.getValue() + "\n");

        bws = new BufferedWriter(
                new OutputStreamWriter(new FileOutputStream(new File(LOG_SUCCESSFUL)), "UTF-8"));
        for (Entry<Integer, String> e : items.entrySet()) {
            bws.write(e.getKey() + " - " + e.getValue() + "\n");
    } catch (IOException e) {
        log.error("Got an IOException while writing the log files.");
    } finally {

    log.info("Texts for [" + items.size() + "] items need to be cleaned up.");

    // update the dubious items with the cleaned coveredText
    PreparedStatement update = null;
    try {
        String updateQuery = "UPDATE EvaluationItem SET coveredText = ? WHERE id = ?";

        update = connection.prepareStatement(updateQuery);
        int i = 0;
        for (Entry<Integer, String> e : items.entrySet()) {
            int id = e.getKey();
            String coveredText = e.getValue();

            // update item in database
            update.setString(1, coveredText);
            update.setInt(2, id);
            log.debug("Updating " + id + " with [" + coveredText + "]");

            // show percentage of updated items
            int part = (int) Math.ceil((double) items.size() / 100);
            if (i % part == 0) {
                log.info(i / part + "% finished (" + i + "/" + items.size() + ").");
    } catch (SQLException e) {
        log.error("Exception while updating: " + e.getMessage());
    } finally {


From source file:com.bah.applefox.main.Ingest.java

public static void main(String[] args) throws Exception {

    if (args.length == 1 && args[0].equals("--help")) {
        System.out.println("Not enough arguments");
        System.out.println("Arguments should be in the format <properties file> <command>");
        System.out.println("Valid commands:");
        System.out.println("\tpr: Calculates Page Rank");
        System.out.println("\timageload: Loads Images from URLs");
        System.out.println("\tload: Loads Full Text Data");
        System.out.println("\tingest: Ingests URLs from given seed");
        System.out.println("\tftsample: Creates a Full Text Index Sample HashMap");
        System.out.println("\timagesample: Creates an Image Hash and Image Tag Sample HashMap");
    }/*from   ww w. j a va2 s  .c  o  m*/
    if (args.length > 2) {
        System.out.println("2 Arguments expected, " + args.length + " given.");

    if (args.length < 2) {
        System.out.println("Not enough arguments");
        System.out.println("Arguments should be in the format <properties file> <command>");
        System.out.println("Valid commands:");
        System.out.println("\tpr: Calculates Page Rank");
        System.out.println("\timageload: Loads Images from URLs");
        System.out.println("\tload: Loads Full Text Data");
        System.out.println("\tingest: Ingests URLs from given seed");
        System.out.println("\tftsample: Creates a Full Text Index Sample HashMap");
        System.out.println("\timagesample: Creates an Image Hash and Image Tag Sample HashMap");
    injector = Guice.createInjector(new IngesterModule());

    // The properties object to read from the configuration file
    Properties properties = new Properties();

    try {
        // Load configuration file from the command line
        properties.load(new FileInputStream(args[0]));
    } catch (Exception e) {
        log.error("ABORT: File not found or could not read from file ->" + e.getMessage());
        log.error("Enter the location of the configuration file");

    // Initialize variables from configuration file

    // Accumulo Variables
    INSTANCE_NAME = properties.getProperty("INSTANCE_NAME");
    ZK_SERVERS = properties.getProperty("ZK_SERVERS");
    USERNAME = properties.getProperty("USERNAME");
    PASSWORD = properties.getProperty("PASSWORD");
    SPLIT_SIZE = properties.getProperty("SPLIT_SIZE");
    NUM_ITERATIONS = Integer.parseInt(properties.getProperty("NUM_ITERATIONS"));
    NUM_NODES = Integer.parseInt(properties.getProperty("NUM_NODES"));

    // General Search Variables
    MAX_NGRAMS = Integer.parseInt(properties.getProperty("MAX_NGRAMS"));
    GENERAL_STOP = properties.getProperty("GENERAL_STOP");

    // Full Text Variables
    FT_DATA_TABLE = properties.getProperty("FT_DATA_TABLE");
    FT_SAMPLE = properties.getProperty("FT_SAMPLE");
    FT_CHECKED_TABLE = properties.getProperty("FT_CHECKED_TABLE");
    FT_DIVS_FILE = properties.getProperty("FT_DIVS_FILE");
    FT_SPLIT_SIZE = properties.getProperty("FT_SPLIT_SIZE");

    // Web Crawler Variables
    URL_TABLE = properties.getProperty("URL_TABLE");
    SEED = properties.getProperty("SEED");
    USER_AGENT = properties.getProperty("USER_AGENT");
    URL_SPLIT_SIZE = properties.getProperty("URL_SPLIT_SIZE");

    // Page Rank Variables
    PR_TABLE_PREFIX = properties.getProperty("PR_TABLE_PREFIX");
    PR_URL_MAP_TABLE_PREFIX = properties.getProperty("PR_URL_MAP_TABLE_PREFIX");
    PR_OUT_LINKS_COUNT_TABLE = properties.getProperty("PR_OUT_LINKS_COUNT_TABLE");
    PR_FILE = properties.getProperty("PR_FILE");
    PR_DAMPENING_FACTOR = Double.parseDouble(properties.getProperty("PR_DAMPENING_FACTOR"));
    PR_ITERATIONS = Integer.parseInt(properties.getProperty("PR_ITERATIONS"));
    PR_SPLIT_SIZE = properties.getProperty("PR_SPLIT_SIZE");

    // Image Variables
    IMG_HASH_TABLE = properties.getProperty("IMG_HASH_TABLE");
    IMG_CHECKED_TABLE = properties.getProperty("IMG_CHECKED_TABLE");
    IMG_TAG_TABLE = properties.getProperty("IMG_TAG_TABLE");
    IMG_HASH_SAMPLE_TABLE = properties.getProperty("IMG_HASH_SAMPLE_TABLE");
    IMG_TAG_SAMPLE_TABLE = properties.getProperty("IMG_TAG_SAMPLE_TABLE");
    IMG_SPLIT_SIZE = properties.getProperty("IMG_SPLIT_SIZE");

    // Future Use:
    // Work Directory in HDFS
    WORK_DIR = properties.getProperty("WORK_DIR");

    // Initialize variable from command line
    RUN = args[1].toLowerCase();

    // Set the instance information for AccumuloUtils

    String[] temp = new String[25];

    // Accumulo Variables
    temp[0] = INSTANCE_NAME;
    temp[1] = ZK_SERVERS;
    temp[2] = USERNAME;
    temp[3] = PASSWORD;

    // Number of Map Tasks
    temp[4] = Integer.toString((int) Math.ceil(1.75 * NUM_NODES * 2));

    // Web Crawler Variables
    temp[5] = URL_TABLE;
    temp[6] = USER_AGENT;

    // Future Use
    temp[7] = WORK_DIR;

    // General Search
    temp[8] = GENERAL_STOP;
    temp[9] = Integer.toString(MAX_NGRAMS);

    // Full Text Variables
    temp[10] = FT_DATA_TABLE;
    temp[11] = FT_CHECKED_TABLE;

    // Page Rank Variables
    temp[12] = PR_URL_MAP_TABLE_PREFIX;
    temp[13] = PR_TABLE_PREFIX;
    temp[14] = Double.toString(PR_DAMPENING_FACTOR);
    temp[15] = PR_OUT_LINKS_COUNT_TABLE;
    temp[16] = PR_FILE;

    // Image Variables
    temp[17] = IMG_HASH_TABLE;
    temp[18] = IMG_CHECKED_TABLE;
    temp[19] = IMG_TAG_TABLE;

    temp[20] = FT_DIVS_FILE;

    // Table Split Sizes
    temp[21] = FT_SPLIT_SIZE;
    temp[22] = IMG_SPLIT_SIZE;
    temp[23] = URL_SPLIT_SIZE;
    temp[24] = PR_SPLIT_SIZE;

    if (RUN.equals("pr")) {
        // Run PR_ITERATIONS number of iterations for page ranking
        PageRank.createPageRank(temp, PR_ITERATIONS, URL_SPLIT_SIZE);
    } else if (RUN.equals("imageload")) {
        // Load image index
        ToolRunner.run(new ImageLoader(), temp);
    } else if (RUN.equals("ingest")) {
        // Ingest
        // Set table split size
        // Write the seed value to the table
        BatchWriter w;
        Value v = new Value();
        Mutation m = new Mutation(SEED);
        m.put("0", "0", v);
        w = AccumuloUtils.connectBatchWrite(URL_TABLE);

        for (int i = 0; i < NUM_ITERATIONS; i++) {
            // Run the ToolRunner for NUM_ITERATIONS iterations
            ToolRunner.run(CachedConfiguration.getInstance(), injector.getInstance(Ingester.class), temp);
    } else if (RUN.equals("load")) {
        // Parse the URLs and add to the data table
        BatchWriter w = AccumuloUtils.connectBatchWrite(FT_CHECKED_TABLE);

        w = AccumuloUtils.connectBatchWrite(FT_DATA_TABLE);
        ToolRunner.run(CachedConfiguration.getInstance(), injector.getInstance(Loader.class), temp);
    } else if (RUN.equals("ftsample")) {
        // Create a sample table for full text index
        FTAccumuloSampler ftSampler = new FTAccumuloSampler(FT_SAMPLE, FT_DATA_TABLE, FT_CHECKED_TABLE);

    } else if (RUN.equals("imagesample")) {
        // Create a sample table for images
        ImageAccumuloSampler imgHashSampler = new ImageAccumuloSampler(IMG_HASH_SAMPLE_TABLE, IMG_HASH_TABLE,

        ImageAccumuloSampler imgTagSampler = new ImageAccumuloSampler(IMG_TAG_SAMPLE_TABLE, IMG_TAG_TABLE,
    } else {
        System.out.println("Invalid argument " + RUN + ".");
        System.out.println("Valid Arguments:");
        System.out.println("\tpr: Calculates Page Rank");
        System.out.println("\timageload: Loads Images from URLs");
        System.out.println("\tload: Loads Full Text Data");
        System.out.println("\tingest: Ingests URLs from given seed");
        System.out.println("\tftsample: Creates a Full Text Index Sample HashMap");
        System.out.println("\timagesample: Creates an Image Hash and Image Tag Sample HashMap");


From source file:Main.java

public static int randomOrient() {
    return (int) Math.ceil(Math.random() * 360);