mas.MAS_VLDB.java Source code

Java tutorial

Introduction

Here is the source code for mas.MAS_VLDB.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package mas;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

/**
 *
 * @author Aale
 */
public class MAS_VLDB {

    public static String SEPERATOR = "\t";
    public static String NEWLINE = "\n";

    //    static List<Integer> conferences_20 = Arrays.asList(22, 23, 52, 120, 165, 2, 4, 24, 26, 29, 71,
    //            104, 115, 368, 384, 3, 5, 46, 49, 63);
    static List<Integer> conferences_30 = Arrays.asList(9, 84, 98, 100, 123, 7, 8, 10, 13, 19, 12, 166, 35, 150,
            165, 130, 187, 188, 189, 260, 20, 258, 296, 340, 413, 34, 45, 76, 102, 103);

    public static void main(String[] args) {
        // 16220 ->
        //        final List<Integer> conf_ids = readConfList("F:\\Data\\SIGMOD_DATA\\data\\mas_results\\200_conf\\conf_domain.txt");
        //        for (Integer id : conf_ids) {
        //            System.out.println(id + "\t" + getConferenceName(id));
        //            try {
        //                Thread.sleep(500L);
        //            } catch (InterruptedException ex) {
        //                Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
        //            }
        //        }

        //        extractPapers(1434300);
        //        for (Integer conf : conf_ids) {
        //            extractPapers(0, conf);
        //        }
        String paper_file = "C:\\Users\\Aale\\Box Sync\\MotiProjects\\Universal-DB\\data\\citation\\top_papers_with_id.txt";
        String citation_out_file = "C:\\Users\\Aale\\Box Sync\\MotiProjects\\Universal-DB\\data\\citation\\citation.txt";
        String citation_out_json_file = "C:\\Users\\Aale\\Box Sync\\MotiProjects\\Universal-DB\\data\\citation\\citation_json.txt";
        String citation_filtered_out_file = "C:\\Users\\Aale\\Box Sync\\MotiProjects\\Universal-DB\\data\\citation\\citation_filtered.txt";

        List<Integer> paper_ids = getPapersList(paper_file);
        //        System.out.println("Papers#: " + paper_ids.size());

        filterCitations(paper_ids, citation_out_file, citation_filtered_out_file);
        //        int start = 0;
        //        int limit = 100;
        //        String str = "";
        //        while (true) {
        //            if (start == paper_ids.size()) {
        //                break;
        //            }
        //            System.out.println("process paper: " + paper_ids.get(start));
        //            str += "DstID%20eq%20" + paper_ids.get(start) + "%20or%20";
        //            if (start % limit == 0) {
        //                str = "(" + str.substring(0, str.length() - 8) + ")";
        //                extractCitation(0, str, citation_out_file, citation_out_json_file);
        //                System.out.println("URL: " + str);
        //                str = "";
        //            }
        //            start++;
        //        }

        //        extractAffilition(0);
        //        extractAuthor(0);
        //        extractConference(0);
        //        extractJournal(0);
        //        extractPaper_Author(0);
        //        extractPaper_Category(4171300);
    }

    public static String paper_last = "data/paper.last";

    public static String getConferenceName(int id) {

        String url = "https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/Conference?$filter=ID%20eq%20" + id
                + "&$format=json";
        while (true) {
            try {
                StringBuilder csv_str = new StringBuilder();
                final String json = getData2(url, 0);
                JSONParser parser = new JSONParser();
                JSONObject jsonObj = (JSONObject) parser.parse(json);
                final JSONObject dObj = (JSONObject) jsonObj.get("d");
                final JSONArray results = (JSONArray) dObj.get("results");
                if (results.size() == 0) {
                    System.out.println("results is Empty, break.");
                    break;
                } else {
                    //                    System.out.println("Conf: results# = " + results.size());
                    for (Object conf : results) {
                        JSONObject confObj = (JSONObject) conf;
                        String shortName = normalized((String) confObj.get("ShortName"));
                        if (!shortName.equals("")) {
                            return shortName;
                        } else {
                            String fullName = normalized((String) confObj.get("FullName"));
                            return fullName;
                        }
                    }
                }
                //                System.out.println("json= " + jsonObj);
            } catch (ParseException ex) {
                System.out.println(ex.getMessage() + " Cause: " + ex.getCause());
                Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
                try {
                    Thread.sleep(5000L);
                } catch (InterruptedException ex1) {
                    Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex1);
                }
            }
        }
        return null;
    }

    public static void extractCitation(int start, String papers_filter, String csv_file, String json_file) {
        String csv_file_path = csv_file;
        String json_dump_file_path = json_file;
        String url = "https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/Paper_Ref?$select=SrcID,DstID&$filter="
                + papers_filter + "&$format=json";
        while (true) {
            IOUtils.writeDataIntoFile(start + "", paper_last, false);
            try {
                StringBuilder csv_str = new StringBuilder();
                final String json = getData2(url, start);
                //                System.out.println("json=" + json);
                if (json == null) {
                    System.out.println("json is null. skip. old start=" + start);
                    start += 100;
                    Thread.sleep(1000L);
                    continue;
                }
                JSONParser parser = new JSONParser();
                JSONObject jsonObj = (JSONObject) parser.parse(json);
                final JSONObject dObj = (JSONObject) jsonObj.get("d");
                final JSONArray results = (JSONArray) dObj.get("results");
                if (results.size() == 0) {
                    System.out.println("results is Empty, break.");
                    break;
                } else {
                    System.out.println("Paper: start = " + start + " results# = " + results.size());
                    for (Object cite : results) {
                        JSONObject citeObj = (JSONObject) cite;
                        //                        Long docType = (Long) paperObj.get("DocType");
                        //                        Long year = (Long) paperObj.get("Year");
                        //                        Long jourID = (Long) paperObj.get("JourID");
                        Long srcID = (Long) citeObj.get("SrcID");
                        Long dstID = (Long) citeObj.get("DstID");
                        //                        String title = (String) citeObj.get("Title");
                        //                        title = normalized(title);
                        csv_str.append(srcID).append(SEPERATOR).append(dstID).append(NEWLINE);
                    }
                    IOUtils.writeDataIntoFile(json + "\n", json_dump_file_path);
                    IOUtils.writeDataIntoFile(csv_str.toString(), csv_file_path);
                    start += 100;
                    Thread.sleep(300L);
                }
                //                System.out.println("json= " + jsonObj);
            } catch (ParseException ex) {
                System.out.println(ex.getMessage() + " Cause: " + ex.getCause());
                Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
                start += 100;
                try {
                    Thread.sleep(5000L);
                } catch (InterruptedException ex1) {
                    Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex1);
                }

            } catch (InterruptedException ex) {
                System.out.println(ex.getMessage() + " Cause: " + ex.getCause());
                Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
                start += 100;
                try {
                    Thread.sleep(5000L);
                } catch (InterruptedException ex1) {
                    Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex1);
                }

            }
        }
    }

    public static void extractPapers(int start, int conf) {
        String csv_file_path = "data/papers_200_conf.csv";
        String json_dump_file_path = "data/papers_200_conf_dump.json";
        String url = "https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/Paper?$select=ID,Title,Year,ConfID,JourID&$filter=Year%20ge%202000%20and%20ConfID%20eq%20"
                + conf + "&$format=json";
        // String url = "https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/Paper?$select=ID,DocType,Title,Year,ConfID,JourID&$filter=Year%20gt%202001&$format=json";
        while (true) {
            IOUtils.writeDataIntoFile(start + "", paper_last, false);
            try {
                StringBuilder csv_str = new StringBuilder();
                final String json = getData2(url, start);
                //                System.out.println("json=" + json);
                if (json == null) {
                    System.out.println("json is null. skip. old start=" + start);
                    start += 100;
                    Thread.sleep(10000L);
                    continue;
                }
                JSONParser parser = new JSONParser();
                JSONObject jsonObj = (JSONObject) parser.parse(json);
                final JSONObject dObj = (JSONObject) jsonObj.get("d");
                final JSONArray results = (JSONArray) dObj.get("results");
                if (results.size() == 0) {
                    System.out.println("results is Empty, break.");
                    break;
                } else {
                    System.out.println("Paper: start = " + start + " results# = " + results.size());
                    for (Object paper : results) {
                        JSONObject paperObj = (JSONObject) paper;
                        //                        Long docType = (Long) paperObj.get("DocType");
                        //                        Long year = (Long) paperObj.get("Year");
                        //                        Long jourID = (Long) paperObj.get("JourID");
                        Long confID = (Long) paperObj.get("ConfID");
                        Long id = (Long) paperObj.get("ID");
                        String title = (String) paperObj.get("Title");
                        title = normalized(title);
                        csv_str.append(id).append(SEPERATOR).append(confID).append(SEPERATOR).append(title)
                                .append(NEWLINE);
                    }
                    IOUtils.writeDataIntoFile(json + "\n", json_dump_file_path);
                    IOUtils.writeDataIntoFile(csv_str.toString(), csv_file_path);
                    start += 100;
                    Thread.sleep(250L);
                }
                //                System.out.println("json= " + jsonObj);
            } catch (ParseException ex) {
                System.out.println(ex.getMessage() + " Cause: " + ex.getCause());
                Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
                start += 100;
                try {
                    Thread.sleep(1000L);
                } catch (InterruptedException ex1) {
                    Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex1);
                }

            } catch (InterruptedException ex) {
                System.out.println(ex.getMessage() + " Cause: " + ex.getCause());
                Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
                start += 100;
                try {
                    Thread.sleep(1000L);
                } catch (InterruptedException ex1) {
                    Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex1);
                }

            }
        }
    }

    public static void extractAffilition(int start) {
        String file_prefix = "affilitions";
        String csv_file_path = "data/" + file_prefix + ".csv";
        String json_dump_file_path = "data/" + file_prefix + "_dump.json";
        String url = "https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/Affiliation?";
        url += "$format=json";
        while (true) {
            try {
                StringBuilder csv_str = new StringBuilder();
                final String json = getData2(url, start);
                JSONParser parser = new JSONParser();
                JSONObject jsonObj = (JSONObject) parser.parse(json);
                final JSONObject dObj = (JSONObject) jsonObj.get("d");
                final JSONArray results = (JSONArray) dObj.get("results");
                if (results.size() == 0) {
                    System.out.println("results is Empty, break.");
                    break;
                } else {
                    System.out.println("Affilition: start = " + start + " results# = " + results.size());
                    for (Object paper : results) {
                        JSONObject paperObj = (JSONObject) paper;
                        Long id = (Long) paperObj.get("ID");
                        String officialName = normalized((String) paperObj.get("OfficialName"));
                        String displayName = normalized((String) paperObj.get("DisplayName"));
                        String nativeName = normalized((String) paperObj.get("NativeName"));
                        Long parentID = (Long) paperObj.get("ParentID");
                        String homepage = normalized((String) paperObj.get("Homepage"));
                        String shortName = normalized((String) paperObj.get("ShortName"));
                        Long type = (Long) paperObj.get("Type");
                        csv_str.append(id).append(SEPERATOR).append(officialName).append(SEPERATOR)
                                .append(displayName).append(SEPERATOR).append(nativeName).append(SEPERATOR)
                                .append(parentID).append(SEPERATOR).append(homepage).append(SEPERATOR)
                                .append(shortName).append(SEPERATOR).append(type).append(NEWLINE);
                    }
                    IOUtils.writeDataIntoFile(json + "\n", json_dump_file_path);
                    IOUtils.writeDataIntoFile(csv_str.toString(), csv_file_path);
                    start += 100;
                    Thread.sleep(300L);
                }
                //                System.out.println("json= " + jsonObj);
            } catch (ParseException ex) {
                Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
            } catch (InterruptedException ex) {
                Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }

    public static void extractAuthor(int start) {
        String file_prefix = "authors";
        String csv_file_path = "data/" + file_prefix + ".csv";
        String json_dump_file_path = "data/" + file_prefix + "_dump.json";
        String url = "https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/Author?$select=ID,Name,NativeName,Affiliation,AffiliationID,Version";
        url += "&$format=json";
        while (true) {
            try {
                StringBuilder csv_str = new StringBuilder();
                final String json = getData2(url, start);
                JSONParser parser = new JSONParser();
                JSONObject jsonObj = (JSONObject) parser.parse(json);
                final JSONObject dObj = (JSONObject) jsonObj.get("d");
                final JSONArray results = (JSONArray) dObj.get("results");
                if (results.size() == 0) {
                    System.out.println("results is Empty, break.");
                    break;
                } else {
                    System.out.println("Author: start = " + start + " results# = " + results.size());
                    for (Object paper : results) {
                        JSONObject paperObj = (JSONObject) paper;
                        Long id = (Long) paperObj.get("ID");
                        String name = normalized((String) paperObj.get("Name"));
                        String nativeName = normalized((String) paperObj.get("NativeName"));
                        String affiliation = normalized((String) paperObj.get("Affiliation"));
                        Long affiliationID = (Long) paperObj.get("AffiliationID");
                        Long version = (Long) paperObj.get("Version");
                        csv_str.append(id).append(SEPERATOR).append(name).append(SEPERATOR).append(nativeName)
                                .append(SEPERATOR).append(affiliation).append(SEPERATOR).append(affiliationID)
                                .append(SEPERATOR).append(version).append(NEWLINE);
                    }
                    IOUtils.writeDataIntoFile(json + "\n", json_dump_file_path);
                    IOUtils.writeDataIntoFile(csv_str.toString(), csv_file_path);
                    start += 100;
                    Thread.sleep(300L);
                }
                //                System.out.println("json= " + jsonObj);
            } catch (ParseException ex) {
                Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
            } catch (InterruptedException ex) {
                Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }

    public static void extractConference(int start) {
        String file_prefix = "conferences";
        String csv_file_path = "data/" + file_prefix + ".csv";
        String json_dump_file_path = "data/" + file_prefix + "_dump.json";
        String url = "https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/Conference?";
        url += "$format=json";
        while (true) {
            try {
                StringBuilder csv_str = new StringBuilder();
                final String json = getData2(url, start);
                JSONParser parser = new JSONParser();
                JSONObject jsonObj = (JSONObject) parser.parse(json);
                final JSONObject dObj = (JSONObject) jsonObj.get("d");
                final JSONArray results = (JSONArray) dObj.get("results");
                if (results.size() == 0) {
                    System.out.println("results is Empty, break.");
                    break;
                } else {
                    System.out.println("Conference: start = " + start + " results# = " + results.size());
                    for (Object paper : results) {
                        JSONObject paperObj = (JSONObject) paper;
                        Long id = (Long) paperObj.get("ID");
                        String shortName = normalized((String) paperObj.get("ShortName"));
                        String fullName = normalized((String) paperObj.get("FullName"));
                        String homepage = normalized((String) paperObj.get("Homepage"));
                        csv_str.append(id).append(SEPERATOR).append(shortName).append(SEPERATOR).append(fullName)
                                .append(SEPERATOR).append(homepage).append(NEWLINE);
                    }
                    IOUtils.writeDataIntoFile(json + "\n", json_dump_file_path);
                    IOUtils.writeDataIntoFile(csv_str.toString(), csv_file_path);
                    start += 100;
                    Thread.sleep(300L);
                }
                //                System.out.println("json= " + jsonObj);
            } catch (ParseException ex) {
                Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
            } catch (InterruptedException ex) {
                Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }

    public static void extractJournal(int start) {
        String file_prefix = "journals";
        String csv_file_path = "data/" + file_prefix + ".csv";
        String json_dump_file_path = "data/" + file_prefix + "_dump.json";
        String url = "https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/Journal?";
        url += "$format=json";
        while (true) {
            try {
                StringBuilder csv_str = new StringBuilder();
                final String json = getData2(url, start);
                JSONParser parser = new JSONParser();
                JSONObject jsonObj = (JSONObject) parser.parse(json);
                final JSONObject dObj = (JSONObject) jsonObj.get("d");
                final JSONArray results = (JSONArray) dObj.get("results");
                if (results.size() == 0) {
                    System.out.println("results is Empty, break.");
                    break;
                } else {
                    System.out.println("Journals: start = " + start + " results# = " + results.size());
                    for (Object paper : results) {
                        JSONObject paperObj = (JSONObject) paper;
                        Long id = (Long) paperObj.get("ID");
                        String shortName = normalized((String) paperObj.get("ShortName"));
                        String fullName = normalized((String) paperObj.get("FullName"));
                        String homepage = normalized((String) paperObj.get("Homepage"));
                        csv_str.append(id).append(SEPERATOR).append(shortName).append(SEPERATOR).append(fullName)
                                .append(SEPERATOR).append(homepage).append(NEWLINE);
                    }
                    IOUtils.writeDataIntoFile(json + "\n", json_dump_file_path);
                    IOUtils.writeDataIntoFile(csv_str.toString(), csv_file_path);
                    start += 100;
                    Thread.sleep(300L);
                }
                //                System.out.println("json= " + jsonObj);
            } catch (ParseException ex) {
                Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
            } catch (InterruptedException ex) {
                Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }

    public static void extractPaper_Author(int start) {
        String file_prefix = "paper_authors";
        String csv_file_path = "data/" + file_prefix + ".csv";
        String json_dump_file_path = "data/" + file_prefix + "_dump.json";
        String url = "https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/Paper_Author?";
        url += "$format=json";
        while (true) {
            try {
                StringBuilder csv_str = new StringBuilder();
                final String json = getData2(url, start);
                JSONParser parser = new JSONParser();
                JSONObject jsonObj = (JSONObject) parser.parse(json);
                final JSONObject dObj = (JSONObject) jsonObj.get("d");
                final JSONArray results = (JSONArray) dObj.get("results");
                if (results.size() == 0) {
                    System.out.println("results is Empty, break.");
                    break;
                } else {
                    System.out.println("Paper_Author: start = " + start + " results# = " + results.size());
                    for (Object paper : results) {
                        JSONObject paperObj = (JSONObject) paper;
                        Long paperID = (Long) paperObj.get("PaperID");

                        Long seqID = (Long) paperObj.get("SeqID");

                        Long authorID = (Long) paperObj.get("authorID");
                        String name = normalized((String) paperObj.get("Name"));
                        String affiliation = normalized((String) paperObj.get("Affiliation"));

                        Long affiliationID = (Long) paperObj.get("AffiliationID");
                        csv_str.append(paperID).append(SEPERATOR).append(seqID).append(SEPERATOR).append(authorID)
                                .append(SEPERATOR).append(name).append(SEPERATOR).append(affiliation)
                                .append(SEPERATOR).append(affiliationID).append(NEWLINE);
                    }
                    IOUtils.writeDataIntoFile(json + "\n", json_dump_file_path);
                    IOUtils.writeDataIntoFile(csv_str.toString(), csv_file_path);
                    start += 100;
                    Thread.sleep(300L);
                }
                //                System.out.println("json= " + jsonObj);
            } catch (ParseException ex) {
                Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
            } catch (InterruptedException ex) {
                Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }

    public static void extractPaper_Category(int start) {
        String file_prefix = "paper_categories";
        String csv_file_path = "data/" + file_prefix + ".csv";
        String json_dump_file_path = "data/" + file_prefix + "_dump.json";
        String url = "https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/Paper_Category?";
        url += "$format=json";
        while (true) {
            try {
                StringBuilder csv_str = new StringBuilder();
                final String json = getData2(url, start);
                if (json == null) {
                    System.out.println("json is null. skip. old start=" + start);
                    start += 100;
                    Thread.sleep(5000L);
                    continue;
                }
                JSONParser parser = new JSONParser();
                JSONObject jsonObj = (JSONObject) parser.parse(json);
                final JSONObject dObj = (JSONObject) jsonObj.get("d");
                final JSONArray results = (JSONArray) dObj.get("results");
                if (results.size() == 0) {
                    System.out.println("results is Empty, break.");
                    break;
                } else {
                    System.out.println("Paper_Category: start = " + start + " results# = " + results.size());
                    for (Object paper : results) {
                        JSONObject paperObj = (JSONObject) paper;
                        Long cPaperID = (Long) paperObj.get("CPaperID");

                        Long domainID = (Long) paperObj.get("DomainID");

                        Long subDomainID = (Long) paperObj.get("SubDomainID");
                        csv_str.append(cPaperID).append(SEPERATOR).append(domainID).append(SEPERATOR)
                                .append(subDomainID).append(NEWLINE);
                    }
                    IOUtils.writeDataIntoFile(json + "\n", json_dump_file_path);
                    IOUtils.writeDataIntoFile(csv_str.toString(), csv_file_path);
                    start += 100;
                    Thread.sleep(300L);
                }
                //                System.out.println("json= " + jsonObj);
            } catch (ParseException ex) {
                Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
            } catch (InterruptedException ex) {
                Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }

    public static String getData2(String url_org, int start) {

        try {
            String complete_url = url_org + "&$skip=" + start;
            //            String url_str = generateURL(url_org, prop);
            Document doc = Jsoup.connect(complete_url).timeout(25000).ignoreContentType(true).get();
            return doc.text();
        } catch (IOException ex) {
            System.out.println(ex.getMessage() + " Cause: " + ex.getCause());
            Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
        }
        return null;
    }

    /**
     * @param args the command line arguments
     */
    public static String getData_old(String url_org, int start) {
        try {
            String complete_url = url_org + "&$skip=" + start;
            //            String url_str = generateURL(url_org, prop);
            URL url = new URL(complete_url);
            URLConnection yc = url.openConnection();
            yc.setConnectTimeout(25 * 1000);
            yc.setReadTimeout(25 * 1000);
            BufferedReader in = new BufferedReader(new InputStreamReader(yc.getInputStream()));
            String inputLine;
            StringBuffer result = new StringBuffer();
            while ((inputLine = in.readLine()) != null) {
                //                System.out.println(inputLine);
                result.append(inputLine);
            }
            in.close();
            return result.toString();
        } catch (MalformedURLException ex) {
            Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
        } catch (IOException ex) {
            Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
        }
        return null;
    }

    /**
     * @param args the command line arguments
     */
    // for old version
    public static void getData(int startIdx, int endIdx) {
        try {
            Properties prop = new Properties();
            prop.setProperty("AppId", "1df63064-efad-4bbd-a797-1131499b7728");
            prop.setProperty("ResultObjects", "publication");
            prop.setProperty("DomainID", "22");
            prop.setProperty("SubDomainID", "2");
            prop.setProperty("YearStart", "2001");
            prop.setProperty("YearEnd", "2010");
            prop.setProperty("StartIdx", startIdx + "");
            prop.setProperty("EndIdx", endIdx + "");

            String url_org = "http://academic.research.microsoft.com/json.svc/search";
            String url_str = generateURL(url_org, prop);
            URL url = new URL(url_str);
            URLConnection yc = url.openConnection();
            BufferedReader in = new BufferedReader(new InputStreamReader(yc.getInputStream()));
            String inputLine;
            while ((inputLine = in.readLine()) != null) {
                System.out.println(inputLine);
            }
            in.close();
        } catch (MalformedURLException ex) {
            Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
        } catch (IOException ex) {
            Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    private static String generateURL(String url_org, Properties prop) {
        String url = url_org + "?";
        for (Object k : prop.keySet()) {
            String key = (String) k;
            String val = prop.getProperty(key);
            url = url + key + "=" + val + "&";
        }
        return url.substring(0, url.length() - 1);
    }

    private static String normalized(String str) {
        if (str == null || str.length() == 0) {
            return "";
        }
        return str.replaceAll("\\s", " ").replaceAll("\\n", " ");
    }

    private static List<Integer> getPapersList(String file_path) {
        final List<String> lines = IOUtils.readFileLineByLine(file_path, false);
        List<Integer> paper_ids = new ArrayList<>();
        boolean added = true;
        for (String l : lines) {
            String[] split = l.split("\\t");
            if (split.length > 0) {
                int id = Integer.parseInt(split[0]);
                //                if (id == 4972689) {
                //                    added = true;
                //                }
                if (added) {
                    paper_ids.add(id);
                }
            }
        }
        return paper_ids;
    }

    private static List<Integer> readConfList(String filename) {
        final List<String> lines = IOUtils.readFileLineByLine(filename, false);
        List<Integer> conf_ids = new ArrayList<>();
        for (String l : lines) {
            String[] split = l.split("\\t");
            if (split.length > 0) {
                int id = Integer.parseInt(split[0]);
                conf_ids.add(id);
            }
        }
        return conf_ids;
    }

    private static void filterCitations(List<Integer> paper_ids, String infile, String outfile) {
        final List<String> lines = IOUtils.readFileLineByLine(infile, false);
        StringBuilder sb = new StringBuilder();
        for (String l : lines) {
            String[] split = l.split("\\t");
            if (split.length == 2) {
                Integer src = Integer.parseInt(split[0]);
                Integer dest = Integer.parseInt(split[1]);
                if (paper_ids.contains(src) && !src.equals(dest)) {
                    sb.append(l).append("\n");
                }
            }
        }
        IOUtils.writeDataIntoFile(sb.toString(), outfile);
    }

}