Example usage for org.apache.hadoop.fs Path Path

List of usage examples for org.apache.hadoop.fs Path Path

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path Path.

Prototype

public Path(URI aUri) 

Source Link

Document

Construct a path from a URI

Usage

From source file:SleepJob.java

License:Apache License

public JobConf setupJobConf(int numMapper, int numReducer, long mapSleepTime, int mapSleepCount,
        long reduceSleepTime, int reduceSleepCount) {
    JobConf job = new JobConf(getConf(), SleepJob.class);
    job.setNumMapTasks(numMapper);//from w  w  w  . j  a v  a 2 s . c  o  m
    job.setNumReduceTasks(numReducer);
    job.setMapperClass(SleepJob.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setReducerClass(SleepJob.class);
    job.setOutputFormat(NullOutputFormat.class);
    job.setInputFormat(SleepInputFormat.class);
    job.setPartitionerClass(SleepJob.class);
    job.setSpeculativeExecution(false);
    FileInputFormat.addInputPath(job, new Path("ignored"));
    job.setLong("sleep.job.map.sleep.time", mapSleepTime);
    job.setLong("sleep.job.reduce.sleep.time", reduceSleepTime);
    job.setInt("sleep.job.map.sleep.count", mapSleepCount);
    job.setInt("sleep.job.reduce.sleep.count", reduceSleepCount);
    return job;
}

From source file:PostgresToSeq.java

License:Apache License

public static void main(String args[]) throws Exception {
    if (args.length != 2) {
        System.err.println("Arguments: [input postgres table] [output sequence file]");
        return;//w w w .  jav a2 s  .  c om
    }
    String inputFileName = args[0];
    String outputDirName = args[1];
    Configuration configuration = new Configuration();
    FileSystem fs = FileSystem.get(configuration);
    Writer writer = new SequenceFile.Writer(fs, configuration, new Path(outputDirName + "/chunk-0"), Text.class,
            Text.class);
    Connection c = null;
    Statement stmt = null;
    try {
        Class.forName("org.postgresql.Driver");
        c = DriverManager.getConnection("jdbc:postgresql://192.168.50.170:5432/uzeni", "postgres", "dbwpsdkdl");
        c.setAutoCommit(false);
        System.out.println("Opened database successfully");
        stmt = c.createStatement();
        ResultSet rs = stmt.executeQuery("SELECT * FROM " + inputFileName);
        int count = 0;
        Text key = new Text();
        Text value = new Text();

        while (rs.next()) {
            String seq = rs.getString("seq");
            String rep = rs.getString("rep");
            String body = rs.getString("body");
            String category = rep;
            String id = seq;
            String message = body;
            key.set("/" + category + "/" + id);
            value.set(message);
            writer.append(key, value);
            count++;
        }
        rs.close();
        stmt.close();
        c.close();
        writer.close();
        System.out.println("Wrote " + count + " entries.");
    } catch (Exception e) {
        System.err.println(e.getClass().getName() + ": " + e.getMessage());
        System.exit(0);
    }
}

From source file:ApplicationMaster.java

License:Apache License

private void renameScriptFile(final Path renamedScriptPath) throws IOException, InterruptedException {
    appSubmitterUgi.doAs(new PrivilegedExceptionAction<Void>() {
        @Override//from  w w  w. j  a v  a  2s.  c  o m
        public Void run() throws IOException {
            FileSystem fs = renamedScriptPath.getFileSystem(conf);
            fs.rename(new Path(scriptPath), renamedScriptPath);
            return null;
        }
    });
    LOG.info("User " + appSubmitterUgi.getUserName() + " added suffix(.sh/.bat) to script file as "
            + renamedScriptPath);
}

From source file:JavaCustomReceiver.java

License:Apache License

/** Create a socket connection and receive data until receiver is stopped */
private void receive() {
    Socket socket = null;//  w  w  w .  java 2s  .  c  om
    String userInput = null;

    try {
        // connect to the server
        socket = new Socket(host, port);

        //   BufferedReader reader = new BufferedReader(new InputStreamReader(socket.getInputStream()));

        //      Path pt=new Path("hdfs://192.168.0.1:9000/equinox-sanjose.20120119-netflow.txt");
        //      FileSystem fs = FileSystem.get(new Configuration());
        //      BufferedReader in=new BufferedReader(new InputStreamReader(fs.open(pt)));
        Path pt = new Path("hdfs://192.168.0.1:9000/user/hduser/equinox-sanjose.20120119-netflow.txt");

        Configuration conf = new Configuration();
        conf.addResource(new Path("/usr/local/hadoop/conf/core-site.xml"));
        conf.addResource(new Path("/usr/local/hadoop/conf/hdfs-site.xml"));
        //      FileSystem fs = FileSystem.get(conf);
        FileSystem fs = pt.getFileSystem(conf);
        System.out.println(fs.getHomeDirectory());
        BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(pt)));

        //      BufferedReader in = new BufferedReader(
        //            new FileReader(
        //                  "/home/hduser/spark_scratchPad/equinox-sanjose.20120119-netflow.txt"));
        //      
        // Until stopped or connection broken continue reading
        while (!isStopped() && (userInput = in.readLine()) != null) {
            System.out.println("Received data '" + userInput + "'");
            store(userInput);
        }
        in.close();
        socket.close();

        // Restart in an attempt to connect again when server is active again
        restart("Trying to connect again");
    } catch (ConnectException ce) {
        // restart if could not connect to server
        restart("Could not connect", ce);
    } catch (Throwable t) {
        restart("Error receiving data", t);
    }
}

From source file:ClassifierHD.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length < 5) {
        System.out.println(/*from w w w.  j av a2  s.c o m*/
                "Arguments: [model] [label index] [dictionnary] [document frequency] [postgres table] [hdfs dir] [job_id]");
        return;
    }
    String modelPath = args[0];
    String labelIndexPath = args[1];
    String dictionaryPath = args[2];
    String documentFrequencyPath = args[3];
    String tablename = args[4];
    String inputDir = args[5];

    Configuration configuration = new Configuration();

    // model is a matrix (wordId, labelId) => probability score
    NaiveBayesModel model = NaiveBayesModel.materialize(new Path(modelPath), configuration);

    StandardNaiveBayesClassifier classifier = new StandardNaiveBayesClassifier(model);

    // labels is a map label => classId
    Map<Integer, String> labels = BayesUtils.readLabelIndex(configuration, new Path(labelIndexPath));
    Map<String, Integer> dictionary = readDictionnary(configuration, new Path(dictionaryPath));
    Map<Integer, Long> documentFrequency = readDocumentFrequency(configuration,
            new Path(documentFrequencyPath));

    // analyzer used to extract word from tweet
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);

    int labelCount = labels.size();
    int documentCount = documentFrequency.get(-1).intValue();

    System.out.println("Number of labels: " + labelCount);
    System.out.println("Number of documents in training set: " + documentCount);

    Connection conn = null;
    PreparedStatement pstmt = null;

    try {
        Class.forName("org.postgresql.Driver");
        conn = DriverManager.getConnection("jdbc:postgresql://192.168.50.170:5432/uzeni", "postgres",
                "dbwpsdkdl");
        conn.setAutoCommit(false);
        String sql = "INSERT INTO " + tablename
                + " (id,gtime,wtime,target,num,link,body,rep) VALUES (?,?,?,?,?,?,?,?);";
        pstmt = conn.prepareStatement(sql);

        FileSystem fs = FileSystem.get(configuration);
        FileStatus[] status = fs.listStatus(new Path(inputDir));
        BufferedWriter bw = new BufferedWriter(
                new OutputStreamWriter(fs.create(new Path(inputDir + "/rep.list"), true)));

        for (int i = 0; i < status.length; i++) {
            BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(status[i].getPath())));
            if (new String(status[i].getPath().getName()).equals("rep.list")) {
                continue;
            }
            int lv_HEAD = 1;
            int lv_cnt = 0;
            String lv_gtime = null;
            String lv_wtime = null;
            String lv_target = null;
            BigDecimal lv_num = null;
            String lv_link = null;
            String[] lv_args;
            String lv_line;
            StringBuilder lv_txt = new StringBuilder();
            while ((lv_line = br.readLine()) != null) {
                if (lv_cnt < lv_HEAD) {
                    lv_args = lv_line.split(",");
                    lv_gtime = lv_args[0];
                    lv_wtime = lv_args[1];
                    lv_target = lv_args[2];
                    lv_num = new BigDecimal(lv_args[3]);
                    lv_link = lv_args[4];
                } else {
                    lv_txt.append(lv_line + '\n');
                }
                lv_cnt++;
            }
            br.close();

            String id = status[i].getPath().getName();
            String message = lv_txt.toString();

            Multiset<String> words = ConcurrentHashMultiset.create();

            TokenStream ts = analyzer.tokenStream("text", new StringReader(message));
            CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
            ts.reset();
            int wordCount = 0;
            while (ts.incrementToken()) {
                if (termAtt.length() > 0) {
                    String word = ts.getAttribute(CharTermAttribute.class).toString();
                    Integer wordId = dictionary.get(word);
                    if (wordId != null) {
                        words.add(word);
                        wordCount++;
                    }
                }
            }

            ts.end();
            ts.close();

            Vector vector = new RandomAccessSparseVector(10000);
            TFIDF tfidf = new TFIDF();
            for (Multiset.Entry<String> entry : words.entrySet()) {
                String word = entry.getElement();
                int count = entry.getCount();
                Integer wordId = dictionary.get(word);
                Long freq = documentFrequency.get(wordId);
                double tfIdfValue = tfidf.calculate(count, freq.intValue(), wordCount, documentCount);
                vector.setQuick(wordId, tfIdfValue);
            }
            Vector resultVector = classifier.classifyFull(vector);
            double bestScore = -Double.MAX_VALUE;
            int bestCategoryId = -1;
            for (Element element : resultVector.all()) {
                int categoryId = element.index();
                double score = element.get();
                if (score > bestScore) {
                    bestScore = score;
                    bestCategoryId = categoryId;
                }
            }
            //System.out.println(message);
            //System.out.println(" => "+ lv_gtime + lv_wtime + lv_link + id + ":" + labels.get(bestCategoryId));
            pstmt.setString(1, id);
            pstmt.setString(2, lv_gtime);
            pstmt.setString(3, lv_wtime);
            pstmt.setString(4, lv_target);
            pstmt.setBigDecimal(5, lv_num);
            pstmt.setString(6, lv_link);
            pstmt.setString(7, message.substring(1, Math.min(50, message.length())));
            pstmt.setString(8, labels.get(bestCategoryId));
            pstmt.addBatch();
            bw.write(id + "\t" + labels.get(bestCategoryId) + "\n");
        }
        pstmt.executeBatch();
        //pstmt.clearParameters();
        pstmt.close();
        conn.commit();
        conn.close();
        bw.close();
    } catch (Exception e) {
        System.err.println(e.getClass().getName() + ": " + e.getMessage());
        System.exit(0);
    }
    analyzer.close();
}

From source file:ColumnStorageBasicTest.java

License:Open Source License

public void testLoadNaviFromHead() {
    try {//from w  w w .jav a2 s  .co m
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);
        Path path = new Path(prefix);

        fs.delete(path, true);
        createAllSingleProject(fs);

        ColumnProject cp = new ColumnProject(conf);
        cp.loadColmnInfoFromHeadInfo(fs, path);

        checkAllColumnInfo(cp.infos());
    } catch (Exception e) {
        e.printStackTrace();
        fail("get exception:" + e.getMessage());
    }
}

From source file:ColumnStorageBasicTest.java

License:Open Source License

public void testGetFileNameByFieldIndex() {
    try {//from  ww w. j  av  a  2  s . co  m
        String navigator = prefix;
        Path path = new Path(navigator);
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);

        ColumnProject cp = new ColumnProject(conf);
        cp.loadColmnInfoFromHeadInfo(fs, path);

        ArrayList<String> fileList = null;
        fileList = cp.getFileNameByIndex(null);
        if (fileList != null) {
            fail("should null");
        }

        ArrayList<Short> idxs = new ArrayList<Short>(10);
        idxs.add((short) 10);
        fileList = cp.getFileNameByIndex(idxs);
        if (fileList != null) {
            fail("should null");
        }

        idxs.clear();
        idxs.add((short) 0);
        fileList = cp.getFileNameByIndex(idxs);
        if (fileList == null) {
            fail("should not null");
        }
        if (fileList.size() != 1) {
            fail("error fileList:" + fileList.size());
        }
        if (!fileList.get(0).equals(byteFileName)) {
            fail("error file name:" + fileList.get(0));
        }

        idxs.clear();
        idxs.add((short) 0);
        idxs.add((short) 5);
        fileList = cp.getFileNameByIndex(idxs);
        if (fileList == null) {
            fail("should not null");
        }
        if (fileList.size() != 2) {
            fail("error fileList:" + fileList.size());
        }
        if (!fileList.get(0).equals(byteFileName)) {
            fail("error file name1:" + fileList.get(0));
        }
        if (!fileList.get(1).equals(doubleFileName)) {
            fail("error file name2:" + fileList.get(1));
        }

        idxs.clear();
        idxs.add((short) 0);
        idxs.add((short) 5);
        idxs.add((short) 10);
        fileList = cp.getFileNameByIndex(idxs);
        if (fileList != null) {
            fail("should null");
        }
    } catch (Exception e) {
        e.printStackTrace();
        fail("get exception:" + e.getMessage());
    }
}

From source file:ColumnStorageBasicTest.java

License:Open Source License

public void testConstructorNullParamter() {
    try {//from   w  ww .j  a v  a 2s .co  m
        String navigator = prefix + "navigator";
        Path path = new Path(navigator);
        Configuration conf = new Configuration();

        ArrayList<Short> idxs = new ArrayList<Short>(10);

        try {
            ColumnStorageClient client = new ColumnStorageClient(null, idxs, conf);
            fail("error should get exception");
        } catch (SEException.InvalidParameterException e) {

        }

        try {
            ColumnStorageClient client = new ColumnStorageClient(path, null, conf);
            fail("error should get exception");
        } catch (SEException.InvalidParameterException e) {

        }

        try {
            ColumnStorageClient client = new ColumnStorageClient(path, idxs, null);
            fail("error should get exception");
        } catch (SEException.InvalidParameterException e) {

        }
    } catch (Exception e) {
        e.printStackTrace();
        fail("get exception:" + e.getMessage());
    }
}

From source file:ColumnStorageBasicTest.java

License:Open Source License

public void testConstructorFieldNoExist() {
    try {//from ww w  .j a  va 2s  .c o  m
        Configuration conf = new Configuration();
        Path path = new Path(prefix);

        FileSystem fs = FileSystem.get(conf);
        fs.delete(path, true);

        createAllSingleProject(fs);
        createMultiProject(fs);

        ArrayList<Short> idxs = new ArrayList<Short>(10);
        idxs.add((short) 10);
        ColumnStorageClient client = new ColumnStorageClient(path, idxs, conf);

        fail("should get exception");
    } catch (SEException.InvalidParameterException e) {

    } catch (Exception e) {
        e.printStackTrace();
        fail("get exception:" + e.getMessage());
    }
}

From source file:ColumnStorageBasicTest.java

License:Open Source License

public void testConstructorFieldInSameFile() {
    try {//  w  w  w  .  jav  a2s .c o  m
        Configuration conf = new Configuration();
        Path path = new Path(prefix);
        FileSystem fs = FileSystem.get(conf);

        ArrayList<Short> idxs = new ArrayList<Short>(10);
        idxs.add((short) 7);
        idxs.add((short) 9);

        ColumnStorageClient client = new ColumnStorageClient(path, idxs, conf);

        if (client.cp == null) {
            fail("cp null");
        }

        if (client.list.size() != 1) {
            fail("error list size:" + client.list.size());
        }

        if (!client.list.get(0).equals(multiFileNameString)) {
            fail("error filename:" + client.list.get(0));
        }
    } catch (Exception e) {
        e.printStackTrace();
        fail("get exception:" + e.getMessage());
    }
}