List of usage examples for org.apache.hadoop.fs Path Path
public Path(URI aUri)
From source file:SleepJob.java
License:Apache License
public JobConf setupJobConf(int numMapper, int numReducer, long mapSleepTime, int mapSleepCount, long reduceSleepTime, int reduceSleepCount) { JobConf job = new JobConf(getConf(), SleepJob.class); job.setNumMapTasks(numMapper);//from w w w . j a v a 2 s . c o m job.setNumReduceTasks(numReducer); job.setMapperClass(SleepJob.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(SleepJob.class); job.setOutputFormat(NullOutputFormat.class); job.setInputFormat(SleepInputFormat.class); job.setPartitionerClass(SleepJob.class); job.setSpeculativeExecution(false); FileInputFormat.addInputPath(job, new Path("ignored")); job.setLong("sleep.job.map.sleep.time", mapSleepTime); job.setLong("sleep.job.reduce.sleep.time", reduceSleepTime); job.setInt("sleep.job.map.sleep.count", mapSleepCount); job.setInt("sleep.job.reduce.sleep.count", reduceSleepCount); return job; }
From source file:PostgresToSeq.java
License:Apache License
public static void main(String args[]) throws Exception { if (args.length != 2) { System.err.println("Arguments: [input postgres table] [output sequence file]"); return;//w w w . jav a2 s . c om } String inputFileName = args[0]; String outputDirName = args[1]; Configuration configuration = new Configuration(); FileSystem fs = FileSystem.get(configuration); Writer writer = new SequenceFile.Writer(fs, configuration, new Path(outputDirName + "/chunk-0"), Text.class, Text.class); Connection c = null; Statement stmt = null; try { Class.forName("org.postgresql.Driver"); c = DriverManager.getConnection("jdbc:postgresql://192.168.50.170:5432/uzeni", "postgres", "dbwpsdkdl"); c.setAutoCommit(false); System.out.println("Opened database successfully"); stmt = c.createStatement(); ResultSet rs = stmt.executeQuery("SELECT * FROM " + inputFileName); int count = 0; Text key = new Text(); Text value = new Text(); while (rs.next()) { String seq = rs.getString("seq"); String rep = rs.getString("rep"); String body = rs.getString("body"); String category = rep; String id = seq; String message = body; key.set("/" + category + "/" + id); value.set(message); writer.append(key, value); count++; } rs.close(); stmt.close(); c.close(); writer.close(); System.out.println("Wrote " + count + " entries."); } catch (Exception e) { System.err.println(e.getClass().getName() + ": " + e.getMessage()); System.exit(0); } }
From source file:ApplicationMaster.java
License:Apache License
private void renameScriptFile(final Path renamedScriptPath) throws IOException, InterruptedException { appSubmitterUgi.doAs(new PrivilegedExceptionAction<Void>() { @Override//from w w w. j a v a 2s. c o m public Void run() throws IOException { FileSystem fs = renamedScriptPath.getFileSystem(conf); fs.rename(new Path(scriptPath), renamedScriptPath); return null; } }); LOG.info("User " + appSubmitterUgi.getUserName() + " added suffix(.sh/.bat) to script file as " + renamedScriptPath); }
From source file:JavaCustomReceiver.java
License:Apache License
/** Create a socket connection and receive data until receiver is stopped */ private void receive() { Socket socket = null;// w w w . java 2s . c om String userInput = null; try { // connect to the server socket = new Socket(host, port); // BufferedReader reader = new BufferedReader(new InputStreamReader(socket.getInputStream())); // Path pt=new Path("hdfs://192.168.0.1:9000/equinox-sanjose.20120119-netflow.txt"); // FileSystem fs = FileSystem.get(new Configuration()); // BufferedReader in=new BufferedReader(new InputStreamReader(fs.open(pt))); Path pt = new Path("hdfs://192.168.0.1:9000/user/hduser/equinox-sanjose.20120119-netflow.txt"); Configuration conf = new Configuration(); conf.addResource(new Path("/usr/local/hadoop/conf/core-site.xml")); conf.addResource(new Path("/usr/local/hadoop/conf/hdfs-site.xml")); // FileSystem fs = FileSystem.get(conf); FileSystem fs = pt.getFileSystem(conf); System.out.println(fs.getHomeDirectory()); BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(pt))); // BufferedReader in = new BufferedReader( // new FileReader( // "/home/hduser/spark_scratchPad/equinox-sanjose.20120119-netflow.txt")); // // Until stopped or connection broken continue reading while (!isStopped() && (userInput = in.readLine()) != null) { System.out.println("Received data '" + userInput + "'"); store(userInput); } in.close(); socket.close(); // Restart in an attempt to connect again when server is active again restart("Trying to connect again"); } catch (ConnectException ce) { // restart if could not connect to server restart("Could not connect", ce); } catch (Throwable t) { restart("Error receiving data", t); } }
From source file:ClassifierHD.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length < 5) { System.out.println(/*from w w w. j av a2 s.c o m*/ "Arguments: [model] [label index] [dictionnary] [document frequency] [postgres table] [hdfs dir] [job_id]"); return; } String modelPath = args[0]; String labelIndexPath = args[1]; String dictionaryPath = args[2]; String documentFrequencyPath = args[3]; String tablename = args[4]; String inputDir = args[5]; Configuration configuration = new Configuration(); // model is a matrix (wordId, labelId) => probability score NaiveBayesModel model = NaiveBayesModel.materialize(new Path(modelPath), configuration); StandardNaiveBayesClassifier classifier = new StandardNaiveBayesClassifier(model); // labels is a map label => classId Map<Integer, String> labels = BayesUtils.readLabelIndex(configuration, new Path(labelIndexPath)); Map<String, Integer> dictionary = readDictionnary(configuration, new Path(dictionaryPath)); Map<Integer, Long> documentFrequency = readDocumentFrequency(configuration, new Path(documentFrequencyPath)); // analyzer used to extract word from tweet Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); int labelCount = labels.size(); int documentCount = documentFrequency.get(-1).intValue(); System.out.println("Number of labels: " + labelCount); System.out.println("Number of documents in training set: " + documentCount); Connection conn = null; PreparedStatement pstmt = null; try { Class.forName("org.postgresql.Driver"); conn = DriverManager.getConnection("jdbc:postgresql://192.168.50.170:5432/uzeni", "postgres", "dbwpsdkdl"); conn.setAutoCommit(false); String sql = "INSERT INTO " + tablename + " (id,gtime,wtime,target,num,link,body,rep) VALUES (?,?,?,?,?,?,?,?);"; pstmt = conn.prepareStatement(sql); FileSystem fs = FileSystem.get(configuration); FileStatus[] status = fs.listStatus(new Path(inputDir)); BufferedWriter bw = new BufferedWriter( new OutputStreamWriter(fs.create(new Path(inputDir + "/rep.list"), true))); for (int i = 0; i < status.length; i++) { BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(status[i].getPath()))); if (new String(status[i].getPath().getName()).equals("rep.list")) { continue; } int lv_HEAD = 1; int lv_cnt = 0; String lv_gtime = null; String lv_wtime = null; String lv_target = null; BigDecimal lv_num = null; String lv_link = null; String[] lv_args; String lv_line; StringBuilder lv_txt = new StringBuilder(); while ((lv_line = br.readLine()) != null) { if (lv_cnt < lv_HEAD) { lv_args = lv_line.split(","); lv_gtime = lv_args[0]; lv_wtime = lv_args[1]; lv_target = lv_args[2]; lv_num = new BigDecimal(lv_args[3]); lv_link = lv_args[4]; } else { lv_txt.append(lv_line + '\n'); } lv_cnt++; } br.close(); String id = status[i].getPath().getName(); String message = lv_txt.toString(); Multiset<String> words = ConcurrentHashMultiset.create(); TokenStream ts = analyzer.tokenStream("text", new StringReader(message)); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); ts.reset(); int wordCount = 0; while (ts.incrementToken()) { if (termAtt.length() > 0) { String word = ts.getAttribute(CharTermAttribute.class).toString(); Integer wordId = dictionary.get(word); if (wordId != null) { words.add(word); wordCount++; } } } ts.end(); ts.close(); Vector vector = new RandomAccessSparseVector(10000); TFIDF tfidf = new TFIDF(); for (Multiset.Entry<String> entry : words.entrySet()) { String word = entry.getElement(); int count = entry.getCount(); Integer wordId = dictionary.get(word); Long freq = documentFrequency.get(wordId); double tfIdfValue = tfidf.calculate(count, freq.intValue(), wordCount, documentCount); vector.setQuick(wordId, tfIdfValue); } Vector resultVector = classifier.classifyFull(vector); double bestScore = -Double.MAX_VALUE; int bestCategoryId = -1; for (Element element : resultVector.all()) { int categoryId = element.index(); double score = element.get(); if (score > bestScore) { bestScore = score; bestCategoryId = categoryId; } } //System.out.println(message); //System.out.println(" => "+ lv_gtime + lv_wtime + lv_link + id + ":" + labels.get(bestCategoryId)); pstmt.setString(1, id); pstmt.setString(2, lv_gtime); pstmt.setString(3, lv_wtime); pstmt.setString(4, lv_target); pstmt.setBigDecimal(5, lv_num); pstmt.setString(6, lv_link); pstmt.setString(7, message.substring(1, Math.min(50, message.length()))); pstmt.setString(8, labels.get(bestCategoryId)); pstmt.addBatch(); bw.write(id + "\t" + labels.get(bestCategoryId) + "\n"); } pstmt.executeBatch(); //pstmt.clearParameters(); pstmt.close(); conn.commit(); conn.close(); bw.close(); } catch (Exception e) { System.err.println(e.getClass().getName() + ": " + e.getMessage()); System.exit(0); } analyzer.close(); }
From source file:ColumnStorageBasicTest.java
License:Open Source License
public void testLoadNaviFromHead() { try {//from w w w .jav a2 s .co m Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new Path(prefix); fs.delete(path, true); createAllSingleProject(fs); ColumnProject cp = new ColumnProject(conf); cp.loadColmnInfoFromHeadInfo(fs, path); checkAllColumnInfo(cp.infos()); } catch (Exception e) { e.printStackTrace(); fail("get exception:" + e.getMessage()); } }
From source file:ColumnStorageBasicTest.java
License:Open Source License
public void testGetFileNameByFieldIndex() { try {//from ww w. j av a 2 s . co m String navigator = prefix; Path path = new Path(navigator); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); ColumnProject cp = new ColumnProject(conf); cp.loadColmnInfoFromHeadInfo(fs, path); ArrayList<String> fileList = null; fileList = cp.getFileNameByIndex(null); if (fileList != null) { fail("should null"); } ArrayList<Short> idxs = new ArrayList<Short>(10); idxs.add((short) 10); fileList = cp.getFileNameByIndex(idxs); if (fileList != null) { fail("should null"); } idxs.clear(); idxs.add((short) 0); fileList = cp.getFileNameByIndex(idxs); if (fileList == null) { fail("should not null"); } if (fileList.size() != 1) { fail("error fileList:" + fileList.size()); } if (!fileList.get(0).equals(byteFileName)) { fail("error file name:" + fileList.get(0)); } idxs.clear(); idxs.add((short) 0); idxs.add((short) 5); fileList = cp.getFileNameByIndex(idxs); if (fileList == null) { fail("should not null"); } if (fileList.size() != 2) { fail("error fileList:" + fileList.size()); } if (!fileList.get(0).equals(byteFileName)) { fail("error file name1:" + fileList.get(0)); } if (!fileList.get(1).equals(doubleFileName)) { fail("error file name2:" + fileList.get(1)); } idxs.clear(); idxs.add((short) 0); idxs.add((short) 5); idxs.add((short) 10); fileList = cp.getFileNameByIndex(idxs); if (fileList != null) { fail("should null"); } } catch (Exception e) { e.printStackTrace(); fail("get exception:" + e.getMessage()); } }
From source file:ColumnStorageBasicTest.java
License:Open Source License
public void testConstructorNullParamter() { try {//from w ww .j a v a 2s .co m String navigator = prefix + "navigator"; Path path = new Path(navigator); Configuration conf = new Configuration(); ArrayList<Short> idxs = new ArrayList<Short>(10); try { ColumnStorageClient client = new ColumnStorageClient(null, idxs, conf); fail("error should get exception"); } catch (SEException.InvalidParameterException e) { } try { ColumnStorageClient client = new ColumnStorageClient(path, null, conf); fail("error should get exception"); } catch (SEException.InvalidParameterException e) { } try { ColumnStorageClient client = new ColumnStorageClient(path, idxs, null); fail("error should get exception"); } catch (SEException.InvalidParameterException e) { } } catch (Exception e) { e.printStackTrace(); fail("get exception:" + e.getMessage()); } }
From source file:ColumnStorageBasicTest.java
License:Open Source License
public void testConstructorFieldNoExist() { try {//from ww w .j a va 2s .c o m Configuration conf = new Configuration(); Path path = new Path(prefix); FileSystem fs = FileSystem.get(conf); fs.delete(path, true); createAllSingleProject(fs); createMultiProject(fs); ArrayList<Short> idxs = new ArrayList<Short>(10); idxs.add((short) 10); ColumnStorageClient client = new ColumnStorageClient(path, idxs, conf); fail("should get exception"); } catch (SEException.InvalidParameterException e) { } catch (Exception e) { e.printStackTrace(); fail("get exception:" + e.getMessage()); } }
From source file:ColumnStorageBasicTest.java
License:Open Source License
public void testConstructorFieldInSameFile() { try {// w w w . jav a2s .c o m Configuration conf = new Configuration(); Path path = new Path(prefix); FileSystem fs = FileSystem.get(conf); ArrayList<Short> idxs = new ArrayList<Short>(10); idxs.add((short) 7); idxs.add((short) 9); ColumnStorageClient client = new ColumnStorageClient(path, idxs, conf); if (client.cp == null) { fail("cp null"); } if (client.list.size() != 1) { fail("error list size:" + client.list.size()); } if (!client.list.get(0).equals(multiFileNameString)) { fail("error filename:" + client.list.get(0)); } } catch (Exception e) { e.printStackTrace(); fail("get exception:" + e.getMessage()); } }