List of usage examples for org.apache.hadoop.fs FileSystem getWorkingDirectory
public abstract Path getWorkingDirectory();
From source file:MRDriver.java
License:Apache License
public int run(String args[]) throws Exception { FileSystem fs = null; Path samplesMapPath = null;/*from w w w .j a v a 2 s .c om*/ float epsilon = Float.parseFloat(args[0]); double delta = Double.parseDouble(args[1]); int minFreqPercent = Integer.parseInt(args[2]); int d = Integer.parseInt(args[3]); int datasetSize = Integer.parseInt(args[4]); int numSamples = Integer.parseInt(args[5]); double phi = Double.parseDouble(args[6]); Random rand; /************************ Job 1 (local FIM) Configuration ************************/ JobConf conf = new JobConf(getConf()); /* * Compute the number of required "votes" for an itemsets to be * declared frequent */ // The +1 at the end is needed to ensure reqApproxNum > numsamples / 2. int reqApproxNum = (int) Math .floor((numSamples * (1 - phi)) - Math.sqrt(numSamples * (1 - phi) * 2 * Math.log(1 / delta))) + 1; int sampleSize = (int) Math.ceil((2 / Math.pow(epsilon, 2)) * (d + Math.log(1 / phi))); //System.out.println("reducersNum: " + numSamples + " reqApproxNum: " + reqApproxNum); conf.setInt("PARMM.reducersNum", numSamples); conf.setInt("PARMM.datasetSize", datasetSize); conf.setInt("PARMM.minFreqPercent", minFreqPercent); conf.setInt("PARMM.sampleSize", sampleSize); conf.setFloat("PARMM.epsilon", epsilon); // Set the number of reducers equal to the number of samples, to // maximize parallelism. Required by our Partitioner. conf.setNumReduceTasks(numSamples); // XXX: why do we disable the speculative execution? MR conf.setBoolean("mapred.reduce.tasks.speculative.execution", false); conf.setInt("mapred.task.timeout", MR_TIMEOUT_MILLI); /* * Enable compression of map output. * * We do it for this job and not for the aggregation one because * each mapper there only print out one record for each itemset, * so there isn't much to compress, I'd say. MR * * In Amazon MapReduce compression of the map output seems to be * happen by default and the Snappy codec is used, which is * extremely fast. */ conf.setBoolean("mapred.compress.map.output", true); //conf.setMapOutputCompressorClass(com.hadoop.compression.lzo.LzoCodec.class); conf.setJarByClass(MRDriver.class); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(DoubleWritable.class); conf.setInputFormat(SequenceFileInputFormat.class); // We write the collections found in a reducers as a SequenceFile conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(conf, new Path(args[9])); // set the mapper class based on command line option switch (Integer.parseInt(args[7])) { case 1: System.out.println("running partition mapper..."); SequenceFileInputFormat.addInputPath(conf, new Path(args[8])); conf.setMapperClass(PartitionMapper.class); break; case 2: System.out.println("running binomial mapper..."); SequenceFileInputFormat.addInputPath(conf, new Path(args[8])); conf.setMapperClass(BinomialSamplerMapper.class); break; case 3: System.out.println("running coin mapper..."); SequenceFileInputFormat.addInputPath(conf, new Path(args[8])); conf.setMapperClass(CoinFlipSamplerMapper.class); case 4: System.out.println("running sampler mapper..."); SequenceFileInputFormat.addInputPath(conf, new Path(args[8])); conf.setMapperClass(InputSamplerMapper.class); // create a random sample of size T*m rand = new Random(); long sampling_start_time = System.nanoTime(); int[] samples = new int[numSamples * sampleSize]; for (int i = 0; i < numSamples * sampleSize; i++) { samples[i] = rand.nextInt(datasetSize); } // for each key in the sample, create a list of all T samples to which this key belongs Hashtable<LongWritable, ArrayList<IntWritable>> hashTable = new Hashtable<LongWritable, ArrayList<IntWritable>>(); for (int i = 0; i < numSamples * sampleSize; i++) { ArrayList<IntWritable> sampleIDs = null; LongWritable key = new LongWritable(samples[i]); if (hashTable.containsKey(key)) sampleIDs = hashTable.get(key); else sampleIDs = new ArrayList<IntWritable>(); sampleIDs.add(new IntWritable(i % numSamples)); hashTable.put(key, sampleIDs); } /* * Convert the Hastable to a MapWritable which we will * write to HDFS and distribute to all Mappers using * DistributedCache */ MapWritable map = new MapWritable(); for (LongWritable key : hashTable.keySet()) { ArrayList<IntWritable> sampleIDs = hashTable.get(key); IntArrayWritable sampleIDsIAW = new IntArrayWritable(); sampleIDsIAW.set(sampleIDs.toArray(new IntWritable[sampleIDs.size()])); map.put(key, sampleIDsIAW); } fs = FileSystem.get(URI.create("samplesMap.ser"), conf); samplesMapPath = new Path("samplesMap.ser"); FSDataOutputStream out = fs.create(samplesMapPath, true); map.write(out); out.sync(); out.close(); DistributedCache.addCacheFile(new URI(fs.getWorkingDirectory() + "/samplesMap.ser#samplesMap.ser"), conf); // stop the sampling timer long sampling_end_time = System.nanoTime(); long sampling_runtime = (sampling_end_time - sampling_start_time) / 1000000; System.out.println("sampling runtime (milliseconds): " + sampling_runtime); break; // end switch case case 5: System.out.println("running random integer partition mapper..."); conf.setInputFormat(WholeSplitInputFormat.class); Path inputFilePath = new Path(args[8]); WholeSplitInputFormat.addInputPath(conf, inputFilePath); conf.setMapperClass(RandIntPartSamplerMapper.class); // Compute number of map tasks. fs = inputFilePath.getFileSystem(conf); FileStatus inputFileStatus = fs.getFileStatus(inputFilePath); long len = inputFileStatus.getLen(); long blockSize = inputFileStatus.getBlockSize(); conf.setLong("mapred.min.split.size", blockSize); conf.setLong("mapred.max.split.size", blockSize); int mapTasksNum = ((int) (len / blockSize)) + 1; conf.setNumMapTasks(mapTasksNum); //System.out.println("len: " + len + " blockSize: " // + blockSize + " mapTasksNum: " + mapTasksNum); // Extract random integer partition of total sample // size into up to mapTasksNum partitions. // XXX I'm not sure this is a correct way to do // it. rand = new Random(); IntWritable[][] toSampleArr = new IntWritable[mapTasksNum][numSamples]; for (int j = 0; j < numSamples; j++) { IntWritable[] tempToSampleArr = new IntWritable[mapTasksNum]; int sum = 0; int i; for (i = 0; i < mapTasksNum - 1; i++) { int size = rand.nextInt(sampleSize - sum); tempToSampleArr[i] = new IntWritable(size); sum += size; if (sum > numSamples * sampleSize) { System.out.println("Something went wrong generating the sample Sizes"); System.exit(1); } if (sum == sampleSize) { break; } } if (i == mapTasksNum - 1) { tempToSampleArr[i] = new IntWritable(sampleSize - sum); } else { for (; i < mapTasksNum; i++) { tempToSampleArr[i] = new IntWritable(0); } } Collections.shuffle(Arrays.asList(tempToSampleArr)); for (i = 0; i < mapTasksNum; i++) { toSampleArr[i][j] = tempToSampleArr[i]; } } for (int i = 0; i < mapTasksNum; i++) { DefaultStringifier.storeArray(conf, toSampleArr[i], "PARMM.toSampleArr_" + i); } break; default: System.err.println("Wrong Mapper ID. Can only be in [1,5]"); System.exit(1); break; } /* * We don't use the default hash partitioner because we want to * maximize the parallelism. That's why we also fix the number * of reducers. */ conf.setPartitionerClass(FIMPartitioner.class); conf.setReducerClass(FIMReducer.class); /************************ Job 2 (aggregation) Configuration ************************/ JobConf confAggr = new JobConf(getConf()); confAggr.setInt("PARMM.reducersNum", numSamples); confAggr.setInt("PARMM.reqApproxNum", reqApproxNum); confAggr.setInt("PARMM.sampleSize", sampleSize); confAggr.setFloat("PARMM.epsilon", epsilon); // XXX: Why do we disable speculative execution? MR confAggr.setBoolean("mapred.reduce.tasks.speculative.execution", false); confAggr.setInt("mapred.task.timeout", MR_TIMEOUT_MILLI); confAggr.setJarByClass(MRDriver.class); confAggr.setMapOutputKeyClass(Text.class); confAggr.setMapOutputValueClass(DoubleWritable.class); confAggr.setOutputKeyClass(Text.class); confAggr.setOutputValueClass(Text.class); confAggr.setMapperClass(AggregateMapper.class); confAggr.setReducerClass(AggregateReducer.class); confAggr.setInputFormat(CombineSequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(confAggr, new Path(args[9])); FileOutputFormat.setOutputPath(confAggr, new Path(args[10])); long FIMjob_start_time = System.currentTimeMillis(); RunningJob FIMjob = JobClient.runJob(conf); long FIMjob_end_time = System.currentTimeMillis(); RunningJob aggregateJob = JobClient.runJob(confAggr); long aggrJob_end_time = System.currentTimeMillis(); long FIMjob_runtime = FIMjob_end_time - FIMjob_start_time; long aggrJob_runtime = aggrJob_end_time - FIMjob_end_time; if (args[7].equals("4")) { // Remove samplesMap file fs.delete(samplesMapPath, false); } Counters counters = FIMjob.getCounters(); Counters.Group FIMMapperStartTimesCounters = counters.getGroup("FIMMapperStart"); long[] FIMMapperStartTimes = new long[FIMMapperStartTimesCounters.size()]; int i = 0; for (Counters.Counter counter : FIMMapperStartTimesCounters) { FIMMapperStartTimes[i++] = counter.getCounter(); } Counters.Group FIMMapperEndTimesCounters = counters.getGroup("FIMMapperEnd"); long[] FIMMapperEndTimes = new long[FIMMapperEndTimesCounters.size()]; i = 0; for (Counters.Counter counter : FIMMapperEndTimesCounters) { FIMMapperEndTimes[i++] = counter.getCounter(); } Counters.Group FIMReducerStartTimesCounters = counters.getGroup("FIMReducerStart"); long[] FIMReducerStartTimes = new long[FIMReducerStartTimesCounters.size()]; i = 0; for (Counters.Counter counter : FIMReducerStartTimesCounters) { FIMReducerStartTimes[i++] = counter.getCounter(); } Counters.Group FIMReducerEndTimesCounters = counters.getGroup("FIMReducerEnd"); long[] FIMReducerEndTimes = new long[FIMReducerEndTimesCounters.size()]; i = 0; for (Counters.Counter counter : FIMReducerEndTimesCounters) { FIMReducerEndTimes[i++] = counter.getCounter(); } Counters countersAggr = aggregateJob.getCounters(); Counters.Group AggregateMapperStartTimesCounters = countersAggr.getGroup("AggregateMapperStart"); long[] AggregateMapperStartTimes = new long[AggregateMapperStartTimesCounters.size()]; i = 0; for (Counters.Counter counter : AggregateMapperStartTimesCounters) { AggregateMapperStartTimes[i++] = counter.getCounter(); } Counters.Group AggregateMapperEndTimesCounters = countersAggr.getGroup("AggregateMapperEnd"); long[] AggregateMapperEndTimes = new long[AggregateMapperEndTimesCounters.size()]; i = 0; for (Counters.Counter counter : AggregateMapperEndTimesCounters) { AggregateMapperEndTimes[i++] = counter.getCounter(); } Counters.Group AggregateReducerStartTimesCounters = countersAggr.getGroup("AggregateReducerStart"); long[] AggregateReducerStartTimes = new long[AggregateReducerStartTimesCounters.size()]; i = 0; for (Counters.Counter counter : AggregateReducerStartTimesCounters) { AggregateReducerStartTimes[i++] = counter.getCounter(); } Counters.Group AggregateReducerEndTimesCounters = countersAggr.getGroup("AggregateReducerEnd"); long[] AggregateReducerEndTimes = new long[AggregateReducerEndTimesCounters.size()]; i = 0; for (Counters.Counter counter : AggregateReducerEndTimesCounters) { AggregateReducerEndTimes[i++] = counter.getCounter(); } long FIMMapperStartMin = FIMMapperStartTimes[0]; for (long l : FIMMapperStartTimes) { if (l < FIMMapperStartMin) { FIMMapperStartMin = l; } } long FIMMapperEndMax = FIMMapperEndTimes[0]; for (long l : FIMMapperEndTimes) { if (l > FIMMapperEndMax) { FIMMapperEndMax = l; } } System.out.println("FIM job setup time (milliseconds): " + (FIMMapperStartMin - FIMjob_start_time)); System.out.println("FIMMapper total runtime (milliseconds): " + (FIMMapperEndMax - FIMMapperStartMin)); long[] FIMMapperRunTimes = new long[FIMMapperStartTimes.length]; long FIMMapperRunTimesSum = 0; for (int l = 0; l < FIMMapperStartTimes.length; l++) { FIMMapperRunTimes[l] = FIMMapperEndTimes[l] - FIMMapperStartTimes[l]; FIMMapperRunTimesSum += FIMMapperRunTimes[l]; } System.out.println("FIMMapper average task runtime (milliseconds): " + FIMMapperRunTimesSum / FIMMapperStartTimes.length); long FIMMapperRunTimesMin = FIMMapperRunTimes[0]; long FIMMapperRunTimesMax = FIMMapperRunTimes[0]; for (long l : FIMMapperRunTimes) { if (l < FIMMapperRunTimesMin) { FIMMapperRunTimesMin = l; } if (l > FIMMapperRunTimesMax) { FIMMapperRunTimesMax = l; } } System.out.println("FIMMapper minimum task runtime (milliseconds): " + FIMMapperRunTimesMin); System.out.println("FIMMapper maximum task runtime (milliseconds): " + FIMMapperRunTimesMax); long FIMReducerStartMin = FIMReducerStartTimes[0]; for (long l : FIMReducerStartTimes) { if (l < FIMReducerStartMin) { FIMReducerStartMin = l; } } long FIMReducerEndMax = FIMReducerEndTimes[0]; for (long l : FIMReducerEndTimes) { if (l > FIMReducerEndMax) { FIMReducerEndMax = l; } } System.out .println("FIM job shuffle phase runtime (milliseconds): " + (FIMReducerStartMin - FIMMapperEndMax)); System.out.println("FIMReducer total runtime (milliseconds): " + (FIMReducerEndMax - FIMReducerStartMin)); long[] FIMReducerRunTimes = new long[FIMReducerStartTimes.length]; long FIMReducerRunTimesSum = 0; for (int l = 0; l < FIMReducerStartTimes.length; l++) { FIMReducerRunTimes[l] = FIMReducerEndTimes[l] - FIMReducerStartTimes[l]; FIMReducerRunTimesSum += FIMReducerRunTimes[l]; } System.out.println("FIMReducer average task runtime (milliseconds): " + FIMReducerRunTimesSum / FIMReducerStartTimes.length); long FIMReducerRunTimesMin = FIMReducerRunTimes[0]; long FIMReducerRunTimesMax = FIMReducerRunTimes[0]; for (long l : FIMReducerRunTimes) { if (l < FIMReducerRunTimesMin) { FIMReducerRunTimesMin = l; } if (l > FIMReducerRunTimesMax) { FIMReducerRunTimesMax = l; } } System.out.println("FIMReducer minimum task runtime (milliseconds): " + FIMReducerRunTimesMin); System.out.println("FIMReducer maximum task runtime (milliseconds): " + FIMReducerRunTimesMax); System.out.println("FIM job cooldown time (milliseconds): " + (FIMjob_end_time - FIMReducerEndMax)); long AggregateMapperStartMin = AggregateMapperStartTimes[0]; for (long l : AggregateMapperStartTimes) { if (l < AggregateMapperStartMin) { AggregateMapperStartMin = l; } } long AggregateMapperEndMax = AggregateMapperEndTimes[0]; for (long l : AggregateMapperEndTimes) { if (l > AggregateMapperEndMax) { AggregateMapperEndMax = l; } } System.out.println( "Aggregation job setup time (milliseconds): " + (AggregateMapperStartMin - FIMjob_end_time)); System.out.println("AggregateMapper total runtime (milliseconds): " + (AggregateMapperEndMax - AggregateMapperStartMin)); long[] AggregateMapperRunTimes = new long[AggregateMapperStartTimes.length]; long AggregateMapperRunTimesSum = 0; for (int l = 0; l < AggregateMapperStartTimes.length; l++) { AggregateMapperRunTimes[l] = AggregateMapperEndTimes[l] - AggregateMapperStartTimes[l]; AggregateMapperRunTimesSum += AggregateMapperRunTimes[l]; } System.out.println("AggregateMapper average task runtime (milliseconds): " + AggregateMapperRunTimesSum / AggregateMapperStartTimes.length); long AggregateMapperRunTimesMin = AggregateMapperRunTimes[0]; long AggregateMapperRunTimesMax = AggregateMapperRunTimes[0]; for (long l : AggregateMapperRunTimes) { if (l < AggregateMapperRunTimesMin) { AggregateMapperRunTimesMin = l; } if (l > AggregateMapperRunTimesMax) { AggregateMapperRunTimesMax = l; } } System.out.println("AggregateMapper minimum task runtime (milliseconds): " + AggregateMapperRunTimesMin); System.out.println("AggregateMapper maximum task runtime (milliseconds): " + AggregateMapperRunTimesMax); long AggregateReducerStartMin = AggregateReducerStartTimes[0]; for (long l : AggregateReducerStartTimes) { if (l < AggregateReducerStartMin) { AggregateReducerStartMin = l; } } long AggregateReducerEndMax = AggregateReducerEndTimes[0]; for (long l : AggregateReducerEndTimes) { if (l > AggregateReducerEndMax) { AggregateReducerEndMax = l; } } System.out.println("Aggregate job round shuffle phase runtime (milliseconds): " + (AggregateReducerStartMin - AggregateMapperEndMax)); System.out.println("AggregateReducer total runtime (milliseconds): " + (AggregateReducerEndMax - AggregateReducerStartMin)); long[] AggregateReducerRunTimes = new long[AggregateReducerStartTimes.length]; long AggregateReducerRunTimesSum = 0; for (int l = 0; l < AggregateReducerStartTimes.length; l++) { AggregateReducerRunTimes[l] = AggregateReducerEndTimes[l] - AggregateReducerStartTimes[l]; AggregateReducerRunTimesSum += AggregateReducerRunTimes[l]; } System.out.println("AggregateReducer average task runtime (milliseconds): " + AggregateReducerRunTimesSum / AggregateReducerStartTimes.length); long AggregateReducerRunTimesMin = AggregateReducerRunTimes[0]; long AggregateReducerRunTimesMax = AggregateReducerRunTimes[0]; for (long l : AggregateReducerRunTimes) { if (l < AggregateReducerRunTimesMin) { AggregateReducerRunTimesMin = l; } if (l > AggregateReducerRunTimesMax) { AggregateReducerRunTimesMax = l; } } System.out.println("AggregateReducer minimum task runtime (milliseconds): " + AggregateReducerRunTimesMin); System.out.println("AggregateReducer maximum task runtime (milliseconds): " + AggregateReducerRunTimesMax); System.out.println( "Aggregation job cooldown time (milliseconds): " + (aggrJob_end_time - AggregateReducerEndMax)); System.out .println("total runtime (all inclusive) (milliseconds): " + (aggrJob_end_time - FIMjob_start_time)); System.out.println("total runtime (no FIM job setup, no aggregation job cooldown) (milliseconds): " + (AggregateReducerEndMax - FIMMapperStartMin)); System.out.println("total runtime (no setups, no cooldowns) (milliseconds): " + (FIMReducerEndMax - FIMMapperStartMin + AggregateReducerEndMax - AggregateMapperStartMin)); System.out.println("FIM job runtime (including setup and cooldown) (milliseconds): " + FIMjob_runtime); System.out.println("FIM job runtime (no setup, no cooldown) (milliseconds): " + (FIMReducerEndMax - FIMMapperStartMin)); System.out.println( "Aggregation job runtime (including setup and cooldown) (milliseconds): " + aggrJob_runtime); System.out.println("Aggregation job runtime (no setup, no cooldown) (milliseconds): " + (AggregateReducerEndMax - AggregateMapperStartMin)); return 0; }
From source file:JaqlShell.java
License:Apache License
/** * @param dir/*from w w w. ja va2 s . c o m*/ * @param numNodes * @param format * @throws Exception */ public void init(String dir, int numNodes) throws Exception { String vInfo = VersionInfo.getVersion(); System.setProperty("test.build.data", dir); m_conf = new Configuration(); // setup conf according to the Hadoop version if (vInfo.indexOf("0.20") < 0) { throw new Exception("Unsupported Hadoop version: " + vInfo); } // setup the mini dfs cluster m_fs = new MiniDFSCluster(m_conf, numNodes, true, (String[]) null); FileSystem filesystem = m_fs.getFileSystem(); m_conf.set("fs.default.name", filesystem.getUri().toString()); Path parentdir = filesystem.getHomeDirectory(); filesystem.mkdirs(parentdir); //FSUtils.setVersion(filesystem, parentdir); // setup hbase cluster (only if OS is not windows) // if(!System.getProperty("os.name").toLowerCase().contains("win")) { // m_conf.set(HConstants.HBASE_DIR, parentdir.toString()); // Path hdfsTestDir = filesystem.makeQualified(new Path(m_conf.get(HConstants.HBASE_DIR))); // // // prime the hdfs for hbase information... // HRegion root = HRegion.createHRegion(HRegionInfo.ROOT_REGIONINFO, hdfsTestDir, (HBaseConfiguration)m_conf); // HRegion meta = HRegion.createHRegion(HRegionInfo.FIRST_META_REGIONINFO, hdfsTestDir, (HBaseConfiguration)m_conf); // HRegion.addRegionToMETA(root, meta); // // // ... and close the root and meta // if (meta != null) { // meta.close(); // meta.getLog().closeAndDelete(); // } // if (root != null) { // root.close(); // root.getLog().closeAndDelete(); // } // // try // { // this.zooKeeperCluster = new MiniZooKeeperCluster(); // File testDir = new File(dir); // int clientPort = this.zooKeeperCluster.startup(testDir); // m_conf.set("hbase.zookeeper.property.clientPort", Integer.toString(clientPort)); // } catch(Exception e) { // LOG.error("Unable to startup zookeeper"); // throw new IOException(e); // } // try { // // start the mini cluster // m_base = new MiniHBaseCluster((HBaseConfiguration)m_conf, numNodes); // } catch(Exception e) { // LOG.error("Unable to startup hbase"); // throw new IOException(e); // } // try { // // opening the META table ensures that cluster is running // new HTable((HBaseConfiguration)m_conf, HConstants.META_TABLE_NAME); // // //setupOverride(conf); // } // catch (Exception e) // { // LOG.warn("Could not verify that hbase is up", e); // } // setupOverride(); // } m_mr = startMRCluster(numNodes, m_fs.getFileSystem().getName(), m_conf); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); // make the home directory if it does not exist Path hd = fs.getWorkingDirectory(); if (!fs.exists(hd)) fs.mkdirs(hd); // make the $USER/_temporary directory if it does not exist Path tmpPath = new Path(hd, "_temporary"); if (!fs.exists(tmpPath)) fs.mkdirs(tmpPath); // if (m_base != null) // { // try { // m_admin = new HBaseAdmin((HBaseConfiguration) m_conf); // HTableDescriptor[] tables = m_admin.listTables(); // if (tables != null) // { // for (int i = 0; i < tables.length; i++) // { // m_admin.enableTable(tables[i].getName()); // } // } // } catch(Exception e) { // LOG.warn("failed to enable hbase tables"); // } // } }
From source file:JaqlShell.java
License:Apache License
/** * @throws Exception/* w ww . j a va 2s .c o m*/ */ public void init() throws Exception { // do nothing in the case of cluster //m_conf = new HBaseConfiguration(); //m_admin = new HBaseAdmin(m_conf); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); // make the home directory if it does not exist Path hd = fs.getWorkingDirectory(); if (!fs.exists(hd)) fs.mkdirs(hd); }
From source file:classTest.fileTest.java
public static void main(String args[]) throws IOException { hbaseDB connHB = new hbaseDB("/Users/andresbenitez/Documents/app/ABTViewer3/srvConf.properties", "HBConf2"); FileSystem hdfs = org.apache.hadoop.fs.FileSystem.get(connHB.getHcfg()); JOptionPane.showMessageDialog(null, hdfs.getHomeDirectory().toString()); JOptionPane.showMessageDialog(null, hdfs.getWorkingDirectory()); hdfs.setWorkingDirectory(new Path("hdfs://hortonserver.com:8020/user/guest/")); System.out.println(hdfs.getWorkingDirectory().toString()); String dirName = "TestDirectory"; Path destPath = new Path( "hdfs://hortonserver.e-contact.cl:8020/user/guest/20160413_000118_00011008887674_98458726_TTR42-1460516478.154581.WAV"); Path sr1 = new Path("hdfs://hortonserver.com:8020/user/guest/Test"); //hdfs.mkdirs(sr1); //FileSystem lhdfs = LocalFileSystem.get(hbconf); //System.out.println(lhdfs.getWorkingDirectory().toString()); //System.out.println(hdfs.getWorkingDirectory().toString()); //Path sourcePath = new Path("/Users/andresbenitez/Documents/Apps/test.txt"); //Path destPath = new Path("/Users/andresbenitez/Documents/Apps/test4.txt"); //hdfs.copyFromLocalFile(sourcePath, destPath); //hdfs.copyToLocalFile(false, new Path("hdfs://sandbox.hortonworks.com:8020/user/guest/installupload.log"), new Path("/Users/andresbenitez/Documents/instaldown3.log"), true); //hdfs.copyToLocalFile(false, new Path("/Users/andresbenitez/Documents/instaldown.log"), new Path("hdfs://sandbox.hortonworks.com:8020/user/guest/installupload.log"), false); //File f=new File("http://srv-gui-g.e-contact.cl/e-recorder/audio/20160413/08/01_20160413_084721_90010990790034__1460548041.4646.wav"); URL url = new URL( "http://grabacionesclaro.e-contact.cl/2011/2016041300/20160413_000118_00011008887674_98458726_TTR42-1460516478.154581.WAV"); File filePaso = new File("/Users/andresbenitez/Documents/paso/JOJOJO.WAV"); File f2 = new File( "/grabacionesclaro.e-contact.cl/2011/2016041300/20160413_000118_00011008887674_98458726_TTR42-1460516478.154581.WAV"); org.apache.commons.io.FileUtils.copyURLToFile(url, filePaso); //org.apache.commons.io.FileUtils.copyFile(f2, filePaso); //&hdfs.copyToLocalFile(false, new Path("/Users/andresbenitez/Documents/paso/JOJOJO.mp3"), destPath); //hdfs.copyFromLocalFile(false, new Path("/Users/andresbenitez/Documents/paso/JOJOJO.WAV"), destPath); }
From source file:co.cask.cdap.data.tools.HBaseTableExporter.java
License:Apache License
/** * Sets up the actual MapReduce job./*w w w .j a v a 2 s.c o m*/ * @param tx The transaction which needs to be passed to the Scan instance. This transaction is be used by * coprocessors to filter out the data corresonding to the invalid transactions . * @param tableName Name of the table which need to be exported as HFiles. * @return the configured job * @throws IOException */ public Job createSubmittableJob(Transaction tx, String tableName) throws IOException { Job job = Job.getInstance(hConf, "HBaseTableExporter"); job.setJarByClass(HBaseTableExporter.class); Scan scan = new Scan(); scan.setCacheBlocks(false); // Set the transaction attribute for the scan. scan.setAttribute(TxConstants.TX_OPERATION_ATTRIBUTE_KEY, new TransactionCodec().encode(tx)); job.setNumReduceTasks(0); TableMapReduceUtil.initTableMapperJob(tableName, scan, KeyValueImporter.class, null, null, job); FileSystem fs = FileSystem.get(hConf); Random rand = new Random(); Path root = new Path(fs.getWorkingDirectory(), "hbasetableexporter"); fs.mkdirs(root); while (true) { bulkloadDir = new Path(root, "" + rand.nextLong()); if (!fs.exists(bulkloadDir)) { break; } } HFileOutputFormat2.setOutputPath(job, bulkloadDir); HTable hTable = new HTable(hConf, tableName); HFileOutputFormat2.configureIncrementalLoad(job, hTable); return job; }
From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceProfileTest.java
License:Apache License
/** * convert with relative path.// w w w . j a v a 2 s .c om * @throws Exception if failed */ @Test public void convert_relpath() throws Exception { Map<String, String> attributes = new HashMap<>(); attributes.put(KEY_PATH, "relative"); DirectDataSourceProfile profile = new DirectDataSourceProfile("testing", HadoopDataSource.class, "context", attributes); Configuration conf = new Configuration(); HadoopDataSourceProfile result = HadoopDataSourceProfile.convert(profile, conf); FileSystem defaultFs = FileSystem.get(conf); Path path = defaultFs.makeQualified(new Path(defaultFs.getWorkingDirectory(), "relative")); assertThat(result.getFileSystem().getCanonicalServiceName(), is(defaultFs.getCanonicalServiceName())); assertThat(result.getFileSystemPath(), is(path)); }
From source file:com.asakusafw.testdriver.FlowPartTestDriver.java
License:Apache License
private String computeInputPath(FileSystem fs, String tableName) { Location location = LegacyUtil.createInputLocation(driverContext, tableName); String path = new Path(fs.getWorkingDirectory(), location.toPath('/')).toString(); return resolvePath(path); }
From source file:com.asakusafw.testdriver.FlowPartTestDriver.java
License:Apache License
private String computeOutputPath(FileSystem fs, String tableName) { Location location = LegacyUtil.createOutputLocation(driverContext, tableName); String path = new Path(fs.getWorkingDirectory(), location.toPath('/')).toString(); return resolvePath(path); }
From source file:com.asakusafw.windgate.hadoopfs.HadoopFsProfile.java
License:Apache License
private static Path extractBasePath(Configuration configuration, ResourceProfile profile) throws IOException { assert configuration != null; assert profile != null; String result = extract(profile, KEY_BASE_PATH, false); try {/*from www. j a v a 2s .c om*/ if (result == null || result.isEmpty()) { FileSystem fileSystem = FileSystem.get(configuration); return fileSystem.getWorkingDirectory(); } URI uri = URI.create(result); FileSystem fileSystem = FileSystem.get(uri, configuration); return fileSystem.makeQualified(new Path(uri)); } catch (IOException e) { WGLOG.error(e, "E00002", profile.getName(), KEY_BASE_PATH, result == null ? "(default)" : result); throw new IOException(MessageFormat.format("Failed to initialize the file system: {1} (resource={0})", profile.getName(), KEY_BASE_PATH, result == null ? "(default)" : result), e); } }
From source file:com.bizosys.unstructured.StopwordAndSynonymAnalyzer.java
License:Apache License
public void load() throws IOException { InputStream stopwordStream = null; InputStream synonumStream = null; Configuration hsearchConf = HSearchConfig.getInstance().getConfiguration(); String filenameSynonum = hsearchConf.get("synonyms.file.location", "synonyms.txt"); String filenameStopword = hsearchConf.get("stopword.file.location", "stopwords.txt"); isLowerCaseEnabled = hsearchConf.getBoolean("lucene.analysis.lowercasefilter", true); isAccentFilterEnabled = hsearchConf.getBoolean("lucene.analysis.accentfilter", true); isSnoballStemEnabled = hsearchConf.getBoolean("lucene.analysis.snowballfilter", true); isStopFilterEnabled = hsearchConf.getBoolean("lucene.analysis.stopfilter", true); if (null != stopwords) return;/*from w w w. j ava2s . c om*/ org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration(); FileSystem fs = FileSystem.get(conf); if (null != fs) { /** * STOPWORD */ Path stopPath = new Path(filenameStopword); if (fs.exists(stopPath)) { if (DEBUG_ENABLED) IdSearchLog.l.debug("Loading Stopword file from HDFS :" + stopPath.toString()); stopwordStream = fs.open(stopPath); } else { IdSearchLog.l.fatal("Stopword file not available in HDFS :" + stopPath.toString()); } /** * SYNONUM */ Path synPath = new Path(filenameSynonum); if (fs.exists(synPath)) { synonumStream = fs.open(synPath); if (DEBUG_ENABLED) IdSearchLog.l.debug("Loading synonym file from HDFS :" + filenameSynonum.toString()); } else { IdSearchLog.l.fatal("Synonym file not available in HDFS :" + filenameSynonum.toString()); IdSearchLog.l.fatal("Working Directory :" + fs.getWorkingDirectory().getName()); } } ClassLoader classLoader = null; if (null == stopwordStream || null == synonumStream) { classLoader = Thread.currentThread().getContextClassLoader(); } if (null == stopwordStream) { URL stopUrl = classLoader.getResource(filenameStopword); if (null != stopUrl) { String stopFile = stopUrl.getPath(); if (null != stopFile) { File stopwordFile = new File(stopFile); if (stopwordFile.exists() && stopwordFile.canRead()) { stopwordStream = new FileInputStream(stopwordFile); if (DEBUG_ENABLED) IdSearchLog.l .debug("Loading Stopword file from Local :" + stopwordFile.getAbsolutePath()); } else { IdSearchLog.l.fatal("Stopword file not available at :" + stopwordFile.getAbsolutePath()); IdSearchLog.l.fatal("Working Directory :" + fs.getHomeDirectory().getName()); } } else { if (DEBUG_ENABLED) IdSearchLog.l.debug("Ignoring Stopwords > " + filenameStopword); } } } if (null == synonumStream) { URL synUrl = classLoader.getResource(filenameSynonum); if (null != synUrl) { String synFileName = synUrl.getPath(); if (null != synFileName) { File synFile = new File(synFileName); if (synFile.exists() && synFile.canRead()) { synonumStream = new FileInputStream(synFile); if (DEBUG_ENABLED) IdSearchLog.l.debug("Loading Synonum file from Local :" + synFile.getAbsolutePath()); } else { if (DEBUG_ENABLED) IdSearchLog.l.debug("Synonum file not available at :" + synFile.getAbsolutePath()); } } else { if (DEBUG_ENABLED) IdSearchLog.l.debug("Ignoring Synonyms > " + filenameSynonum); } } } load(stopwordStream, synonumStream); }