List of usage examples for org.apache.hadoop.fs Path toString
@Override
public String toString()
From source file:com.chinamobile.bcbsp.util.BSPJob.java
License:Apache License
/** * Set Working directory./* w w w.ja v a 2s .c o m*/ * @param dir * The path of the HDSF. */ public void setWorkingDirectory(Path dir) throws IOException { ensureState(JobState.DEFINE); dir = new BSPHdfsImpl().hdfsgetWorkingDirectory(dir); conf.set(Constants.USER_BC_BSP_JOB_WORKING_DIR, dir.toString()); }
From source file:com.chinamobile.bcbsp.workermanager.WorkerManager.java
License:Apache License
/** * Localization of job./* w w w. j a v a2 s.c o m*/ * @param sip StaffInProgress * @param directive {@link Directive} */ private void localizeJob(StaffInProgress sip, Directive directive) throws IOException { Staff staff = sip.getStaff(); conf.addResource(staff.getJobFile()); BSPJob defaultJobConf = new BSPJob((BSPConfiguration) conf); Path localJobFile = defaultJobConf.getLocalPath( Constants.BC_BSP_LOCAL_SUBDIR_WORKERMANAGER + "/" + staff.getStaffID() + "/" + "job.xml"); Path localJarFile = null; // systemFS.copyToLocalFile(new Path(staff.getJobFile()), localJobFile); bspsystemFS.copyToLocalFile(new BSPHdfsImpl().newPath(staff.getJobFile()), localJobFile); BSPConfiguration confBsp = new BSPConfiguration(); confBsp.addResource(localJobFile); LOG.info("debug: conf.get(Constants.USER_BC_BSP_JOB_TYPE) " + confBsp.get(Constants.USER_BC_BSP_JOB_TYPE)); BSPJob jobConf = new BSPJob(confBsp, staff.getJobID().toString()); LOG.info("debug: conf.get(Constants.USER_BC_BSP_JOB_TYPE) " + confBsp.get(Constants.USER_BC_BSP_JOB_TYPE)); LOG.info("debug: job type is " + jobConf.getJobType()); if (Constants.USER_BC_BSP_JOB_TYPE_C.equals(jobConf.getJobType())) { LOG.info("debug: in LocalizeJob job.exe"); localJarFile = defaultJobConf.getLocalPath( Constants.BC_BSP_LOCAL_SUBDIR_WORKERMANAGER + "/" + staff.getStaffID() + "/" + "jobC"); } else { LOG.info("debug: in in LocalizeJob job.jar"); localJarFile = defaultJobConf.getLocalPath( Constants.BC_BSP_LOCAL_SUBDIR_WORKERMANAGER + "/" + staff.getStaffID() + "/" + "job.jar"); } Path jarFile = null; LOG.info("debug: job type is" + jobConf.getJobType()); if (Constants.USER_BC_BSP_JOB_TYPE_C.equals(jobConf.getJobType())) { LOG.info("debug: in LocalizeJob bofore jobConf.getJobExe =" + jobConf.getJobExe()); if (jobConf.getJobExe() != null) { jarFile = new Path(jobConf.getJobExe()); } LOG.info("jarFile is" + jarFile); jobConf.setJobExe(localJarFile.toString()); } else { if (jobConf.getJar() != null) { jarFile = new Path(jobConf.getJar()); } jobConf.setJar(localJarFile.toString()); } if (jarFile != null) { LOG.info("jarFile != null"); bspsystemFS.copyToLocalFile(jarFile, localJarFile); File workDir = new File(new File(localJobFile.toString()).getParent(), "work"); if (!workDir.mkdirs()) { if (!workDir.isDirectory()) { throw new IOException("Mkdirs failed to create " + workDir.toString()); } } if (!Constants.USER_BC_BSP_JOB_TYPE_C.equals(jobConf.getJobType())) { RunJar.unJar(new File(localJarFile.toString()), workDir); /** Add the user program jar to the system's classpath. */ ClassLoaderUtil.addClassPath(localJarFile.toString()); } } RunningJob rjob = addStaffToJob(staff.getJobID(), localJobFile, sip, directive, jobConf); LOG.info("debug:after addStaffToJob(staff.getJobID(), " + "localJobFile, sip, directive, jobConf); "); rjob.localized = true; sip.setFaultSSStep(directive.getFaultSSStep()); LOG.info("debug:before launchStaffForJob(sip, jobConf);"); launchStaffForJob(sip, jobConf); }
From source file:com.chriscx.mapred.Map.java
private void parseSkipFile(Path patternsFile) { try {/*from w w w . j a v a 2 s . com*/ BufferedReader fis = new BufferedReader(new FileReader(patternsFile.toString())); String pattern = null; while ((pattern = fis.readLine()) != null) { patternsToSkip.add(pattern); } } catch (IOException ioe) { System.err.println("Caught exception while parsing the cached file '" + patternsFile + "' : " + StringUtils.stringifyException(ioe)); } }
From source file:com.ci.backports.hadoop.hbase.ZHFileOutputFormat.java
License:Apache License
/** * Configure a MapReduce Job to perform an incremental load into the given * table. This//from w ww . j av a 2 s . c o m * <ul> * <li>Inspects the table to configure a total order partitioner</li> * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li> * <li>Sets the number of reduce tasks to match the current number of regions</li> * <li>Sets the output key/value class to match ZHFileOutputFormat's requirements</li> * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or * ZPutSortReducer)</li> * </ul> * The user should be sure to set the map output value class to either KeyValue or Put before * running this function. */ public static void configureIncrementalLoad(Job job, HTable table) throws IOException { Configuration conf = job.getConfiguration(); job.setPartitionerClass(TotalOrderPartitioner.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); job.setOutputFormatClass(ZHFileOutputFormat.class); // Based on the configured map output class, set the correct reducer to properly // sort the incoming values. // TODO it would be nice to pick one or the other of these formats. if (KeyValue.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(KeyValueSortReducer.class); } else if (Put.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(ZPutSortReducer.class); } else { LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass()); } LOG.info("Looking up current regions for table " + table); List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table); LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count"); job.setNumReduceTasks(startKeys.size()); Path partitionsPath = new Path(job.getWorkingDirectory(), "partitions_" + System.currentTimeMillis()); LOG.info("Writing partition information to " + partitionsPath); FileSystem fs = partitionsPath.getFileSystem(conf); writePartitions(conf, partitionsPath, startKeys); partitionsPath.makeQualified(fs); URI cacheUri; try { cacheUri = new URI(partitionsPath.toString() + "#" + TotalOrderPartitioner.DEFAULT_PATH); } catch (URISyntaxException e) { throw new IOException(e); } DistributedCache.addCacheFile(cacheUri, conf); DistributedCache.createSymlink(conf); LOG.info("Incremental table output configured."); }
From source file:com.citic.zxyjs.zwlscx.mapreduce.lib.input.HFileOutputFormatBase.java
License:Apache License
public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context) throws IOException, InterruptedException { // Get the path of the temporary output file final Path outputPath = FileOutputFormat.getOutputPath(context); final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath(); final Path ignoreOutputPath = new Path(outputPath + "_ignore"); final Configuration conf = context.getConfiguration(); final FileSystem fs = outputdir.getFileSystem(conf); // These configs. are from hbase-*.xml final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE); // Invented config. Add to hbase-*.xml if other than default // compression. final String defaultCompression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false);/* w w w. ja v a2s . c om*/ if (fs.exists(ignoreOutputPath)) { LOG.info("Deleted " + ignoreOutputPath.toString() + " success."); fs.delete(ignoreOutputPath, true); } // create a map from column family to the compression algorithm final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf); final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf); final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf); String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY); final HFileDataBlockEncoder encoder; if (dataBlockEncodingStr == null) { encoder = NoOpDataBlockEncoder.INSTANCE; } else { try { encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding.valueOf(dataBlockEncodingStr)); } catch (IllegalArgumentException ex) { throw new RuntimeException("Invalid data block encoding type configured for the param " + DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr); } } return new RecordWriter<ImmutableBytesWritable, KeyValue>() { // Map of families to writers and how much has been output on the // writer. private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>( Bytes.BYTES_COMPARATOR); private final FSDataOutputStream dos = fs.create(ignoreOutputPath); private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY; private final byte[] now = Bytes.toBytes(System.currentTimeMillis()); private boolean rollRequested = false; public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException { // null input == user explicitly wants to flush if (row == null && kv == null) { rollWriters(); return; } byte[] rowKey = kv.getRow(); long length = kv.getLength(); byte[] family = kv.getFamily(); if (ignore(kv)) { byte[] readBuf = rowKey; dos.write(readBuf, 0, readBuf.length); dos.write(Bytes.toBytes("\n")); return; } WriterLength wl = this.writers.get(family); // If this is a new column family, verify that the directory // exists if (wl == null) { Path path = null; path = new Path(outputdir, Bytes.toString(family)); fs.mkdirs(path); } // If any of the HFiles for the column families has reached // maxsize, we need to roll all the writers if (wl != null && wl.written + length >= maxsize) { this.rollRequested = true; } // This can only happen once a row is finished though if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) { rollWriters(); } // create a new HLog writer, if necessary if (wl == null || wl.writer == null) { wl = getNewWriter(family, conf); } // we now have the proper HLog writer. full steam ahead kv.updateLatestStamp(this.now); wl.writer.append(kv); wl.written += length; // Copy the row so we know when a row transition. this.previousRow = rowKey; } private void rollWriters() throws IOException { for (WriterLength wl : this.writers.values()) { if (wl.writer != null) { LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written)); close(wl.writer); } wl.writer = null; wl.written = 0; } this.rollRequested = false; } /* * Create a new StoreFile.Writer. * @param family * @return A WriterLength, containing a new StoreFile.Writer. * @throws IOException */ private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException { WriterLength wl = new WriterLength(); Path familydir = new Path(outputdir, Bytes.toString(family)); String compression = compressionMap.get(family); compression = compression == null ? defaultCompression : compression; String bloomTypeStr = bloomTypeMap.get(family); BloomType bloomType = BloomType.NONE; if (bloomTypeStr != null) { bloomType = BloomType.valueOf(bloomTypeStr); } String blockSizeString = blockSizeMap.get(family); int blockSize = blockSizeString == null ? HConstants.DEFAULT_BLOCKSIZE : Integer.parseInt(blockSizeString); Configuration tempConf = new Configuration(conf); tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f); wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs, blockSize) .withOutputDir(familydir) .withCompression(AbstractHFileWriter.compressionByName(compression)) .withBloomType(bloomType).withComparator(KeyValue.COMPARATOR).withDataBlockEncoder(encoder) .withChecksumType(HStore.getChecksumType(conf)) .withBytesPerChecksum(HStore.getBytesPerChecksum(conf)).build(); this.writers.put(family, wl); return wl; } private void close(final StoreFile.Writer w) throws IOException { if (w != null) { w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude)); w.appendTrackedTimestampsToMetadata(); w.close(); } } public void close(TaskAttemptContext c) throws IOException, InterruptedException { dos.flush(); dos.close(); for (WriterLength wl : this.writers.values()) { close(wl.writer); } } }; }
From source file:com.cloudera.cdk.data.hcatalog.TestHCatalogDatasetRepository.java
License:Apache License
@Test public void testExternalTable() throws IOException { Path testDirectory = new Path(Files.createTempDir().getAbsolutePath()); File tableDir = new File(testDirectory.toString(), TABLE_NAME); Assert.assertFalse("Data directory should not exist before test", tableDir.exists()); repo = new HCatalogDatasetRepository(fileSystem, testDirectory); Dataset ds = repo.create(TABLE_NAME, new DatasetDescriptor.Builder().schema(USER_SCHEMA_URL).get()); Assert.assertTrue("Data directory should exist after dataset creation", tableDir.exists()); writeTestUsers(ds, 10);/* w w w . j av a 2s. c o m*/ checkTestUsers(ds, 10); Assert.assertTrue("Data directory should exist after writing", tableDir.exists()); repo.delete(TABLE_NAME); Assert.assertFalse("Data directory should not exist after dropping", tableDir.exists()); }
From source file:com.cloudera.cdk.morphline.hadoop.core.DownloadHdfsFileTest.java
License:Apache License
private Command createMorphline(String file, Path inputFile, File cwd) { return createMorphline("test-morphlines/downloadHdfsFile", ConfigFactory .parseMap(ImmutableMap.of("inputFile", inputFile.toString(), "outputDir", cwd.toString()))); }
From source file:com.cloudera.cdk.morphline.hadoop.rcfile.ReadRCFileTest.java
License:Apache License
@Test public void testRCFileRowWise() throws Exception { morphline = createMorphline("test-morphlines/rcFileMorphlineRow"); String rcFileName = "testRCFileRowWise.rc"; List<Record> expected = setupRCFile(rcFileName, NUM_RECORDS, NUM_COLUMNS, true); Path inputFile = dfs.makeQualified(new Path(testDirectory, rcFileName)); Record input = new Record(); input.put(Fields.ATTACHMENT_NAME, inputFile.toString()); input.put(Fields.ATTACHMENT_BODY, readPath(inputFile)); startSession();/*from ww w. jav a2 s. co m*/ assertEquals(1, collector.getNumStartEvents()); assertTrue(morphline.process(input)); assertTrue(areFieldsEqual(expected, collector.getRecords(), NUM_COLUMNS, NUM_RECORDS, true)); }
From source file:com.cloudera.cdk.morphline.hadoop.rcfile.ReadRCFileTest.java
License:Apache License
@Test public void testRCFileColumnWise() throws Exception { morphline = createMorphline("test-morphlines/rcFileMorphlineColumn"); String rcFileName = "testRCFileColumnWise.rc"; List<Record> expected = setupRCFile(rcFileName, NUM_RECORDS, NUM_COLUMNS, false); Path inputFile = dfs.makeQualified(new Path(testDirectory, rcFileName)); Record input = new Record(); input.put(Fields.ATTACHMENT_NAME, inputFile.toString()); input.put(Fields.ATTACHMENT_BODY, readPath(inputFile)); startSession();//w w w. j a va2 s .c o m assertEquals(1, collector.getNumStartEvents()); assertTrue(morphline.process(input)); assertTrue(areFieldsEqual(expected, collector.getRecords(), NUM_COLUMNS, NUM_RECORDS, false)); }
From source file:com.cloudera.circus.test.TestXTest.java
License:Open Source License
@Test @TestHadoop/*from w ww .j a va 2s. co m*/ public void testHadoopMapReduce() throws Exception { JobConf conf = getHadoopConf(); FileSystem fs = FileSystem.get(conf); JobClient jobClient = new JobClient(conf); try { Path inputDir = new Path(getHadoopTestDir(), "input"); Path outputDir = new Path(getHadoopTestDir(), "output"); fs.mkdirs(inputDir); Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt"))); writer.write("a\n"); writer.write("b\n"); writer.write("c\n"); writer.close(); JobConf jobConf = getHadoopConf(); jobConf.setInt("mapred.map.tasks", 1); jobConf.setInt("mapred.map.max.attempts", 1); jobConf.setInt("mapred.reduce.max.attempts", 1); jobConf.set("mapred.input.dir", inputDir.toString()); jobConf.set("mapred.output.dir", outputDir.toString()); final RunningJob runningJob = jobClient.submitJob(jobConf); waitFor(60 * 1000, true, new Predicate() { @Override public boolean evaluate() throws Exception { return runningJob.isComplete(); } }); Assert.assertTrue(runningJob.isSuccessful()); Assert.assertTrue(fs.exists(new Path(outputDir, "part-00000"))); BufferedReader reader = new BufferedReader( new InputStreamReader(fs.open(new Path(outputDir, "part-00000")))); Assert.assertTrue(reader.readLine().trim().endsWith("a")); Assert.assertTrue(reader.readLine().trim().endsWith("b")); Assert.assertTrue(reader.readLine().trim().endsWith("c")); Assert.assertNull(reader.readLine()); reader.close(); } finally { fs.close(); jobClient.close(); } }