Example usage for org.apache.hadoop.fs Path toString

List of usage examples for org.apache.hadoop.fs Path toString

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toString.

Prototype

@Override
    public String toString() 

Source Link

Usage

From source file:com.chinamobile.bcbsp.util.BSPJob.java

License:Apache License

/**
 * Set Working directory./* w  w  w.ja v a  2s  .c o m*/
 * @param dir
 *        The path of the HDSF.
 */
public void setWorkingDirectory(Path dir) throws IOException {
    ensureState(JobState.DEFINE);
    dir = new BSPHdfsImpl().hdfsgetWorkingDirectory(dir);
    conf.set(Constants.USER_BC_BSP_JOB_WORKING_DIR, dir.toString());
}

From source file:com.chinamobile.bcbsp.workermanager.WorkerManager.java

License:Apache License

/**
 * Localization of job./*  w w w.  j  a  v a2 s.c o  m*/
 * @param sip StaffInProgress
 * @param directive {@link Directive}
 */
private void localizeJob(StaffInProgress sip, Directive directive) throws IOException {
    Staff staff = sip.getStaff();
    conf.addResource(staff.getJobFile());
    BSPJob defaultJobConf = new BSPJob((BSPConfiguration) conf);
    Path localJobFile = defaultJobConf.getLocalPath(
            Constants.BC_BSP_LOCAL_SUBDIR_WORKERMANAGER + "/" + staff.getStaffID() + "/" + "job.xml");
    Path localJarFile = null;
    // systemFS.copyToLocalFile(new Path(staff.getJobFile()), localJobFile);
    bspsystemFS.copyToLocalFile(new BSPHdfsImpl().newPath(staff.getJobFile()), localJobFile);
    BSPConfiguration confBsp = new BSPConfiguration();
    confBsp.addResource(localJobFile);
    LOG.info("debug: conf.get(Constants.USER_BC_BSP_JOB_TYPE) " + confBsp.get(Constants.USER_BC_BSP_JOB_TYPE));
    BSPJob jobConf = new BSPJob(confBsp, staff.getJobID().toString());
    LOG.info("debug: conf.get(Constants.USER_BC_BSP_JOB_TYPE) " + confBsp.get(Constants.USER_BC_BSP_JOB_TYPE));
    LOG.info("debug: job type is " + jobConf.getJobType());
    if (Constants.USER_BC_BSP_JOB_TYPE_C.equals(jobConf.getJobType())) {
        LOG.info("debug: in LocalizeJob job.exe");
        localJarFile = defaultJobConf.getLocalPath(
                Constants.BC_BSP_LOCAL_SUBDIR_WORKERMANAGER + "/" + staff.getStaffID() + "/" + "jobC");
    } else {
        LOG.info("debug: in in LocalizeJob  job.jar");
        localJarFile = defaultJobConf.getLocalPath(
                Constants.BC_BSP_LOCAL_SUBDIR_WORKERMANAGER + "/" + staff.getStaffID() + "/" + "job.jar");
    }
    Path jarFile = null;
    LOG.info("debug: job type is" + jobConf.getJobType());
    if (Constants.USER_BC_BSP_JOB_TYPE_C.equals(jobConf.getJobType())) {
        LOG.info("debug: in LocalizeJob bofore jobConf.getJobExe =" + jobConf.getJobExe());
        if (jobConf.getJobExe() != null) {
            jarFile = new Path(jobConf.getJobExe());
        }
        LOG.info("jarFile is" + jarFile);
        jobConf.setJobExe(localJarFile.toString());
    } else {
        if (jobConf.getJar() != null) {
            jarFile = new Path(jobConf.getJar());
        }
        jobConf.setJar(localJarFile.toString());
    }
    if (jarFile != null) {
        LOG.info("jarFile != null");
        bspsystemFS.copyToLocalFile(jarFile, localJarFile);
        File workDir = new File(new File(localJobFile.toString()).getParent(), "work");
        if (!workDir.mkdirs()) {
            if (!workDir.isDirectory()) {
                throw new IOException("Mkdirs failed to create " + workDir.toString());
            }
        }
        if (!Constants.USER_BC_BSP_JOB_TYPE_C.equals(jobConf.getJobType())) {
            RunJar.unJar(new File(localJarFile.toString()), workDir);
            /** Add the user program jar to the system's classpath. */
            ClassLoaderUtil.addClassPath(localJarFile.toString());
        }
    }
    RunningJob rjob = addStaffToJob(staff.getJobID(), localJobFile, sip, directive, jobConf);
    LOG.info("debug:after addStaffToJob(staff.getJobID(), " + "localJobFile, sip, directive, jobConf); ");
    rjob.localized = true;
    sip.setFaultSSStep(directive.getFaultSSStep());
    LOG.info("debug:before launchStaffForJob(sip, jobConf);");
    launchStaffForJob(sip, jobConf);
}

From source file:com.chriscx.mapred.Map.java

private void parseSkipFile(Path patternsFile) {
    try {/*from w w  w . j a v a 2  s .  com*/
        BufferedReader fis = new BufferedReader(new FileReader(patternsFile.toString()));
        String pattern = null;
        while ((pattern = fis.readLine()) != null) {
            patternsToSkip.add(pattern);
        }
    } catch (IOException ioe) {
        System.err.println("Caught exception while parsing the cached file '" + patternsFile + "' : "
                + StringUtils.stringifyException(ioe));
    }
}

From source file:com.ci.backports.hadoop.hbase.ZHFileOutputFormat.java

License:Apache License

/**
 * Configure a MapReduce Job to perform an incremental load into the given
 * table. This//from w ww  .  j av  a  2 s . c  o m
 * <ul>
 *   <li>Inspects the table to configure a total order partitioner</li>
 *   <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
 *   <li>Sets the number of reduce tasks to match the current number of regions</li>
 *   <li>Sets the output key/value class to match ZHFileOutputFormat's requirements</li>
 *   <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
 *     ZPutSortReducer)</li>
 * </ul> 
 * The user should be sure to set the map output value class to either KeyValue or Put before
 * running this function.
 */
public static void configureIncrementalLoad(Job job, HTable table) throws IOException {
    Configuration conf = job.getConfiguration();
    job.setPartitionerClass(TotalOrderPartitioner.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(ZHFileOutputFormat.class);

    // Based on the configured map output class, set the correct reducer to properly
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(ZPutSortReducer.class);
    } else {
        LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    LOG.info("Looking up current regions for table " + table);
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table);
    LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count");
    job.setNumReduceTasks(startKeys.size());

    Path partitionsPath = new Path(job.getWorkingDirectory(), "partitions_" + System.currentTimeMillis());
    LOG.info("Writing partition information to " + partitionsPath);

    FileSystem fs = partitionsPath.getFileSystem(conf);
    writePartitions(conf, partitionsPath, startKeys);
    partitionsPath.makeQualified(fs);
    URI cacheUri;
    try {
        cacheUri = new URI(partitionsPath.toString() + "#" + TotalOrderPartitioner.DEFAULT_PATH);
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }
    DistributedCache.addCacheFile(cacheUri, conf);
    DistributedCache.createSymlink(conf);

    LOG.info("Incremental table output configured.");
}

From source file:com.citic.zxyjs.zwlscx.mapreduce.lib.input.HFileOutputFormatBase.java

License:Apache License

public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context)
        throws IOException, InterruptedException {
    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
    final Path ignoreOutputPath = new Path(outputPath + "_ignore");

    final Configuration conf = context.getConfiguration();
    final FileSystem fs = outputdir.getFileSystem(conf);
    // These configs. are from hbase-*.xml
    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
    // Invented config. Add to hbase-*.xml if other than default
    // compression.
    final String defaultCompression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
    final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
            false);/*  w  w w. ja  v a2s .  c om*/

    if (fs.exists(ignoreOutputPath)) {
        LOG.info("Deleted " + ignoreOutputPath.toString() + " success.");
        fs.delete(ignoreOutputPath, true);
    }

    // create a map from column family to the compression algorithm
    final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf);
    final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf);
    final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf);

    String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY);
    final HFileDataBlockEncoder encoder;
    if (dataBlockEncodingStr == null) {
        encoder = NoOpDataBlockEncoder.INSTANCE;
    } else {
        try {
            encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding.valueOf(dataBlockEncodingStr));
        } catch (IllegalArgumentException ex) {
            throw new RuntimeException("Invalid data block encoding type configured for the param "
                    + DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr);
        }
    }

    return new RecordWriter<ImmutableBytesWritable, KeyValue>() {
        // Map of families to writers and how much has been output on the
        // writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                Bytes.BYTES_COMPARATOR);
        private final FSDataOutputStream dos = fs.create(ignoreOutputPath);
        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
        private final byte[] now = Bytes.toBytes(System.currentTimeMillis());
        private boolean rollRequested = false;

        public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException {
            // null input == user explicitly wants to flush
            if (row == null && kv == null) {
                rollWriters();
                return;
            }

            byte[] rowKey = kv.getRow();
            long length = kv.getLength();
            byte[] family = kv.getFamily();

            if (ignore(kv)) {
                byte[] readBuf = rowKey;
                dos.write(readBuf, 0, readBuf.length);
                dos.write(Bytes.toBytes("\n"));
                return;
            }
            WriterLength wl = this.writers.get(family);

            // If this is a new column family, verify that the directory
            // exists
            if (wl == null) {
                Path path = null;
                path = new Path(outputdir, Bytes.toString(family));
                fs.mkdirs(path);
            }

            // If any of the HFiles for the column families has reached
            // maxsize, we need to roll all the writers
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            }

            // This can only happen once a row is finished though
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                rollWriters();
            }

            // create a new HLog writer, if necessary
            if (wl == null || wl.writer == null) {
                wl = getNewWriter(family, conf);
            }

            // we now have the proper HLog writer. full steam ahead
            kv.updateLatestStamp(this.now);
            wl.writer.append(kv);
            wl.written += length;

            // Copy the row so we know when a row transition.
            this.previousRow = rowKey;
        }

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath()
                            + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                    close(wl.writer);
                }
                wl.writer = null;
                wl.written = 0;
            }
            this.rollRequested = false;
        }

        /*
         * Create a new StoreFile.Writer.
         * @param family
         * @return A WriterLength, containing a new StoreFile.Writer.
         * @throws IOException
         */
        private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException {
            WriterLength wl = new WriterLength();
            Path familydir = new Path(outputdir, Bytes.toString(family));
            String compression = compressionMap.get(family);
            compression = compression == null ? defaultCompression : compression;
            String bloomTypeStr = bloomTypeMap.get(family);
            BloomType bloomType = BloomType.NONE;
            if (bloomTypeStr != null) {
                bloomType = BloomType.valueOf(bloomTypeStr);
            }
            String blockSizeString = blockSizeMap.get(family);
            int blockSize = blockSizeString == null ? HConstants.DEFAULT_BLOCKSIZE
                    : Integer.parseInt(blockSizeString);
            Configuration tempConf = new Configuration(conf);
            tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
            wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs, blockSize)
                    .withOutputDir(familydir)
                    .withCompression(AbstractHFileWriter.compressionByName(compression))
                    .withBloomType(bloomType).withComparator(KeyValue.COMPARATOR).withDataBlockEncoder(encoder)
                    .withChecksumType(HStore.getChecksumType(conf))
                    .withBytesPerChecksum(HStore.getBytesPerChecksum(conf)).build();

            this.writers.put(family, wl);
            return wl;
        }

        private void close(final StoreFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
                w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                        Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
                w.appendTrackedTimestampsToMetadata();
                w.close();
            }
        }

        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            dos.flush();
            dos.close();
            for (WriterLength wl : this.writers.values()) {
                close(wl.writer);
            }
        }
    };
}

From source file:com.cloudera.cdk.data.hcatalog.TestHCatalogDatasetRepository.java

License:Apache License

@Test
public void testExternalTable() throws IOException {
    Path testDirectory = new Path(Files.createTempDir().getAbsolutePath());
    File tableDir = new File(testDirectory.toString(), TABLE_NAME);
    Assert.assertFalse("Data directory should not exist before test", tableDir.exists());
    repo = new HCatalogDatasetRepository(fileSystem, testDirectory);

    Dataset ds = repo.create(TABLE_NAME, new DatasetDescriptor.Builder().schema(USER_SCHEMA_URL).get());
    Assert.assertTrue("Data directory should exist after dataset creation", tableDir.exists());

    writeTestUsers(ds, 10);/* w  w w .  j  av a 2s. c  o  m*/
    checkTestUsers(ds, 10);
    Assert.assertTrue("Data directory should exist after writing", tableDir.exists());

    repo.delete(TABLE_NAME);
    Assert.assertFalse("Data directory should not exist after dropping", tableDir.exists());
}

From source file:com.cloudera.cdk.morphline.hadoop.core.DownloadHdfsFileTest.java

License:Apache License

private Command createMorphline(String file, Path inputFile, File cwd) {
    return createMorphline("test-morphlines/downloadHdfsFile", ConfigFactory
            .parseMap(ImmutableMap.of("inputFile", inputFile.toString(), "outputDir", cwd.toString())));
}

From source file:com.cloudera.cdk.morphline.hadoop.rcfile.ReadRCFileTest.java

License:Apache License

@Test
public void testRCFileRowWise() throws Exception {
    morphline = createMorphline("test-morphlines/rcFileMorphlineRow");
    String rcFileName = "testRCFileRowWise.rc";
    List<Record> expected = setupRCFile(rcFileName, NUM_RECORDS, NUM_COLUMNS, true);
    Path inputFile = dfs.makeQualified(new Path(testDirectory, rcFileName));
    Record input = new Record();
    input.put(Fields.ATTACHMENT_NAME, inputFile.toString());
    input.put(Fields.ATTACHMENT_BODY, readPath(inputFile));
    startSession();/*from ww w. jav a2  s. co  m*/
    assertEquals(1, collector.getNumStartEvents());
    assertTrue(morphline.process(input));
    assertTrue(areFieldsEqual(expected, collector.getRecords(), NUM_COLUMNS, NUM_RECORDS, true));
}

From source file:com.cloudera.cdk.morphline.hadoop.rcfile.ReadRCFileTest.java

License:Apache License

@Test
public void testRCFileColumnWise() throws Exception {
    morphline = createMorphline("test-morphlines/rcFileMorphlineColumn");
    String rcFileName = "testRCFileColumnWise.rc";
    List<Record> expected = setupRCFile(rcFileName, NUM_RECORDS, NUM_COLUMNS, false);
    Path inputFile = dfs.makeQualified(new Path(testDirectory, rcFileName));
    Record input = new Record();
    input.put(Fields.ATTACHMENT_NAME, inputFile.toString());
    input.put(Fields.ATTACHMENT_BODY, readPath(inputFile));
    startSession();//w  w  w.  j  a  va2 s  .c  o  m
    assertEquals(1, collector.getNumStartEvents());
    assertTrue(morphline.process(input));
    assertTrue(areFieldsEqual(expected, collector.getRecords(), NUM_COLUMNS, NUM_RECORDS, false));
}

From source file:com.cloudera.circus.test.TestXTest.java

License:Open Source License

@Test
@TestHadoop/*from   w ww  .j a  va 2s.  co  m*/
public void testHadoopMapReduce() throws Exception {
    JobConf conf = getHadoopConf();
    FileSystem fs = FileSystem.get(conf);
    JobClient jobClient = new JobClient(conf);
    try {
        Path inputDir = new Path(getHadoopTestDir(), "input");
        Path outputDir = new Path(getHadoopTestDir(), "output");

        fs.mkdirs(inputDir);
        Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
        writer.write("a\n");
        writer.write("b\n");
        writer.write("c\n");
        writer.close();

        JobConf jobConf = getHadoopConf();
        jobConf.setInt("mapred.map.tasks", 1);
        jobConf.setInt("mapred.map.max.attempts", 1);
        jobConf.setInt("mapred.reduce.max.attempts", 1);
        jobConf.set("mapred.input.dir", inputDir.toString());
        jobConf.set("mapred.output.dir", outputDir.toString());
        final RunningJob runningJob = jobClient.submitJob(jobConf);
        waitFor(60 * 1000, true, new Predicate() {
            @Override
            public boolean evaluate() throws Exception {
                return runningJob.isComplete();
            }
        });
        Assert.assertTrue(runningJob.isSuccessful());
        Assert.assertTrue(fs.exists(new Path(outputDir, "part-00000")));
        BufferedReader reader = new BufferedReader(
                new InputStreamReader(fs.open(new Path(outputDir, "part-00000"))));
        Assert.assertTrue(reader.readLine().trim().endsWith("a"));
        Assert.assertTrue(reader.readLine().trim().endsWith("b"));
        Assert.assertTrue(reader.readLine().trim().endsWith("c"));
        Assert.assertNull(reader.readLine());
        reader.close();
    } finally {
        fs.close();
        jobClient.close();
    }
}