Example usage for org.apache.hadoop.mapred JobConf getWorkingDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getWorkingDirectory.

Prototype

public Path getWorkingDirectory()

Source Link

Document

Get the current working directory for the default file system.

Usage

From source file:cascading.tap.hadoop.Hadoop18TapUtil.java

License:Open Source License

static void setWorkOutputPath(JobConf conf, Path outputDir) {
    outputDir = new Path(conf.getWorkingDirectory(), outputDir);
    conf.set("mapred.work.output.dir", outputDir.toString());
}

From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java

License:Apache License

/**
 * Set the array of {@link Path}s as the list of inputs
 * for the map-reduce job.//from  www  . j  a v a  2s  . com
 * 
 * @param conf Configuration of the job. 
 * @param inputPaths the {@link Path}s of the input directories/files 
 * for the map-reduce job.
 */
public static void setInputPaths(JobConf conf, Path... inputPaths) {
    Path path = new Path(conf.getWorkingDirectory(), inputPaths[0]);
    StringBuffer str = new StringBuffer(StringUtils.escapeString(path.toString()));
    for (int i = 1; i < inputPaths.length; i++) {
        str.append(StringUtils.COMMA_STR);
        path = new Path(conf.getWorkingDirectory(), inputPaths[i]);
        str.append(StringUtils.escapeString(path.toString()));
    }
    conf.set("mapred.input.dir", str.toString());
}

From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java

License:Apache License

/**
 * Add a {@link Path} to the list of inputs for the map-reduce job.
 * //from  www .j  ava  2 s  .c o  m
 * @param conf The configuration of the job 
 * @param path {@link Path} to be added to the list of inputs for 
 *            the map-reduce job.
 */
public static void addInputPath(JobConf conf, Path path) {
    path = new Path(conf.getWorkingDirectory(), path);
    String dirStr = StringUtils.escapeString(path.toString());
    String dirs = conf.get("mapred.input.dir");
    conf.set("mapred.input.dir", dirs == null ? dirStr : dirs + StringUtils.COMMA_STR + dirStr);
}

From source file:infinidb.hadoop.db.IDBFileInputFormat.java

License:Apache License

@Override
public RecordReader<NullWritable, NullWritable> getRecordReader(InputSplit arg0, JobConf arg1, Reporter arg2)
        throws IOException {
    final String filename = ((FileSplit) arg0).getPath().toString();
    final JobConf job = arg1;

    return new RecordReader<NullWritable, NullWritable>() {
        private boolean unread = true;

        @Override/*from  w w w.ja v a2  s  . c o  m*/
        public void close() throws IOException {
        }

        @Override
        public NullWritable createKey() {
            return NullWritable.get();
        }

        @Override
        public NullWritable createValue() {
            return NullWritable.get();
        }

        @Override
        public long getPos() throws IOException {
            return 0;
        }

        @Override
        public float getProgress() throws IOException {
            return unread ? 0 : 1;
        }

        @Override
        /* spawn a cpimport process for each input file */
        public boolean next(NullWritable arg0, NullWritable arg1) throws IOException {
            InfiniDBConfiguration dbConf = new InfiniDBConfiguration(job);
            String schemaName = dbConf.getOutputSchemaName();
            String tableName = (filename.substring(filename.lastIndexOf('/') + 1, filename.length()));
            tableName = tableName.substring(0, tableName.lastIndexOf('.'));
            String output = job.get("mapred.output.dir");
            if (unread) {
                try {
                    StringBuilder loadCmdStr = new StringBuilder();
                    loadCmdStr.append(dbConf.getInfiniDBHome());
                    loadCmdStr.append("/bin/");
                    loadCmdStr.append("infinidoop_load.sh ");
                    loadCmdStr.append(filename);
                    loadCmdStr.append(" ");
                    loadCmdStr.append(schemaName);
                    loadCmdStr.append(" ");
                    loadCmdStr.append(tableName);

                    Process lChldProc = Runtime.getRuntime().exec(loadCmdStr.toString());

                    // Wait for the child to exit
                    lChldProc.waitFor();
                    BufferedReader lChldProcOutStream = new BufferedReader(
                            new InputStreamReader(lChldProc.getInputStream()));
                    BufferedReader stdError = new BufferedReader(
                            new InputStreamReader(lChldProc.getErrorStream()));

                    String lChldProcOutPutStr = null;
                    StringBuffer outpath = new StringBuffer();
                    outpath.append(job.getWorkingDirectory());
                    outpath.append("/");
                    outpath.append(output);
                    outpath.append("/");
                    outpath.append(tableName);
                    outpath.append(".log");

                    Path pt = new Path(outpath.toString());
                    FileSystem fs = FileSystem.get(new Configuration());
                    BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(pt, false)));

                    // catch output
                    while ((lChldProcOutPutStr = lChldProcOutStream.readLine()) != null) {
                        br.write(lChldProcOutPutStr);
                        br.newLine();
                    }

                    // catch error
                    while ((lChldProcOutPutStr = stdError.readLine()) != null) {
                        br.write(lChldProcOutPutStr);
                        br.newLine();
                    }

                    //br.write(outpath.toString());
                    //br.newLine();
                    //br.write(loadCmdStr.toString());
                    //br.newLine();
                    //br.write(filename);
                    br.close();

                    lChldProcOutStream.close();
                } catch (Exception e) {
                    e.printStackTrace();
                }
                unread = false;
                return true;
            } else {
                return false;
            }
        }
    };
}

From source file:io.druid.indexer.updater.HadoopConverterJob.java

License:Apache License

public List<DataSegment> run() throws IOException {
    final JobConf jobConf = new JobConf();
    jobConf.setKeepFailedTaskFiles(false);
    for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) {
        jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()");
    }/*from   w w  w .java2  s  .co m*/
    final List<DataSegment> segments = converterConfig.getSegments();
    if (segments.isEmpty()) {
        throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource());
    }
    converterConfigIntoConfiguration(converterConfig, segments, jobConf);

    jobConf.setNumReduceTasks(0);// Map only. Number of map tasks determined by input format
    jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache()));

    setJobName(jobConf, segments);

    if (converterConfig.getJobPriority() != null) {
        jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority()));
    }

    final Job job = Job.getInstance(jobConf);

    job.setInputFormatClass(ConfigInputFormat.class);
    job.setMapperClass(ConvertingMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setMapSpeculativeExecution(false);
    job.setOutputFormatClass(ConvertingOutputFormat.class);

    JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()),
            JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())),
            job);

    Throwable throwable = null;
    try {
        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
        final boolean success = job.waitForCompletion(true);
        if (!success) {
            final TaskReport[] reports = job.getTaskReports(TaskType.MAP);
            if (reports != null) {
                for (final TaskReport report : reports) {
                    log.error("Error in task [%s] : %s", report.getTaskId(),
                            Arrays.toString(report.getDiagnostics()));
                }
            }
            return null;
        }
        try {
            loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue();
            writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue();
        } catch (IOException ex) {
            log.error(ex, "Could not fetch counters");
        }
        final JobID jobID = job.getJobID();

        final Path jobDir = getJobPath(jobID, job.getWorkingDirectory());
        final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
        final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true);
        final List<Path> goodPaths = new ArrayList<>();
        while (it.hasNext()) {
            final LocatedFileStatus locatedFileStatus = it.next();
            if (locatedFileStatus.isFile()) {
                final Path myPath = locatedFileStatus.getPath();
                if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) {
                    goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY));
                }
            }
        }
        if (goodPaths.isEmpty()) {
            log.warn("No good data found at [%s]", jobDir);
            return null;
        }
        final List<DataSegment> returnList = ImmutableList
                .copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() {
                    @Nullable
                    @Override
                    public DataSegment apply(final Path input) {
                        try {
                            if (!fs.exists(input)) {
                                throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]",
                                        ConvertingOutputFormat.DATA_SUCCESS_KEY,
                                        ConvertingOutputFormat.DATA_FILE_KEY, jobDir);
                            }
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                        try (final InputStream stream = fs.open(input)) {
                            return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class);
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                    }
                }));
        if (returnList.size() == segments.size()) {
            return returnList;
        } else {
            throw new ISE(
                    "Tasks reported success but result length did not match! Expected %d found %d at path [%s]",
                    segments.size(), returnList.size(), jobDir);
        }
    } catch (InterruptedException | ClassNotFoundException e) {
        RuntimeException exception = Throwables.propagate(e);
        throwable = exception;
        throw exception;
    } catch (Throwable t) {
        throwable = t;
        throw t;
    } finally {
        try {
            cleanup(job);
        } catch (IOException e) {
            if (throwable != null) {
                throwable.addSuppressed(e);
            } else {
                log.error(e, "Could not clean up job [%s]", job.getJobID());
            }
        }
    }
}

From source file:microbench.TeraSortOnHDFSDataLocal.java

License:Apache License

public static void setInputPaths(JobConf conf, Path... inputPaths) {
    Path path = new Path(conf.getWorkingDirectory(), inputPaths[0]);
    StringBuffer str = new StringBuffer(StringUtils.escapeString(path.toString()));
    for (int i = 1; i < inputPaths.length; i++) {
        str.append(StringUtils.COMMA_STR);
        path = new Path(conf.getWorkingDirectory(), inputPaths[i]);
        str.append(StringUtils.escapeString(path.toString()));
    }// www  . j  a  va2 s  .  c  o  m
    conf.set(MAPRED_INPUT_DIR, str.toString());
}

From source file:org.apache.druid.indexer.updater.HadoopConverterJob.java

License:Apache License

public List<DataSegment> run() throws IOException {
    final JobConf jobConf = new JobConf();
    jobConf.setKeepFailedTaskFiles(false);
    for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) {
        jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()");
    }/* w  w w .  j  a  v a2  s.  c o  m*/
    final List<DataSegment> segments = converterConfig.getSegments();
    if (segments.isEmpty()) {
        throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource());
    }
    converterConfigIntoConfiguration(converterConfig, segments, jobConf);

    jobConf.setNumReduceTasks(0); // Map only. Number of map tasks determined by input format
    jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache()));

    setJobName(jobConf, segments);

    if (converterConfig.getJobPriority() != null) {
        jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority()));
    }

    final Job job = Job.getInstance(jobConf);

    job.setInputFormatClass(ConfigInputFormat.class);
    job.setMapperClass(ConvertingMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setMapSpeculativeExecution(false);
    job.setOutputFormatClass(ConvertingOutputFormat.class);

    JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()),
            JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())),
            job);

    Throwable throwable = null;
    try {
        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
        final boolean success = job.waitForCompletion(true);
        if (!success) {
            final TaskReport[] reports = job.getTaskReports(TaskType.MAP);
            if (reports != null) {
                for (final TaskReport report : reports) {
                    log.error("Error in task [%s] : %s", report.getTaskId(),
                            Arrays.toString(report.getDiagnostics()));
                }
            }
            return null;
        }
        try {
            loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue();
            writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue();
        } catch (IOException ex) {
            log.error(ex, "Could not fetch counters");
        }
        final JobID jobID = job.getJobID();

        final Path jobDir = getJobPath(jobID, job.getWorkingDirectory());
        final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
        final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true);
        final List<Path> goodPaths = new ArrayList<>();
        while (it.hasNext()) {
            final LocatedFileStatus locatedFileStatus = it.next();
            if (locatedFileStatus.isFile()) {
                final Path myPath = locatedFileStatus.getPath();
                if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) {
                    goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY));
                }
            }
        }
        if (goodPaths.isEmpty()) {
            log.warn("No good data found at [%s]", jobDir);
            return null;
        }
        final List<DataSegment> returnList = ImmutableList
                .copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() {
                    @Nullable
                    @Override
                    public DataSegment apply(final Path input) {
                        try {
                            if (!fs.exists(input)) {
                                throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]",
                                        ConvertingOutputFormat.DATA_SUCCESS_KEY,
                                        ConvertingOutputFormat.DATA_FILE_KEY, jobDir);
                            }
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                        try (final InputStream stream = fs.open(input)) {
                            return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class);
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                    }
                }));
        if (returnList.size() == segments.size()) {
            return returnList;
        } else {
            throw new ISE(
                    "Tasks reported success but result length did not match! Expected %d found %d at path [%s]",
                    segments.size(), returnList.size(), jobDir);
        }
    } catch (InterruptedException | ClassNotFoundException e) {
        RuntimeException exception = Throwables.propagate(e);
        throwable = exception;
        throw exception;
    } catch (Throwable t) {
        throwable = t;
        throw t;
    } finally {
        try {
            cleanup(job);
        } catch (IOException e) {
            if (throwable != null) {
                throwable.addSuppressed(e);
            } else {
                log.error(e, "Could not clean up job [%s]", job.getJobID());
            }
        }
    }
}

From source file:org.sf.xrime.preprocessing.pajek.PajekFormat2LabeledAdjBiSetVertex.java

License:Apache License

public void toBinaryData() {
    try {/*from   www. j ava2s.c om*/
        JobConf jobConf = new JobConf(new Configuration(), PajekFormat2LabeledAdjBiSetVertex.class);

        Path filePath = new Path(dstPath + "/part00000");
        Path path = new Path(jobConf.getWorkingDirectory(), filePath);
        FileSystem fs = path.getFileSystem(jobConf);

        CompressionCodec codec = null;
        CompressionType compressionType = CompressionType.NONE;
        if (jobConf.getBoolean("mapred.output.compress", false)) {
            // find the kind of compression to do             
            String val = jobConf.get("mapred.output.compression.type", CompressionType.RECORD.toString());
            compressionType = CompressionType.valueOf(val);

            // find the right codec
            Class<? extends CompressionCodec> codecClass = DefaultCodec.class;
            String name = jobConf.get("mapred.output.compression.codec");
            if (name != null) {
                try {
                    codecClass = jobConf.getClassByName(name).asSubclass(CompressionCodec.class);
                } catch (ClassNotFoundException e) {
                    throw new IllegalArgumentException("Compression codec " + name + " was not found.", e);
                }
            }
            codec = ReflectionUtils.newInstance(codecClass, jobConf);
        }

        Set<String> keySet = vertexes.keySet();
        Iterator<String> iter = keySet.iterator();
        LabeledAdjBiSetVertex currentAdjVertex = new LabeledAdjBiSetVertex();

        SequenceFile.Writer out = SequenceFile.createWriter(fs, jobConf, path, Text.class,
                LabeledAdjBiSetVertex.class, compressionType, codec, null);

        while (iter.hasNext()) {
            currentAdjVertex = vertexes.get(iter.next());
            out.append(new Text(currentAdjVertex.getId()), currentAdjVertex);
        }
        out.close();
    } catch (IOException e) {

    }

}

From source file:org.sf.xrime.preprocessing.pajek.PajekFormat2WeightedLabeledAdjVertex.java

License:Apache License

public void toBinaryData() {
    try {//from  w w  w  .j  av a  2  s.  c om
        JobConf jobConf = new JobConf(new Configuration(), PajekFormat2WeightedLabeledAdjVertex.class);

        Path filePath = new Path(dstPath + "/part00000");
        Path path = new Path(jobConf.getWorkingDirectory(), filePath);
        FileSystem fs = path.getFileSystem(jobConf);

        CompressionCodec codec = null;
        CompressionType compressionType = CompressionType.NONE;
        if (jobConf.getBoolean("mapred.output.compress", false)) {
            // find the kind of compression to do             
            String val = jobConf.get("mapred.output.compression.type", CompressionType.RECORD.toString());
            compressionType = CompressionType.valueOf(val);

            // find the right codec
            Class<? extends CompressionCodec> codecClass = DefaultCodec.class;
            String name = jobConf.get("mapred.output.compression.codec");
            if (name != null) {
                try {
                    codecClass = jobConf.getClassByName(name).asSubclass(CompressionCodec.class);
                } catch (ClassNotFoundException e) {
                    throw new IllegalArgumentException("Compression codec " + name + " was not found.", e);
                }
            }
            codec = ReflectionUtils.newInstance(codecClass, jobConf);
        }

        Set<String> keySet = vertexes.keySet();
        Iterator<String> iter = keySet.iterator();
        LabeledAdjVertex currentAdjVertex = new LabeledAdjVertex();

        SequenceFile.Writer out = SequenceFile.createWriter(fs, jobConf, path, Text.class,
                LabeledAdjVertex.class, compressionType, codec, null);

        while (iter.hasNext()) {
            currentAdjVertex = vertexes.get(iter.next());
            out.append(new Text(currentAdjVertex.getId()), currentAdjVertex);
        }
        out.close();
    } catch (IOException e) {

    }

}