Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:DupleInputFormat.java

License:Apache License

/** 
 * Generate the list of files and make them into FileSplits.
 * @param job the job context//from   w  ww  .java2s.c  om
 * @throws IOException
 */
public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    // times that each file exists in the files List
    ArrayList<Integer> times = new ArrayList<Integer>();
    ArrayList<Path> paths = new ArrayList<Path>();

    for (FileStatus file : files) {
        Path path = file.getPath();
        long length = file.getLen();
        if (length != 0) {
            FileSystem fs = path.getFileSystem(job.getConfiguration());
            BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);

            int index;
            if ((index = paths.indexOf(path)) != -1)
                times.set(index, times.get(index) + 1);
            else {
                times.add(0);
                paths.add(path);
                index = times.size() - 1;
            }

            // not splitable
            splits.add(makeSplit(path, 0, length, blkLocations[0].getHosts(), times.get(index)));

        } else {
            //Create empty hosts array for zero length files
            splits.add(makeSplit(path, 0, length, new String[0]));
        }
    }
    // Save the number of input files for metrics/loadgen
    job.getConfiguration().setLong(NUM_INPUT_FILES, files.size());
    //LOG.debug("Total # of splits: " + splits.size());
    return splits;
}

From source file:andromache.hadoop.CassandraInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext context) throws IOException {
    Configuration conf = context.getConfiguration();
    validateConfiguration(conf);//ww w.j  av  a 2  s  .  co  m

    // cannonical ranges and nodes holding replicas
    List<TokenRange> masterRangeNodes = getRangeMap(conf);

    keyspace = CassandraConfigHelper.getInputKeyspace(context.getConfiguration());

    cfNames = CassandraConfigHelper.getInputColumnFamilies(context.getConfiguration());

    // TODO: [IS] make sure this partitioner matches to what is set on each keyspace participating
    partitioner = CassandraConfigHelper.getInputPartitioner(context.getConfiguration());
    logger.debug("partitioner is " + partitioner);

    // cannonical ranges, split into pieces, fetching the splits in parallel

    ExecutorService executor = Executors.newCachedThreadPool();
    List<InputSplit> splits = new ArrayList<InputSplit>();

    try {
        List<Future<List<CassandraSplit>>> splitfutures = new ArrayList<Future<List<CassandraSplit>>>();
        KeyRange jobKeyRange = CassandraConfigHelper.getInputKeyRange(conf);
        Range<Token> jobRange = null;
        if (jobKeyRange != null && jobKeyRange.start_token != null) {
            assert partitioner
                    .preservesOrder() : "ConfigHelper.setInputKeyRange(..) can only be used with a order preserving paritioner";
            assert jobKeyRange.start_key == null : "only start_token supported";
            assert jobKeyRange.end_key == null : "only end_token supported";
            jobRange = new Range<Token>(partitioner.getTokenFactory().fromString(jobKeyRange.start_token),
                    partitioner.getTokenFactory().fromString(jobKeyRange.end_token), partitioner);
        }

        for (TokenRange range : masterRangeNodes) {
            if (jobRange == null) {
                // for each range, pick a live owner and ask it to compute bite-sized splits

                splitfutures.add(executor.submit(new SplitCallable(range, conf)));
            } else {
                Range<Token> dhtRange = new Range<Token>(
                        partitioner.getTokenFactory().fromString(range.start_token),
                        partitioner.getTokenFactory().fromString(range.end_token), partitioner);

                if (dhtRange.intersects(jobRange)) {
                    for (Range<Token> intersection : dhtRange.intersectionWith(jobRange)) {
                        range.start_token = partitioner.getTokenFactory().toString(intersection.left);
                        range.end_token = partitioner.getTokenFactory().toString(intersection.right);
                        // for each range, pick a live owner and ask it to compute bite-sized splits
                        splitfutures.add(executor.submit(new SplitCallable(range, conf)));
                    }
                }
            }
        }

        // wait until we have all the results back
        for (Future<List<CassandraSplit>> futureInputSplits : splitfutures) {
            try {
                splits.addAll(futureInputSplits.get());
            } catch (Exception e) {
                throw new IOException("Could not get input splits", e);
            }
        }
    } finally {
        executor.shutdownNow();
    }

    assert splits.size() > 0;
    Collections.shuffle(splits, new Random(System.nanoTime()));
    return splits;
}

From source file:andromache.hadoop.CassandraOutputFormat.java

License:Apache License

/**
* Check for validity of the output-specification for the job.
*
* @param context information about the job
* @throws java.io.IOException when output should not be attempted
*///from w  w w .ja  va  2 s .com
@Override
public void checkOutputSpecs(JobContext context) {
    Configuration conf = context.getConfiguration();
    if (CassandraConfigHelper.getOutputPartitioner(conf) == null) {
        throw new UnsupportedOperationException(
                "You must set the output partitioner to the one used by your Cassandra cluster");
    }
    if (CassandraConfigHelper.getOutputInitialAddress(conf) == null) {
        throw new UnsupportedOperationException("You must set the initial output address to a Cassandra node");
    }
}

From source file:at.illecker.hama.rootbeer.examples.util.RandomGraphGenerator.java

License:Apache License

static long getNumberOfRows(JobContext job) {
    return job.getConfiguration().getLong("hama.num.vertices", 0);
}

From source file:be.uantwerpen.adrem.hadoop.util.SplitByKTextInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    List<InputSplit> splits = newArrayList();
    int numberOfSplits = getNumberOfSplits(job);
    for (FileStatus status : listStatus(job)) {
        splits.addAll(getSplitsForFile(status, job.getConfiguration(), numberOfSplits));
    }/*  ww w . j  av  a  2 s  .c o  m*/
    return splits;
}

From source file:be.uantwerpen.adrem.hadoop.util.SplitByKTextInputFormat.java

License:Apache License

/**
 * Get the number of splits//from   w ww  . ja va 2s . c o  m
 * 
 * @param job
 *          the job
 * @return the number of splits to be created on this file
 */
public static int getNumberOfSplits(JobContext job) {
    return job.getConfiguration().getInt(NUMBER_OF_CHUNKS, 1);
}

From source file:boa.aggregators.MLAggregator.java

License:Apache License

public void saveModel(Object model) {
     FSDataOutputStream out = null;//  ww  w.j  a  va 2  s .  c  om
     FileSystem fileSystem = null;
     Path filePath = null;
     try {
         JobContext context = (JobContext) getContext();
         Configuration configuration = context.getConfiguration();
         int boaJobId = configuration.getInt("boa.hadoop.jobid", 0);
         JobConf job = new JobConf(configuration);
         Path outputPath = FileOutputFormat.getOutputPath(job);
         fileSystem = outputPath.getFileSystem(context.getConfiguration());

         fileSystem.mkdirs(new Path("/boa", new Path("" + boaJobId)));
         filePath = new Path("/boa",
                 new Path("" + boaJobId, new Path(("" + getKey()).split("\\[")[0] + "ML.model")));

         if (fileSystem.exists(filePath))
             return;

         out = fileSystem.create(filePath);
         ByteArrayOutputStream byteOutStream = new ByteArrayOutputStream();
         ObjectOutputStream objectOut = new ObjectOutputStream(byteOutStream);
         objectOut.writeObject(model);
         objectOut.close();

         byte[] serializedObject = byteOutStream.toByteArray();
         out.write(serializedObject, 0, serializedObject.length);

         this.collect(filePath.toString());

     } catch (Exception e) {
         e.printStackTrace();
     } finally {
         try {
             if (out != null)
                 out.close();
         } catch (final Exception e) {
             e.printStackTrace();
         }
     }
 }

From source file:boa.io.BoaOutputCommitter.java

License:Apache License

@Override
public void commitJob(JobContext context) throws java.io.IOException {
    super.commitJob(context);

    int boaJobId = context.getConfiguration().getInt("boa.hadoop.jobid", 0);
    storeOutput(context, boaJobId);//  w  w  w .  j a va 2  s. c  o m
    updateStatus(null, boaJobId);
}

From source file:boa.io.BoaOutputCommitter.java

License:Apache License

@Override
public void abortJob(JobContext context, JobStatus.State runState) throws java.io.IOException {
    super.abortJob(context, runState);

    final JobClient jobClient = new JobClient(new JobConf(context.getConfiguration()));
    final RunningJob job = jobClient.getJob(
            (org.apache.hadoop.mapred.JobID) JobID.forName(context.getConfiguration().get("mapred.job.id")));
    String diag = "";
    for (final TaskCompletionEvent event : job.getTaskCompletionEvents(0))
        switch (event.getTaskStatus()) {
        case SUCCEEDED:
            break;
        case FAILED:
        case KILLED:
        case OBSOLETE:
        case TIPFAILED:
            diag += "Diagnostics for: " + event.getTaskTrackerHttp() + "\n";
            for (final String s : job.getTaskDiagnostics(event.getTaskAttemptId()))
                diag += s + "\n";
            diag += "\n";
            break;
        }/*from w  w w .jav a 2s  .c o m*/
    updateStatus(diag, context.getConfiguration().getInt("boa.hadoop.jobid", 0));
}

From source file:boa.io.BoaOutputCommitter.java

License:Apache License

private void storeOutput(final JobContext context, final int jobId) {
    if (jobId == 0)
        return;//from  w w w . j a  v  a 2s .c  o m

    Connection con = null;
    FileSystem fileSystem = null;
    FSDataInputStream in = null;
    FSDataOutputStream out = null;

    try {
        fileSystem = outputPath.getFileSystem(context.getConfiguration());

        con = DriverManager.getConnection(url, user, password);

        PreparedStatement ps = null;
        try {
            ps = con.prepareStatement("INSERT INTO boa_output (id, length) VALUES (" + jobId + ", 0)");
            ps.executeUpdate();
        } catch (final Exception e) {
        } finally {
            try {
                if (ps != null)
                    ps.close();
            } catch (final Exception e) {
                e.printStackTrace();
            }
        }

        fileSystem.mkdirs(new Path("/boa", new Path("" + jobId)));
        out = fileSystem.create(new Path("/boa", new Path("" + jobId, new Path("output.txt"))));

        int partNum = 0;

        final byte[] b = new byte[64 * 1024 * 1024];
        long length = 0;
        boolean hasWebResult = false;

        while (true) {
            final Path path = new Path(outputPath, "part-r-" + String.format("%05d", partNum++));
            if (!fileSystem.exists(path))
                break;

            if (in != null)
                try {
                    in.close();
                } catch (final Exception e) {
                    e.printStackTrace();
                }
            in = fileSystem.open(path);

            int numBytes = 0;

            while ((numBytes = in.read(b)) > 0) {
                if (!hasWebResult) {
                    hasWebResult = true;

                    try {
                        ps = con.prepareStatement("UPDATE boa_output SET web_result=? WHERE id=" + jobId);
                        int webSize = 64 * 1024 - 1;
                        ps.setString(1, new String(b, 0, numBytes < webSize ? numBytes : webSize));
                        ps.executeUpdate();
                    } finally {
                        try {
                            if (ps != null)
                                ps.close();
                        } catch (final Exception e) {
                            e.printStackTrace();
                        }
                    }
                }
                out.write(b, 0, numBytes);
                length += numBytes;

                this.context.progress();
            }
        }

        try {
            ps = con.prepareStatement("UPDATE boa_output SET length=? WHERE id=" + jobId);
            ps.setLong(1, length);
            ps.executeUpdate();
        } finally {
            try {
                if (ps != null)
                    ps.close();
            } catch (final Exception e) {
                e.printStackTrace();
            }
        }
    } catch (final Exception e) {
        e.printStackTrace();
    } finally {
        try {
            if (con != null)
                con.close();
        } catch (final Exception e) {
            e.printStackTrace();
        }
        try {
            if (in != null)
                in.close();
        } catch (final Exception e) {
            e.printStackTrace();
        }
        try {
            if (out != null)
                out.close();
        } catch (final Exception e) {
            e.printStackTrace();
        }
        try {
            if (fileSystem != null)
                fileSystem.close();
        } catch (final Exception e) {
            e.printStackTrace();
        }
    }
}