Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:DupleInputFormat.java

License:Apache License

/** 
 * Generate the list of files and make them into FileSplits.
 * @param job the job context//from   w  ww  .java2s.c  om
 * @throws IOException
 */
public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    // times that each file exists in the files List
    ArrayList<Integer> times = new ArrayList<Integer>();
    ArrayList<Path> paths = new ArrayList<Path>();

    for (FileStatus file : files) {
        Path path = file.getPath();
        long length = file.getLen();
        if (length != 0) {
            FileSystem fs = path.getFileSystem(job.getConfiguration());
            BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);

            int index;
            if ((index = paths.indexOf(path)) != -1)
                times.set(index, times.get(index) + 1);
            else {
                times.add(0);
                paths.add(path);
                index = times.size() - 1;
            }

            // not splitable
            splits.add(makeSplit(path, 0, length, blkLocations[0].getHosts(), times.get(index)));

        } else {
            //Create empty hosts array for zero length files
            splits.add(makeSplit(path, 0, length, new String[0]));
        }
    }
    // Save the number of input files for metrics/loadgen
    job.getConfiguration().setLong(NUM_INPUT_FILES, files.size());
    //LOG.debug("Total # of splits: " + splits.size());
    return splits;
}

From source file:andromache.hadoop.CassandraInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext context) throws IOException {
    Configuration conf = context.getConfiguration();
    validateConfiguration(conf);//ww w.j  av  a 2  s  .  co  m

    // cannonical ranges and nodes holding replicas
    List<TokenRange> masterRangeNodes = getRangeMap(conf);

    keyspace = CassandraConfigHelper.getInputKeyspace(context.getConfiguration());

    cfNames = CassandraConfigHelper.getInputColumnFamilies(context.getConfiguration());

    // TODO: [IS] make sure this partitioner matches to what is set on each keyspace participating
    partitioner = CassandraConfigHelper.getInputPartitioner(context.getConfiguration());
    logger.debug("partitioner is " + partitioner);

    // cannonical ranges, split into pieces, fetching the splits in parallel

    ExecutorService executor = Executors.newCachedThreadPool();
    List<InputSplit> splits = new ArrayList<InputSplit>();

    try {
        List<Future<List<CassandraSplit>>> splitfutures = new ArrayList<Future<List<CassandraSplit>>>();
        KeyRange jobKeyRange = CassandraConfigHelper.getInputKeyRange(conf);
        Range<Token> jobRange = null;
        if (jobKeyRange != null && jobKeyRange.start_token != null) {
            assert partitioner
                    .preservesOrder() : "ConfigHelper.setInputKeyRange(..) can only be used with a order preserving paritioner";
            assert jobKeyRange.start_key == null : "only start_token supported";
            assert jobKeyRange.end_key == null : "only end_token supported";
            jobRange = new Range<Token>(partitioner.getTokenFactory().fromString(jobKeyRange.start_token),
                    partitioner.getTokenFactory().fromString(jobKeyRange.end_token), partitioner);
        }

        for (TokenRange range : masterRangeNodes) {
            if (jobRange == null) {
                // for each range, pick a live owner and ask it to compute bite-sized splits

                splitfutures.add(executor.submit(new SplitCallable(range, conf)));
            } else {
                Range<Token> dhtRange = new Range<Token>(
                        partitioner.getTokenFactory().fromString(range.start_token),
                        partitioner.getTokenFactory().fromString(range.end_token), partitioner);

                if (dhtRange.intersects(jobRange)) {
                    for (Range<Token> intersection : dhtRange.intersectionWith(jobRange)) {
                        range.start_token = partitioner.getTokenFactory().toString(intersection.left);
                        range.end_token = partitioner.getTokenFactory().toString(intersection.right);
                        // for each range, pick a live owner and ask it to compute bite-sized splits
                        splitfutures.add(executor.submit(new SplitCallable(range, conf)));
                    }
                }
            }
        }

        // wait until we have all the results back
        for (Future<List<CassandraSplit>> futureInputSplits : splitfutures) {
            try {
                splits.addAll(futureInputSplits.get());
            } catch (Exception e) {
                throw new IOException("Could not get input splits", e);
            }
        }
    } finally {
        executor.shutdownNow();
    }

    assert splits.size() > 0;
    Collections.shuffle(splits, new Random(System.nanoTime()));
    return splits;
}

From source file:andromache.hadoop.CassandraOutputFormat.java

License:Apache License

/**
* Check for validity of the output-specification for the job.
*
* @param context information about the job
* @throws java.io.IOException when output should not be attempted
*///from w  w w .ja  va  2 s .com
@Override
public void checkOutputSpecs(JobContext context) {
    Configuration conf = context.getConfiguration();
    if (CassandraConfigHelper.getOutputPartitioner(conf) == null) {
        throw new UnsupportedOperationException(
                "You must set the output partitioner to the one used by your Cassandra cluster");
    }
    if (CassandraConfigHelper.getOutputInitialAddress(conf) == null) {
        throw new UnsupportedOperationException("You must set the initial output address to a Cassandra node");
    }
}

From source file:at.illecker.hama.rootbeer.examples.util.RandomGraphGenerator.java

License:Apache License

static long getNumberOfRows(JobContext job) {
    return job.getConfiguration().getLong("hama.num.vertices", 0);
}

From source file:be.uantwerpen.adrem.hadoop.util.SplitByKTextInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    List<InputSplit> splits = newArrayList();
    int numberOfSplits = getNumberOfSplits(job);
    for (FileStatus status : listStatus(job)) {
        splits.addAll(getSplitsForFile(status, job.getConfiguration(), numberOfSplits));
    }/*  ww w . j  av  a  2 s  .c o  m*/
    return splits;
}

From source file:be.uantwerpen.adrem.hadoop.util.SplitByKTextInputFormat.java

License:Apache License

/**
 * Get the number of splits//from   w ww  . ja va 2s . c o  m
 * 
 * @param job
 *          the job
 * @return the number of splits to be created on this file
 */
public static int getNumberOfSplits(JobContext job) {
    return job.getConfiguration().getInt(NUMBER_OF_CHUNKS, 1);
}

From source file:boa.aggregators.MLAggregator.java

License:Apache License

public void saveModel(Object model) {
     FSDataOutputStream out = null;//  ww  w.j  a  va 2  s .  c  om
     FileSystem fileSystem = null;
     Path filePath = null;
     try {
         JobContext context = (JobContext) getContext();
         Configuration configuration = context.getConfiguration();
         int boaJobId = configuration.getInt("boa.hadoop.jobid", 0);
         JobConf job = new JobConf(configuration);
         Path outputPath = FileOutputFormat.getOutputPath(job);
         fileSystem = outputPath.getFileSystem(context.getConfiguration());

         fileSystem.mkdirs(new Path("/boa", new Path("" + boaJobId)));
         filePath = new Path("/boa",
                 new Path("" + boaJobId, new Path(("" + getKey()).split("\\[")[0] + "ML.model")));

         if (fileSystem.exists(filePath))
             return;

         out = fileSystem.create(filePath);
         ByteArrayOutputStream byteOutStream = new ByteArrayOutputStream();
         ObjectOutputStream objectOut = new ObjectOutputStream(byteOutStream);
         objectOut.writeObject(model);
         objectOut.close();

         byte[] serializedObject = byteOutStream.toByteArray();
         out.write(serializedObject, 0, serializedObject.length);

         this.collect(filePath.toString());

     } catch (Exception e) {
         e.printStackTrace();
     } finally {
         try {
             if (out != null)
                 out.close();
         } catch (final Exception e) {
             e.printStackTrace();
         }
     }
 }

From source file:boa.io.BoaOutputCommitter.java

License:Apache License

@Override
public void commitJob(JobContext context) throws java.io.IOException {
    super.commitJob(context);

    int boaJobId = context.getConfiguration().getInt("boa.hadoop.jobid", 0);
    storeOutput(context, boaJobId);//  w  w  w .  j a va 2  s. c  o m
    updateStatus(null, boaJobId);
}

From source file:boa.io.BoaOutputCommitter.java

License:Apache License

@Override
public void abortJob(JobContext context, JobStatus.State runState) throws java.io.IOException {
    super.abortJob(context, runState);

    final JobClient jobClient = new JobClient(new JobConf(context.getConfiguration()));
    final RunningJob job = jobClient.getJob(
            (org.apache.hadoop.mapred.JobID) JobID.forName(context.getConfiguration().get("mapred.job.id")));
    String diag = "";
    for (final TaskCompletionEvent event : job.getTaskCompletionEvents(0))
        switch (event.getTaskStatus()) {
        case SUCCEEDED:
            break;
        case FAILED:
        case KILLED:
        case OBSOLETE:
        case TIPFAILED:
            diag += "Diagnostics for: " + event.getTaskTrackerHttp() + "\n";
            for (final String s : job.getTaskDiagnostics(event.getTaskAttemptId()))
                diag += s + "\n";
            diag += "\n";
            break;
        }/*from w  w w .jav a 2s  .c o m*/
    updateStatus(diag, context.getConfiguration().getInt("boa.hadoop.jobid", 0));
}

From source file:boa.io.BoaOutputCommitter.java

License:Apache License

private void storeOutput(final JobContext context, final int jobId) {
    if (jobId == 0)
        return;//from  w w w . j a  v  a 2s .c  o m

    Connection con = null;
    FileSystem fileSystem = null;
    FSDataInputStream in = null;
    FSDataOutputStream out = null;

    try {
        fileSystem = outputPath.getFileSystem(context.getConfiguration());

        con = DriverManager.getConnection(url, user, password);

        PreparedStatement ps = null;
        try {
            ps = con.prepareStatement("INSERT INTO boa_output (id, length) VALUES (" + jobId + ", 0)");
            ps.executeUpdate();
        } catch (final Exception e) {
        } finally {
            try {
                if (ps != null)
                    ps.close();
            } catch (final Exception e) {
                e.printStackTrace();
            }
        }

        fileSystem.mkdirs(new Path("/boa", new Path("" + jobId)));
        out = fileSystem.create(new Path("/boa", new Path("" + jobId, new Path("output.txt"))));

        int partNum = 0;

        final byte[] b = new byte[64 * 1024 * 1024];
        long length = 0;
        boolean hasWebResult = false;

        while (true) {
            final Path path = new Path(outputPath, "part-r-" + String.format("%05d", partNum++));
            if (!fileSystem.exists(path))
                break;

            if (in != null)
                try {
                    in.close();
                } catch (final Exception e) {
                    e.printStackTrace();
                }
            in = fileSystem.open(path);

            int numBytes = 0;

            while ((numBytes = in.read(b)) > 0) {
                if (!hasWebResult) {
                    hasWebResult = true;

                    try {
                        ps = con.prepareStatement("UPDATE boa_output SET web_result=? WHERE id=" + jobId);
                        int webSize = 64 * 1024 - 1;
                        ps.setString(1, new String(b, 0, numBytes < webSize ? numBytes : webSize));
                        ps.executeUpdate();
                    } finally {
                        try {
                            if (ps != null)
                                ps.close();
                        } catch (final Exception e) {
                            e.printStackTrace();
                        }
                    }
                }
                out.write(b, 0, numBytes);
                length += numBytes;

                this.context.progress();
            }
        }

        try {
            ps = con.prepareStatement("UPDATE boa_output SET length=? WHERE id=" + jobId);
            ps.setLong(1, length);
            ps.executeUpdate();
        } finally {
            try {
                if (ps != null)
                    ps.close();
            } catch (final Exception e) {
                e.printStackTrace();
            }
        }
    } catch (final Exception e) {
        e.printStackTrace();
    } finally {
        try {
            if (con != null)
                con.close();
        } catch (final Exception e) {
            e.printStackTrace();
        }
        try {
            if (in != null)
                in.close();
        } catch (final Exception e) {
            e.printStackTrace();
        }
        try {
            if (out != null)
                out.close();
        } catch (final Exception e) {
            e.printStackTrace();
        }
        try {
            if (fileSystem != null)
                fileSystem.close();
        } catch (final Exception e) {
            e.printStackTrace();
        }
    }
}