Example usage for org.apache.hadoop.fs Path getParent

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getParent.

Prototype

public Path getParent()

Source Link

Document

Returns the parent of a path or null if at root.

Usage

From source file:edu.umn.cs.spatialHadoop.mapreduce.SpatialInputFormat3.java

License:Open Source License

protected void listStatus(final FileSystem fs, Path dir, final List<FileStatus> result, BlockFilter filter)
        throws IOException {
    GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, dir);
    if (gindex == null || filter == null) {
        // No global index which means we cannot use the filter function
        FileStatus[] listStatus;/*from w w w .j  a va  2 s.c o m*/
        if (OperationsParams.isWildcard(dir)) {
            // Wild card
            listStatus = fs.globStatus(dir);
        } else {
            listStatus = fs.listStatus(dir, SpatialSite.NonHiddenFileFilter);
        }
        // Add all files under this directory
        for (FileStatus status : listStatus) {
            if (status.isDir()) {
                // Recursively go in subdir
                listStatus(fs, status.getPath(), result, filter);
            } else {
                // A file, just add it
                result.add(status);
            }
        }
    } else {
        final Path indexDir = OperationsParams.isWildcard(dir) ? dir.getParent() : dir;
        // Use the global index to limit files
        filter.selectCells(gindex, new ResultCollector<Partition>() {
            @Override
            public void collect(Partition partition) {
                try {
                    Path cell_path = new Path(indexDir, partition.filename);
                    if (!fs.exists(cell_path))
                        LOG.warn("Matched file not found: " + cell_path);
                    result.add(fs.getFileStatus(cell_path));
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        });
    }
}

From source file:edu.umn.cs.spatialHadoop.nasa.StockQuadTree.java

License:Open Source License

/**
 * Creates a full spatio-temporal hierarchy for a source folder
 * @throws ParseException //from   w w w.  j  ava 2s.  c o  m
 * @throws InterruptedException 
 */
public static void directoryIndexer(final OperationsParams params)
        throws IOException, ParseException, InterruptedException {
    Path inputDir = params.getInputPath();
    FileSystem sourceFs = inputDir.getFileSystem(params);
    final Path sourceDir = inputDir.makeQualified(sourceFs);
    Path destDir = params.getOutputPath();
    final FileSystem destFs = destDir.getFileSystem(params);

    TimeRange timeRange = params.get("time") != null ? new TimeRange(params.get("time")) : null;

    // Create daily indexes that do not exist
    final Path dailyIndexDir = new Path(destDir, "daily");
    FileStatus[] mathcingDays = timeRange == null ? sourceFs.listStatus(inputDir)
            : sourceFs.listStatus(inputDir, timeRange);
    final Vector<Path> sourceFiles = new Vector<Path>();
    for (FileStatus matchingDay : mathcingDays) {
        for (FileStatus matchingTile : sourceFs.listStatus(matchingDay.getPath())) {
            sourceFiles.add(matchingTile.getPath());
        }

    }
    // Shuffle the array for better load balancing across threads
    Collections.shuffle(sourceFiles);
    final String datasetName = params.get("dataset");
    Parallel.forEach(sourceFiles.size(), new RunnableRange<Object>() {
        @Override
        public Object run(int i1, int i2) {
            LOG.info("Worker [" + i1 + "," + i2 + ") started");
            for (int i = i1; i < i2; i++) {
                Path sourceFile = sourceFiles.get(i);
                try {
                    Path relativeSourceFile = makeRelative(sourceDir, sourceFile);
                    Path destFilePath = new Path(dailyIndexDir, relativeSourceFile);
                    if (!destFs.exists(destFilePath)) {
                        LOG.info("Worker [" + i1 + "," + i2 + ") indexing: " + sourceFile.getName());
                        Path tmpFile;
                        do {
                            tmpFile = new Path((int) (Math.random() * 1000000) + ".tmp");
                        } while (destFs.exists(tmpFile));
                        tmpFile = tmpFile.makeQualified(destFs);
                        if (datasetName == null)
                            throw new RuntimeException(
                                    "Please provide the name of dataset you would like to index");
                        AggregateQuadTree.build(params, sourceFile, datasetName, tmpFile);
                        synchronized (destFs) {
                            Path destDir = destFilePath.getParent();
                            if (!destFs.exists(destDir))
                                destFs.mkdirs(destDir);
                        }
                        destFs.rename(tmpFile, destFilePath);
                    }
                } catch (IOException e) {
                    throw new RuntimeException("Error building an index for " + sourceFile, e);
                }
            }
            LOG.info("Worker [" + i1 + "," + i2 + ") finished");
            return null;
        }

    });
    LOG.info("Done generating daily indexes");

    // Merge daily indexes into monthly indexes
    Path monthlyIndexDir = new Path(destDir, "monthly");
    final SimpleDateFormat dayFormat = new SimpleDateFormat("yyyy.MM.dd");
    final SimpleDateFormat monthFormat = new SimpleDateFormat("yyyy.MM");
    mergeIndexes(destFs, dailyIndexDir, monthlyIndexDir, dayFormat, monthFormat, params);
    LOG.info("Done generating monthly indexes");

    // Merge daily indexes into monthly indexes
    Path yearlyIndexDir = new Path(destDir, "yearly");
    final SimpleDateFormat yearFormat = new SimpleDateFormat("yyyy");
    mergeIndexes(destFs, monthlyIndexDir, yearlyIndexDir, monthFormat, yearFormat, params);
    LOG.info("Done generating yearly indexes");
}

From source file:edu.umn.cs.spatialHadoop.nasa.StockQuadTree.java

License:Open Source License

/**
 * Merges a set of indexes into larger indexes
 * @param fs/*  www  . ja v  a2s  .  co  m*/
 * @param srcIndexDir
 * @param dstIndexDir
 * @param srcFormat
 * @param dstFormat
 * @param params
 * @throws IOException
 * @throws ParseException
 * @throws InterruptedException
 */
private static void mergeIndexes(final FileSystem fs, Path srcIndexDir, Path dstIndexDir,
        SimpleDateFormat srcFormat, SimpleDateFormat dstFormat, final OperationsParams params)
        throws IOException, ParseException, InterruptedException {
    TimeRange timeRange = params.get("time") != null ? new TimeRange(params.get("time")) : null;
    final FileStatus[] sourceIndexes = timeRange == null ? fs.listStatus(srcIndexDir)
            : fs.listStatus(srcIndexDir, timeRange);
    Arrays.sort(sourceIndexes); // Alphabetical sort acts as sort-by-date here

    // Scan the source indexes and merge each consecutive run belonging to the
    // same unit
    int i1 = 0;
    while (i1 < sourceIndexes.length) {
        final String indexToCreate = dstFormat.format(srcFormat.parse(sourceIndexes[i1].getPath().getName()));
        int i2 = i1 + 1;
        // Keep scanning as long as the source index belongs to the same dest index
        while (i2 < sourceIndexes.length && dstFormat
                .format(srcFormat.parse(sourceIndexes[i2].getPath().getName())).equals(indexToCreate))
            i2++;

        // Merge all source indexes in the range [i1, i2) into one dest index

        // Copy i1, i2 to other variables as final to be accessible from threads
        final int firstIndex = i1;
        final int lastIndex = i2;

        final Path destIndex = new Path(dstIndexDir, indexToCreate);

        // For each tile, merge all values in all source indexes
        /*A regular expression to catch the tile identifier of a MODIS grid cell*/
        final Pattern MODISTileID = Pattern.compile("^.*(h\\d\\dv\\d\\d).*$");
        final FileStatus[] tilesInFirstDay = fs.listStatus(sourceIndexes[i1].getPath());
        // Shuffle the array for better load balancing across threads
        Random rand = new Random();
        for (int i = 0; i < tilesInFirstDay.length - 1; i++) {
            // Swap the entry at i with any following entry
            int j = i + rand.nextInt(tilesInFirstDay.length - i - 1);
            FileStatus temp = tilesInFirstDay[i];
            tilesInFirstDay[i] = tilesInFirstDay[j];
            tilesInFirstDay[j] = temp;
        }
        Parallel.forEach(tilesInFirstDay.length, new RunnableRange<Object>() {
            @Override
            public Object run(int i_file1, int i_file2) {
                for (int i_file = i_file1; i_file < i_file2; i_file++) {
                    try {
                        FileStatus tileInFirstDay = tilesInFirstDay[i_file];

                        // Extract tile ID
                        Matcher matcher = MODISTileID.matcher(tileInFirstDay.getPath().getName());
                        if (!matcher.matches()) {
                            LOG.warn("Cannot extract tile id from file " + tileInFirstDay.getPath());
                            continue;
                        }

                        final String tileID = matcher.group(1);
                        Path destIndexFile = new Path(destIndex, tileID);

                        PathFilter tileFilter = new PathFilter() {
                            @Override
                            public boolean accept(Path path) {
                                return path.getName().contains(tileID);
                            }
                        };

                        // Find matching tiles in all source indexes to merge
                        Vector<Path> filesToMerge = new Vector<Path>(lastIndex - firstIndex);
                        filesToMerge.add(tileInFirstDay.getPath());
                        for (int iDailyIndex = firstIndex + 1; iDailyIndex < lastIndex; iDailyIndex++) {
                            FileStatus[] matchedTileFile = fs.listStatus(sourceIndexes[iDailyIndex].getPath(),
                                    tileFilter);
                            if (matchedTileFile.length == 0)
                                LOG.warn("Could not find tile " + tileID + " in dir "
                                        + sourceIndexes[iDailyIndex].getPath());
                            else if (matchedTileFile.length == 1)
                                filesToMerge.add(matchedTileFile[0].getPath());
                        }

                        if (fs.exists(destIndexFile)) {
                            // Destination file already exists
                            // Check the date of the destination and source files to see
                            // whether it needs to be updated or not
                            long destTimestamp = fs.getFileStatus(destIndexFile).getModificationTime();
                            boolean needsUpdate = false;
                            for (Path fileToMerge : filesToMerge) {
                                long sourceTimestamp = fs.getFileStatus(fileToMerge).getModificationTime();
                                if (sourceTimestamp > destTimestamp) {
                                    needsUpdate = true;
                                    break;
                                }
                            }
                            if (!needsUpdate)
                                continue;
                            else
                                LOG.info("Updating file " + destIndexFile.getName());
                        }

                        // Do the merge
                        Path tmpFile;
                        do {
                            tmpFile = new Path((int) (Math.random() * 1000000) + ".tmp");
                        } while (fs.exists(tmpFile));
                        tmpFile = tmpFile.makeQualified(fs);
                        LOG.info("Merging tile " + tileID + " into file " + destIndexFile);
                        AggregateQuadTree.merge(params, filesToMerge.toArray(new Path[filesToMerge.size()]),
                                tmpFile);
                        synchronized (fs) {
                            Path destDir = destIndexFile.getParent();
                            if (!fs.exists(destDir))
                                fs.mkdirs(destDir);
                        }
                        fs.rename(tmpFile, destIndexFile);
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
                return null;
            }
        });
        i1 = i2;
    }
}

From source file:edu.umn.cs.spatialHadoop.nasa.StockQuadTree.java

License:Open Source License

/**
 * Make a path relative to another path by removing all common ancestors
 * @param parent//from   w ww.ja  v a 2s  .c om
 * @param descendant
 * @return
 */
private static Path makeRelative(Path parent, Path descendant) {
    Stack<String> components = new Stack<String>();
    while (descendant.depth() > parent.depth()) {
        components.push(descendant.getName());
        descendant = descendant.getParent();
    }
    if (!descendant.equals(parent))
        throw new RuntimeException("descendant not a child of parent");
    if (components.isEmpty())
        return new Path(".");
    Path relative = new Path(components.pop());
    while (!components.isEmpty())
        relative = new Path(relative, components.pop());
    return relative;
}

From source file:edu.umn.cs.spatialHadoop.nasa.HDFPlot.java

License:Open Source License

public static Job plotHeatMap(Path[] inFiles, Path outFile, OperationsParams params)
        throws IOException, InterruptedException, ClassNotFoundException {
    if (params.get("shape") == null) {
        // Set the default shape value
        params.setClass("shape", NASARectangle.class, Shape.class);
    } else if (!(params.getShape("shape") instanceof NASAShape)) {
        System.err.println("The specified shape " + params.get("shape") + " in not an instance of NASAShape");
        System.exit(1);/* ww  w. j  a  v  a 2 s.  c o m*/
    }

    if (params.get("mbr") == null) {
        // Set to the same value as query rectangle or the whole world
        params.set("mbr", params.get("rect", "-180,-90,180,90"));
    }

    // Restrict to HDF files if working on a directory
    for (int i = 0; i < inFiles.length; i++) {
        if (!inFiles[i].getName().toLowerCase().endsWith(".hdf"))
            inFiles[i] = new Path(inFiles[i], "*.hdf");
    }
    String recover = params.get("recover", "none").toLowerCase();
    if (recover.equals("none")) {
        // Don't recover holes
        params.setBoolean("recoverholes", false);
    } else if (recover.equals("read")) {
        // Recover holes on read
        params.setBoolean("recoverholes", true);
    } else if (recover.equals("write")) {
        // Recover holes upon writing the final image
        params.setBoolean("recoverholes", false);
        if (params.get(PREPROCESSED_WATERMARK) == null) {
            OperationsParams params2 = new OperationsParams(params);
            params2.setBoolean("background", false);
            Path wmImage = new Path(outFile.getParent(), outFile.getName() + "_WaterMask");
            generateWaterMask(wmImage, params2);
            params.set(PREPROCESSED_WATERMARK, wmImage.toString());
        }
    }
    if (params.getBoolean("pyramid", false))
        return MultilevelPlot.plot(inFiles, outFile, HDFRasterizer.class, params);
    else
        return SingleLevelPlot.plot(inFiles, outFile, HDFRasterizer.class, params);
}

From source file:edu.umn.cs.spatialHadoop.visualization.HadoopvizServer.java

License:Open Source License

/**
 * Lists the contents of a directory//ww  w .j av a 2s. c  om
 * @param request
 * @param response
 */
private void handleListFiles(HttpServletRequest request, HttpServletResponse response) {
    try {
        String pathStr = request.getParameter("path");
        Path path = new Path(pathStr == null || pathStr.isEmpty() ? "/" : pathStr);
        FileSystem fs = path.getFileSystem(commonParams);
        FileStatus[] fileStatuses = fs.listStatus(path, SpatialSite.NonHiddenFileFilter);
        Arrays.sort(fileStatuses, new Comparator<FileStatus>() {
            @Override
            public int compare(FileStatus o1, FileStatus o2) {
                if (o1.isDirectory() && o2.isFile())
                    return -1;
                if (o1.isFile() && o2.isDirectory())
                    return 1;
                return o1.getPath().getName().toLowerCase().compareTo(o2.getPath().getName().toLowerCase());
            }
        });
        response.setContentType("application/json;charset=utf-8");
        response.setStatus(HttpServletResponse.SC_OK);
        PrintWriter out = response.getWriter();
        out.print("{\"FileStatuses\":{");
        if (pathStr.endsWith("/")) {
            pathStr = pathStr.substring(0, pathStr.length() - 1);
        }
        out.printf("\"BaseDir\":\"%s\",", pathStr);
        if (path.getParent() != null)
            out.printf("\"ParentDir\":\"%s\",", path.getParent());
        out.print("\"FileStatus\":[");
        for (int i = 0; i < fileStatuses.length; i++) {
            FileStatus fileStatus = fileStatuses[i];
            if (i != 0)
                out.print(',');
            String filename = fileStatus.getPath().getName();
            int idot = filename.lastIndexOf('.');
            String extension = idot == -1 ? "" : filename.substring(idot + 1);
            out.printf(
                    "{\"accessTime\":%d,\"blockSize\":%d,\"childrenNum\":%d,\"fileId\":%d,"
                            + "\"group\":\"%s\",\"length\":%d,\"modificationTime\":%d,"
                            + "\"owner\":\"%s\",\"pathSuffix\":\"%s\",\"permission\":\"%s\","
                            + "\"replication\":%d,\"storagePolicy\":%d,\"type\":\"%s\",\"extension\":\"%s\"}",
                    fileStatus.getAccessTime(), fileStatus.getBlockSize(), 0, 0, fileStatus.getGroup(),
                    fileStatus.getLen(), fileStatus.getModificationTime(), fileStatus.getOwner(),
                    fileStatus.getPath().getName(), fileStatus.getPermission(), fileStatus.getReplication(), 0,
                    fileStatus.isDirectory() ? "DIRECTORY" : "FILE", extension.toLowerCase());
        }
        out.print("]}");
        // Check if there is an image or master file
        FileStatus[] metaFiles = fs.listStatus(path, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("_master") || path.getName().equals("_data.png");
            }
        });
        for (FileStatus metaFile : metaFiles) {
            String metaFileName = metaFile.getPath().getName();
            if (metaFileName.startsWith("_master")) {
                out.printf(",\"MasterPath\":\"%s\"", metaFileName);
                String shape = OperationsParams.detectShape(fileStatuses[0].getPath(), commonParams);
                if (shape != null)
                    out.printf(",\"Shape\":\"%s\"", shape);
            } else if (metaFileName.equals("_data.png"))
                out.printf(",\"ImagePath\":\"%s\"", metaFileName);
        }
        out.print("}");

        out.close();
    } catch (Exception e) {
        System.out.println("error happened");
        e.printStackTrace();
        try {
            e.printStackTrace(response.getWriter());
        } catch (IOException ioe) {
            ioe.printStackTrace();
            e.printStackTrace();
        }
        response.setContentType("text/plain;charset=utf-8");
        response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
    }
}

From source file:edu.umn.cs.sthadoop.operations.STJoin.java

License:Open Source License

/**
 * @param args//ww w. j av  a 2 s  . c  o  m
 * @throws Exception
 */
public static void main(String[] args) throws Exception {

    //       args = new String[10];
    //       args[0] = "/home/louai/nyc-taxi/yellowIndex";
    //       args[1] = "/home/louai/nyc-taxi/humanIndex";
    //       args[2] = "/home/louai/nyc-taxi/resultSTJoin";
    //       args[3] = "shape:edu.umn.cs.sthadoop.core.STPoint";
    //       args[4] =
    //       "rect:-74.98451232910156,35.04014587402344,-73.97936248779295,41.49399566650391";
    //       args[5] = "interval:2015-01-01,2015-01-02";
    //       args[6] = "timeDistance:1,day";
    //       args[7] = "spaceDistance:2";
    //       args[8] = "-overwrite";
    //       args[9] = "-no-local";

    OperationsParams params = new OperationsParams(new GenericOptionsParser(args));
    Path[] allFiles = params.getPaths();
    if (allFiles.length < 2) {
        System.err.println("This operation requires at least two input files");
        printUsage();
        System.exit(1);
    }
    if (allFiles.length == 2 && !params.checkInput()) {
        // One of the input files does not exist
        printUsage();
        System.exit(1);
    }
    if (allFiles.length > 2 && !params.checkInputOutput()) {
        printUsage();
        System.exit(1);
    }

    if (params.get("timedistance") == null) {
        System.err.println("time distance is missing");
        printUsage();
        System.exit(1);
    }

    if (params.get("spacedistance") == null) {
        System.err.println("space distance is missing");
        printUsage();
        System.exit(1);
    }

    Path[] inputPaths = allFiles.length == 2 ? allFiles : params.getInputPaths();
    Path outputPath = allFiles.length == 2 ? null : params.getOutputPath();

    // modify the query range with new time interval to consider in join 
    String[] value = params.get("timedistance").split(",");
    String[] date = params.get("interval").split(",");
    int interval = Integer.parseInt(value[0]);
    String start = addtimeSpaceToInterval(date[0], -interval);
    String end = addtimeSpaceToInterval(date[1], interval);
    params.set("interval", start + "," + end);

    // Query from the dataset.
    for (Path input : inputPaths) {
        args = new String[7];
        args[0] = input.toString();
        args[1] = outputPath.getParent().toString() + "candidatebuckets/" + input.getName();
        args[2] = "shape:" + params.get("shape");
        args[3] = "rect:" + params.get("rect");
        args[4] = "interval:" + params.get("interval");
        args[5] = "-overwrite";
        args[6] = "-no-local";
        for (String x : args)
            System.out.println(x);
        STRangeQuery.main(args);
        System.out.println("done with the STQuery from: " + input.toString() + "\n" + "candidate:" + args[1]);

    }
    // invoke the map-hash and reduce-join .
    FileSystem fs = outputPath.getFileSystem(params);
    Path inputstjoin;
    if (fs.exists(new Path(outputPath.getParent().toString() + "candidatebuckets/"))) {
        inputstjoin = new Path(outputPath.getParent().toString() + "candidatebuckets");
    } else {
        inputstjoin = new Path(outputPath.getParent().toString() + "/candidatebuckets");
    }
    Path hashedbucket = new Path(outputPath.getParent().toString() + "hashedbucket");
    long t1 = System.currentTimeMillis();
    // join hash step 
    args = new String[7];
    args[0] = inputstjoin.toString();
    args[1] = hashedbucket.toString();
    args[2] = "shape:" + params.get("shape");
    args[3] = "rect:" + params.get("rect");
    args[4] = "interval:" + params.get("interval");
    args[5] = "-overwrite";
    args[6] = "-no-local";
    for (String x : args)
        System.out.println(x);
    STHash.main(args);
    //      //join Step
    //      if(fs.exists(new Path(outputPath.getParent().toString()+"hashedbucket"))){
    //          inputstjoin = new Path(outputPath.getParent().toString()+"hashedbucket");
    //       }else{
    //          inputstjoin = new Path(outputPath.getParent().toString()+"/hashedbucket");
    //       }
    //Join refinement Step 
    stJoin(hashedbucket, outputPath, params);
    long t2 = System.currentTimeMillis();
    System.out.println("Total join time: " + (t2 - t1) + " millis");
}

From source file:eu.stratosphere.hadoopcompatibility.FileOutputCommitterWrapper.java

License:Apache License

public void commitTask(JobConf conf, TaskAttemptID taskAttemptID) throws IOException {
    Path taskOutputPath = getTempTaskOutputPath(conf, taskAttemptID);
    if (taskOutputPath != null) {
        FileSystem fs = taskOutputPath.getFileSystem(conf);
        if (fs.exists(taskOutputPath)) {
            Path jobOutputPath = taskOutputPath.getParent().getParent();
            // Move the task outputs to their final place
            moveTaskOutputs(conf, taskAttemptID, fs, jobOutputPath, taskOutputPath);
            // Delete the temporary task-specific output directory
            if (!fs.delete(taskOutputPath, true)) {
                LOG.info("Failed to delete the temporary output" + " directory of task: " + taskAttemptID
                        + " - " + taskOutputPath);
            }/*from   w w  w  . ja v  a2s .c o  m*/
            LOG.info("Saved output of task '" + taskAttemptID + "' to " + jobOutputPath);
        }
    }
}

From source file:fr.ens.biologie.genomique.eoulsan.MainHadoop.java

License:LGPL

@Override
protected Handler getLogHandler(final URI logFile) throws IOException {

    if (logFile == null) {
        throw new NullPointerException("The log file is null");
    }/*from  w w  w.ja v  a  2  s  .co m*/

    final Path loggerPath = new Path(logFile);
    final FileSystem loggerFs = loggerPath.getFileSystem(this.conf);

    final Path parentPath = loggerPath.getParent();

    // Create parent directory if necessary
    if (!loggerFs.exists(loggerPath.getParent())) {
        if (!loggerFs.mkdirs(loggerPath.getParent())) {
            throw new IOException("Unable to create directory " + parentPath + " for log file:" + logFile);
        }
    }

    return new StreamHandler(loggerFs.create(loggerPath), Globals.LOG_FORMATTER);
}

From source file:fr.ens.biologie.genomique.eoulsan.modules.expression.hadoop.ExpressionHadoopModule.java

License:LGPL

private static Job createJobPairedEnd(final Configuration parentConf, final TaskContext context,
          final Data alignmentsData, final Data genomeDescriptionData) throws IOException, BadBioEntryException {

      final Configuration jobConf = new Configuration(parentConf);

      // Get the source
      final DataFile inputDataFile = alignmentsData.getDataFile();

      // Set input path
      final Path inputPath = new Path(inputDataFile.getSource());

      // Set counter group
      jobConf.set(CommonHadoop.COUNTER_GROUP_KEY, COUNTER_GROUP);

      // Set Genome description path
      jobConf.set(GENOME_DESC_PATH_KEY, genomeDescriptionData.getDataFilename());

      // Create the job and its name
      final Job job = Job.getInstance(jobConf, "Pretreatment for the expression estimation step ("
              + alignmentsData.getName() + ", " + inputDataFile.getSource() + ")");

      // Set the jar
      job.setJarByClass(ExpressionHadoopModule.class);

      // Set input path
      FileInputFormat.addInputPath(job, inputPath);

      // Set the Mapper class
      job.setMapperClass(PreTreatmentExpressionMapper.class);

      // Set the Reducer class
      job.setReducerClass(PreTreatmentExpressionReducer.class);

      // Set the output key class
      job.setOutputKeyClass(Text.class);

      // Set the output value class
      job.setOutputValueClass(Text.class);

      // Output name
      String outputName = StringUtils.filenameWithoutExtension(inputPath.getName());
      outputName = outputName.substring(0, outputName.length());
      outputName += TSAM_EXTENSION;/*w  ww .  j a v a  2s .c  om*/

      // Set output path
      FileOutputFormat.setOutputPath(job, new Path(inputPath.getParent(), outputName));

      return job;
  }