Example usage for org.apache.hadoop.fs Path toUri

List of usage examples for org.apache.hadoop.fs Path toUri

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toUri.

Prototype

public URI toUri() 

Source Link

Document

Convert this Path to a URI.

Usage

From source file:com.trendmicro.hdfs.webdav.HDFSResource.java

License:Apache License

@Override
public void move(final DavResource resource) throws DavException {
    final HDFSResource dfsResource = (HDFSResource) resource;
    final Path destPath = dfsResource.getPath();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Moving '" + path.toUri().getPath() + "' to '" + destPath.toUri().getPath() + "'");
    }//from   w  w  w  .  ja v  a 2  s .c  o m
    try {
        user.doAs(new PrivilegedExceptionAction<Void>() {
            public Void run() throws Exception {
                FileSystem.get(conf).rename(path, destPath);
                return null;
            }
        });
    } catch (IOException e) {
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.trendmicro.hdfs.webdav.HDFSResource.java

License:Apache License

@Override
public void removeMember(final DavResource resource) throws DavException {
    final HDFSResource dfsResource = (HDFSResource) resource;
    final Path destPath = dfsResource.getPath();
    try {//from   w ww  . ja  va2 s. co m
        if (LOG.isDebugEnabled()) {
            LOG.debug("Deleting '" + destPath.toUri().getPath() + "'");
        }
        boolean success = user.doAs(new PrivilegedExceptionAction<Boolean>() {
            public Boolean run() throws Exception {
                return FileSystem.get(conf).delete(destPath, true);
            }
        });
        if (!success) {
            throw new DavException(DavServletResponse.SC_INTERNAL_SERVER_ERROR);
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.tripadvisor.hadoop.BackupHdfs.java

License:Apache License

public static void main(String[] args) throws IOException {
    Path baseDir = null;
    String localPath = null;/* w ww  . ja  v  a 2s. c o  m*/
    String preservePath = null;
    String sIgnoreTablesFilename = null;
    String sNoPreserveFilename = null;
    String sDateString = null;
    long size = 0;

    // UNIX dates for right now
    long now = new java.util.Date().getTime() / 1000;
    long maxDate = now;

    for (int i = 0; i < args.length; i++) {
        if (args[i].equals("--hdfs-path")) {
            baseDir = new Path(args[++i]);
            continue;
        }
        if (args[i].equals("--local-path")) {
            localPath = args[++i];
            continue;
        }
        if (args[i].equals("--preserve-path")) {
            preservePath = args[++i];
            continue;
        }
        if (args[i].equals("--no-preserve")) {
            sNoPreserveFilename = args[++i];
            continue;
        }
        if (args[i].equals("--ignore-tables")) {
            sIgnoreTablesFilename = args[++i];
            continue;
        }
        if (args[i].equals("--sleep")) {
            try {
                m_nSleepSeconds = Integer.parseInt(args[++i]);
            } catch (Exception e) {
                System.err.println("ERROR: " + e.toString() + "\n");
                usage();
            }
            continue;
        }
        if (args[i].equals("--dry-run")) {
            m_bDryRun = true;
            continue;
        }
        if (args[i].equals("--date")) {
            sDateString = args[++i];
            continue;
        }
        if (args[i].equals("--max-date")) {
            maxDate = Long.parseLong(args[++i]);
            continue;
        }
        if (args[i].equals("--max-bytes")) {
            size = Long.parseLong(args[++i]);
            continue;
        }

        System.err.println("ERROR: unknown arg " + args[i]);
        usage();
    }

    if (baseDir == null || localPath == null || preservePath == null || sDateString == null) {
        usage();
    }

    long minDate;

    if ("yesterday".equals(sDateString)) {
        // figure out yesterday's dates
        Calendar cal = Calendar.getInstance();
        cal.roll(Calendar.DAY_OF_YEAR, -1);

        // yesterday midnight
        cal.set(Calendar.HOUR_OF_DAY, 0);
        cal.set(Calendar.MINUTE, 0);
        cal.set(Calendar.SECOND, 0);
        cal.set(Calendar.MILLISECOND, 0);

        minDate = cal.getTimeInMillis() / 1000;

        // yesterday end of day
        cal.set(Calendar.HOUR_OF_DAY, 23);
        cal.set(Calendar.MINUTE, 59);
        cal.set(Calendar.SECOND, 59);
        cal.set(Calendar.MILLISECOND, 999);

        maxDate = cal.getTimeInMillis() / 1000;
    } else if ("last-week".equals(sDateString)) {
        minDate = maxDate - (7 * 24 * 60 * 60);
    } else if ("last-day".equals(sDateString)) {
        minDate = maxDate - (24 * 60 * 60);
    } else {
        // UNIX date since epoch of last backup
        minDate = Long.parseLong(sDateString);
    }

    long tmpDate = 0;
    BackupHdfs bak = new BackupHdfs();

    // initialize the list of tables to ignore
    if (sIgnoreTablesFilename != null) {
        bak.initializeTablesToIgnore(sIgnoreTablesFilename);
    }

    // initialize list of files to not preserve
    if (sNoPreserveFilename != null) {
        bak.initializeNoPreserve(sNoPreserveFilename);
    }

    ArrayList<Path> pathList = new ArrayList<Path>(2000);
    HashMap<Path, Long> hmTimestamps = new HashMap<Path, Long>();

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    // If the HDFS path is a dir continue
    if (fs.getFileStatus(baseDir).isDir()) {
        Calendar cal = Calendar.getInstance();

        System.err.println("");
        cal.setTimeInMillis(minDate * 1000);
        System.err.println("min date = " + cal.getTime().toString());

        cal.setTimeInMillis(maxDate * 1000);
        System.err.println("max date = " + cal.getTime().toString());

        System.err.println("");
        System.err.println("Searching filesystem: " + baseDir.toUri().getPath());

        bak.checkDir(fs, minDate, maxDate, baseDir, pathList, hmTimestamps);

        System.err.println("");
        System.err.println("Skipped " + m_nIgnoredTables + " files due to ignored tables");

        System.err.println("");
        System.err.println("Number of files to backup = " + pathList.size());

        System.err.println("Total bytes to backup = " + prettyPrintBytes(m_nTotalBytes));

        System.err.println("");
        System.err.println("sorting list of files...");
        Collections.sort(pathList, new DateComparator(hmTimestamps));
        System.err.println("done");

        System.err.println("");
        System.err.println("starting backup...");
        tmpDate = bak.backupFiles(localPath, preservePath, fs, pathList, size);

        bak.closeFiles();

        System.err.println("");
        System.err.println("backup completed...");
    }

    if (tmpDate == 0) {
        // If not size limit reached print out date for right now
        System.out.println(maxDate);
    } else {
        // Print out date for last file backed up
        System.err.println("Size limit reached.");
        System.out.println(tmpDate);
    }

    System.exit(0);
}

From source file:com.tripadvisor.hadoop.BackupHdfs.java

License:Apache License

/**
 * Method to move files from HDFS to local filesystem
 *
 * localPath: Path on the machines filesystem
 * fs:FileSystem object from HDFS// w  w  w. ja va 2 s  .  com
 * pathList:List of paths for files that might need to be backed
 * up
 * size:max size in bytes to be backed up
 *
 * ReturnsDate of the last files backed up if reached size limit,
 * else, zero
 **/
public long backupFiles(String localPath, String preservePath, FileSystem fs, ArrayList<Path> pathList,
        long size) {
    Path fsPath;
    long tmpSize = 0;
    long tmpDate = 0;

    // Start iterating over all paths
    for (Path hdfsPath : pathList) {
        try {
            long nFileSize = fs.getContentSummary(hdfsPath).getLength();
            tmpSize = tmpSize + nFileSize;

            if ((tmpSize <= size) || (size == 0)) {
                FileStatus stat = fs.getFileStatus(hdfsPath);

                System.err.println("File " + hdfsPath.toUri().getPath() + " " + nFileSize + " bytes, "
                        + "perms: " + stat.getOwner() + "/" + stat.getGroup() + ", "
                        + stat.getPermission().toString());

                tmpDate = stat.getModificationTime() / 1000;

                String sFsPath = localPath + hdfsPath.toUri().getPath();
                fsPath = new Path(sFsPath);

                File f = new File(sFsPath);

                // COMMENTED OUT: until a few backup cycles run
                // and the mtime gets in fact set on all copied
                // files.
                //
                // ignore it if the file exists and has the same mtime
                // if (f.exists() && f.isFile() && f.lastModified() == stat.getModificationTime())
                // {
                // System.out.println("no need to backup " + f.toString() + ", mtime matches hdfs");
                // continue;
                // }

                if (false == m_bDryRun) {
                    // check if we need to back up the local file
                    // (not directory), if it already exists.
                    if (f.exists() && f.isFile()) {
                        // ignore files with substrings in the
                        // no-preserve file
                        if (true == doPreserveFile(sFsPath)) {
                            // move it to the backup path
                            String sNewPath = preservePath + hdfsPath.toUri().getPath();
                            File newFile = new File(sNewPath);

                            // create directory structure for new file?
                            if (false == newFile.getParentFile().exists()) {
                                if (false == newFile.getParentFile().mkdirs()) {
                                    System.err
                                            .println("Failed to mkdirs " + newFile.getParentFile().toString());
                                    System.exit(1);
                                }
                            }

                            // rename existing file to new location
                            if (false == f.renameTo(newFile)) {
                                System.err.println(
                                        "Failed to renameTo " + f.toString() + " to " + newFile.toString());
                                System.exit(1);
                            }

                            System.out.println("preserved " + f.toString() + " into " + newFile.toString());
                        } else {
                            System.out.println("skipped preservation of " + f.toString());
                        }
                    }

                    // copy from hdfs to local filesystem
                    fs.copyToLocalFile(hdfsPath, fsPath);

                    // set the mtime to match hdfs file
                    f.setLastModified(stat.getModificationTime());

                    // compare checksums on both files
                    compareChecksums(fs, hdfsPath, sFsPath);
                }

                // don't print the progress after every file -- go
                // by at least 1% increments
                long nPercentDone = (long) (100 * tmpSize / m_nTotalBytes);
                if (nPercentDone > m_nLastPercentBytesDone) {
                    System.out.println("progress: copied " + prettyPrintBytes(tmpSize) + ", " + nPercentDone
                            + "% done" + ", tstamp=" + tmpDate);

                    m_nLastPercentBytesDone = nPercentDone;
                }

                if (m_nSleepSeconds > 0) {
                    try {
                        Thread.sleep(1000 * m_nSleepSeconds);
                    } catch (Exception e2) {
                        // ignore
                    }
                }
            } else {
                return tmpDate;
            }
        } catch (IOException e) {
            System.err.println("FATAL ERROR: Something wrong with the file");
            System.err.println(e);
            System.out.println(tmpDate);
            System.exit(1);

            return 0;
        }
    }

    return 0;
}

From source file:com.tripadvisor.hadoop.BackupHdfs.java

License:Apache License

/**
 * Method to go though the HDFS filesystem in a DFS to find all
 * files/*w  ww  . jav  a 2 s .co  m*/
 *
 * fs:FileSystem object from HDFS
 * minDate:      Oldest date for files to be backed up
 * maxDate:Newest date for files to be backed up
 * p:Path in HDFS to look for files
 * pathList:Will be filled with all files in p
 * hmTimestamps: hashmap of timestamps for later sorting
 **/
public void checkDir(FileSystem fs, long minDate, long maxDate, Path p, ArrayList<Path> pathList,
        HashMap<Path, Long> hmTimestamps) {
    long tmpDate;
    FileStatus[] fStat;

    try {
        String sPath = p.toUri().getPath();

        // If this is a directory
        if (fs.getFileStatus(p).isDir()) {
            // ignore certain directories
            if ("dfstmp".equals(p.getName()) || "tmp".equals(p.getName()) || "jobtracker".equals(p.getName())
                    || sPath.startsWith("/mapred") || "ops".equals(p.getName())
                    || p.getName().startsWith("_distcp_logs")) {
                return;
            }

            // dump the mkdir and chmod commands for this
            // directory -- skip root directory only
            {
                FileStatus stat = fs.getFileStatus(p);

                if (!sPath.equals("/")) {
                    m_wrMkdirs.println("hadoop fs -mkdir " + sPath);
                }

                m_wrChmods.println("hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath);

                Short sh = new Short(stat.getPermission().toShort());
                m_wrChmods.println("hadoop fs -chmod " + Long.toOctalString(sh.longValue()) + " " + sPath);
            }

            fStat = fs.listStatus(p);

            // Do a recursive call to all elements
            for (int i = 0; i < fStat.length; i++) {
                checkDir(fs, minDate, maxDate, fStat[i].getPath(), pathList, hmTimestamps);
            }
        } else {
            // If not a directory then we've found a file

            // ignore crc files
            if (p.getName().endsWith(".crc")) {
                return;
            }

            // ignore other files
            if (sPath.startsWith("/user/oozie/etl/workflows/")) {
                return;
            }

            // try to get the table name from the path. There are
            // various types of tables, from those replicated from
            // another database to regular hive tables to
            // partitioned hive tables.  We use table names to
            // both exclude some from the backup, and for the rest
            // to dump out the schema and partition name.
            if (m_ignoreTables != null && m_ignoreTables.doIgnoreFile(sPath)) {
                m_nIgnoredTables++;

                if (m_nIgnoredTables < 5) {
                    System.out.println("Skipping ignore-table file: " + sPath);
                } else if (m_nIgnoredTables == 5) {
                    System.out.println("(...not showing other skipped tables...)");
                }
                return;
            }

            FileStatus stat = fs.getFileStatus(p);

            tmpDate = stat.getModificationTime() / 1000;

            // store the chmods/chowns for all files
            m_wrChmods.println("hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath);

            m_wrChmods.println("hadoop fs -chmod " + stat.getPermission().toShort() + " " + sPath);

            // check dates.  is it too young?
            if (tmpDate < minDate) {
                return;
            }

            // is the file too recent?
            if (tmpDate > maxDate) {
                //System.out.println("file too recent: " + sPath);
                return;
            }

            // file timestamp is ok
            pathList.add(p);

            hmTimestamps.put(p, new Long(tmpDate));

            // store info about total bytes neeed to backup
            m_nTotalBytes += fs.getContentSummary(p).getLength();
        }
    } catch (IOException e) {
        System.err.println("ERROR: could not open " + p + ": " + e);

        // System.exit(1) ;
    }
}

From source file:com.tripadvisor.hadoop.VerifyHdfsBackup.java

License:Apache License

public static void main(String[] args) throws IOException {
    Path baseDir = null;
    String sLocalPathRoot = null;
    String sIgnoreTablesFilename = null;
    String sMaxDateString = null;
    String sFromFilename = null;/*from w  ww .  j a  v a 2  s .com*/

    for (int i = 0; i < args.length; i++) {
        if (args[i].equals("--hdfs-path")) {
            baseDir = new Path(args[++i]);
            continue;
        }
        if (args[i].equals("--local-path")) {
            sLocalPathRoot = args[++i];
            continue;
        }
        if (args[i].equals("--ignore-tables")) {
            sIgnoreTablesFilename = args[++i];
            continue;
        }
        if (args[i].equals("--max-date")) {
            sMaxDateString = args[++i];
            continue;
        }
        if (args[i].equals("--from-file")) {
            sFromFilename = args[++i];
            continue;
        }

        System.err.println("ERROR: unknown arg " + args[i]);
        usage();
    }

    if (baseDir == null || sLocalPathRoot == null) {
        usage();
    }

    // UNIX date for right now
    long maxDate = new java.util.Date().getTime() / 1000;

    if (sMaxDateString != null) {
        // UNIX date since epoch of last backup
        maxDate = Long.parseLong(sMaxDateString);
    }

    VerifyHdfsBackup bak = new VerifyHdfsBackup();

    // initialize the list of tables to ignore
    if (sIgnoreTablesFilename != null) {
        bak.initializeTablesToIgnore(sIgnoreTablesFilename);
    }

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    if (sFromFilename != null) {
        BufferedReader in = null;
        try {
            in = new BufferedReader(new FileReader(sFromFilename));
            String sFile;
            while ((sFile = in.readLine()) != null) {
                bak.checkDir(fs, new Path(sFile), sLocalPathRoot, maxDate);
            }
        } catch (Exception e) {
            System.out.println("ERROR: Failed to read from-file " + sFromFilename + ": " + e);
        } finally {
            try {
                in.close();
            } catch (Exception e2) {
            }
        }
    } else {
        // If the HDFS path is a dir continue
        if (fs.getFileStatus(baseDir).isDir()) {
            System.out.println("Searching filesystem: " + baseDir.toUri().getPath());

            bak.checkDir(fs, baseDir, sLocalPathRoot, maxDate);
        }
    }

    System.exit(0);
}

From source file:com.tripadvisor.hadoop.VerifyHdfsBackup.java

License:Apache License

/**
 * Method to go though the HDFS filesystem in a DFS to find all
 * files// w  ww. j a v  a 2s.co m
 *
 * fs:FileSystem object from HDFS
 * maxDate:Newest date for files to be backed up
 * p:Path in HDFS to look for files
 **/
public void checkDir(FileSystem fs, Path p, String sLocalPathRoot, long maxDate) {
    FileStatus[] fStat;

    try {
        String sPath = p.toUri().getPath();

        // If this is a directory
        if (fs.getFileStatus(p).isDir()) {
            // ignore certain directories
            if ("dfstmp".equals(p.getName()) || "tmp".equals(p.getName()) || "jobtracker".equals(p.getName())
                    || sPath.startsWith("/mapred") || "ops".equals(p.getName())
                    || p.getName().startsWith("_distcp_logs")) {
                return;
            }

            fStat = fs.listStatus(p);

            // Do a recursive call to all elements
            for (int i = 0; i < fStat.length; i++) {
                checkDir(fs, fStat[i].getPath(), sLocalPathRoot, maxDate);
            }
        } else {
            // If not a directory then we've found a file

            // ignore crc files
            if (p.getName().endsWith(".crc")) {
                return;
            }

            // ignore other files
            if (sPath.startsWith("/user/oozie/etl/workflows/")) {
                return;
            }

            // try to get the table name from the path. There are
            // various types of tables, from those replicated from
            // tripmonster to regular hive tables to partitioned
            // hive tables.  We use table names to both exclude
            // some from the backup, and for the rest to dump out
            // the schema and partition name.
            if (m_ignoreTables != null && m_ignoreTables.doIgnoreFile(sPath)) {
                return;
            }

            // check the file
            FileStatus stat = fs.getFileStatus(p);

            // ignore files that are too new
            if ((stat.getModificationTime() / 1000) > maxDate) {
                System.out.println("IGNORING: " + sPath + " too new");
                return;
            }

            // warn about files that have a mis-matching block
            // size.  The checksum check will fail for them
            // anyways, so just catch it here.
            if (stat.getBlockSize() != N_BLOCK_SIZE) {
                System.out.println("ERROR: non-default block size (" + (stat.getBlockSize() / (1024 * 1024))
                        + "M) would fail checksum: " + sPath);
                return;
            }

            // get HDFS checksum
            FileChecksum ck = fs.getFileChecksum(p);
            String sCk, sCkShort;
            if (ck == null) {
                sCk = sCkShort = "<null>";
            } else {
                sCk = ck.toString();
                sCkShort = sCk.replaceAll("^.*:", "");
            }

            System.out.println(sPath + " len=" + stat.getLen() + " " + stat.getOwner() + "/" + stat.getGroup()
                    + " checksum=" + sCk);

            // find the local file
            String sFsPath = sLocalPathRoot + p.toUri().getPath();
            File fLocal = new File(sFsPath);
            if (!fLocal.exists()) {
                Calendar cal = Calendar.getInstance();
                cal.setTimeInMillis(stat.getModificationTime());

                System.out.println("ERROR: file does not exist: " + sFsPath + " hdfs-last-mtime="
                        + cal.getTime().toString());
                return;
            }
            if (!fLocal.isFile()) {
                System.out.println("ERROR: path is not a file: " + sFsPath);
                return;
            }
            if (stat.getLen() != fLocal.length()) {
                System.out.println("ERROR: length mismatch: " + sFsPath + " hdfslen=" + stat.getLen()
                        + " fslen=" + fLocal.length());
                return;
            }

            // get local fs checksum
            FileChecksum ckLocal = getLocalFileChecksum(sFsPath);
            if (ckLocal == null) {
                System.out.println("ERROR Failed to get checksum for local file " + sFsPath);
                return;
            }

            // compare checksums as a string, to strip the
            // algorithm name from the beginning
            String sCkLocal = ckLocal.toString();
            String sCkLocalShort = sCkLocal.replaceAll("^.*:", "");

            if (false == sCkShort.equals(sCkLocalShort)) {
                System.out.println(
                        "ERROR: checksum mismatch: " + sFsPath + "\nhdfs = " + sCk + "\nlocal= " + sCkLocal);
                return;
            }
        }
    } catch (IOException e) {
        System.out.println("ERROR: could not open " + p + ": " + e);

        // System.exit(1) ;
    }
}

From source file:com.turn.sorcerer.dependency.impl.HDFSPathDependency.java

License:Open Source License

@Override
public boolean check(int iterNo) {

    Collection<String> paths = paths(iterNo);

    // zero paths is considered invalid
    if (paths == null || paths.size() == 0) {
        return false;
    }/*  w  w w . j a  v  a2 s  .  co m*/

    if (fs == null) {
        try {
            fs = FileSystem.get(new Configuration());
        } catch (IOException e) {
            logger.error("Filesystem unreachable!", e);
            return false;
        }
    }

    for (String p : paths) {
        logger.debug("Checking path {}", p);

        if (p == null) {
            return false;
        }

        Path path = new Path(p);

        try {
            if (!fs.exists(path)) {
                logger.debug("Dependency Check Failed - Path does not exist: {}", path.toUri());
                return false;
            }
        } catch (IOException e) {
            logger.error("Could not check existence of path {}", path);
            return false;
        }
    }

    return true;
}

From source file:com.turn.sorcerer.status.impl.HDFSStatusStorage.java

License:Open Source License

@Override
public void commitStatus(String identifier, int jobId, Status status, DateTime time, boolean overwrite)
        throws IOException {
    Preconditions.checkNotNull(jobId);/* ww w  . j av  a 2 s . c om*/

    if (fs == null) {
        try {
            fs = FileSystem.get(new Configuration());
        } catch (IOException e) {
            logger.error("Filesystem unreachable!", e);
            return;
        }
    }

    Path directoryPath = new Path(getStatusPath(identifier, jobId));

    // Clear old status
    if (overwrite && fs.exists(directoryPath)) {
        fs.delete(directoryPath, true);
    }

    // Create directory if doesn't exist
    if (fs.exists(directoryPath) == false) {
        fs.mkdirs(directoryPath);
    }

    // Commit new status
    Path path = new Path(directoryPath, status.getString());

    fs.createNewFile(path);
    fs.setTimes(path, time.toInstant().getMillis(), time.toInstant().getMillis());
    logger.debug("Created new status file: " + path.toUri());
}

From source file:com.twitter.algebra.AlgebraCommon.java

License:Apache License

/**
 * Convert an in-memory representation of a matrix to a distributed MapDir
 * format. It then can be used in distributed jobs
 * //from   w  w  w  .  j  a va2  s.c  om
 * @param oriMatrix
 * @return path that will contain the matrix files
 * @throws Exception
 */
public static DistributedRowMatrix toMapDir(Matrix origMatrix, Path outPath, Path tmpPath, String label)
        throws Exception {
    Configuration conf = new Configuration();
    Path outputDir = new Path(outPath, label + origMatrix.numRows() + "x" + origMatrix.numCols());
    FileSystem fs = FileSystem.get(outputDir.toUri(), conf);
    if (!fs.exists(outputDir)) {
        Path mapDir = new Path(outputDir, "matrix-k-0");
        Path outputFile = new Path(mapDir, "data");
        @SuppressWarnings("deprecation")
        SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, outputFile, IntWritable.class,
                VectorWritable.class);
        VectorWritable vectorw = new VectorWritable();
        IntWritable intw = new IntWritable();
        try {
            for (int r = 0; r < origMatrix.numRows(); r++) {
                Vector vector = origMatrix.viewRow(r);
                vectorw.set(vector);
                intw.set(r);
                writer.append(intw, vectorw);
            }
        } finally {
            writer.close();
        }
        MapFile.fix(fs, mapDir, IntWritable.class, VectorWritable.class, false, conf);
    } else {
        log.warn("----------- Skip matrix " + outputDir + " - already exists");
    }
    DistributedRowMatrix dMatrix = new DistributedRowMatrix(outputDir, tmpPath, origMatrix.numRows(),
            origMatrix.numCols());
    dMatrix.setConf(conf);
    return dMatrix;
}