Example usage for org.apache.hadoop.fs FileSystem create

List of usage examples for org.apache.hadoop.fs FileSystem create

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem create.

Prototype

public FSDataOutputStream create(Path f, short replication) throws IOException 

Source Link

Document

Create an FSDataOutputStream at the indicated Path.

Usage

From source file:com.marklogic.contentpump.OutputArchive.java

License:Apache License

private void newOutputStream() throws IOException {
    // use the constructor filename for the first zip,
    // then add filecount to subsequent archives, if any.
    int count = fileCount.getAndIncrement();
    currPath = newPackagePath(basePath, count, 6);
    if (outputStream != null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("closing output archive: " + currPath);
        }/*from   ww w  .ja  v a2  s  .c  o  m*/
        outputStream.flush();
        outputStream.close();
    }
    currentFileBytes = 0;
    currentEntries = 0;

    Path zpath = new Path(currPath);
    FileSystem fs = zpath.getFileSystem(conf);
    if (fs.exists(zpath)) {
        throw new IOException(zpath + " already exists.");
    }

    if (LOG.isDebugEnabled()) {
        LOG.debug("Creating output archive: " + zpath);
        LOG.debug("Default charset: " + Charset.defaultCharset());
    }
    // if fs instanceof DistributedFileSystem, use hadoop api; otherwise,
    // use java api
    if (fs instanceof DistributedFileSystem) {
        FSDataOutputStream fsout = fs.create(zpath, false);
        outputStream = new ZipOutputStream(fsout);
    } else {
        File f = new File(zpath.toUri().getPath());
        if (!f.exists()) {
            f.getParentFile().mkdirs();
            f.createNewFile();
        }
        FileOutputStream fos = new FileOutputStream(f, false);
        outputStream = new ZipOutputStream(fos);
    }

}

From source file:com.marklogic.contentpump.SingleDocumentWriter.java

License:Apache License

@Override
public void write(DocumentURI uri, MarkLogicDocument content) throws IOException, InterruptedException {
    OutputStream os = null;//from w w  w .j  av a  2 s . c  o  m
    try {
        String childPath = URIUtil.getPathFromURI(uri);
        Path path;
        if (childPath.charAt(0) == '/') {
            // concatenate outputPath with path to form the path
            path = new Path(dir.toString() + childPath);
        } else {
            path = new Path(dir, childPath);
        }
        FileSystem fs = path.getFileSystem(conf);
        if (fs instanceof DistributedFileSystem) {
            os = fs.create(path, false);
        } else {
            File f = new File(path.toUri().getPath());
            if (!f.exists()) {
                f.getParentFile().mkdirs();
                f.createNewFile();
            }
            os = new FileOutputStream(f, false);
        }

        ContentType type = content.getContentType();
        if (ContentType.BINARY.equals(type)) {
            if (content.isStreamable()) {
                InputStream is = null;
                try {
                    is = content.getContentAsByteStream();
                    long size = content.getContentSize();
                    long bufSize = Math.min(size, 512 << 10);
                    byte[] buf = new byte[(int) bufSize];
                    for (long toRead = size, read = 0; toRead > 0; toRead -= read) {
                        read = is.read(buf, 0, (int) bufSize);
                        if (read > 0) {
                            os.write(buf, 0, (int) read);
                        } else {
                            LOG.error("Premature EOF: uri=" + uri + ",toRead=" + toRead);
                            break;
                        }
                    }
                } finally {
                    if (is != null) {
                        is.close();
                    }
                }
            } else {
                os.write(content.getContentAsByteArray());
            }
        } else if (ContentType.TEXT.equals(type) || ContentType.XML.equals(type)
                || ContentType.JSON.equals(type)) {
            if (encoding.equals("UTF-8")) {
                Text t = content.getContentAsText();
                os.write(t.getBytes(), 0, t.getLength());
            } else {
                String t = content.getContentAsString();
                os.write(t.getBytes(encoding));
            }
            if (LOG.isTraceEnabled()) {
                Text t = content.getContentAsText();
                LOG.trace(t);
                byte[] bytes = content.getContentAsByteArray();
                StringBuilder sb = new StringBuilder();
                for (int i = 0; i < bytes.length; i++) {
                    sb.append(Byte.toString(bytes[i]));
                    sb.append(" ");
                }
                LOG.trace(sb);
            }
        } else {
            LOG.error("Skipping " + uri + ".  Unsupported content type: " + type.name());
        }
    } catch (Exception e) {
        LOG.error("Error saving: " + uri, e);
    } finally {
        if (os != null) {
            os.close();
        }
    }
}

From source file:com.marklogic.mapreduce.examples.BinaryReader.java

License:Apache License

@Override
public void write(DocumentURI uri, BytesWritable content) throws IOException, InterruptedException {
    String pathStr = dir.getName() + uri.getUri();
    Path path = new Path(pathStr);
    FileSystem fs = path.getFileSystem(conf);
    FSDataOutputStream out = fs.create(path, false);
    System.out.println("writing to: " + path);
    out.write(content.getBytes(), 0, content.getLength());
    out.flush();//w  w  w .  j  ava2  s  . co m
    out.close();
}

From source file:com.metamx.druid.indexer.Utils.java

License:Open Source License

public static OutputStream makePathAndOutputStream(JobContext job, Path outputPath, boolean deleteExisting)
        throws IOException {
    OutputStream retVal;//from   ww w . j a  v a 2s . c o  m
    FileSystem fs = outputPath.getFileSystem(job.getConfiguration());

    if (fs.exists(outputPath)) {
        if (deleteExisting) {
            fs.delete(outputPath, false);
        } else {
            throw new ISE("outputPath[%s] must not exist.", outputPath);
        }
    }

    if (!FileOutputFormat.getCompressOutput(job)) {
        retVal = fs.create(outputPath, false);
    } else {
        Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job,
                GzipCodec.class);
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
        outputPath = new Path(outputPath.toString() + codec.getDefaultExtension());

        retVal = codec.createOutputStream(fs.create(outputPath, false));
    }

    return retVal;
}

From source file:com.ml.ira.algos.LogisticModelParameters.java

License:Apache License

/**
 * Saves a model to an output stream.//from  ww w. j  a  v  a2  s  .c om
 */
public void saveTo(Path path) throws IOException {
    Closeables.close(lr, false);
    targetCategories = getCsvRecordFactory().getTargetCategories();
    FileSystem ofs = path.getFileSystem(new Configuration());
    FSDataOutputStream out = ofs.create(path, true);
    write(out);
    out.flush();
    ofs.close();
}

From source file:com.ML_Hadoop.K_meansClustering.K_meansClusteringReduce.java

@Override
protected void cleanup(Context context) throws IOException {
    String uri = "/user/hduser/k_mean.txt";
    Path path = new Path(uri);

    // Write the latest values of cetroids' of clusters in 'k_mean.txt' file
    try {//from ww w .j ava 2s . c  o  m
        FileSystem fs = FileSystem.get(URI.create(uri), context.getConfiguration());
        if (fs.exists(path))
            fs.delete(path, true);
        BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));
        for (int i = 0; i < number_of_clusters; i++) {
            for (int j = 0; j < feature_size; j++)
                br.write(((Float) ((FloatWritable) cetroids_of_all_clusters.get(i).get()[j]).get()).toString()
                        + ",");
            br.write("\n");
        }
        br.close();
    } catch (Exception e) {
        System.out.println("File k_mean.txt not found");
    }

    // Write the values of cetroids' of clusters for current iteration in directory '/user/hduser/K-means/...'

    uri = "/user/hduser/K-means/means-" + current_iteration_num + ".txt";
    path = new Path(uri);

    try {
        FileSystem fs = FileSystem.get(context.getConfiguration());
        if (current_iteration_num == 0)
            fs.delete(new Path("/user/hduser/K-means"), true);
        OutputStreamWriter osw = new OutputStreamWriter(fs.create(path, true));
        BufferedWriter br = new BufferedWriter(osw);
        for (int i = 0; i < number_of_clusters; i++) {
            for (int j = 0; j < feature_size; j++)
                br.write((Float) ((FloatWritable) cetroids_of_all_clusters.get(i).get()[j]).get() + ",");
            br.write("\n");
        }
        br.close();
    } catch (Exception e) {
        System.out.println("File not found");
    }
}

From source file:com.ML_Hadoop.MultipleLinearRegression.MultipleLinearRegressionReduce.java

@Override
protected void cleanup(Context context) throws IOException {
    String uri = "/user/hduser/theta.txt";
    Path path = new Path(uri);

    try {/*  ww w . jav  a 2 s.c  o m*/
        FileSystem fs = FileSystem.get(URI.create(uri), context.getConfiguration());
        if (fs.exists(path))
            fs.delete(path, true);
        BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));
        for (int i = 0; i < theta.length; i++)
            br.write(theta[i].toString() + ",");
        br.write("\n");
        br.close();
    } catch (Exception e) {
        System.out.println("File not found");
    }

    uri = "/user/hduser/LinearReg/theta-" + iteration + ".txt";
    path = new Path(uri);

    try {
        FileSystem fs = FileSystem.get(context.getConfiguration());
        if (iteration == 0)
            fs.delete(new Path("/user/hduser/LinearReg"), true);
        OutputStreamWriter osw = new OutputStreamWriter(fs.create(path, true));
        BufferedWriter br = new BufferedWriter(osw);
        br.write(prediction_error + ", ");
        for (int i = 0; i < theta.length; i++)
            br.write(theta[i].toString() + ", ");
        br.write("\n");
        br.close();
    } catch (Exception e) {
        System.out.println("File not found");
    }
}

From source file:com.ML_Hadoop.NaiveBayesClassifier_Continuous_Features.NaiveBayesClassifierReduce_Continuous_Features.java

@Override
protected void cleanup(Context context) throws IOException {
    String uri = "/user/hduser/naive_bayes_continuous.txt";
    Path path = new Path(uri);

    try {/* w  ww.ja v a2  s  . c  om*/
        FileSystem fs = FileSystem.get(URI.create(uri), context.getConfiguration());
        if (fs.exists(path))
            fs.delete(path, true);
        BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));
        br.write("class_id,     mu(mean),     std");
        br.write("-------------------------------\n");
        for (int i = 0; i < number_of_classes; i++) {
            br.write("-------- Class-" + i + "-------\n");
            for (int j = 0; j < number_of_features; j++) {
                br.write(((FloatWritable) probablity_info_output.get(i)[j].get(new Text("class_id_mu")))
                        + ",  ");
                br.write(((FloatWritable) probablity_info_output.get(i)[j].get(new Text("class_id_std")))
                        + "\n");
            }
            br.write("\n");
        }
        br.close();
    } catch (Exception e) {
        System.out.println("File /user/hduser/naive_bayes_continuous.txt cannot be found");
    }

}

From source file:com.mongodb.hadoop.splitter.BSONSplitter.java

License:Apache License

public void writeSplits() throws IOException {
    if (!getConf().getBoolean("bson.split.write_splits", true)) {
        LOG.info("bson.split.write_splits is set to false - skipping writing splits to disk.");
        return;//from  w  w w  .  j a v  a2 s  .c  om
    } else {
        LOG.info("Writing splits to disk.");
    }

    if (splitsList == null) {
        LOG.info("No splits found, skipping write of splits file.");
    }

    Path outputPath = new Path(inputPath.getParent(), "." + inputPath.getName() + ".splits");

    FileSystem pathFileSystem = outputPath.getFileSystem(getConf());
    FSDataOutputStream fsDataOut = null;
    try {
        fsDataOut = pathFileSystem.create(outputPath, false);
        for (FileSplit inputSplit : splitsList) {
            BSONObject splitObj = BasicDBObjectBuilder.start().add("s", inputSplit.getStart())
                    .add("l", inputSplit.getLength()).get();
            byte[] encodedObj = bsonEnc.encode(splitObj);
            fsDataOut.write(encodedObj, 0, encodedObj.length);
        }
    } catch (IOException e) {
        LOG.error("Could not create splits file: " + e.getMessage());
        throw e;
    } finally {
        if (fsDataOut != null) {
            fsDataOut.close();
        }
    }
}

From source file:com.moz.fiji.mapreduce.util.SerializeLoggerAspect.java

License:Apache License

/**
 * Logic to serialize collected profiling content to a file on HDFS. The files are stored
 * in the current working directory for this context, in a folder specified by STATS_DIR. The per
 * task file is named by the task attempt id.
 * We obtain the profiling stats collected by the LogTimerAspect in FijiSchema. The format of the
 * file is as follows: Job Name, Job ID, Task Attempt, Function Signature,
 * Aggregate Time (nanoseconds), Number of Invocations, Time per call (nanoseconds)'\n'
 *
 * @param context The {@link TaskInputOutputContext} for this job.
 * @throws IOException If the writes to HDFS fail.
 *///from   w  ww. j  ava  2s  .c o  m
private void serializeToFile(TaskInputOutputContext context) throws IOException {
    Path parentPath = new Path(context.getWorkingDirectory(), STATS_DIR);
    FileSystem fs = parentPath.getFileSystem(context.getConfiguration());
    fs.mkdirs(parentPath);
    Path path = new Path(parentPath, context.getTaskAttemptID().toString());
    OutputStreamWriter out = new OutputStreamWriter(fs.create(path, true), "UTF-8");
    try {
        out.write("Job Name, Job ID, Task Attempt, Function Signature, Aggregate Time (nanoseconds), "
                + "Number of Invocations, Time per call (nanoseconds)\n");

        ConcurrentHashMap<String, LoggingInfo> signatureTimeMap = mLogTimerAspect.getSignatureTimeMap();
        for (Map.Entry<String, LoggingInfo> entrySet : signatureTimeMap.entrySet()) {
            writeProfileInformation(out, context, entrySet.getKey(), entrySet.getValue());
        }

        signatureTimeMap = mMRLogTimerAspect.getSignatureTimeMap();
        for (Map.Entry<String, LoggingInfo> entrySet : signatureTimeMap.entrySet()) {
            writeProfileInformation(out, context, entrySet.getKey(), entrySet.getValue());
        }
    } finally {
        out.close();
    }
}