Example usage for org.apache.hadoop.io SequenceFile createWriter

List of usage examples for org.apache.hadoop.io SequenceFile createWriter

Introduction

In this page you can find the example usage for org.apache.hadoop.io SequenceFile createWriter.

Prototype

@Deprecated
public static Writer createWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass)
        throws IOException 

Source Link

Document

Construct the preferred type of SequenceFile Writer.

Usage

From source file:ComRoughSetApproInputSampler.java

License:Apache License

/**
 * Write a partition file for the given job, using the Sampler provided.
 * Queries the sampler for a sample keyset, sorts by the output key
 * comparator, selects the keys for each rank, and writes to the destination
 * returned from {@link TotalOrderPartitioner#getPartitionFile}.
 *//*w  w  w  .  j  a  v a2  s  .co m*/
@SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator
public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler)
        throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = job.getConfiguration();
    final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
    int numPartitions = job.getNumReduceTasks();
    K[] samples = (K[]) sampler.getSample(inf, job);
    LOG.info("Using " + samples.length + " samples");
    RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();
    Arrays.sort(samples, comparator);
    Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf));
    FileSystem fs = dst.getFileSystem(conf);
    if (fs.exists(dst)) {
        fs.delete(dst, false);
    }
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(),
            NullWritable.class);
    NullWritable nullValue = NullWritable.get();
    float stepSize = samples.length / (float) numPartitions;
    int last = -1;
    for (int i = 1; i < numPartitions; ++i) {
        int k = Math.round(stepSize * i);
        while (last >= k && comparator.compare(samples[last], samples[k]) == 0) {
            ++k;
        }
        writer.append(samples[k], nullValue);
        last = k;
    }
    writer.close();
}

From source file:ac.keio.sslab.nlp.lda.RowIdJob.java

License:Apache License

@SuppressWarnings("deprecation")
@Override//w  w w  .  ja va 2  s.  c om
public int run(String[] args) throws Exception {

    addInputOption();
    addOutputOption();

    Map<String, List<String>> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    Path outputPath = getOutputPath();
    Path indexPath = new Path(outputPath, "docIndex");
    Path matrixPath = new Path(outputPath, "matrix");

    try (SequenceFile.Writer indexWriter = SequenceFile.createWriter(fs, conf, indexPath, IntWritable.class,
            Text.class);
            SequenceFile.Writer matrixWriter = SequenceFile.createWriter(fs, conf, matrixPath,
                    IntWritable.class, VectorWritable.class)) {
        IntWritable docId = new IntWritable();
        int i = 0;
        int numCols = 0;
        for (Pair<Text, VectorWritable> record : new SequenceFileDirIterable<Text, VectorWritable>(
                getInputPath(), PathType.LIST, PathFilters.logsCRCFilter(), null, true, conf)) {
            VectorWritable value = record.getSecond();
            docId.set(i);
            indexWriter.append(docId, record.getFirst());
            matrixWriter.append(docId, value);
            i++;
            numCols = value.get().size();
        }

        log.info("Wrote out matrix with {} rows and {} columns to {}", i, numCols, matrixPath);
        return 0;
    }
}

From source file:bigsatgps.BigDataHandler.java

License:Open Source License

/**
 *
 * @param infile/*from ww  w  .  j a v  a 2  s. c  o m*/
 * @return
 * @throws Exception
 */
public String ImageToSequence(String infile) throws Exception {
    String log4jConfPath = "lib/log4j.properties";
    PropertyConfigurator.configure(log4jConfPath);
    confHadoop = new Configuration();
    confHadoop.addResource(new Path("/hadoop/projects/hadoop-1.0.4/conf/core-site.xml"));
    confHadoop.addResource(new Path("/hadoop/projects/hadoop-1.0.4/conf/hdfs-site.xml"));
    FileSystem fs = FileSystem.get(confHadoop);
    Path inPath = new Path(infile);
    String outfile = infile.substring(0, infile.indexOf(".")) + ".seq";
    Path outPath = new Path(outfile);
    System.out.println();
    System.out.println("Successfully created the sequencefile " + outfile);
    FSDataInputStream in = null;
    Text key = new Text();
    BytesWritable value = new BytesWritable();
    SequenceFile.Writer writer = null;
    try {
        in = fs.open(inPath);
        byte buffer[] = new byte[in.available()];
        in.read(buffer);
        writer = SequenceFile.createWriter(fs, confHadoop, outPath, key.getClass(), value.getClass());
        writer.append(new Text(inPath.getName()), new BytesWritable(buffer));
        IOUtils.closeStream(writer);
        return outfile;
    } catch (IOException e) {
        System.err.println("Exception MESSAGES = " + e.getMessage());
        IOUtils.closeStream(writer);
        return null;
    }
}

From source file:boa.datagen.SeqProjectCombiner.java

License:Apache License

public static void main(String[] args) throws IOException {
    Configuration conf = new Configuration();
    conf.set("fs.default.name", "hdfs://boa-njt/");
    FileSystem fileSystem = FileSystem.get(conf);
    String base = conf.get("fs.default.name", "");

    HashMap<String, String> sources = new HashMap<String, String>();
    HashSet<String> marks = new HashSet<String>();
    FileStatus[] files = fileSystem.listStatus(new Path(base + "tmprepcache/2015-07"));
    for (int i = 0; i < files.length; i++) {
        FileStatus file = files[i];//from   w  w w .ja v a  2s .c om
        String name = file.getPath().getName();
        if (name.startsWith("projects-") && name.endsWith(".seq")) {
            System.out.println("Reading file " + i + " in " + files.length + ": " + name);
            SequenceFile.Reader r = new SequenceFile.Reader(fileSystem, file.getPath(), conf);
            final Text key = new Text();
            final BytesWritable value = new BytesWritable();
            try {
                while (r.next(key, value)) {
                    String s = key.toString();
                    if (marks.contains(s))
                        continue;
                    Project p = Project
                            .parseFrom(CodedInputStream.newInstance(value.getBytes(), 0, value.getLength()));
                    if (p.getCodeRepositoriesCount() > 0 && p.getCodeRepositories(0).getRevisionsCount() > 0)
                        marks.add(s);
                    sources.put(s, name);
                }
            } catch (Exception e) {
                System.err.println(name);
                e.printStackTrace();
            }
            r.close();
        }
    }
    SequenceFile.Writer w = SequenceFile.createWriter(fileSystem, conf,
            new Path(base + "repcache/2015-07/projects.seq"), Text.class, BytesWritable.class);
    for (int i = 0; i < files.length; i++) {
        FileStatus file = files[i];
        String name = file.getPath().getName();
        if (name.startsWith("projects-") && name.endsWith(".seq")) {
            System.out.println("Reading file " + i + " in " + files.length + ": " + name);
            SequenceFile.Reader r = new SequenceFile.Reader(fileSystem, file.getPath(), conf);
            final Text key = new Text();
            final BytesWritable value = new BytesWritable();
            try {
                while (r.next(key, value)) {
                    String s = key.toString();
                    if (sources.get(s).equals(name))
                        w.append(key, value);
                }
            } catch (Exception e) {
                System.err.println(name);
                e.printStackTrace();
            }
            r.close();
        }
    }
    w.close();

    fileSystem.close();
}

From source file:boa.datagen.SeqSortMerge.java

License:Apache License

public static void main(String[] args) throws IOException {
    conf.set("fs.default.name", base);
    FileSystem fs = FileSystem.get(conf);

    String inPath = "/tmprepcache/2015-07-sorted/";
    while (true) {
        FileStatus[] files = fs.listStatus(new Path(inPath));
        if (files.length < 2)
            break;
        Path path = new Path(inPath + System.currentTimeMillis());
        fs.mkdirs(path);/*from   w  ww.java2 s .c o m*/
        SequenceFile.Writer w = SequenceFile.createWriter(fs, conf,
                new Path(inPath + path.getName() + "/part-00000"), Text.class, BytesWritable.class);
        FileStatus[] candidates = getCandidates(files);
        System.out.println("Merging " + candidates.length + " from " + files.length);
        SequenceFile.Reader[] readers = new SequenceFile.Reader[candidates.length];
        for (int i = 0; i < candidates.length; i++)
            readers[i] = new SequenceFile.Reader(fs,
                    new Path(inPath + candidates[i].getPath().getName() + "/part-00000"), conf);
        Text[] keys = new Text[candidates.length];
        BytesWritable[] values = new BytesWritable[candidates.length];
        read(readers, keys, values);
        while (true) {
            int index = min(keys);
            if (keys[index].toString().isEmpty())
                break;
            w.append(keys[index], values[index]);
            read(readers[index], keys[index], values[index]);
        }
        for (int i = 0; i < readers.length; i++)
            readers[i].close();
        w.close();
        for (int i = 0; i < readers.length; i++)
            fs.delete(new Path(inPath + candidates[i].getPath().getName()), true);
    }
}

From source file:cn.com.warlock.SequenceFilesTest.java

License:Apache License

public static void main(String[] args) throws IOException {
    String hdfsUri = "hdfs://hlg-2p238-fandongsheng:8020";
    String pathStr = "/tmp/example/seq1";
    String compressType = "1";

    // ??windows?
    // System.setProperty("hadoop.home.dir", "E:\\tools");

    Configuration conf = new Configuration();
    conf.set("fs.defaultFS", hdfsUri);
    Path path = new Path(pathStr);

    IntWritable key = new IntWritable();
    Text value = new Text();
    SequenceFile.Writer writer = null;
    try {/*w w w.  jav  a 2  s. c  o  m*/
        SequenceFile.Writer.Option pathOpt = SequenceFile.Writer.file(path);
        SequenceFile.Writer.Option keyClassOpt = SequenceFile.Writer.keyClass(key.getClass());
        SequenceFile.Writer.Option valueClassOpt = SequenceFile.Writer.valueClass(value.getClass());
        SequenceFile.Writer.Option compressionOpt = null;

        // compress type
        if (compressType.equals("1")) {
            System.out.println("compress none");
            compressionOpt = SequenceFile.Writer.compression(CompressionType.NONE);
        } else if (compressType.equals("2")) {
            System.out.println("compress record");
            compressionOpt = SequenceFile.Writer.compression(CompressionType.RECORD);
        } else if (compressType.equals("3")) {
            System.out.println("compress block");
            compressionOpt = SequenceFile.Writer.compression(CompressionType.BLOCK);
        } else {
            System.out.println("Default : compress none");
            compressionOpt = SequenceFile.Writer.compression(CompressionType.NONE);
        }

        writer = SequenceFile.createWriter(conf, pathOpt, keyClassOpt, valueClassOpt, compressionOpt);

        for (int i = 0; i < 100; i++) {
            key.set(100 - i);
            value.set(DATA[i % DATA.length]);
            System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value);
            writer.append(key, value);

        }
    } finally {
        IOUtils.closeStream(writer);
    }
}

From source file:co.nubetech.hiho.common.HihoTestCase.java

License:Apache License

public void createSequenceFileInHdfs(HashMap inputData, String filePath, String nameOfFile) throws Exception {
    Configuration conf = new Configuration();
    FileSystem fs = getFileSystem();
    Path inputFile = new Path(filePath + "/" + nameOfFile);
    SequenceFile.Writer writer = null;
    SequenceFile.Reader reader = null;
    try {/*from w ww  .j ava  2 s  . co  m*/
        Set key = inputData.keySet();
        Object keyValue = key.iterator().next();
        writer = SequenceFile.createWriter(fs, conf, inputFile, keyValue.getClass(),
                inputData.get(keyValue).getClass());
        logger.debug("key class is: " + keyValue.getClass());
        logger.debug("val class is: " + inputData.get(keyValue).getClass());
        Iterator valIterator = inputData.values().iterator();
        Iterator keyIterator = inputData.keySet().iterator();
        while (keyIterator.hasNext()) {
            writer.append(keyIterator.next(), valIterator.next());
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        IOUtils.closeStream(writer);
    }
}

From source file:co.nubetech.hiho.testdata.SequenceFileForCustomObject.java

License:Apache License

public static void main(String[] args) throws IOException {
    String uri = "inputnew.seq";
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(URI.create(uri), conf);
    Path path = new Path(uri);
    // IntWritable key = new IntWritable();
    // Student value[] = new Student[10];
    SequenceFile.Writer writer = null;
    Student student = new Student();
    try {//from   w w w  . ja v  a2  s  .  co  m
        writer = SequenceFile.createWriter(fs, conf, path, IntWritable.class, Student.class);
        for (int i = 0; i < 10; i++) {
            student.setId(id[i]);
            student.setName(name[i]);
            student.setAddress(address[i]);
            student.setMobileNumber(mobileNo[i]);
            student.setPercentage(percentage[i]);
            // value[i]=student;

            // System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key,
            // value);
            writer.append(student.getId(), student);
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        IOUtils.closeStream(writer);
    }

}

From source file:co.nubetech.hiho.testdata.SequenceFileWriteDemo.java

License:Apache License

public static void main(String[] args) throws IOException {
    String uri = "input2.seq";
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(URI.create(uri), conf);
    Path path = new Path(uri);
    IntWritable key = new IntWritable();
    Text value = new Text();
    SequenceFile.Writer writer = null;
    try {//from  w w  w  .  j  a  v a2  s .c o m
        writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());
        for (int i = 0; i < 2; i++) {
            key.set(2 - i);
            value.set(DATA[i % DATA.length]);
            System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value);
            writer.append(key, value);
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        IOUtils.closeStream(writer);
    }
}

From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java

License:Apache License

/**
 * Write out a SequenceFile that can be read by TotalOrderPartitioner
 * that contains the split points in startKeys.
 * @param partitionsPath output path for SequenceFile
 * @param startKeys the region start keys
 *///  www . j  a  v  a 2  s .co m
private static void writePartitions(Configuration conf, Path partitionsPath,
        List<ImmutableBytesWritable> startKeys) throws IOException {
    if (startKeys.isEmpty()) {
        throw new IllegalArgumentException("No regions passed");
    }

    // We're generating a list of split points, and we don't ever
    // have keys < the first region (which has an empty start key)
    // so we need to remove it. Otherwise we would end up with an
    // empty reducer with index 0
    TreeSet<ImmutableBytesWritable> sorted = new TreeSet<ImmutableBytesWritable>(startKeys);

    ImmutableBytesWritable first = sorted.first();
    if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) {
        throw new IllegalArgumentException("First region of table should have empty start key. Instead has: "
                + Bytes.toStringBinary(first.get()));
    }
    sorted.remove(first);

    // Write the actual file
    FileSystem fs = partitionsPath.getFileSystem(conf);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, partitionsPath,
            ImmutableBytesWritable.class, NullWritable.class);

    try {
        for (ImmutableBytesWritable startKey : sorted) {
            writer.append(startKey, NullWritable.get());
        }
    } finally {
        writer.close();
    }
}