Example usage for org.apache.hadoop.fs Path suffix

List of usage examples for org.apache.hadoop.fs Path suffix

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path suffix.

Prototype

public Path suffix(String suffix) 

Source Link

Document

Adds a suffix to the final name in the path.

Usage

From source file:org.apache.mahout.text.TestSequenceFilesFromDirectory.java

License:Apache License

private static void checkMRResultFilesRecursive(Configuration configuration, Path outputDir, String[][] data,
        String prefix) throws IOException {
    FileSystem fs = FileSystem.get(configuration);

    // output exists?
    FileStatus[] fileStatuses = fs.listStatus(outputDir.suffix("/part-m-00000"), PathFilters.logsCRCFilter());
    assertEquals(1, fileStatuses.length); // only one
    assertEquals("part-m-00000", fileStatuses[0].getPath().getName());
    Map<String, String> fileToData = Maps.newHashMap();
    String currentPath = prefix;/*from   ww w  .j a  va  2s .  c  om*/

    for (String[] aData : data) {
        currentPath += Path.SEPARATOR + aData[0];
        fileToData.put(currentPath + Path.SEPARATOR + "file.txt", aData[1]);
    }

    // read a chunk to check content
    SequenceFileIterator<Text, Text> iterator = new SequenceFileIterator<Text, Text>(fileStatuses[0].getPath(),
            true, configuration);
    try {
        while (iterator.hasNext()) {
            Pair<Text, Text> record = iterator.next();
            System.out.printf("MR-Recur > Trying to check: %s\n", record.getFirst().toString().trim());
            String retrievedData = fileToData.get(record.getFirst().toString().trim());
            assertNotNull(retrievedData);
            assertEquals(retrievedData, record.getSecond().toString().trim());
        }
    } finally {
        Closeables.close(iterator, true);
    }
}

From source file:org.apache.mrql.Evaluator.java

License:Apache License

/** dump MRQL data into a sequence file */
public void dump(String file, Tree type, MRData data) throws Exception {
    Path path = new Path(file);
    FileSystem fs = path.getFileSystem(Plan.conf);
    PrintStream ftp = new PrintStream(fs.create(path.suffix(".type")));
    ftp.print("2@" + type.toString() + "\n");
    ftp.close();//w  w  w .  j av  a2  s  .com
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, Plan.conf, path, MRContainer.class,
            MRContainer.class);
    if (data instanceof MR_dataset)
        data = Plan.collect(((MR_dataset) data).dataset());
    if (data instanceof Bag) {
        Bag s = (Bag) data;
        long i = 0;
        for (MRData e : s) {
            counter_key.set(i++);
            value_container.set(e);
            writer.append(counter_container, value_container);
        }
    } else {
        counter_key.set(0);
        value_container.set(data);
        writer.append(counter_container, value_container);
    }
    ;
    writer.close();
}

From source file:org.apache.mrql.Evaluator.java

License:Apache License

/** for dumped data to a file, return the MRQL type of the data */
public Tree get_type(String file) {
    try {/*w w  w  .j a v a 2 s. co  m*/
        Path path = new Path(file);
        FileSystem fs = path.getFileSystem(Plan.conf);
        BufferedReader ftp = new BufferedReader(new InputStreamReader(fs.open(path.suffix(".type"))));
        String s[] = ftp.readLine().split("@");
        ftp.close();
        if (s.length != 2)
            return null;
        if (!s[0].equals("2"))
            throw new Error("The binary file has been created in java mode and cannot be read in hadoop mode");
        return Tree.parse(s[1]);
    } catch (Exception e) {
        return null;
    }
}

From source file:org.apache.tajo.storage.TestCompressionStorages.java

License:Apache License

private void storageCompressionTest(StoreType storeType, Class<? extends CompressionCodec> codec)
        throws IOException {
    Schema schema = new Schema();
    schema.addColumn("id", Type.INT4);
    schema.addColumn("age", Type.FLOAT4);
    schema.addColumn("name", Type.TEXT);

    TableMeta meta = CatalogUtil.newTableMeta(storeType);
    meta.putOption("compression.codec", codec.getCanonicalName());
    meta.putOption("compression.type", SequenceFile.CompressionType.BLOCK.name());
    meta.putOption("rcfile.serde", TextSerializerDeserializer.class.getName());
    meta.putOption("sequencefile.serde", TextSerializerDeserializer.class.getName());

    String fileName = "Compression_" + codec.getSimpleName();
    Path tablePath = new Path(testDir, fileName);
    Appender appender = ((FileStorageManager) StorageManager.getFileStorageManager(conf)).getAppender(meta,
            schema, tablePath);//  w  w  w .j a  v  a 2s  . c om
    appender.enableStats();

    appender.init();

    String extension = "";
    if (appender instanceof CSVFile.CSVAppender) {
        extension = ((CSVFile.CSVAppender) appender).getExtension();
    } else if (appender instanceof DelimitedTextFile.DelimitedTextFileAppender) {
        extension = ((DelimitedTextFile.DelimitedTextFileAppender) appender).getExtension();
    }

    int tupleNum = 100000;
    VTuple vTuple;

    for (int i = 0; i < tupleNum; i++) {
        vTuple = new VTuple(3);
        vTuple.put(0, DatumFactory.createInt4(i + 1));
        vTuple.put(1, DatumFactory.createFloat4((float) i));
        vTuple.put(2, DatumFactory.createText(String.valueOf(i)));
        appender.addTuple(vTuple);
    }
    appender.close();

    TableStats stat = appender.getStats();
    assertEquals(tupleNum, stat.getNumRows().longValue());
    tablePath = tablePath.suffix(extension);
    FileStatus status = fs.getFileStatus(tablePath);
    long fileLen = status.getLen();
    FileFragment[] tablets = new FileFragment[1];
    tablets[0] = new FileFragment(fileName, tablePath, 0, fileLen);

    Scanner scanner = StorageManager.getFileStorageManager(conf).getScanner(meta, schema, tablets[0], schema);

    if (StoreType.CSV == storeType) {
        if (SplittableCompressionCodec.class.isAssignableFrom(codec)) {
            assertTrue(scanner.isSplittable());
        } else {
            assertFalse(scanner.isSplittable());
        }
    }
    scanner.init();

    if (storeType == StoreType.SEQUENCEFILE) {
        assertTrue(scanner instanceof SequenceFileScanner);
        Writable key = ((SequenceFileScanner) scanner).getKey();
        assertEquals(key.getClass().getCanonicalName(), LongWritable.class.getCanonicalName());
    }

    int tupleCnt = 0;
    Tuple tuple;
    while ((tuple = scanner.next()) != null) {
        tupleCnt++;
    }
    scanner.close();
    assertEquals(tupleNum, tupleCnt);
    assertNotSame(appender.getStats().getNumBytes().longValue(),
            scanner.getInputStats().getNumBytes().longValue());
    assertEquals(appender.getStats().getNumRows().longValue(),
            scanner.getInputStats().getNumRows().longValue());
}

From source file:org.apache.tajo.storage.TestLineReader.java

License:Apache License

@Test
public void testLineDelimitedReader() throws IOException {
    TajoConf conf = new TajoConf();
    Path testDir = CommonTestingUtil.getTestDir(TEST_PATH);
    FileSystem fs = testDir.getFileSystem(conf);

    Schema schema = new Schema();
    schema.addColumn("id", Type.INT4);
    schema.addColumn("age", Type.INT8);
    schema.addColumn("comment", Type.TEXT);
    schema.addColumn("comment2", Type.TEXT);

    TableMeta meta = CatalogUtil.newTableMeta(StoreType.TEXTFILE);
    meta.putOption("compression.codec", DeflateCodec.class.getCanonicalName());

    Path tablePath = new Path(testDir, "line1." + DeflateCodec.class.getSimpleName());
    FileAppender appender = (FileAppender) StorageManager.getFileStorageManager(conf).getAppender(null, null,
            meta, schema, tablePath);// ww  w  .j a va  2s.c  o m
    appender.enableStats();
    appender.init();
    int tupleNum = 10000;
    VTuple vTuple;

    long splitOffset = 0;
    for (int i = 0; i < tupleNum; i++) {
        vTuple = new VTuple(4);
        vTuple.put(0, DatumFactory.createInt4(i + 1));
        vTuple.put(1, DatumFactory.createInt8(25l));
        vTuple.put(2, DatumFactory.createText("emiya muljomdao"));
        vTuple.put(3, NullDatum.get());
        appender.addTuple(vTuple);

        if (i == (tupleNum / 2)) {
            splitOffset = appender.getOffset();
        }
    }
    String extension = ((DelimitedTextFile.DelimitedTextFileAppender) appender).getExtension();
    appender.close();

    tablePath = tablePath.suffix(extension);
    FileFragment fragment = new FileFragment("table", tablePath, 0, splitOffset);
    DelimitedLineReader reader = new DelimitedLineReader(conf, fragment); // if file is compressed, will read to EOF
    assertTrue(reader.isCompressed());
    assertFalse(reader.isReadable());
    reader.init();
    assertTrue(reader.isReadable());

    int i = 0;
    while (reader.isReadable()) {
        ByteBuf buf = reader.readLine();
        if (buf == null)
            break;
        i++;
    }

    IOUtils.cleanup(null, reader, fs);
    assertEquals(tupleNum, i);

}

From source file:org.apache.tajo.storage.v2.TestCSVCompression.java

License:Apache License

public void testSplitCompressionData() throws IOException {

    Schema schema = new Schema();
    schema.addColumn("id", TajoDataTypes.Type.INT4);
    schema.addColumn("age", TajoDataTypes.Type.INT8);

    TableMeta meta = CatalogUtil.newTableMeta(CatalogProtos.StoreType.CSV);
    meta.putOption("compression.codec", BZip2Codec.class.getCanonicalName());

    Path tablePath = new Path(testDir, "SplitCompression");
    Appender appender = StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath);
    appender.enableStats();/* w  w w  .  java 2 s . com*/

    appender.init();

    String extention = "";
    if (appender instanceof CSVFile.CSVAppender) {
        extention = ((CSVFile.CSVAppender) appender).getExtension();
    }

    int tupleNum = 100000;
    VTuple vTuple;

    for (int i = 0; i < tupleNum; i++) {
        vTuple = new VTuple(2);
        vTuple.put(0, DatumFactory.createInt4(i + 1));
        vTuple.put(1, DatumFactory.createInt8(25l));
        appender.addTuple(vTuple);
    }
    appender.close();

    TableStats stat = appender.getStats();
    assertEquals(tupleNum, stat.getNumRows().longValue());
    tablePath = tablePath.suffix(extention);

    FileStatus status = fs.getFileStatus(tablePath);
    long fileLen = status.getLen();
    long randomNum = (long) (Math.random() * fileLen) + 1;

    FileFragment[] tablets = new FileFragment[2];
    tablets[0] = new FileFragment("SplitCompression", tablePath, 0, randomNum);
    tablets[1] = new FileFragment("SplitCompression", tablePath, randomNum, (fileLen - randomNum));

    Scanner scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, tablets[0],
            schema);
    scanner.init();
    int tupleCnt = 0;
    Tuple tuple;
    while ((tuple = scanner.next()) != null) {
        tupleCnt++;
    }
    scanner.close();

    scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, tablets[1], schema);
    scanner.init();
    while ((tuple = scanner.next()) != null) {
        tupleCnt++;
    }

    scanner.close();
    assertEquals(tupleNum, tupleCnt);
}

From source file:org.apache.tajo.storage.v2.TestCSVCompression.java

License:Apache License

private void storageCompressionTest(CatalogProtos.StoreType storeType, Class<? extends CompressionCodec> codec)
        throws IOException {
    Schema schema = new Schema();
    schema.addColumn("id", TajoDataTypes.Type.INT4);
    schema.addColumn("age", TajoDataTypes.Type.INT8);

    TableMeta meta = CatalogUtil.newTableMeta(storeType);
    meta.putOption("compression.codec", codec.getCanonicalName());

    String fileName = "Compression_" + codec.getSimpleName();
    Path tablePath = new Path(testDir, fileName);
    Appender appender = StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath);
    appender.enableStats();//from w ww  . j ava 2 s.c o  m

    appender.init();

    String extension = "";
    if (appender instanceof CSVFile.CSVAppender) {
        extension = ((CSVFile.CSVAppender) appender).getExtension();
    }

    int tupleNum = 10000;
    VTuple vTuple;

    for (int i = 0; i < tupleNum; i++) {
        vTuple = new VTuple(2);
        vTuple.put(0, DatumFactory.createInt4(i + 1));
        vTuple.put(1, DatumFactory.createInt8(25l));
        appender.addTuple(vTuple);
    }
    appender.close();

    TableStats stat = appender.getStats();
    assertEquals(tupleNum, stat.getNumRows().longValue());
    tablePath = tablePath.suffix(extension);
    FileStatus status = fs.getFileStatus(tablePath);
    long fileLen = status.getLen();
    FileFragment[] tablets = new FileFragment[1];
    tablets[0] = new FileFragment(fileName, tablePath, 0, fileLen);

    Scanner scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, tablets[0],
            schema);
    scanner.init();
    int tupleCnt = 0;
    while (scanner.next() != null) {
        tupleCnt++;
    }
    scanner.close();
    assertEquals(tupleCnt, tupleNum);
}

From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MapOutput.java

License:Apache License

public static MapOutput createDiskMapOutput(InputAttemptIdentifier attemptIdentifier, MergeManager merger,
        long size, Configuration conf, int fetcher, boolean primaryMapOutput, TezTaskOutputFiles mapOutputFile)
        throws IOException {
    FileSystem fs = FileSystem.getLocal(conf);
    Path outputpath = mapOutputFile.getInputFileForWrite(attemptIdentifier.getInputIdentifier().getInputIndex(),
            size);/* w  w w  .  j a  va 2 s . com*/
    // Files are not clobbered due to the id being appended to the outputPath in the tmpPath,
    // otherwise fetches for the same task but from different attempts would clobber each other.
    Path tmpOuputPath = outputpath.suffix(String.valueOf(fetcher));
    long offset = 0;

    MapOutput mapOutput = new MapOutput(Type.DISK, attemptIdentifier, merger, size, outputpath, offset,
            primaryMapOutput, fs, tmpOuputPath);
    mapOutput.disk = mapOutput.localFS.create(tmpOuputPath);

    return mapOutput;
}

From source file:org.seqdoop.hadoop_bam.cli.plugins.View.java

License:Open Source License

@Override
protected int run(CmdLineParser parser) {

    final List<String> args = parser.getRemainingArgs();
    if (args.isEmpty()) {
        System.err.println("view :: PATH not given.");
        return 3;
    }//from ww  w  . j a  va 2 s.c  om

    Utils.toStringency(parser.getOptionValue(stringencyOpt, ValidationStringency.DEFAULT_STRINGENCY.toString()),
            "view");

    final String path = args.get(0);
    final List<String> regions = args.subList(1, args.size());

    final boolean headerOnly = parser.getBoolean(headerOnlyOpt);

    final SAMFileReader reader;

    try {
        final Path p = new Path(path);

        SeekableStream idx;
        try {
            idx = WrapSeekable.openPath(getConf(), p.suffix(".bai"));
        } catch (Exception e) {
            idx = null;
        }

        final SeekableStream sam = WrapSeekable.openPath(getConf(), p);

        reader = idx == null ? new SAMFileReader(sam, false) : new SAMFileReader(sam, idx, false);
    } catch (Exception e) {
        System.err.printf("view :: Could not open '%s': %s\n", path, e.getMessage());
        return 4;
    }

    reader.setValidationStringency(ValidationStringency.SILENT);

    final SAMFileHeader header;

    try {
        header = reader.getFileHeader();
    } catch (SAMFormatException e) {
        System.err.printf("view :: Could not parse '%s': %s\n", path, e.getMessage());
        return 4;
    }

    final String fmt = (String) parser.getOptionValue(formatOpt);

    final SAMFormat format = fmt == null ? SAMFormat.SAM : SAMFormat.valueOf(fmt.toUpperCase(Locale.ENGLISH));

    final SAMFileWriterImpl writer;
    switch (format) {
    case BAM:
        // BAM output inside view no longer supported since Picard made the class private
        System.err.println("BAM output inside view no longer supported");
        return 1;
    case SAM:
        writer = new SAMTextWriter(System.out);
        break;
    default:
        writer = null;
        assert false;
    }

    writer.setSortOrder(header.getSortOrder(), true);
    writer.setHeader(header);

    if (regions.isEmpty() || headerOnly) {
        if (!headerOnly)
            if (!writeIterator(writer, reader.iterator(), path))
                return 4;

        writer.close();
        return 0;
    }

    if (!reader.isBinary()) {
        System.err.println("view :: Cannot output regions from SAM file");
        return 4;
    }

    if (!reader.hasIndex()) {
        System.err.println("view :: Cannot output regions from BAM file lacking an index");
        return 4;
    }

    reader.enableIndexCaching(true);

    boolean errors = false;

    for (final String region : regions) {
        final StringTokenizer st = new StringTokenizer(region, ":-");
        final String refStr = st.nextToken();
        final int beg, end;

        if (st.hasMoreTokens()) {
            beg = parseCoordinate(st.nextToken());
            end = st.hasMoreTokens() ? parseCoordinate(st.nextToken()) : -1;

            if (beg < 0 || end < 0) {
                errors = true;
                continue;
            }
            if (end < beg) {
                System.err.printf("view :: Invalid range, cannot end before start: '%d-%d'\n", beg, end);
                errors = true;
                continue;
            }
        } else
            beg = end = 0;

        SAMSequenceRecord ref = header.getSequence(refStr);
        if (ref == null)
            try {
                ref = header.getSequence(Integer.parseInt(refStr));
            } catch (NumberFormatException e) {
            }

        if (ref == null) {
            System.err.printf("view :: Not a valid sequence name or index: '%s'\n", refStr);
            errors = true;
            continue;
        }

        final SAMRecordIterator it = reader.queryOverlapping(ref.getSequenceName(), beg, end);

        if (!writeIterator(writer, it, path))
            return 4;
    }
    writer.close();
    return errors ? 5 : 0;
}

From source file:org.springframework.data.hadoop.store.strategy.naming.CodecFileNamingStrategy.java

License:Apache License

@Override
public Path resolve(Path path) {
    CodecInfo c = getCodecInfo();/*from www . j av a2  s  . co  m*/
    String suffix = c != null ? "." + c.getDefaultSuffix() : "";
    if (path != null) {
        return path.suffix(suffix);
    } else if (StringUtils.hasText(suffix)) {
        return new Path(suffix);
    } else {
        return path;
    }
}