Example usage for org.apache.hadoop.fs FileSystem exists

List of usage examples for org.apache.hadoop.fs FileSystem exists

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem exists.

Prototype

public boolean exists(Path f) throws IOException 

Source Link

Document

Check if a path exists.

Usage

From source file:com.inmobi.databus.purge.DataPurgerServiceTest.java

License:Apache License

private void createTestPurgefiles(FileSystem fs, Cluster cluster, Calendar date) throws Exception {
    for (String streamname : cluster.getSourceStreams()) {
        String[] files = new String[NUM_OF_FILES];
        String datapath = Cluster.getDateAsYYYYMMDDHHMNPath(date.getTime());
        String commitpath = cluster.getLocalFinalDestDirRoot() + File.separator + streamname + File.separator
                + datapath;/*from  ww w  . j a  v a 2s. com*/
        String mergecommitpath = cluster.getFinalDestDirRoot() + File.separator + streamname + File.separator
                + datapath;
        String trashpath = cluster.getTrashPath() + File.separator + CalendarHelper.getDateAsString(date)
                + File.separator;
        fs.mkdirs(new Path(commitpath));

        for (int j = 0; j < NUM_OF_FILES; ++j) {
            files[j] = new String(cluster.getName() + "-"
                    + TestLocalStreamService.getDateAsYYYYMMDDHHmm(new Date()) + "_" + idFormat.format(j));
            {
                Path path = new Path(commitpath + File.separator + files[j]);
                // LOG.info("Creating streams_local File " + path.getName());
                FSDataOutputStream streamout = fs.create(path);
                streamout.writeBytes("Creating Test data for teststream " + path.toString());
                streamout.close();
                Assert.assertTrue(fs.exists(path));
            }
            {
                Path path = new Path(mergecommitpath + File.separator + files[j]);
                // LOG.info("Creating streams File " + path.getName());
                FSDataOutputStream streamout = fs.create(path);
                streamout.writeBytes("Creating Test data for teststream " + path.toString());
                streamout.close();
                Assert.assertTrue(fs.exists(path));
            }

            {
                Path path = new Path(trashpath + File.separator + String.valueOf(date.get(Calendar.HOUR_OF_DAY))
                        + File.separator + files[j]);
                // LOG.info("Creating trash File " + path.toString());
                FSDataOutputStream streamout = fs.create(path);
                streamout.writeBytes("Creating Test trash data for teststream " + path.getName());
                streamout.close();
                Assert.assertTrue(fs.exists(path));
            }
        }
    }

}

From source file:com.inmobi.grid.fs.s4fs.NativeS4FileSystem.java

License:Apache License

/**
 * fs.default.name in conf is the HDFS store which has credential file for
 * s3n in /user/<name> with bucket name.crd
 * the .crd file contains access:secret in a single line.
 *//*from   w w  w .j a  va 2s  .c  o  m*/
@Override
public void initialize(URI uri, Configuration conf) throws IOException {

    this.uri = uri;

    if (new Path(conf.get("fs.default.name")).toUri().getScheme().equals("s4")) {
        // currently illegal to set fs.default.name to s4;
        // without this, below code causes recursive call.
        return;
    }

    FileSystem fs = FileSystem.get(conf);
    Path nnWorkingDir = fs.getHomeDirectory();

    if (!fs.exists(nnWorkingDir)) {
        throw new IOException("Users home directory does not exist: " + fs.getWorkingDirectory());
    }

    String scheme = uri.getScheme();
    String bucket = uri.getAuthority();

    Path credFile = new Path(nnWorkingDir, bucket + ".crd");
    if (!fs.exists(credFile)) {
        throw new IOException(credFile.toString() + " does not exists");
    }

    StringBuilder sb = new StringBuilder(getCredentialFromFile(fs, credFile)).append("@").append(bucket);
    String bucketWithAccess = uri.toString().replaceFirst(scheme, "s3n");
    bucketWithAccess = bucketWithAccess.replaceFirst(bucket, sb.toString());

    super.initialize(new Path(bucketWithAccess).toUri(), conf);

}

From source file:com.kadwa.hadoop.DistExec.java

License:Open Source License

/**
 * Sanity check for srcPath/*from   w w  w .j  a v a  2 s .  c  om*/
 */
private static void checkSrcPath(JobConf jobConf, List<Path> srcPaths) throws IOException {
    List<IOException> rslt = new ArrayList<IOException>();

    Path[] ps = new Path[srcPaths.size()];
    ps = srcPaths.toArray(ps);
    TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), ps, jobConf);

    for (Path p : srcPaths) {
        FileSystem fs = p.getFileSystem(jobConf);
        if (!fs.exists(p)) {
            rslt.add(new IOException("Input source " + p + " does not exist."));
        }
    }
    if (!rslt.isEmpty()) {
        throw new InvalidInputException(rslt);
    }
}

From source file:com.kadwa.hadoop.DistExec.java

License:Open Source License

/**
 * Initialize ExecFilesMapper specific job-configuration.
 *
 * @param conf    : The dfs/mapred configuration.
 * @param jobConf : The handle to the jobConf object to be initialized.
 * @param args    Arguments//from w  w w  . j  a  v a 2 s .  c o m
 * @return true if it is necessary to launch a job.
 */
private static boolean setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException {
    jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString());
    jobConf.set(EXEC_CMD_LABEL, args.execCmd);

    //set boolean values
    jobConf.setBoolean(Options.REDIRECT_ERROR_TO_OUT.propertyname,
            args.flags.contains(Options.REDIRECT_ERROR_TO_OUT));

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobConf);
    Path stagingArea;
    try {
        stagingArea = JobSubmissionFiles.getStagingDir(jClient, conf);
    } catch (InterruptedException e) {
        throw new IOException(e);
    }

    Path jobDirectory = new Path(stagingArea + NAME + "_" + randomId);
    FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION);
    FileSystem.mkdirs(FileSystem.get(jobDirectory.toUri(), conf), jobDirectory, mapredSysPerms);
    jobConf.set(JOB_DIR_LABEL, jobDirectory.toString());

    FileSystem dstfs = args.dst.getFileSystem(conf);

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), new Path[] { args.dst }, conf);

    boolean dstExists = dstfs.exists(args.dst);
    boolean dstIsDir = false;
    if (dstExists) {
        dstIsDir = dstfs.getFileStatus(args.dst).isDir();
    }

    // default logPath
    Path logPath = args.log;
    if (logPath == null) {
        String filename = "_" + NAME + "_logs_" + randomId;
        if (!dstExists || !dstIsDir) {
            Path parent = args.dst.getParent();
            if (!dstfs.exists(parent)) {
                dstfs.mkdirs(parent);
            }
            logPath = new Path(parent, filename);
        } else {
            logPath = new Path(args.dst, filename);
        }
    }
    FileOutputFormat.setOutputPath(jobConf, logPath);

    // create src list, dst list
    FileSystem jobfs = jobDirectory.getFileSystem(jobConf);

    Path srcfilelist = new Path(jobDirectory, "_" + NAME + "_src_files");
    jobConf.set(SRC_LIST_LABEL, srcfilelist.toString());
    SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    Path dstfilelist = new Path(jobDirectory, "_" + NAME + "_dst_files");
    SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class,
            Text.class, SequenceFile.CompressionType.NONE);

    Path dstdirlist = new Path(jobDirectory, "_" + NAME + "_dst_dirs");
    jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString());
    SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    // handle the case where the destination directory doesn't exist
    // and we've only a single src directory.
    final boolean special = (args.srcs.size() == 1 && !dstExists);
    int srcCount = 0, cnsyncf = 0, dirsyn = 0;
    long fileCount = 0L, byteCount = 0L, cbsyncs = 0L;
    try {
        for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) {
            final Path src = srcItr.next();
            FileSystem srcfs = src.getFileSystem(conf);
            FileStatus srcfilestat = srcfs.getFileStatus(src);
            Path root = special && srcfilestat.isDir() ? src : src.getParent();
            if (srcfilestat.isDir()) {
                ++srcCount;
            }

            Stack<FileStatus> pathstack = new Stack<FileStatus>();
            for (pathstack.push(srcfilestat); !pathstack.empty();) {
                FileStatus cur = pathstack.pop();
                FileStatus[] children = srcfs.listStatus(cur.getPath());
                for (int i = 0; i < children.length; i++) {
                    boolean skipfile = false;
                    final FileStatus child = children[i];
                    final String dst = makeRelative(root, child.getPath());
                    ++srcCount;

                    if (child.isDir()) {
                        pathstack.push(child);
                    } else {

                        if (!skipfile) {
                            ++fileCount;
                            byteCount += child.getLen();

                            if (LOG.isTraceEnabled()) {
                                LOG.trace("adding file " + child.getPath());
                            }

                            ++cnsyncf;
                            cbsyncs += child.getLen();
                            if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) {
                                src_writer.sync();
                                dst_writer.sync();
                                cnsyncf = 0;
                                cbsyncs = 0L;
                            }
                        }
                    }

                    if (!skipfile) {
                        src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()),
                                new FilePair(child, dst));
                    }

                    dst_writer.append(new Text(dst), new Text(child.getPath().toString()));
                }

                if (cur.isDir()) {
                    String dst = makeRelative(root, cur.getPath());
                    dir_writer.append(new Text(dst), new FilePair(cur, dst));
                    if (++dirsyn > SYNC_FILE_MAX) {
                        dirsyn = 0;
                        dir_writer.sync();
                    }
                }
            }
        }
    } finally {
        checkAndClose(src_writer);
        checkAndClose(dst_writer);
        checkAndClose(dir_writer);
    }

    FileStatus dststatus = null;
    try {
        dststatus = dstfs.getFileStatus(args.dst);
    } catch (FileNotFoundException fnfe) {
        LOG.info(args.dst + " does not exist.");
    }

    // create dest path dir if copying > 1 file
    if (dststatus == null) {
        if (srcCount > 1 && !dstfs.mkdirs(args.dst)) {
            throw new IOException("Failed to create" + args.dst);
        }
    }

    final Path sorted = new Path(jobDirectory, "_" + NAME + "_sorted");
    checkDuplication(jobfs, dstfilelist, sorted, conf);

    Path tmpDir = new Path(
            (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst,
            "_" + NAME + "_tmp_" + randomId);
    jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString());
    LOG.info("sourcePathsCount=" + srcCount);
    LOG.info("filesToExecCount=" + fileCount);
    LOG.info("bytesToExecCount=" + StringUtils.humanReadableInt(byteCount));
    jobConf.setInt(SRC_COUNT_LABEL, srcCount);
    jobConf.setLong(TOTAL_SIZE_LABEL, byteCount);
    setMapCount(fileCount, jobConf);
    return fileCount > 0;
}

From source file:com.knewton.mapreduce.SSTableRecordReader.java

License:Apache License

/**
 * Moves all the minimum required tables for the table reader to work to local disk.
 *
 * @param split The table to work on./*w  w w  .j av  a2  s  .  co  m*/
 */
@VisibleForTesting
void copyTablesToLocal(FileSystem remoteFS, FileSystem localFS, Path dataTablePath, TaskAttemptContext context)
        throws IOException {
    Configuration conf = context.getConfiguration();
    String hdfsDataTablePathStr = dataTablePath.toUri().getPath();
    String localDataTablePathStr = dataTablePath.toUri().getHost() + File.separator
            + dataTablePath.toUri().getPath();
    // Make path relative due to EMR permissions
    if (localDataTablePathStr.startsWith("/")) {
        String mapTaskId = conf.get("mapreduce.task.attempt.id");
        String mapTempDir = conf.get("mapreduce.cluster.temp.dir");
        String taskWorkDir = mapTempDir + File.separator + mapTaskId;
        LOG.info("Appending {} to {}", taskWorkDir, localDataTablePathStr);
        localDataTablePathStr = taskWorkDir + localDataTablePathStr;
    }
    Path localDataTablePath = new Path(localDataTablePathStr);
    LOG.info("Copying hdfs file from {} to local disk at {}.", dataTablePath.toUri(),
            localDataTablePath.toUri());
    copyToLocalFile(remoteFS, localFS, dataTablePath, localDataTablePath);
    boolean isCompressed = conf.getBoolean(PropertyConstants.COMPRESSION_ENABLED.txt, false);
    if (isCompressed) {
        decompress(localDataTablePath, context);
    }
    components.add(Component.DATA);
    desc = Descriptor.fromFilename(localDataTablePathStr);
    Descriptor hdfsDesc = Descriptor.fromFilename(hdfsDataTablePathStr);
    String indexPathStr = hdfsDesc.filenameFor(Component.PRIMARY_INDEX);
    components.add(Component.PRIMARY_INDEX);
    Path localIdxPath = new Path(desc.filenameFor(Component.PRIMARY_INDEX));
    LOG.info("Copying hdfs file from {} to local disk at {}.", indexPathStr, localIdxPath);
    copyToLocalFile(remoteFS, localFS, new Path(indexPathStr), localIdxPath);
    if (isCompressed) {
        decompress(localIdxPath, context);
    }
    String compressionTablePathStr = hdfsDesc.filenameFor(Component.COMPRESSION_INFO.name());
    Path compressionTablePath = new Path(compressionTablePathStr);
    if (remoteFS.exists(compressionTablePath)) {
        Path localCompressionPath = new Path(desc.filenameFor(Component.COMPRESSION_INFO.name()));
        LOG.info("Copying hdfs file from {} to local disk at {}.", compressionTablePath.toUri(),
                localCompressionPath);
        copyToLocalFile(remoteFS, localFS, compressionTablePath, localCompressionPath);
        if (isCompressed) {
            decompress(localCompressionPath, context);
        }
        components.add(Component.COMPRESSION_INFO);
    }
}

From source file:com.knewton.mapreduce.SSTableRecordReader.java

License:Apache License

/**
 * Copies a remote path to the local filesystem, while updating hadoop that we're making
 * progress. Doesn't support directories.
 *//*from  w  w  w  .  j  a  v  a 2s.  com*/
@VisibleForTesting
void copyToLocalFile(FileSystem remoteFS, FileSystem localFS, Path remote, Path local) throws IOException {
    // don't support transferring from remote directories
    FileStatus remoteStat = remoteFS.getFileStatus(remote);
    Preconditions.checkArgument(!remoteStat.isDirectory(), String.format("Path %s is directory!", remote));
    // if local is a dir, copy to inside that dir, like 'cp /path/file /tmp/' would do
    if (localFS.exists(local)) {
        FileStatus localStat = localFS.getFileStatus(local);
        if (localStat.isDirectory()) {
            local = new Path(local, remote.getName());
        }
    }
    long remoteFileSize = remoteStat.getLen();
    // do actual copy
    InputStream in = null;
    OutputStream out = null;
    try {
        long startTime = System.currentTimeMillis();
        long lastLogTime = 0;
        long bytesCopied = 0;
        in = remoteFS.open(remote);
        out = localFS.create(local, true);
        int buffSize = this.conf.getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY,
                CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_DEFAULT);
        byte[] buf = new byte[buffSize];
        int bytesRead = in.read(buf);
        while (bytesRead >= 0) {
            long now = System.currentTimeMillis();
            // log transfer rate once per min, starting 1 min after transfer began
            if (now - lastLogTime > 60000L && now - startTime > 60000L) {
                double elapsedSec = (now - startTime) / 1000D;
                double bytesPerSec = bytesCopied / elapsedSec;
                LOG.info("Transferred {} of {} bytes at {} bytes per second", bytesCopied, remoteFileSize,
                        bytesPerSec);
                lastLogTime = now;
            }
            this.ctx.progress();
            out.write(buf, 0, bytesRead);
            bytesCopied += bytesRead;
            bytesRead = in.read(buf);
        }
        // try to close these outside of finally so we receive exception on failure
        out.close();
        out = null;
        in.close();
        in = null;
    } finally {
        // make sure everything's closed
        IOUtils.closeStream(out);
        IOUtils.closeStream(in);
    }
}

From source file:com.knewton.mapreduce.SSTableRecordReaderTest.java

License:Apache License

/**
 * Tests to see if tables can be correctly copied locally including the compression info table
 *//* w w  w .jav a  2 s  . c  o  m*/
@Test
public void testCopyTablesToLocalWithCompressionInfo() throws Exception {
    TaskAttemptContext context = getTaskAttemptContext(true, true, true);
    ssTableColumnRecordReader.initialize(inputSplit, context);

    doCallRealMethod().when(ssTableColumnRecordReader).copyTablesToLocal(any(FileSystem.class),
            any(FileSystem.class), any(Path.class), any(TaskAttemptContext.class));

    FileSystem remoteFS = mock(FileSystem.class);
    FileSystem localFS = mock(FileSystem.class);

    byte[] data = new byte[] { 0xA };
    FSDataInputStream fsIn = new FSDataInputStream(new MemoryDataInputStream(data));
    FSDataOutputStream fsOut = mock(FSDataOutputStream.class);

    when(remoteFS.open(any(Path.class))).thenReturn(fsIn);
    when(localFS.create(any(Path.class), anyBoolean())).thenReturn(fsOut);

    Path dataTablePath = inputSplit.getPath();
    FileStatus fileStatus = mock(FileStatus.class);
    when(fileStatus.getLen()).thenReturn(10L);
    when(fileStatus.isDirectory()).thenReturn(false);
    when(remoteFS.getFileStatus(any(Path.class))).thenReturn(fileStatus);

    String str = ssTableColumnRecordReader.getDescriptor().filenameFor(Component.COMPRESSION_INFO);
    when(remoteFS.exists(new Path(str))).thenReturn(true);

    ssTableColumnRecordReader.copyTablesToLocal(remoteFS, localFS, dataTablePath, context);
    verify(remoteFS).getFileStatus(dataTablePath);
    ssTableColumnRecordReader.close();
    verify(fsOut).write(any(byte[].class), eq(0), eq(data.length));
    assertEquals(3, ssTableColumnRecordReader.getComponentSize());
}

From source file:com.kse.bigdata.main.Driver.java

License:Apache License

public static void main(String[] args) throws Exception {
    /**********************************************************************************
     **    Merge the source files into one.                                          **
    /**    Should change the directories of each file before executing the program   **
    ***********************************************************************************/
    //        String inputFileDirectory = "/media/bk/??/BigData_Term_Project/Debug";
    //        String resultFileDirectory = "/media/bk/??/BigData_Term_Project/debug.csv";
    //        File resultFile = new File(resultFileDirectory);
    //        if(!resultFile.exists())
    //            new SourceFileMerger(inputFileDirectory, resultFileDirectory).mergeFiles();

    /**********************************************************************************
     * Hadoop Operation.//from ww  w .j  a v a2 s .  c om
     * Befort Start, Check the Length of Sequence We Want to Predict.
     **********************************************************************************/

    Configuration conf = new Configuration();

    //Enable MapReduce intermediate compression as Snappy
    conf.setBoolean("mapred.compress.map.output", true);
    conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec");

    //Enable Profiling
    //conf.setBoolean("mapred.task.profile", true);

    String testPath = null;
    String inputPath = null;
    String outputPath = null;

    int sampleSize = 1;
    ArrayList<String> results = new ArrayList<String>();

    for (int index = 0; index < args.length; index++) {

        /*
         * Mandatory command
         */
        //Extract input path string from command line.
        if (args[index].equals("-in"))
            inputPath = args[index + 1];

        //Extract output path string from command line.
        if (args[index].equals("-out"))
            outputPath = args[index + 1];

        //Extract test data path string from command line.
        if (args[index].equals("-test"))
            testPath = args[index + 1];

        /*
         * Optional command
         */
        //Extract a number of neighbors.
        if (args[index].equals("-nn"))
            conf.setInt(Reduce.NUMBER_OF_NEAREAST_NEIGHBOR, Integer.parseInt(args[index + 1]));

        //Whether job uses normalization or not.
        if (args[index].equals("-norm"))
            conf.setBoolean(Map.NORMALIZATION, true);

        //Extract the number of sample size to test.
        if (args[index].equals("-s"))
            sampleSize = Integer.valueOf(args[index + 1]);

        //Whether job uses mean or median
        //[Default : mean]
        if (args[index].equals("-med"))
            conf.setBoolean(Reduce.MEDIAN, true);
    }

    String outputFileName = "part-r-00000";
    SequenceSampler sampler = new SequenceSampler(testPath, sampleSize);
    LinkedList<Sequence> testSequences = sampler.getRandomSample();

    //        Test Sequence
    //        String testSeqString = "13.591-13.674-13.778-13.892-13.958-14.049-14.153-14.185-14.169-14.092-13.905-13.702-13.438-13.187-13.0-12.914-12.868-12.766-12.62-12.433-12.279-12.142-12.063-12.025-100";
    //        Sequence testSeq = new Sequence(testSeqString);
    //        LinkedList<Sequence> testSequences = new LinkedList<>();
    //        testSequences.add(testSeq);

    for (Sequence seq : testSequences) {

        /*
         ********************  Hadoop Launch ***********************
         */

        System.out.println(seq.getTailString());

        conf.set(Map.INPUT_SEQUENCE, seq.toString());

        Job job = new Job(conf);
        job.setJarByClass(Driver.class);
        job.setJobName("term-project-driver");

        job.setMapperClass(Map.class);
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(Text.class);

        //          Should think another way to implement the combiner class
        //          Current Implementation is not helpful to Job.
        //          job.setCombinerClass(Combiner.class);

        //Set 1 for number of reduce task for keeping 100 most neighbors in sorted set.
        job.setNumReduceTasks(1);
        job.setReducerClass(Reduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.setInputPaths(job, new Path(inputPath));
        FileOutputFormat.setOutputPath(job, new Path(outputPath));

        job.waitForCompletion(true);

        /*
         * if job finishes, get result of the job and store it in results(list).
         */
        try {
            FileSystem hdfs = FileSystem.get(new Configuration());
            BufferedReader fileReader = new BufferedReader(
                    new InputStreamReader(hdfs.open(new Path(outputPath + "/" + outputFileName))));

            String line;
            while ((line = fileReader.readLine()) != null) {
                results.add(seq.getSeqString() + " " + line);
            }

            fileReader.close();

            hdfs.delete(new Path(outputPath), true);
            hdfs.close();

        } catch (IOException e) {
            e.printStackTrace();
            System.exit(1);
        }
    }

    /*
     * if all jobs finish, store results of jobs to output/result.txt file.
     */
    String finalOutputPath = "output/result.csv";
    try {
        FileSystem hdfs = FileSystem.get(new Configuration());
        Path file = new Path(finalOutputPath);
        if (hdfs.exists(file)) {
            hdfs.delete(file, true);
        }

        OutputStream os = hdfs.create(file);
        PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(os, "UTF-8"));

        //CSV File Header
        printWriter.println("Actual,Predicted,MER,MAE");
        printWriter.flush();

        for (String result : results) {
            String[] tokens = result.split("\\s+");

            printWriter.println(tokens[0] + "," + tokens[1] + "," + tokens[2] + "," + tokens[3]);
            printWriter.flush();
        }

        printWriter.close();
        hdfs.close();
    } catch (IOException e) {
        e.printStackTrace();
        System.exit(1);
    }

}

From source file:com.kylinolap.common.persistence.HBaseResourceStore.java

License:Apache License

private Path writeLargeCellToHdfs(String resPath, byte[] largeColumn, HTableInterface table)
        throws IOException {
    Path redirectPath = bigCellHDFSPath(resPath);
    Configuration hconf = HadoopUtil.getCurrentConfiguration();
    FileSystem fileSystem = FileSystem.get(hconf);

    if (fileSystem.exists(redirectPath)) {
        fileSystem.delete(redirectPath, true);
    }//  www  .  j a  va 2s  .c  o m

    FSDataOutputStream out = fileSystem.create(redirectPath);

    try {
        out.write(largeColumn);
    } finally {
        IOUtils.closeQuietly(out);
    }

    return redirectPath;
}

From source file:com.kylinolap.common.persistence.HBaseResourceStoreTest.java

License:Apache License

@Test
public void testHBaseStoreWithLargeCell() throws Exception {
    String path = "/cube/_test_large_cell.json";
    String largeContent = "THIS_IS_A_LARGE_CELL";
    StringEntity content = new StringEntity(largeContent);
    KylinConfig config = KylinConfig.getInstanceFromEnv();
    int origSize = config.getHBaseKeyValueSize();
    ResourceStore store = ResourceStore.getStore(KylinConfig.getInstanceFromEnv());

    try {/* w  ww  . ja v a 2 s.com*/
        config.setProperty("kylin.hbase.client.keyvalue.maxsize", String.valueOf(largeContent.length() - 1));

        store.deleteResource(path);

        store.putResource(path, content, StringEntity.serializer);
        assertTrue(store.exists(path));
        StringEntity t = store.getResource(path, StringEntity.class, StringEntity.serializer);
        assertEquals(content, t);

        Path redirectPath = ((HBaseResourceStore) store).bigCellHDFSPath(path);
        Configuration hconf = HadoopUtil.getCurrentConfiguration();
        FileSystem fileSystem = FileSystem.get(hconf);
        assertTrue(fileSystem.exists(redirectPath));

        FSDataInputStream in = fileSystem.open(redirectPath);
        assertEquals(largeContent, in.readUTF());
        in.close();

        store.deleteResource(path);
    } finally {
        config.setProperty("kylin.hbase.client.keyvalue.maxsize", "" + origSize);
        store.deleteResource(path);
    }
}