List of usage examples for org.apache.hadoop.fs FileUtil stat2Paths
public static Path[] stat2Paths(FileStatus[] stats)
From source file:gr.ntua.h2rdf.client.ResultSetOpenRDFBindings.java
License:Open Source License
public ResultSetOpenRDFBindings(String out, H2RDFConf hconf, HashMap<Integer, String> varIds) { //System.out.println(out); this.varIds = varIds; this.hconf = hconf; Configuration conf = hconf.getConf();// new Configuration(); //System.out.println(conf.get("fs.default.name")); try {//from ww w . ja v a 2 s .c o m try { //Configuration c = HBaseConfiguration.create(); this.table = new HTable(conf, hconf.getTable() + "_Index"); fs = FileSystem.get(new URI(conf.get("fs.default.name")), conf, hconf.getUser()); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (URISyntaxException e) { // TODO Auto-generated catch block e.printStackTrace(); } if (out.startsWith("output/")) { Path p = new Path(out); o = p; if (fs.isFile(p)) {//file outfiles = new Path[1]; outfiles[0] = p; filesNo = 1; nextfile = 1; outfile = new SequenceFile.Reader(fs, p, conf); //outfile = fs.open(p); } else if (fs.exists(p)) {//MapReduce folder Path[] outf = FileUtil.stat2Paths(fs.listStatus(p)); int paths = 0; outfiles = new Path[outf.length]; for (Path f : outf) { if (f.getName().startsWith("part")) { outfiles[paths] = f; paths++; } } filesNo = paths; nextfile = 1; outfile = new SequenceFile.Reader(fs, outfiles[0], conf); } } else { o = null; filesNo = 1; nextfile = 1; //outfile = new SequenceFile.Reader(fs, p, conf); //outfile = new ByteArrayInputStream(out.getBytes()); } lineFinished = true; } catch (IOException e) { e.printStackTrace(); } }
From source file:gr.ntua.h2rdf.inputFormat.HFileRecordReaderBitmapBufferedNoScan.java
License:Open Source License
/** * Initializes the reader./* w w w.j av a 2s. c o m*/ * * @param inputsplit The split to work with. * @param context The current task context. * @throws IOException When setting up the reader fails. * @throws InterruptedException When the job is aborted. * @see org.apache.hadoop.mapreduce.RecordReader#initialize( * org.apache.hadoop.mapreduce.InputSplit, * org.apache.hadoop.mapreduce.TaskAttemptContext) */ @Override public void initialize(InputSplit inputsplit, TaskAttemptContext context) throws IOException, InterruptedException { tsplit = (TableColumnSplit) inputsplit; EWAHCompressedBitmap regionBitmap = tsplit.getRegionBitmap(); BitmapIter = regionBitmap.iterator(); //System.out.println("contains: "+regionBitmap.cardinality()+" size:"+regionBitmap.sizeInBytes()); table = new HTable(HBconf, tsplit.getTable()); //table.flushCommits(); startr = tsplit.getStartRow(); stopr = tsplit.getStopRow(); System.out.println("start: " + Bytes.toStringBinary(startr)); System.out.println("stop: " + Bytes.toStringBinary(stopr)); FileSystem fs = FileSystem.get(HBconf); String dir = ""; if (startr.length == 1) { byte[] st = new byte[stopr.length - 1]; for (int i = 0; i < st.length; i++) { st[i] = stopr[i]; } dir = "/hbase/" + tsplit.getTable() + "/" + table.getRegionLocation(stopr).getRegionInfo().getEncodedName() + "/A"; } else { dir = "/hbase/" + tsplit.getTable() + "/" + table.getRegionLocation(startr).getRegionInfo().getEncodedName() + "/A"; } Path regionDir = new Path(dir); Path file = null; Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(regionDir)); for (Path hfile : hfiles) { file = new Path(dir + "/" + hfile.getName()); } System.out.println(dir + "/" + file.getName()); reader = HFile.createReader(fs, file, new CacheConfig(HBconf)); //reader = new Reader(fs, file, null, false); // Load up the index. reader.loadFileInfo(); // Get a scanner that caches and that does not use pread. scanner = reader.getScanner(false, true); if (!BitmapIter.hasNext()) { more = false; return; } nextBitmapRegion(); /*if(!scanner.isSeeked()){ //System.out.println(table.getRegionLocation(startr).getRegionInfo().getRegionId()); scanner.seekTo(); }*/ first = 1; while (scanner.next()) { kv = scanner.getKeyValue(); if (Bytes.compareTo(kv.getRow(), startr) >= 0 && Bytes.compareTo(kv.getRow(), stopr) <= 0) { System.out.println("curkey: " + Bytes.toStringBinary(kv.getRow())); more = true; break; } if (Bytes.compareTo(kv.getRow(), stopr) > 0) { if (BitmapIter.hasNext()) { nextBitmapRegion(); } else { more = false; break; } } } }
From source file:gr.ntua.h2rdf.inputFormat.HFileRecordReaderBitmapCrossRegion.java
License:Open Source License
private void nextBitmapRegion() { regions++;//from w w w . jav a 2s . com //System.out.println("next region: "+regions); if (!BitmapIter.hasNext()) { System.out.println("Bitmap end"); more = false; return; } int curBitmapInt = BitmapIter.next(); processed++; //System.out.println(curBitmapInt); byte[] curBitmapByteInt = Bytes.toBytes(curBitmapInt); //System.out.println("curInt: "+Bytes.toStringBinary(curBitmapByteInt)); startr = new byte[8 + 1]; stopr = new byte[8 + 1]; startr[0] = (byte) 1; stopr[0] = (byte) 1; for (int j = 1; j < curBitmapByteInt.length + 1; j++) { startr[j] = curBitmapByteInt[j - 1]; stopr[j] = curBitmapByteInt[j - 1]; } for (int j = 1 + curBitmapByteInt.length; j < startr.length; j++) { startr[j] = (byte) 0; stopr[j] = (byte) 255; } //System.out.println("Next Bitmap start: "+Bytes.toStringBinary(startr)); KeyValue rowKey = KeyValue.createFirstOnRow(startr); try { table.flushCommits(); if (lastRowKey == null) {//read the first HFile of the bitmap FileSystem fs = FileSystem.get(HBconf); String dir = ""; dir = "/hbase/" + tsplit.getTable() + "/" + table.getRegionLocation(startr).getRegionInfo().getEncodedName() + "/A"; Path regionDir = new Path(dir); Path file = null; Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(regionDir)); for (Path hfile : hfiles) { file = new Path(dir + "/" + hfile.getName()); } //System.out.println(dir); System.out.println("First HFile: " + dir + "/" + file.getName()); reader = HFile.createReader(fs, file, new CacheConfig(HBconf)); //reader = new Reader(fs, file, null, false); // Load up the index. lastRowKey = table.getRegionLocation(startr).getRegionInfo().getEndKey(); reader.loadFileInfo(); // Get a scanner that caches and that does not use pread. scanner = reader.getScanner(false, true); scanner.seekBefore(rowKey.getKey()); first = 1; while (scanner.next()) { kv = scanner.getKeyValue(); //System.out.println("first key in Hfile: "+Bytes.toStringBinary(kv.getRow())); if (Bytes.compareTo(kv.getRow(), startr) >= 0 && Bytes.compareTo(kv.getRow(), stopr) <= 0) { //System.out.println("curkey: "+Bytes.toStringBinary(kv.getRow())); more = true; break; } if (Bytes.compareTo(kv.getRow(), stopr) > 0) { if (BitmapIter.hasNext()) { nextBitmapRegion(); } else { System.out.println("Bitmap end"); more = false; break; } } } } else { if (Bytes.compareTo(lastRowKey, startr) > 0) {//same region scanner.seekBefore(rowKey.getKey()); //System.out.println("Same region"); first = 1; while (scanner.next()) { kv = scanner.getKeyValue(); //System.out.println("next key: "+Bytes.toStringBinary(kv.getRow())); if (Bytes.compareTo(kv.getRow(), startr) >= 0 && Bytes.compareTo(kv.getRow(), stopr) <= 0) { //System.out.println("curkey: "+Bytes.toStringBinary(kv.getRow())); more = true; break; } if (Bytes.compareTo(kv.getRow(), stopr) > 0) { if (BitmapIter.hasNext()) { nextBitmapRegion(); } else { System.out.println("Bitmap end"); more = false; break; } } } } else {//open new HFile FileSystem fs = FileSystem.get(HBconf); String dir = ""; dir = "/hbase/" + tsplit.getTable() + "/" + table.getRegionLocation(startr).getRegionInfo().getEncodedName() + "/A"; Path regionDir = new Path(dir); Path file = null; Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(regionDir)); for (Path hfile : hfiles) { file = new Path(dir + "/" + hfile.getName()); } System.out.println("New Hfile" + dir + "/" + file.getName()); reader = HFile.createReader(fs, file, new CacheConfig(HBconf)); //reader = new Reader(fs, file, null, false); // Load up the index. lastRowKey = table.getRegionLocation(startr).getRegionInfo().getEndKey(); reader.loadFileInfo(); // Get a scanner that caches and that does not use pread. scanner = reader.getScanner(false, true); scanner.seekBefore(rowKey.getKey()); first = 1; while (scanner.next()) { kv = scanner.getKeyValue(); //System.out.println("first key in Hfile: "+Bytes.toStringBinary(kv.getRow())); if (Bytes.compareTo(kv.getRow(), startr) >= 0 && Bytes.compareTo(kv.getRow(), stopr) <= 0) { //System.out.println("curkey: "+Bytes.toStringBinary(kv.getRow())); more = true; break; } if (Bytes.compareTo(kv.getRow(), stopr) > 0) { if (BitmapIter.hasNext()) { nextBitmapRegion(); } else { System.out.println("Bitmap end"); more = false; break; } } } } } } catch (IOException e) { e.printStackTrace(); } }
From source file:gr.ntua.h2rdf.inputFormat.HFileRecordReaderBitmapCrossRegion.java
License:Open Source License
private boolean nextHfile() { FileSystem fs;//from w w w . j a va 2 s . com try { fs = FileSystem.get(HBconf); String dir = ""; dir = "/hbase/" + tsplit.getTable() + "/" + table.getRegionLocation(lastRowKey).getRegionInfo().getEncodedName() + "/A"; Path regionDir = new Path(dir); Path file = null; Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(regionDir)); for (Path hfile : hfiles) { file = new Path(dir + "/" + hfile.getName()); } //System.out.println("Last row key"+Bytes.toStringBinary(lastRowKey)); System.out.println("HFile: " + dir + "/" + file.getName()); reader = HFile.createReader(fs, file, new CacheConfig(HBconf)); //reader = new Reader(fs, file, null, false); // Load up the index. lastRowKey = table.getRegionLocation(lastRowKey).getRegionInfo().getEndKey(); reader.loadFileInfo(); // Get a scanner that caches and that does not use pread. scanner = reader.getScanner(false, true); scanner.seekTo(); first = 1; if (scanner.next()) { kv = scanner.getKeyValue(); //System.out.println("next key after change: "+Bytes.toStringBinary(kv.getRow())); if (Bytes.compareTo(kv.getRow(), startr) >= 0 && Bytes.compareTo(kv.getRow(), stopr) <= 0) { byte[] indexKey = kv.getRow(); byte[] indexKey1 = new byte[8]; for (int i = 0; i < indexKey1.length; i++) { indexKey1[i] = indexKey[i + 1]; } value.set(tsplit.getFname() + "!" + Bytes.toLong(indexKey1) + "$$" + Bytes.toString(kv.getValue())); //System.out.println("used key: "+Bytes.toStringBinary(kv.getRow())); return true; } if (Bytes.compareTo(kv.getRow(), stopr) > 0) { if (BitmapIter.hasNext()) { nextBitmapRegion(); if (!scanner.isSeeked()) {//bug System.out.println("Not seeked"); return nextHfile(); } if (more) { byte[] indexKey = kv.getRow(); byte[] indexKey1 = new byte[8]; for (int i = 0; i < indexKey1.length; i++) { indexKey1[i] = indexKey[i + 1]; } value.set(tsplit.getFname() + "!" + Bytes.toLong(indexKey1) + "$$" + Bytes.toString(kv.getValue())); //System.out.println("used key after change: "+Bytes.toStringBinary(kv.getRow())); } first = 2; return more; } else { System.out.println("Bitmap end"); return false; } } else { System.out.println("Bug"); return false; } } } catch (IOException e) { e.printStackTrace(); } System.out.println("Bug"); return false; }
From source file:gr.ntua.h2rdf.inputFormat.HFileRecordReaderBufferedNoScan.java
License:Open Source License
/** * Initializes the reader./* w w w. j av a2 s . co m*/ * * @param inputsplit The split to work with. * @param context The current task context. * @throws IOException When setting up the reader fails. * @throws InterruptedException When the job is aborted. * @see org.apache.hadoop.mapreduce.RecordReader#initialize( * org.apache.hadoop.mapreduce.InputSplit, * org.apache.hadoop.mapreduce.TaskAttemptContext) */ @Override public void initialize(InputSplit inputsplit, TaskAttemptContext context) throws IOException, InterruptedException { tsplit = (TableColumnSplit) inputsplit; HTable table = new HTable(HBconf, tsplit.getTable()); //HTable table = new HTable( HBconf, "H2RDF" ); //table.flushCommits(); byte[] startr = tsplit.getStartRow(); stopr = tsplit.getStopRow(); System.out.println("start: " + Bytes.toStringBinary(startr)); System.out.println("stop: " + Bytes.toStringBinary(stopr)); FileSystem fs = FileSystem.get(HBconf); String dir = ""; if (startr.length == 1) { byte[] st = new byte[stopr.length - 1]; for (int i = 0; i < st.length; i++) { st[i] = stopr[i]; } dir = "/hbase/" + tsplit.getTable() + "/" + table.getRegionLocation(stopr).getRegionInfo().getEncodedName() + "/A"; } else { dir = "/hbase/" + tsplit.getTable() + "/" + table.getRegionLocation(startr).getRegionInfo().getEncodedName() + "/A"; } Path regionDir = new Path(dir); Path file = null; Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(regionDir)); for (Path hfile : hfiles) { file = new Path(dir + "/" + hfile.getName()); } System.out.println(dir + "/" + file.getName()); reader = HFile.createReader(fs, file, new CacheConfig(HBconf)); //reader = new Reader(fs, file, null, false); // Load up the index. reader.loadFileInfo(); // Get a scanner that caches and that does not use pread. scanner = reader.getScanner(false, true); KeyValue rowKey = KeyValue.createFirstOnRow(startr); scanner.seekBefore(rowKey.getKey()); id = table.getRegionLocation(startr).getRegionInfo().getRegionId(); if (!scanner.isSeeked()) { //System.out.println(table.getRegionLocation(startr).getRegionInfo().getRegionId()); scanner.seekTo(); } while (scanner.next()) { kv = scanner.getKeyValue(); if (Bytes.compareTo(kv.getRow(), startr) >= 0) { more = true; break; } } if (!tsplit.getFname().startsWith("T")) { /*FSDataInputStream v = fs.open(new Path(context.getConfiguration().get("nikos.inputfile"))); BufferedReader read = new BufferedReader(new InputStreamReader(v)); read.readLine(); read.readLine(); v.close();*/ String vars = tsplit.getVars(); System.out.println(vars); StringTokenizer vtok = new StringTokenizer(vars); varsno = 0; while (vtok.hasMoreTokens()) { vtok.nextToken(); varsno++; } if (varsno == 1) { StringTokenizer vtok2 = new StringTokenizer(vars); v1 = vtok2.nextToken(); } else if (varsno == 2) { StringTokenizer vtok2 = new StringTokenizer(vars); v1 = vtok2.nextToken(); v2 = vtok2.nextToken(); } progress = (float) 0.2; } else { varsno = 15; } }
From source file:gr.ntua.h2rdf.inputFormat.HFileRecordReaderNoScan.java
License:Open Source License
/** * Initializes the reader./*w w w .j a v a2s . co m*/ * * @param inputsplit The split to work with. * @param context The current task context. * @throws IOException When setting up the reader fails. * @throws InterruptedException When the job is aborted. * @see org.apache.hadoop.mapreduce.RecordReader#initialize( * org.apache.hadoop.mapreduce.InputSplit, * org.apache.hadoop.mapreduce.TaskAttemptContext) */ @Override public void initialize(InputSplit inputsplit, TaskAttemptContext context) throws IOException, InterruptedException { tsplit = (TableColumnSplit) inputsplit; HTable table = new HTable(HBconf, tsplit.getTable()); byte[] rowid = tsplit.getStartRow(); byte[] startr = new byte[19]; stopr = new byte[19]; for (int i = 0; i < rowid.length; i++) { startr[i] = rowid[i]; stopr[i] = rowid[i]; } if (rowid.length == 18) { startr[18] = (byte) 0; stopr[18] = (byte) MyNewTotalOrderPartitioner.MAX_HBASE_BUCKETS; } if (rowid.length == 10) { for (int i = 10; i < startr.length - 1; i++) { startr[i] = (byte) 0; stopr[i] = (byte) 255; } startr[startr.length - 1] = (byte) 0; stopr[startr.length - 1] = (byte) MyNewTotalOrderPartitioner.MAX_HBASE_BUCKETS; } FileSystem fs = FileSystem.get(HBconf); String dir = "/hbase/" + tsplit.getTable() + "/" + table.getRegionLocation(startr).getRegionInfo().getEncodedName() + "/A"; Path regionDir = new Path(dir); Path file = null; Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(regionDir)); for (Path hfile : hfiles) { file = new Path(dir + "/" + hfile.getName()); } reader = HFile.createReader(fs, file, new CacheConfig(HBconf)); //reader = new Reader(fs, file, null, false); // Load up the index. reader.loadFileInfo(); // Get a scanner that caches and that does not use pread. scanner = reader.getScanner(false, true); KeyValue rowKey = KeyValue.createFirstOnRow(startr); scanner.seekBefore(rowKey.getKey()); while (scanner.next()) { kv = scanner.getKeyValue(); if (Bytes.compareTo(kv.getRow(), startr) >= 0) { more = true; return; } } more = false; }
From source file:gr.ntua.h2rdf.inputFormat.TableInputFormatBase.java
License:Open Source License
private void splitSubclass(Scan scan, String tname, String vars, String fname) { byte[] rowid = scan.getStartRow(); String col = ""; if (scan.hasFamilies()) { col = Bytes.toString(scan.getFamilies()[0]); }/* w w w .ja v a 2s .c o m*/ //System.out.println(Bytes.toString(scan.getFamilies()[0])); byte[] startRow = new byte[rowlength + 2]; byte[] stopRow = new byte[rowlength + 2]; if (scan.getFamilies()[0].length <= 1) {//rowid.length==rowlength) { byte[] objid = new byte[totsize]; for (int i = 0; i < totsize; i++) { objid[i] = rowid[i + totsize + 1]; } byte[] classrowStart = new byte[rowlength + 2]; byte[] classrowStop = new byte[rowlength + 2]; classrowStart[0] = (byte) 3; //pos for (int i1 = 0; i1 < totsize; i1++) { classrowStart[i1 + 1] = SUBCLASS[i1]; } for (int i1 = 0; i1 < totsize; i1++) { classrowStart[i1 + totsize + 1] = objid[i1]; } for (int i1 = 0; i1 < classrowStart.length - 1; i1++) { classrowStop[i1] = classrowStart[i1]; } classrowStart[classrowStart.length - 2] = (byte) 0; classrowStart[classrowStart.length - 1] = (byte) 0; classrowStop[classrowStop.length - 2] = (byte) 255; classrowStop[classrowStop.length - 1] = (byte) 255; byte[] bid, a; a = Bytes.toBytes("A"); bid = new byte[a.length]; for (int i = 0; i < a.length; i++) { bid[i] = a[i]; } Scan scan1 = new Scan(); scan1.setStartRow(classrowStart); scan1.setStopRow(classrowStop); scan1.setCaching(254); scan1.addFamily(bid); try { ResultScanner resultScanner = table.getScanner(scan1); Result result = null; while ((result = resultScanner.next()) != null) { System.out.println("Subclasses: " + result.size()); Iterator<KeyValue> it = result.list().iterator(); while (it.hasNext()) { KeyValue kv = it.next(); byte[] qq = kv.getQualifier(); for (int ik = 0; ik < totsize + 1; ik++) { startRow[ik] = rowid[ik]; stopRow[ik] = rowid[ik]; } for (int ik = 0; ik < totsize; ik++) { startRow[ik + totsize + 1] = qq[ik]; stopRow[ik + totsize + 1] = qq[ik]; } /*for (int i = 0; i < startRow.length-1; i++) { if(i>=9 && i<startRow.length-1){ startRow[i]=qq[i-9]; stopRow[i]=qq[i-9]; } else{ startRow[i]=rowid[i]; stopRow[i]=rowid[i]; } }*/ startRow[rowlength] = (byte) 0; startRow[rowlength + 1] = (byte) 0; stopRow[rowlength] = (byte) MyNewTotalOrderPartitioner.MAX_HBASE_BUCKETS; stopRow[rowlength + 1] = (byte) MyNewTotalOrderPartitioner.MAX_HBASE_BUCKETS; //System.out.println(Bytes.toStringBinary(startRow)); //addSplit(tname, vars, fname, startRow, stopRow, col); scan.setStartRow(startRow); scan.setStopRow(stopRow); splitSubclass(scan, tname, vars, fname); } } } catch (IOException e) { e.printStackTrace(); } /*Get get = new Get(classrow); try { Result result = table.get(get); System.out.println("Subclasses: "+result.size()); if(result.size()!=0){ KeyValue[] vv = result.raw(); for (int j = 0; j < vv.length; j++) { byte[] qq = vv[j].getQualifier(); for (int i = 0; i < startRow.length-1; i++) { if(i>=9 && i<startRow.length-1){ startRow[i]=qq[i-9]; stopRow[i]=qq[i-9]; } else{ startRow[i]=rowid[i]; stopRow[i]=rowid[i]; } } startRow[17] =(byte)0; stopRow[17] =(byte)MyNewTotalOrderPartitioner.MAX_HBASE_BUCKETS; addSplit(tname, vars, fname, startRow, stopRow, scan.getInputColumns()); } } } catch (IOException e) { e.printStackTrace(); }*/ for (int ik = 0; ik < rowid.length; ik++) { startRow[ik] = rowid[ik]; stopRow[ik] = rowid[ik]; } startRow[rowlength] = (byte) 0; startRow[rowlength + 1] = (byte) 0; stopRow[rowlength] = (byte) MyNewTotalOrderPartitioner.MAX_HBASE_BUCKETS; stopRow[rowlength + 1] = (byte) MyNewTotalOrderPartitioner.MAX_HBASE_BUCKETS; addSplit(tname, vars, fname, startRow, stopRow, col); } else if (scan.hasFamilies()) { if (rowid.length == totsize + 1) { for (int ik = 0; ik < rowid.length; ik++) { startRow[ik] = rowid[ik]; stopRow[ik] = rowid[ik]; } for (int ik = totsize + 1; ik < startRow.length - 2; ik++) { startRow[ik] = (byte) 0; stopRow[ik] = (byte) 255; } startRow[rowlength] = (byte) 0; startRow[rowlength + 1] = (byte) 0; stopRow[rowlength] = (byte) MyNewTotalOrderPartitioner.MAX_HBASE_BUCKETS; stopRow[rowlength + 1] = (byte) MyNewTotalOrderPartitioner.MAX_HBASE_BUCKETS; addSplit(tname, vars, fname, startRow, stopRow, col); } else { byte[] stop = scan.getStopRow(); if (stop.length <= 1) stop = rowid; System.out.println(Bytes.toStringBinary(stop)); startRow[0] = rowid[0]; stopRow[0] = stop[0]; for (int i = 1; i < 1 + 2 * totsize; i++) { startRow[i] = rowid[i]; stopRow[i] = stop[i]; } startRow[rowlength] = (byte) 0; startRow[rowlength + 1] = (byte) 0; stopRow[rowlength] = (byte) MyNewTotalOrderPartitioner.MAX_HBASE_BUCKETS; stopRow[rowlength + 1] = (byte) MyNewTotalOrderPartitioner.MAX_HBASE_BUCKETS; System.out.println(Bytes.toStringBinary(startRow)); System.out.println(Bytes.toStringBinary(stopRow)); addSplit(tname, vars, fname, startRow, stopRow, col); } } else if (rowid.length == 1) {//reverse index scan System.out.println("Reverse index scan"); String bitmapDir = conf.get("nikos.inputfile"); FileSystem fs; try { fs = FileSystem.get(conf); Path[] bitmapFiles = FileUtil.stat2Paths(fs.listStatus(new Path(bitmapDir))); ewahBitmap = new EWAHCompressedBitmap(); for (Path bitmapFile : bitmapFiles) { //System.out.println(bitmapFile); EWAHCompressedBitmap ewahBitmapTemp = new EWAHCompressedBitmap(); ewahBitmapTemp.deserialize(fs.open(bitmapFile)); //System.out.println("bitmap size in bytes: "+ewahBitmapTemp.sizeInBytes()); //System.out.println("cardinality: "+ewahBitmapTemp.cardinality()); ewahBitmap = ewahBitmap.or(ewahBitmapTemp); //System.out.println("Total bitmap size in bytes: "+ewahBitmap.sizeInBytes()); //System.out.println("Total cardinality: "+ewahBitmap.cardinality()); } //System.out.println("Total bitmap size in bytes: "+ewahBitmap.sizeInBytes()); //System.out.println("Total cardinality: "+ewahBitmap.cardinality()); int card = ewahBitmap.cardinality() / max_tasks + 1; Iterator<Integer> bitmapIt = ewahBitmap.iterator(); int size = 0, numSplits = 0, firstOfRegion = 0; String[] slaves = { "clone22", "clone29" }; EWAHCompressedBitmap ewahBitmapTemp = new EWAHCompressedBitmap(); while (bitmapIt.hasNext()) { if (size == 0) { firstOfRegion = bitmapIt.next(); ewahBitmapTemp.set(firstOfRegion); } else { ewahBitmapTemp.set(bitmapIt.next()); } size++; if (size >= card) { numSplits++; String regionlocation = slaves[numSplits % 2]; byte[] nextValueByteInt = Bytes.toBytes(firstOfRegion); //byte[] nextValueByte = Bytes.toBytes(new Long(nextValue*Integer.MAX_VALUE)) ; byte[] nextKey = new byte[totsize + 1]; nextKey[0] = (byte) 1; for (int j = 1; j < nextValueByteInt.length; j++) { nextKey[j] = nextValueByteInt[j - 1]; } for (int j = 1 + nextValueByteInt.length; j < nextKey.length; j++) { nextKey[j] = (byte) 0; } //String regionlocation = fs.getFileBlockLocations(fs.getFileStatus(bitmapFile), new Long(0), fs.getFileStatus(bitmapFile).getLen())[0].getHosts()[0]; //System.out.println("regionlocation: "+regionlocation); //System.out.println("Adding split "+numSplits); //System.out.println("Cardinality "+ewahBitmapTemp.cardinality()); InputSplit split = new TableColumnSplit(tname, vars, fname, nextKey.clone(), scan.getStopRow().clone(), col, regionlocation, ewahBitmapTemp); splits.add(split); size = 0; ewahBitmapTemp = new EWAHCompressedBitmap(); } } if (size != 0) { numSplits++; String regionlocation = slaves[numSplits % 2]; byte[] nextValueByteInt = Bytes.toBytes(firstOfRegion); //byte[] nextValueByte = Bytes.toBytes(new Long(nextValue*Integer.MAX_VALUE)) ; byte[] nextKey = new byte[totsize + 1]; nextKey[0] = (byte) 1; for (int j = 1; j < nextValueByteInt.length; j++) { nextKey[j] = nextValueByteInt[j - 1]; } for (int j = 1 + nextValueByteInt.length; j < nextKey.length; j++) { nextKey[j] = (byte) 0; } //String regionlocation = fs.getFileBlockLocations(fs.getFileStatus(bitmapFile), new Long(0), fs.getFileStatus(bitmapFile).getLen())[0].getHosts()[0]; //System.out.println("regionlocation: "+regionlocation); //System.out.println("Adding split "+numSplits); //System.out.println("Cardinality "+ewahBitmapTemp.cardinality()); InputSplit split = new TableColumnSplit(tname, vars, fname, nextKey.clone(), scan.getStopRow().clone(), col, regionlocation, ewahBitmapTemp); splits.add(split); } } catch (IOException e) { e.printStackTrace(); } //addSplitIndexTranslate(tname, vars, fname, scan.getStartRow(), scan.getStopRow(), col); } }
From source file:gr.ntua.h2rdf.loadTriples.SortIds.java
License:Apache License
public static void loadHFiles(String[] args) throws Exception { Configuration conf = new Configuration(); HBaseAdmin hadmin = new HBaseAdmin(conf); Path hfofDir = new Path(args[1]); FileSystem fs = hfofDir.getFileSystem(conf); //if (!fs.exists(hfofDir)) { // throw new FileNotFoundException("HFileOutputFormat dir " + // hfofDir + " not found"); //}/* w ww . j a v a2 s. c o m*/ FileStatus[] familyDirStatuses = fs.listStatus(hfofDir); //if (familyDirStatuses == null) { // throw new FileNotFoundException("No families found in " + hfofDir); //} int length = 0; byte[][] splits = new byte[18000][]; for (FileStatus stat : familyDirStatuses) { if (!stat.isDir()) { continue; } Path familyDir = stat.getPath(); // Skip _logs, etc if (familyDir.getName().startsWith("_")) continue; //byte[] family = familyDir.getName().getBytes(); Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir)); for (Path hfile : hfiles) { if (hfile.getName().startsWith("_")) continue; HFile.Reader hfr = HFile.createReader(fs, hfile, new CacheConfig(conf)); //HFile.Reader hfr = new HFile.Reader(fs, hfile, null, false); final byte[] first; try { hfr.loadFileInfo(); first = hfr.getFirstRowKey(); } finally { hfr.close(); } splits[length] = first.clone(); length++; } } //System.out.println(length); byte[][] splits1 = new byte[length][]; for (int i = 0; i < splits1.length; i++) { splits1[i] = splits[i]; } Arrays.sort(splits1, Bytes.BYTES_COMPARATOR); //HTableDescriptor desc = new HTableDescriptor("H2RDF"); HTableDescriptor desc = new HTableDescriptor(TABLE_NAME + "_Index"); HColumnDescriptor family = new HColumnDescriptor("1"); family.setCompressionType(Algorithm.GZ); desc.addFamily(family); HColumnDescriptor family2 = new HColumnDescriptor("2"); family2.setCompressionType(Algorithm.GZ); desc.addFamily(family2); //for (int i = 0; i < splits.length; i++) { // System.out.println(Bytes.toStringBinary(splits[i])); //} conf.setInt("zookeeper.session.timeout", 600000); if (hadmin.tableExists(TABLE_NAME + "_Index")) { //hadmin.disableTable(TABLE_NAME); //hadmin.deleteTable(TABLE_NAME); } else { hadmin.createTable(desc, splits1); } //hadmin.createTable(desc); String[] args1 = new String[2]; args1[0] = args[1]; args1[1] = TABLE_NAME + "_Index"; //args1[1]="new2"; ToolRunner.run(new LoadIncrementalHFiles(HBaseConfiguration.create()), args1); }
From source file:gr.ntua.h2rdf.loadTriples.TranslateAndImport.java
License:Apache License
private void loadHFiles() throws Exception { Configuration conf = HBaseConfiguration.create(); conf.addResource("hbase-default.xml"); conf.addResource("hbase-site.xml"); HBaseAdmin hadmin = new HBaseAdmin(conf); Path hfofDir = new Path("out/I"); FileSystem fs = hfofDir.getFileSystem(conf); //if (!fs.exists(hfofDir)) { // throw new FileNotFoundException("HFileOutputFormat dir " + // hfofDir + " not found"); //}//from w ww . java 2 s . c o m // FileStatus[] familyDirStatuses = fs.listStatus(hfofDir); //if (familyDirStatuses == null) { // throw new FileNotFoundException("No families found in " + hfofDir); //} int length = 0; byte[][] splits = new byte[18000][]; Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(hfofDir)); for (Path hfile : hfiles) { if (hfile.getName().startsWith("_")) continue; HFile.Reader hfr = HFile.createReader(fs, hfile, new CacheConfig(conf)); //HFile.Reader hfr = new HFile.Reader(fs, hfile, null, false); final byte[] first; try { hfr.loadFileInfo(); first = hfr.getFirstRowKey(); } finally { hfr.close(); } //System.out.println("out/I/"+hfile.getName()+" \t "+Bytes.toStringBinary(first)); splits[length] = first.clone(); length++; } byte[][] splits1 = new byte[length][]; for (int i = 0; i < splits1.length; i++) { splits1[i] = splits[i]; } Arrays.sort(splits1, Bytes.BYTES_COMPARATOR); HTableDescriptor desc = new HTableDescriptor(TABLE_NAME); HColumnDescriptor family = new HColumnDescriptor("I"); family.setCompressionType(Algorithm.SNAPPY); desc.addFamily(family); family = new HColumnDescriptor("S"); family.setCompressionType(Algorithm.SNAPPY); desc.addFamily(family); family = new HColumnDescriptor("T"); family.setCompressionType(Algorithm.SNAPPY); desc.addFamily(family); //family= new HColumnDescriptor("C"); //desc.addFamily(family); //for (int i = 0; i < splits.length; i++) { // System.out.println(Bytes.toStringBinary(splits[i])); //} conf.setInt("zookeeper.session.timeout", 600000); if (hadmin.tableExists(TABLE_NAME)) { //hadmin.disableTable(TABLE_NAME); //hadmin.deleteTable(TABLE_NAME); } else { hadmin.createTable(desc, splits1); } //hadmin.createTable(desc); String[] args1 = new String[2]; args1[0] = "out"; args1[1] = TABLE_NAME; //args1[1]="new2"; ToolRunner.run(new LoadIncrementalHFiles(HBaseConfiguration.create()), args1); }
From source file:hadoop.TestingDriver.java
License:Open Source License
public static Configuration addPathToDC(Configuration conf, String path) throws IOException { FileSystem fs = FileSystem.get(conf); FileStatus[] fstatus = fs.globStatus(new Path(path)); Path[] listedPaths = FileUtil.stat2Paths(fstatus); for (Path p : listedPaths) { System.out.println(" Add File to DC " + p.toUri().toString()); DistributedCache.addCacheFile(p.toUri(), conf); }//from w ww .j a v a 2s . c o m return conf; }