List of usage examples for org.apache.hadoop.fs FileSystem getLocal
public static LocalFileSystem getLocal(Configuration conf) throws IOException
From source file:ivory.core.data.dictionary.PrefixEncodedLexicographicallySortedDictionaryTest.java
License:Apache License
@Test public void test1() throws IOException { // This is the ground truth mapping between term and term id. Map<String, Integer> data = Maps.newLinkedHashMap(); data.put("a", 0); data.put("aa", 1); data.put("aaa", 2); data.put("aaaa", 3); data.put("aaaaaa", 4); data.put("aab", 5); data.put("aabb", 6); data.put("aaabcb", 7); data.put("aad", 8); data.put("abd", 9); data.put("abde", 10); PrefixEncodedLexicographicallySortedDictionary m = new PrefixEncodedLexicographicallySortedDictionary(8); // Add entries, in order. for (String key : data.keySet()) { m.add(key);/* w ww .j a v a 2s . co m*/ } // Verify size. assertEquals(data.size(), m.size()); // Verify bidirectional mapping. for (Map.Entry<String, Integer> entry : data.entrySet()) { assertEquals((int) entry.getValue(), m.getId(entry.getKey())); assertEquals(entry.getKey(), m.getTerm(entry.getValue())); } Iterator<String> iter1 = m.iterator(); Iterator<String> iter2 = data.keySet().iterator(); for (int i = 0; i < m.size(); i++) { assertTrue(iter1.hasNext()); assertTrue(iter2.hasNext()); assertEquals(iter2.next(), iter1.next()); } assertFalse(iter1.hasNext()); assertFalse(iter2.hasNext()); assertEquals(0.6923077, m.getCompresssionRatio(), 10e-6); FileSystem fs = FileSystem.getLocal(new Configuration()); m.store("tmp.dat", fs); PrefixEncodedLexicographicallySortedDictionary n = PrefixEncodedLexicographicallySortedDictionary .load(new Path("tmp.dat"), fs); // Verify size. assertEquals(data.size(), n.size()); // Verify bidirectional mapping. for (Map.Entry<String, Integer> entry : data.entrySet()) { assertEquals((int) entry.getValue(), n.getId(entry.getKey())); assertEquals(entry.getKey(), n.getTerm(entry.getValue())); } iter1 = m.iterator(); iter2 = data.keySet().iterator(); for (int i = 0; i < m.size(); i++) { assertEquals(iter2.next(), iter1.next()); } assertFalse(iter1.hasNext()); assertFalse(iter2.hasNext()); fs.delete(new Path("tmp.dat"), true); }
From source file:ivory.core.data.dictionary.PrefixEncodedLexicographicallySortedDictionaryTest.java
License:Apache License
@Test public void test2() throws IOException { FileSystem fs = FileSystem.getLocal(new Configuration()); PrefixEncodedLexicographicallySortedDictionary m = PrefixEncodedLexicographicallySortedDictionary .loadFromPlainTextFile(new Path("etc/dictionary-test.txt"), fs, 8); assertEquals(0, m.getId("a")); assertEquals(1, m.getId("a1")); assertEquals(248, m.getId("aardvark")); assertEquals(2291, m.getId("affair")); assertEquals(3273, m.getId("airwolf")); assertEquals(6845, m.getId("anntaylor")); assertEquals(11187, m.getId("augustus")); assertEquals(12339, m.getId("azzuz")); assertEquals(0.5631129, m.getCompresssionRatio(), 10e-6); m.store("tmp.dat", fs); PrefixEncodedLexicographicallySortedDictionary n = PrefixEncodedLexicographicallySortedDictionary .load(new Path("tmp.dat"), fs); assertEquals(0, n.getId("a")); assertEquals(1, n.getId("a1")); assertEquals(248, n.getId("aardvark")); assertEquals(2291, n.getId("affair")); assertEquals(3273, n.getId("airwolf")); assertEquals(6845, n.getId("anntaylor")); assertEquals(11187, n.getId("augustus")); assertEquals(12339, n.getId("azzuz")); fs.delete(new Path("tmp.dat"), true); }
From source file:ivory.core.data.index.IntPostingsForwardIndex.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 1) { System.out.println("usage: [indexPath]"); System.exit(-1);//from w w w .ja v a 2 s . c om } long startingMemoryUse = MemoryUsageUtils.getUsedMemory(); Configuration conf = new Configuration(); IntPostingsForwardIndex index = new IntPostingsForwardIndex(args[0], FileSystem.getLocal(conf)); long endingMemoryUse = MemoryUsageUtils.getUsedMemory(); System.out.println("Memory usage: " + (endingMemoryUse - startingMemoryUse) + " bytes\n"); String term = null; BufferedReader stdin = new BufferedReader(new InputStreamReader(System.in)); System.out.print("Look up postings of termid > "); while ((term = stdin.readLine()) != null) { int termid = Integer.parseInt(term); System.out.println(termid + ": " + index.getPostingsList(termid)); System.out.print("Look up postings of termid > "); } }
From source file:ivory.core.data.stat.PrefixEncodedGlobalStats.java
License:Apache License
public static void main(String[] args) throws Exception { //String indexPath = "/umd-lin/telsayed/indexes/medline04"; String indexPath = "c:/Research/ivory-workspace"; Configuration conf = new Configuration(); FileSystem fileSys = FileSystem.getLocal(conf); RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fileSys); Path termsFilePath = new Path(env.getIndexTermsData()); Path dfByTermFilePath = new Path(env.getDfByTermData()); Path cfByTermFilePath = new Path(env.getCfByTermData()); System.out.println("PrefixEncodedGlobalStats"); PrefixEncodedGlobalStats globalStatsMap = new PrefixEncodedGlobalStats(termsFilePath); System.out.println("PrefixEncodedGlobalStats1"); globalStatsMap.loadDFStats(dfByTermFilePath); System.out.println("PrefixEncodedGlobalStats2"); globalStatsMap.loadCFStats(cfByTermFilePath); System.out.println("PrefixEncodedGlobalStats3"); //String[] firstKeys = termIDMap.getDictionary().getFirstKeys(100); int nTerms = globalStatsMap.length(); System.out.println("nTerms: " + nTerms); /*for(int i = 0; i < nTerms; i++){ /*from ww w . ja v a 2 s. c o m*/ PairOfIntLong p = globalStatsMap.getStats(i); System.out.println(i+"\t"+p.getLeftElement() +"\t"+ p.getRightElement()); //if(i%10000 == 0) System.out.println(i+" terms so far ("+p+")."); }*/ String term; term = "0046"; System.out.println(term + "\t" + globalStatsMap.getDF(term)); term = "00565"; System.out.println(term + "\t" + globalStatsMap.getDF(term)); term = "01338"; System.out.println(term + "\t" + globalStatsMap.getDF(term)); term = "01hz"; System.out.println(term + "\t" + globalStatsMap.getDF(term)); term = "03x"; System.out.println(term + "\t" + globalStatsMap.getDF(term)); term = "0278x"; System.out.println(term + "\t" + globalStatsMap.getDF(term)); term = "0081"; System.out.println(term + "\t" + globalStatsMap.getDF(term)); term = "0183"; System.out.println(term + "\t" + globalStatsMap.getDF(term)); term = "0244"; System.out.println(term + "\t" + globalStatsMap.getDF(term)); term = "032"; System.out.println(term + "\t" + globalStatsMap.getDF(term)); //for(int i = 1; i<=200; i++){ // term = termIDMap.getTerm(i); // System.out.println(i+"\t"+term+"\t"+termIDMap.getID(term)); //} }
From source file:ivory.core.data.stat.PrefixEncodedGlobalStatsWithIndex.java
License:Apache License
public static void main(String[] args) throws Exception { //String indexPath = "/umd-lin/telsayed/indexes/medline04"; String indexPath = "c:/Research/ivory-workspace"; Configuration conf = new Configuration(); FileSystem fileSys = FileSystem.getLocal(conf); RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fileSys); Path termsFilePath = new Path(env.getIndexTermsData()); Path dfByTermFilePath = new Path(env.getDfByTermData()); Path cfByTermFilePath = new Path(env.getCfByTermData()); Path idToTermFilePath = new Path(env.getIndexTermIdMappingData()); System.out.println("PrefixEncodedGlobalStats"); PrefixEncodedGlobalStatsWithIndex globalStatsMap = new PrefixEncodedGlobalStatsWithIndex(termsFilePath); System.out.println("PrefixEncodedGlobalStats1"); globalStatsMap.loadDFStats(dfByTermFilePath, idToTermFilePath, 0.2f, true); System.out.println("PrefixEncodedGlobalStats2"); globalStatsMap.loadCFStats(cfByTermFilePath, idToTermFilePath, 0.2f, false); System.out.println("PrefixEncodedGlobalStats3"); //String[] firstKeys = termIDMap.getDictionary().getFirstKeys(100); int nTerms = globalStatsMap.length(); System.out.println("nTerms: " + nTerms); /*for(int i = 0; i < nTerms; i++){ //from ww w . j ava2s. c om PairOfIntLong p = globalStatsMap.getStats(i); System.out.println(i+"\t"+p.getLeftElement() +"\t"+ p.getRightElement()); //if(i%10000 == 0) System.out.println(i+" terms so far ("+p+")."); }*/ String term; term = "0046"; System.out.println(term + "\t" + globalStatsMap.getDF(term)); term = "00565"; System.out.println(term + "\t" + globalStatsMap.getDF(term)); term = "01338"; System.out.println(term + "\t" + globalStatsMap.getDF(term)); term = "01hz"; System.out.println(term + "\t" + globalStatsMap.getDF(term)); term = "03x"; System.out.println(term + "\t" + globalStatsMap.getDF(term)); term = "0278x"; System.out.println(term + "\t" + globalStatsMap.getDF(term)); term = "0081"; System.out.println(term + "\t" + globalStatsMap.getDF(term)); term = "0183"; System.out.println(term + "\t" + globalStatsMap.getDF(term)); term = "0244"; System.out.println(term + "\t" + globalStatsMap.getDF(term)); term = "032"; System.out.println(term + "\t" + globalStatsMap.getDF(term)); //for(int i = 1; i<=200; i++){ // term = termIDMap.getTerm(i); // System.out.println(i+"\t"+term+"\t"+termIDMap.getID(term)); //} }
From source file:ivory.data.PrefixEncodedTermSetTest.java
License:Apache License
@Test public void test1() throws IOException { PrefixEncodedTermSet m = new PrefixEncodedTermSet(8); m.add("a");//from ww w . ja va 2s . c o m m.add("aa"); m.add("aaa"); m.add("aaaa"); m.add("aaaaaa"); m.add("aab"); m.add("aabb"); m.add("aaabcb"); m.add("aad"); m.add("abd"); m.add("abde"); assertEquals(0, m.getIndex("a")); assertEquals(1, m.getIndex("aa")); assertEquals(2, m.getIndex("aaa")); assertEquals(3, m.getIndex("aaaa")); assertEquals(4, m.getIndex("aaaaaa")); assertEquals(5, m.getIndex("aab")); assertEquals(6, m.getIndex("aabb")); assertEquals(7, m.getIndex("aaabcb")); assertEquals(8, m.getIndex("aad")); assertEquals(9, m.getIndex("abd")); assertEquals(10, m.getIndex("abde")); Set<String> set = m.getKeySet(); Iterator<String> iter = set.iterator(); assertEquals(iter.next(), "a"); assertEquals(iter.next(), "aa"); assertEquals(iter.next(), "aaa"); assertEquals(iter.next(), "aaaa"); assertEquals(iter.next(), "aaaaaa"); assertEquals(iter.next(), "aab"); assertEquals(iter.next(), "aabb"); assertEquals(iter.next(), "aaabcb"); assertEquals(iter.next(), "aad"); assertEquals(iter.next(), "abd"); assertEquals(iter.next(), "abde"); assertEquals(0.6923077, m.getCompresssionRatio(), 10e-6); FileSystem fs = FileSystem.getLocal(new Configuration()); m.store("tmp.dat", fs); PrefixEncodedTermSet n = PrefixEncodedTermSet.load("tmp.dat", fs); assertEquals(0, n.getIndex("a")); assertEquals(1, n.getIndex("aa")); assertEquals(2, n.getIndex("aaa")); assertEquals(3, n.getIndex("aaaa")); assertEquals(4, n.getIndex("aaaaaa")); assertEquals(5, n.getIndex("aab")); assertEquals(6, n.getIndex("aabb")); assertEquals(7, n.getIndex("aaabcb")); assertEquals(8, n.getIndex("aad")); assertEquals(9, n.getIndex("abd")); assertEquals(10, n.getIndex("abde")); set = n.getKeySet(); iter = set.iterator(); assertEquals(iter.next(), "a"); assertEquals(iter.next(), "aa"); assertEquals(iter.next(), "aaa"); assertEquals(iter.next(), "aaaa"); assertEquals(iter.next(), "aaaaaa"); assertEquals(iter.next(), "aab"); assertEquals(iter.next(), "aabb"); assertEquals(iter.next(), "aaabcb"); assertEquals(iter.next(), "aad"); assertEquals(iter.next(), "abd"); assertEquals(iter.next(), "abde"); fs.delete(new Path("tmp.dat"), true); }
From source file:ivory.data.PrefixEncodedTermSetTest.java
License:Apache License
@Test public void test2() throws IOException { FileSystem fs = FileSystem.getLocal(new Configuration()); PrefixEncodedTermSet m = PrefixEncodedTermSet.loadFromPlainTextFile("etc/dictionary-test.txt", fs, 8); assertEquals(0, m.getIndex("a")); assertEquals(1, m.getIndex("a1")); assertEquals(248, m.getIndex("aardvark")); assertEquals(2291, m.getIndex("affair")); assertEquals(3273, m.getIndex("airwolf")); assertEquals(6845, m.getIndex("anntaylor")); assertEquals(11187, m.getIndex("augustus")); assertEquals(12339, m.getIndex("azzuz")); assertEquals(0.5631129, m.getCompresssionRatio(), 10e-6); m.store("tmp.dat", fs); PrefixEncodedTermSet n = PrefixEncodedTermSet.load("tmp.dat", fs); assertEquals(0, n.getIndex("a")); assertEquals(1, n.getIndex("a1")); assertEquals(248, n.getIndex("aardvark")); assertEquals(2291, n.getIndex("affair")); assertEquals(3273, n.getIndex("airwolf")); assertEquals(6845, n.getIndex("anntaylor")); assertEquals(11187, n.getIndex("augustus")); assertEquals(12339, n.getIndex("azzuz")); fs.delete(new Path("tmp.dat"), true); }
From source file:ivory.ltr.ExtractFeatures.java
License:Apache License
public static void main(String[] args) throws SAXException, ParserConfigurationException, NotBoundException, Exception { Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); ExtractFeatures extractor = new ExtractFeatures(args, fs); extractor.extract();//w w w .j a v a 2s . c om }
From source file:ivory.server.RunLocalRetrievalServer.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length < 2) { System.out.println("usage: [config-file] [port]"); System.exit(-1);// w ww . j a v a2 s . c o m } String configFile = args[0]; FileSystem fs = FileSystem.getLocal(new Configuration()); Document d = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(fs.open(new Path(configFile))); sLogger.info("Reading configuration..."); NodeList servers = d.getElementsByTagName("server"); if (servers.getLength() > 1) throw new Exception( "Error: multiple servers specified. Cannot launch more than one server on local machine!"); String sid = null; for (int i = 0; i < servers.getLength(); i++) { // get server id Node node = servers.item(i); sid = XMLTools.getAttributeValue(node, "id", null); } if (sid == null) { throw new Exception("Must specify a query id attribute for every server!"); } int port = Integer.parseInt(args[1]); RetrievalServer server = new RetrievalServer(); server.initialize(sid, configFile, fs); server.start(port); while (true) ; }
From source file:ivory.smrf.retrieval.RunQueryLocal.java
License:Apache License
public RunQueryLocal(String[] args) throws SAXException, IOException, ParserConfigurationException, Exception, NotBoundException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); try {/*from ww w .ja v a 2s. co m*/ LOG.info("initilaize runquery ..."); runner = new BatchQueryRunner(args, fs); } catch (Exception e) { e.printStackTrace(); } }