Java tutorial
/** * This software is licensed to you under the Apache License, Version 2.0 (the * "Apache License"). * * LinkedIn's contributions are made under the Apache License. If you contribute * to the Software, the contributions will be deemed to have been made under the * Apache License, unless you expressly indicate otherwise. Please do not make any * contributions that would be inconsistent with the Apache License. * * You may obtain a copy of the Apache License at http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, this software * distributed under the Apache License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the Apache * License for the specific language governing permissions and limitations for the * software governed under the Apache License. * * 2012 LinkedIn Corp. All Rights Reserved. */ package com.browseengine.bobo.test; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.io.LineNumberReader; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.Random; import java.util.Map.Entry; import org.apache.lucene.LucenePackage; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermEnum; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.SimpleFSDirectory; import com.browseengine.bobo.api.BoboBrowser; import com.browseengine.bobo.api.BoboIndexReader; import com.browseengine.bobo.api.BrowseException; import com.browseengine.bobo.api.BrowseFacet; import com.browseengine.bobo.api.BrowseRequest; import com.browseengine.bobo.api.BrowseResult; import com.browseengine.bobo.api.BrowseSelection; import com.browseengine.bobo.api.FacetAccessible; import com.browseengine.bobo.api.FacetSpec; import com.browseengine.bobo.api.FacetSpec.FacetSortSpec; import com.browseengine.bobo.facets.FacetHandler; import com.browseengine.bobo.facets.data.PredefinedTermListFactory; import com.browseengine.bobo.facets.impl.DefaultFacetCountCollector; import com.browseengine.bobo.facets.impl.MultiValueFacetHandler; import com.browseengine.bobo.facets.impl.SimpleFacetHandler; public class LucenePerf { public static String[] words = { "manager", "university", "in", "business", "management", "a", "development", "consultant", "director", "10", "services", "on", "senior", "marketing", "project", "sales", "technology", "systems", "as", "software", "new", "professional", "owner", "experience", "inc", "team", "company" }; public static final ArrayList<String> wordlist = new ArrayList<String>(); static final Random rand = new Random(987129); public static int inNumItr = 10; /** * @param args * @throws IOException * @throws CorruptIndexException * @throws InterruptedException * @throws BrowseException */ public static void main(String[] args) throws CorruptIndexException, IOException, InterruptedException, BrowseException { System.out.println(LucenePackage.get()); System.out.println(Arrays.toString(args)); String filename = "/Users/xgu/lucene29test/caches/people-search-index"; if (args.length > 0) filename = "/Users/xgu/lucene29test/caches/people-search-index"; System.out.println(filename); File file = new File(filename); FSDirectory directory = new SimpleFSDirectory(file); // FSDirectory directory = FSDirectory.getDirectory(file); System.out.println(directory.getClass().getName()); IndexReader reader = IndexReader.open(directory, true); loadFile(); // TermEnum termEnum = reader.terms(new Term("b", "")); // while(termEnum.next()) // { // Term t = termEnum.term(); // wordlist.add(t.text()); // } // words = wordlist.toArray(new String[1]); System.out.println("load the words " + words.length); final Collection<FacetHandler<?>> facetHandlers = new ArrayList<FacetHandler<?>>(); facetHandlers.add(new MultiValueFacetHandler("ccid", new PredefinedTermListFactory<Integer>(Integer.class, "0000000000"))); // facetHandlers.add(new MultiValueFacetHandler("pcid", new PredefinedTermListFactory<Integer>(Integer.class,"0000000000"))); facetHandlers.add(new SimpleFacetHandler("industry", new PredefinedTermListFactory<Integer>(Integer.class, "0000000000"))); facetHandlers.add(new SimpleFacetHandler("geo_region")); facetHandlers.add(new MultiValueFacetHandler("education_id", new PredefinedTermListFactory<Integer>(Integer.class, "0000000000"))); long tgetinstance = System.currentTimeMillis(); final BoboIndexReader boboReader = BoboIndexReader.getInstance(reader, facetHandlers, null); System.out.println("getInstanceTime: " + (System.currentTimeMillis() - tgetinstance)); //warming for (int x = 0; x < 30; x++) { doSearch(5, boboReader, facetHandlers); } long start = System.currentTimeMillis(); int numThread = 2; System.out.println(numThread + " threads"); int numItr = 1000; long ttime = 0; for (int x = 1; x < numItr + 1; x++) { long time = doSearch(numThread, boboReader, facetHandlers); ttime += time; if (x % 20 == 0) { System.out.println("total time: " + ttime); System.out.println( "average time: " + ((float) ttime / (float) x / (float) numThread / (float) inNumItr)); } } System.out.println("total time: " + ttime); System.out.println( "number of iterations: " + numItr + "\t\tnumThread: " + numThread + "\t\tinner itr: " + inNumItr); System.out.println( "average time: " + ((float) ttime / (float) numItr / (float) numThread / (float) inNumItr)); System.out.println(LucenePackage.get()); } private static long doSearch(int numThread, final BoboIndexReader boboReader, final Collection<FacetHandler<?>> facetHandlers) throws IOException, CorruptIndexException, InterruptedException { Thread[] threads = new Thread[numThread]; final long[] times = new long[numThread]; for (int x = 0; x < threads.length; x++) { final int y = x; threads[x] = new Thread() { public void run() { try { times[y] = oneRun(boboReader, facetHandlers); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (BrowseException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }; } for (int x = 0; x < threads.length; x++) { threads[x].setDaemon(true); threads[x].start(); } long sum = 0; for (int x = 0; x < threads.length; x++) { threads[x].join(); sum += times[x]; } return sum; } /* * [00000001371(3156), 00000001025(2951), 00000001035(2688), 00000001009(2429), 00000157234(2318), 00000001028(1871), * 00000001063(1711), 00000002114(1371), 00000001033(1340), 00000001384(1187), 00000001292(1016), 00000001694(993), * 00000001483(980), 00000001062(884), 00000001115(883), 00000001441(854), 00000001052(695), 00000001093(681), * 00000001714(665), 00000001128(641), 00000224605(619), 00000001053(616), 00000002271(613), 00000001288(609), * 00000001038(607), 00000001060(585), 00000001043(573), 00000157240(555), 00000001044(549), 00000001663(546), * 00000001231(544), 00000001123(526), 00000001505(497), 00000001120(487), 00000001070(484), 00000001217(480), * 00000001073(478), 00000001006(452), 00000001068(437), 00000001207(432), 00000001066(415), 00000001116(415), * 00000001271(415), 00000001015(407), 00000011448(401), 00000001040(399), 00000001235(393), 00000001058(391), * 00000001482(382), -00000000001(0)] */ private static long oneRun(BoboIndexReader boboReader, Collection<FacetHandler<?>> facetHandlers) throws IOException, BrowseException { long tt = 0; long hitscount = 0; for (int x = 0; x < inNumItr; x++) { long t0 = System.currentTimeMillis(); BoboBrowser browser = new BoboBrowser(boboReader); BrowseRequest req = new BrowseRequest(); req.setCount(500); FacetSpec spec = new FacetSpec(); spec.setMaxCount(50); spec.setOrderBy(FacetSortSpec.OrderHitsDesc); // req.setFacetSpec("ccid", spec); // req.setFacetSpec("pcid", spec); // req.setFacetSpec("education_id", spec); req.setFacetSpec("geo_region", spec); // req.setFacetSpec("industry", spec); String qstr = words[nextInt()]; // qstr = "project manager"; String[] terms = qstr.split(" "); BooleanQuery q = new BooleanQuery(); for (String s : terms) { q.add(new TermQuery(new Term("b", s)), Occur.MUST); } // q.add(new MatchAllDocsQuery(), Occur.MUST); req.setQuery(q);//new TermQuery(new Term("b",qstr))); BrowseSelection sel = new BrowseSelection("ccid"); sel.addValue("0000001384"); // req.addSelection(sel ); BrowseSelection seli = new BrowseSelection("industry"); seli.addValue("0000000052"); // req.addSelection(seli ); long tf0 = 0; long tf1 = 0; BrowseResult bres = browser.browse(req); for (Entry<String, FacetAccessible> entry : bres.getFacetMap().entrySet()) { // System.out.println(entry.getKey()); FacetAccessible fa = entry.getValue(); tf0 = System.currentTimeMillis(); List<BrowseFacet> facets = fa.getFacets(); tf1 = System.currentTimeMillis(); System.out.println(tf1 - tf0 + "\tfacet " + entry.getKey() + " get time\tsize: " + facets.size()); // System.out.println(Arrays.toString(facets.toArray())); fa.close(); } browser.close(); // System.out.println(t2 - t0 +"\ttotal time\t\t\t hits: "+ bres.getNumHits()); hitscount += bres.getNumHits(); long t2 = System.currentTimeMillis(); tt += (t2 - t0); browser.close(); // System.out.println(t2 - t0 -(tf1-tf0)+"\tsearch time\t"); } if (hitscount > 80000) System.out.println("avg hits count: " + hitscount / inNumItr); try { Thread.sleep(50); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } return tt; } public static void loadFile() { File file = new File("/Users/xgu/lucene29test/keywords"); try { FileInputStream fis = new FileInputStream("/Users/xgu/lucene29test/bbb"); InputStreamReader isr = new InputStreamReader(fis); LineNumberReader reader = new LineNumberReader(isr); String line; while ((line = reader.readLine()) != null) { wordlist.add(line); } words = wordlist.toArray(new String[1]); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } public static int ii = 28000; public static synchronized int nextInt() { int ret = ii; ii = (ret + 1) % words.length; return ret; } }