List of usage examples for org.apache.lucene.benchmark.byTask.feeds DocData getID
public int getID()
From source file:com.tamingtext.qa.WikipediaWexIndexer.java
License:Apache License
public int index(File wikipediaWEX, int numDocs, int batchSize) throws Exception { int result = 0; if (wikipediaWEX != null && wikipediaWEX.isFile()) { WexWikiContentSource contentSource = new WexWikiContentSource(); Properties properties = new Properties(); // fileName = config.get("docs.file", null); String filePath = wikipediaWEX.getAbsolutePath(); properties.setProperty("docs.file", filePath); properties.setProperty("doc.maker.forever", "false"); contentSource.setConfig(new Config(properties)); contentSource.resetInputs();// w w w.j a va 2s.c o m // docMaker.openFile(); List<SolrInputDocument> docs = new ArrayList<SolrInputDocument>(1000); int i = 0; SolrInputDocument sDoc = null; long start = System.currentTimeMillis(); try { DocData docData = new DocData(); while ((docData = contentSource.getNextDocData(docData)) != null && i < numDocs) { int mod = i % batchSize; sDoc = new SolrInputDocument(); docs.add(sDoc); sDoc.addField("file", filePath + "_" + i); sDoc.addField("docid", String.valueOf(docData.getID())); sDoc.addField("body", docData.getBody()); sDoc.addField("doctitle", docData.getTitle()); sDoc.addField("name_s", docData.getName()); String[] categories = docData.getProps().getProperty("category").split(";;"); for (String c : categories) { sDoc.addField("category", c); } if (mod == batchSize - 1) { log.info("Sending: " + docs.size() + " docs" + " total sent for this file: " + i); server.add(docs); docs.clear(); } i++; } } catch (NoMoreDataException e) { } long finish = System.currentTimeMillis(); if (log.isInfoEnabled()) { log.info("Indexing took " + (finish - start) + " ms"); } if (docs.size() > 0) { server.add(docs); } result = i + docs.size(); server.commit(); server.optimize(); } else { System.out.println("Can't find file: " + wikipediaWEX); } return result; }