Example usage for org.apache.lucene.benchmark.byTask.feeds DocData getID

List of usage examples for org.apache.lucene.benchmark.byTask.feeds DocData getID

Introduction

In this page you can find the example usage for org.apache.lucene.benchmark.byTask.feeds DocData getID.

Prototype

public int getID() 

Source Link

Usage

From source file:com.tamingtext.qa.WikipediaWexIndexer.java

License:Apache License

public int index(File wikipediaWEX, int numDocs, int batchSize) throws Exception {
    int result = 0;
    if (wikipediaWEX != null && wikipediaWEX.isFile()) {
        WexWikiContentSource contentSource = new WexWikiContentSource();
        Properties properties = new Properties();
        // fileName = config.get("docs.file", null);
        String filePath = wikipediaWEX.getAbsolutePath();
        properties.setProperty("docs.file", filePath);
        properties.setProperty("doc.maker.forever", "false");
        contentSource.setConfig(new Config(properties));
        contentSource.resetInputs();// w w w.j a  va 2s.c  o  m
        // docMaker.openFile();
        List<SolrInputDocument> docs = new ArrayList<SolrInputDocument>(1000);
        int i = 0;
        SolrInputDocument sDoc = null;
        long start = System.currentTimeMillis();
        try {
            DocData docData = new DocData();

            while ((docData = contentSource.getNextDocData(docData)) != null && i < numDocs) {
                int mod = i % batchSize;

                sDoc = new SolrInputDocument();
                docs.add(sDoc);
                sDoc.addField("file", filePath + "_" + i);

                sDoc.addField("docid", String.valueOf(docData.getID()));
                sDoc.addField("body", docData.getBody());
                sDoc.addField("doctitle", docData.getTitle());
                sDoc.addField("name_s", docData.getName());

                String[] categories = docData.getProps().getProperty("category").split(";;");

                for (String c : categories) {
                    sDoc.addField("category", c);
                }

                if (mod == batchSize - 1) {
                    log.info("Sending: " + docs.size() + " docs" + " total sent for this file: " + i);
                    server.add(docs);
                    docs.clear();
                }
                i++;
            }
        } catch (NoMoreDataException e) {

        }
        long finish = System.currentTimeMillis();
        if (log.isInfoEnabled()) {
            log.info("Indexing took " + (finish - start) + " ms");
        }
        if (docs.size() > 0) {
            server.add(docs);
        }
        result = i + docs.size();
        server.commit();
        server.optimize();
    } else {
        System.out.println("Can't find file: " + wikipediaWEX);
    }
    return result;
}