Example usage for org.apache.lucene.index IndexWriterConfig setOpenMode

List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setOpenMode.

Prototype

public IndexWriterConfig setOpenMode(OpenMode openMode) 

Source Link

Document

Specifies OpenMode of the index.

Usage

From source file:index.IndexEx.java

public static void main(String[] args) {
    String indexPath = "index";
    String docsPath = null;//w w w .j a v a  2 s  . c o  m
    boolean create = true;
    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");
        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }
        // iwc.setRAMBufferSizeMB(256.0);
        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);
        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");
    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }

}

From source file:index.IndexNews.java

public static void main(String[] args) throws FileNotFoundException, IOException {
    //index/* w w  w. j ava2  s  .  c  o m*/
    String indexPath = "/Users/smita/Documents/ES/index/news/";
    String docsPath = null;
    boolean create = true;

    String path = "/Users/smita/Documents/data/newsSpace.txt";

    Date start = new Date();
    //        System.out.println("Indexing to directory '" + indexPath + "'...");
    Directory dir = FSDirectory.open(Paths.get(indexPath));
    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    if (create) {
        //         Create a new index in the directory, removing any
        //         previously indexed documents:
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    } else {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    }
    // iwc.setRAMBufferSizeMB(256.0);
    IndexWriter writer = new IndexWriter(dir, iwc);

    FileInputStream inputStream = null;
    Scanner sc = null;
    try {
        int linecount = 0;
        inputStream = new FileInputStream(path);
        sc = new Scanner(inputStream, "UTF-8");
        //String hash = sc.nextLine();
        while (sc.hasNextLine()) {
            String item = "";
            linecount++;
            String line = sc.nextLine();
            System.out.println("-   " + line);
            //                if(line.endsWith("\n"))
            //                {
            //                    item=item+" "+line;
            //                    line=sc.nextLine();
            //                    //contineous
            //                }
            //                else
            //                {
            //                    item=line;
            //                }
            String agency = line.split("\t")[0];
            //                String title=line.split("\\t")[2];
            //                String abs=line.split("\\t")[5];
            //                String date=line.split("\\t")[7];
            //System.out.println("-   "+agency);
            //System.out.println(redirectLbl);
            //                try
            //                {
            //                senseLbl=senseLbl.substring(29,senseLbl.length()-1);
            //                System.out.print("sense: "+senseLbl +": ");
            ////                lbl=lbl.replaceAll("_", " ");
            ////                String cat=line.split(" ")[2];
            //                  //System.out.println(idxLbl);
            //                idxLbl=idxLbl.substring(29,idxLbl.length()-1);
            //                idxLbl=idxLbl.replaceAll("_"," ");
            //                cat=cat.replaceAll("_", " ");
            //System.out.println(idxLbl);

            //index line as a doc
            //                Document doc = new Document();
            //                doc.add(new TextField("idxlbl",idxLbl,Field.Store.YES));
            //                doc.add(new TextField("senses",senseLbl,Field.Store.YES));
            //                if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) 
            //                {
            //                  System.out.println("adding " +linecount );
            //                  writer.addDocument(doc);
            //                  
            //                }
            //                else 
            //                {
            //                  System.out.println("updating " );
            //                  //writer.updateDocument(new Term("path", file.toString()), doc);
            //                }
            //            }
            //                
            //                catch(Exception e2){}

        }

        // note that Scanner suppresses exceptions
        if (sc.ioException() != null) {
            throw sc.ioException();
        }
    } finally {
        if (inputStream != null) {
            inputStream.close();
        }
        if (sc != null) {
            sc.close();
        }
    }
    //writer.close();

    Date end = new Date();
    System.out.println(end.getTime() - start.getTime() + " total milliseconds");

}

From source file:index.IndexOmimtsv.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {

    Date start = new Date();
    final Path docDir = Paths.get("F:/Ecole(Telecom)/cours telecom/Projet_GMD/bases/omimtsv.tsv");
    //final Path docDir = Paths.get("C:/Users/lulu/Desktop/Projet/Donnes/stitch/chemical.sources.v5.0.tsv");

    try {// w  ww.  java2s  . c om

        System.out.println("Indexing to directory '"
                + "C:/Users/gauthier/Desktop/TELECOM/2A/GMD/Projet/indexOmimtsv" + "'...");

        Directory dir = FSDirectory
                .open(Paths.get("F:/Ecole(Telecom)/cours telecom/Projet_GMD/indexs/indexOmimtsv"));
        //Directory dir = FSDirectory.open(Paths.get("C:/Users/lulu/Desktop/Projet/Donnes/stitch"));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        iwc.setOpenMode(OpenMode.CREATE);

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:index.IndexWikiAbstract.java

public static void main(String[] args) throws FileNotFoundException, IOException {
    //index/*from   w  w w.  j  a  va  2s. c  o m*/
    String indexPath = "/Users/smita/Documents/ES/index/abstract/";
    String docsPath = null;
    boolean create = true;

    String path = "/Users/smita/Documents/data/dbpedia/long_abstracts_en.nt";

    Date start = new Date();
    //        System.out.println("Indexing to directory '" + indexPath + "'...");
    Directory dir = FSDirectory.open(Paths.get(indexPath));
    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    if (create) {
        // Create a new index in the directory, removing any
        // previously indexed documents:
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    } else {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    }
    //         // iwc.setRAMBufferSizeMB(256.0);
    IndexWriter writer = new IndexWriter(dir, iwc);

    FileInputStream inputStream = null;
    Scanner sc = null;
    try {
        int linecount = 0;
        inputStream = new FileInputStream(path);
        sc = new Scanner(inputStream, "UTF-8");
        String hash = sc.nextLine();
        while (sc.hasNextLine()) {

            linecount++;
            String line = sc.nextLine();

            try {
                String title = line.split(" ")[0];
                String prop = line.split(" ")[1];
                String abs = line.substring(title.length() + prop.length() + 2);
                //System.out.println(abs);
                abs = abs.substring(0, abs.length() - 6);
                title = title.replaceAll("_", " ");
                title = title.substring(29, title.length() - 1);
                //System.out.println(abs);

                //index line as a doc
                Document doc = new Document();
                doc.add(new TextField("title", title, Field.Store.YES));
                doc.add(new TextField("abs", abs, Field.Store.YES));
                if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
                    System.out.println("adding " + linecount);
                    writer.addDocument(doc);

                } else {
                    System.out.println("updating ");
                    //writer.updateDocument(new Term("path", file.toString()), doc);
                }
            }

            catch (Exception e2) {
            }

        }

        // note that Scanner suppresses exceptions
        if (sc.ioException() != null) {
            throw sc.ioException();
        }
    } finally {
        if (inputStream != null) {
            inputStream.close();
        }
        if (sc != null) {
            sc.close();
        }
    }
    writer.close();

    Date end = new Date();
    System.out.println(end.getTime() - start.getTime() + " total milliseconds");

}

From source file:index.RedirectIndex.java

public static void main(String[] args) throws FileNotFoundException, IOException {
    //index// w  w  w.  ja  v a  2 s. c  o  m
    String indexPath = "/Users/smita/Documents/ES/index/redirect/";
    String docsPath = null;
    boolean create = true;

    String path = "/Users/smita/Documents/data/dbpedia/redirects_en.ttl";

    Date start = new Date();
    //        System.out.println("Indexing to directory '" + indexPath + "'...");
    Directory dir = FSDirectory.open(Paths.get(indexPath));
    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    if (create) {
        // Create a new index in the directory, removing any
        // previously indexed documents:
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    } else {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    }
    //         // iwc.setRAMBufferSizeMB(256.0);
    IndexWriter writer = new IndexWriter(dir, iwc);

    FileInputStream inputStream = null;
    Scanner sc = null;
    try {
        int linecount = 0;
        inputStream = new FileInputStream(path);
        sc = new Scanner(inputStream, "UTF-8");
        String hash = sc.nextLine();
        while (sc.hasNextLine()) {

            linecount++;
            String line = sc.nextLine();
            String redirectLbl = line.split(" ")[0];
            String idxLbl = line.split(" ")[2];
            System.out.println(line);
            //System.out.println(redirectLbl);
            //                try
            //                {
            //                redirectLbl=redirectLbl.substring(29,redirectLbl.length()-1);
            //                //System.out.print(redirectLbl +": ");
            ////                lbl=lbl.replaceAll("_", " ");
            ////                String cat=line.split(" ")[2];
            //                  //System.out.println(idxLbl);
            //                idxLbl=idxLbl.substring(29,idxLbl.length()-1);
            //                cat=cat.replaceAll("_", " ");
            //System.out.println(idxLbl);

            //                //index line as a doc
            //                Document doc = new Document();
            //                doc.add(new TextField("idxlbl",idxLbl,Field.Store.YES));
            //                doc.add(new TextField("redirect",redirectLbl,Field.Store.YES));
            //                if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) 
            //                {
            //                  System.out.println("adding " +linecount );
            //                  writer.addDocument(doc);
            //                  
            //                }
            //                else 
            //                {
            //                  System.out.println("updating " );
            //                  //writer.updateDocument(new Term("path", file.toString()), doc);
            //                }
            //            }
            //                
            //                catch(Exception e2){}
            //                

        }

        // note that Scanner suppresses exceptions
        if (sc.ioException() != null) {
            throw sc.ioException();
        }
    } finally {
        if (inputStream != null) {
            inputStream.close();
        }
        if (sc != null) {
            sc.close();
        }
    }
    writer.close();

    Date end = new Date();
    System.out.println(end.getTime() - start.getTime() + " total milliseconds");

}

From source file:indexer.AMIIndexer.java

public void process() throws Exception {
    System.out.println("Indexing AMI annotations...");

    IndexWriterConfig iwcfg = new IndexWriterConfig(analyzer);
    iwcfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

    writer = new IndexWriter(FSDirectory.open(indexDir.toPath()), iwcfg);
    indexAnnotation();/*  w  w w .  ja  v a 2s.c  o m*/
    writer.close();
}

From source file:indexer.files.LuceneWriter.java

public boolean openIndex() {
    try {/*ww  w.jav  a 2 s. co  m*/
        //Abrimos el directorio
        Directory dir = FSDirectory.open(new File(pathToIndex));

        //Elegimos un Analyzer . Y especificamos la versin de Lucene que usamos
        SpanishAnalyzer analyzer = new SpanishAnalyzer(Version.LUCENE_43, new CharArraySet(Version.LUCENE_43,
                Arrays.asList(
                        StringUtils.split(FileUtils.readFileToString(new File(this.pathToStopWords), "UTF-8"))),
                true));

        //Creamos un IndexWriterConfig 
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer);
        //Siempre vamos a sobreescribir el indice que tenemos en el directorio
        iwc.setOpenMode(OpenMode.CREATE);

        indexWriter = new IndexWriter(dir, iwc);

        return true;
    } catch (Exception e) {
        System.out.println("Ocurrio un problema abriendo el documento para escritura: " + e.getClass() + " :: "
                + e.getMessage());
        return false;
    }
}

From source file:indexer.IndexSplitter.java

public IndexSplitter(String propFile) throws Exception {
    super(propFile);

    File pureIndexDir = new File(prop.getProperty("splitindex.pure"));
    File mixedIndexDir = new File(prop.getProperty("splitindex.mixed"));

    IndexWriterConfig iwcfg_pure = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
    iwcfg_pure.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriterConfig iwcfg_mixed = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
    iwcfg_mixed.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

    pureIndexWriter = new IndexWriter(FSDirectory.open(pureIndexDir), iwcfg_pure);
    mixedIndexWriter = new IndexWriter(FSDirectory.open(mixedIndexDir), iwcfg_mixed);
}

From source file:indexer.LuceneIndexer.java

/**
 * Indexing the files. This method checks for the directories and then 
 * finishes out after the indexing is complete.
 * @param global This is for reference to the global class variables 
 * and methods./*  ww w  . j av  a2  s  .  c  om*/
 * @param createIndex If true a new index will be created from scratch
 * and the old index will be destroyed.
 * @param indexPanel If true it will also print the console printout lines 
 * to the main panel.
 */
public static void IndexFiles(Global global, Boolean createIndex) {
    String dataDir = global.dataDir;
    String indexDir = global.indexDir;

    //Verifies that the data directory exists
    if (dataDir == null) {
        System.err.println("Data Directory Is not accessable, Unable to Index files.");
    }

    //Verifies that the data directory is readable and writeable
    final Path docDir = Paths.get(dataDir);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
    }

    startTime = new Date();
    try {
        System.out.println("Indexing to directory '" + indexDir + "'...");

        //Setups the analyzer
        Analyzer analyzer;
        try (Directory dir = FSDirectory.open(Paths.get(indexDir))) {

            analyzer = new StandardAnalyzer();
            IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
            if (createIndex) {
                // Create a new index in the directory, removing any
                // previously indexed documents:
                iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
            } else {
                // Add new documents to an existing index:
                iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
            }
            iwc.setRAMBufferSizeMB(global.RAM_BUFFER_SIZE);
            iwc.setMaxBufferedDocs(global.MAX_BUFFERED_DOCS);

            LogDocMergePolicy ldmp = new LogDocMergePolicy();
            ldmp.setMergeFactor(global.MERGE_FACTOR);
            iwc.setMergePolicy(ldmp);

            try (IndexWriter writer = new IndexWriter(dir, iwc)) {
                hm.clear();
                indexDocs(writer, docDir, global);

                //This is a costly operation, we scheduled the time to apply it
                if (global.merge) {
                    System.out.println("Starting Merge");
                    writer.forceMerge(1);
                    global.merge = false;
                }
                writer.close();
            }
            finishTime = new Date();
            long millis = finishTime.getTime() - startTime.getTime();
            totalTime = String.format("%02dhr %02dmin %02dsec", TimeUnit.MILLISECONDS.toHours(millis),
                    TimeUnit.MILLISECONDS.toMinutes(millis)
                            - TimeUnit.HOURS.toMinutes(TimeUnit.MILLISECONDS.toHours(millis)), // The change is in this line
                    TimeUnit.MILLISECONDS.toSeconds(millis)
                            - TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(millis)));
            System.out.println("");
            System.out.println("");
            System.out.println("Start Time:          " + global.sdf.format(startTime.getTime()));
            System.out.println("Building List Time:  " + listBuildTime);
            System.out.println("Indexing Time:       " + indexingTime);
            System.out.println("Total Time:          " + totalTime);
            System.out.println("Number of Documents: " + amountOfDocuments);
            System.out.println("Finish Time:         " + global.sdf.format(finishTime.getTime()));
            System.out.println("");
        }
        analyzer.close();
    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
        log.fatal(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:indexer.PaperIndexer.java

void processAll() throws Exception {
    System.out.println("Indexing TREC collection...");

    IndexWriterConfig iwcfg = new IndexWriterConfig(analyzer);
    iwcfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

    File indexDirDocs = new File(indexDir + "/docs/");
    File indexDirPara = new File(indexDir + "/para/");
    writer = new IndexWriter(FSDirectory.open(indexDirDocs.toPath()), iwcfg);

    iwcfg = new IndexWriterConfig(analyzer);
    iwcfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    paraWriter = new IndexWriter(FSDirectory.open(indexDirPara.toPath()), iwcfg);

    indexAll();//from  w  w w .  ja va2 s.co m

    writer.close();
    paraWriter.close();
}