Example usage for org.deeplearning4j.datasets.iterator BaseDatasetIterator BaseDatasetIterator

List of usage examples for org.deeplearning4j.datasets.iterator BaseDatasetIterator BaseDatasetIterator

Introduction

In this page you can find the example usage for org.deeplearning4j.datasets.iterator BaseDatasetIterator BaseDatasetIterator.

Prototype

public BaseDatasetIterator(int batch, int numExamples, BaseDataFetcher fetcher) 

Source Link

Usage

From source file:Dl4j.Doc2VecWithAutoEncoder.java

public static void main(String[] args) throws FileNotFoundException, IOException {

    if (args.length < 1) {
        args = new String[1];
        args[0] = "/home/procheta/NetBeansProjects/Dl4jTest/src/dl4jtest/init.properties";
    }/*from w w  w . j  a va  2  s . co  m*/
    String[] docs = { "The cat sat on the mat", "The dog sat on the mat", "The chicken ate the corn",
            "The corn was sweet", "The milk was sweet", "The dog sat on the mat", "The cat drank the milk",
            "The dog ate the bone" };

    try {
        Properties prop = new Properties();
        prop.load(new FileReader(args[0]));
        LuceneDocFetcher luceneDocFetcher;

        // test loading a simple collection of docs...
        // Create in-memory index
        RAMDirectory ramdir = new RAMDirectory();

        IndexWriterConfig iwcfg = new IndexWriterConfig(new EnglishAnalyzer());
        iwcfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        IndexWriter writer = new IndexWriter(ramdir, iwcfg);
        for (String doc : docs) {
            try {
                Document lDoc = new Document();
                lDoc.add(new Field(LuceneDocFetcher.CONTENET_FIELD_NAME, doc, Field.Store.NO,
                        Field.Index.ANALYZED, Field.TermVector.YES));
                writer.addDocument(lDoc);
            } catch (Exception e) {
            }
        }
        writer.close();
        Path path = Paths.get(prop.getProperty("index"));
        Directory dir = FSDirectory.open(path);

        Doc2VecWithAutoEncoder dva = new Doc2VecWithAutoEncoder();
        System.out.println(prop.getProperty("depth"));
        ArrayList<String> docIds;
        dva.getDocIds(prop.getProperty("qid"), prop.getProperty("qrel"));
        //   docIds = dva.subsample(Integer.parseInt(prop.getProperty("depth")), prop.getProperty("fileList"), prop.getProperty("qid"), prop.getProperty("folderPath"));
        //  dva.saveSampleDocId(docIds, prop.getProperty("sampleOutput"));
        // pass the in-mem index reader to the vectorizer
        //  luceneDocFetcher = new LuceneDocFetcher(dir, dva.docIds);
        luceneDocFetcher = new LuceneDocFetcher(dir, dva.docIds, dva.labels);

        DataSetIterator iter = new BaseDatasetIterator(1, 50, luceneDocFetcher);
        while (iter.hasNext()) {
            DataSet v = iter.next();

            System.out.println(v.getFeatures());
        }

        // test auto-encoding
        final int vocabSize = luceneDocFetcher.getDimension();
        //int seed = Random.nextInt(vocabSize);
        int iterations = 2;
        int listenerFreq = iterations / 5;

        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                //.seed(seed)
                .iterations(iterations).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
                .list(2)
                .layer(0,
                        new RBM.Builder().nIn(vocabSize).nOut(5)
                                .lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
                .layer(1,
                        new RBM.Builder().nIn(5).nOut(10).lossFunction(LossFunctions.LossFunction.RMSE_XENT)
                                .build())
                //.pretrain(true)
                //.backprop(true)

                //.layer(2, new RBM.Builder().nIn(500).nOut(250).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
                //.layer(3, new RBM.Builder().nIn(250).nOut(100).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
                //.layer(4, new RBM.Builder().nIn(100).nOut(30).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()) 

                /*
                 //encoding stops
                 .layer(5, new RBM.Builder().nIn(30).nOut(100).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())    
                        
                 //decoding starts
                 .layer(6, new RBM.Builder().nIn(100).nOut(250).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
                 .layer(7, new RBM.Builder().nIn(250).nOut(500).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
                 .layer(8, new RBM.Builder().nIn(500).nOut(1000).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build())
                 .layer(9, new OutputLayer.Builder(LossFunctions.LossFunction.RMSE_XENT).nIn(1000).nOut(vocabSize).build())
                 .pretrain(true).backprop(true)
                 */
                .build();

        MultiLayerNetwork model = new MultiLayerNetwork(conf);
        model.init();

        model.setListeners(Arrays.asList((IterationListener) new ScoreIterationListener(listenerFreq)));
        model.fit(iter);

        System.out.println("Output layer: ");
        iter.reset();
        while (iter.hasNext()) {
            DataSet v = iter.next();

            // System.out.println(model.output(v.getFeatures()));
        }
        //++Procheta
        iter.reset();
        dva.saveModel(iter, prop.getProperty("output"), model);//*/
    } catch (Exception ex) {
        ex.printStackTrace();
    }

}

From source file:vectorizer.Doc2Vec.java

private DataSetIterator getDataSetIterator(LuceneDocFetcher fetcher) {
    int numData = fetcher.totalExamples();
    DataSetIterator iter = new BaseDatasetIterator(1, numData, fetcher);
    return iter;//from  w  ww. j  av a  2 s.  co m
}