21 January 2016

Basic steps:

  • Open input lmdb
  • Read all key strings from this lmdb, and save into a vector
  • Write the vector into a file

ps. This file is supposed to be in caffe-rc/tools.


code

#include <gflags/gflags.h>
#include <glog/logging.h>
#include <lmdb.h>
#include <sys/stat.h>

#include <algorithm>
#include <fstream>  // NOLINT(readability/streams)
#include <string>
#include <utility>
#include <vector>

#include "caffe/proto/caffe.pb.h"
#include "caffe/util/io.hpp"
#include "caffe/util/rng.hpp"


using namespace caffe;  // NOLINT(build/namespaces)
using std::cout;
using std::cerr;
using std::endl;
using std::ends;
using std::vector;
using std::pair;
using std::make_pair;
using std::string;
using std::ifstream;

//============================== MAIN function =================================
int main(int argc, char** argv) {
  ::google::InitGoogleLogging(argv[0]);

#ifndef GFLAGS_GFLAGS_H_
    namespace gflags = google;
#endif

    gflags::SetUsageMessage("Load key string from input lmdb\n"
        "Usage:\n"
        "    load_keystring_from_lmdb [FLAGS] db_name db_keylist\n"
        "");
    
    //jin
    //gflags::ParseCommandLineFlags(&argc, &argv, true);
    FLAGS_log_dir="./log";
    
    if (argc != 3) {
        gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/load_keystring_from_lmdb ");
        return 1;
    }

//========================= 1. Open lmdb 
    const char *src_db1 = argv[1];
    
    MDB_env *mdb_env1; 
    MDB_dbi mdb_dbi1;  
    MDB_val mdb_key1, mdb_value1;  
    MDB_txn *mdb_txn1;  
    MDB_cursor *mdb_cursor1;  
   
    CHECK_EQ(mdb_env_create(&mdb_env1), MDB_SUCCESS) << "mdb_env_create failed";          
    CHECK_EQ(mdb_env_set_mapsize(mdb_env1, 1099511627776), MDB_SUCCESS)  // 1TB
        << "mdb_env_set_mapsize failed";        
    CHECK_EQ(mdb_env_open(mdb_env1, src_db1, MDB_RDONLY|MDB_NOTLS, 0664), MDB_SUCCESS) << "mdb_env_open failed";       
    CHECK_EQ(mdb_txn_begin(mdb_env1, NULL, MDB_RDONLY, &mdb_txn1), MDB_SUCCESS) << "mdb_txn_begin failed";      
    CHECK_EQ(mdb_open(mdb_txn1, NULL, 0, &mdb_dbi1), MDB_SUCCESS) << "mdb_open failed";        
    LOG(INFO) << "Opening lmdb in " << src_db1 << endl;        
          
	CHECK_EQ(mdb_cursor_open(mdb_txn1, mdb_dbi1, &mdb_cursor1), MDB_SUCCESS)
			<< "mdb_cursor_open failed";
	CHECK_EQ(mdb_cursor_get(mdb_cursor1, &mdb_key1, &mdb_value1, MDB_FIRST), MDB_SUCCESS) << "mdb_cursor_get failed";

    
//========================= 2. read all key strings from lmdb, save into a vector
    //to save all key string into file DB_KEYLISTFILE
    vector<string> keystr_vec;
    
    int count = 0;     
    
    string keystr((char *)mdb_key1.mv_data, mdb_key1.mv_size);
    keystr_vec.push_back(keystr);
    
    count++;
   
    while(mdb_cursor_get(mdb_cursor1, &mdb_key1, &mdb_value1, MDB_NEXT) == MDB_SUCCESS)
    {
        
        string keystr((char *)mdb_key1.mv_data, mdb_key1.mv_size);
        
        keystr_vec.push_back(keystr);
        count++;
        
        if(count % 10000 == 0)
        {
            cout << count << "  " << ends;
        }
                
    }
    
    cout << endl << src_db1 << " contains count = " << count << " key/value pairs. "<< endl;
//========================= 3. save vector into a list
    std::ofstream outfile(argv[2]); 
    if(!outfile)
    {
        cerr << "Failed to open file " << argv[7] << endl;
        return -1;
    }
    
    int keystr_vec_size = keystr_vec.size();
    outfile << keystr_vec_size << endl;
    
    for(int i = 0; i < keystr_vec_size; ++i)
    {
        outfile << keystr_vec[i] << endl;
    }   
 
  return 0;
}


blog comments powered by Disqus