This commit is contained in:
Federico Ponchio 2004-06-22 15:32:09 +00:00
parent f33b962262
commit 098b2acac0
1 changed files with 203 additions and 201 deletions

View File

@ -24,6 +24,9 @@
History History
$Log: not supported by cvs2svn $ $Log: not supported by cvs2svn $
Revision 1.2 2004/06/22 10:27:16 ponchio
*** empty log message ***
Revision 1.1 2004/06/22 00:39:56 ponchio Revision 1.1 2004/06/22 00:39:56 ponchio
Created Created
@ -33,12 +36,12 @@ Created
#ifndef VFILE_H #ifndef VFILE_H
#define VFILE_H #define VFILE_H
#include <unistd.h>
#include <errno.h> #include <errno.h>
//#include <hash_map> //#include <hash_map>
#include <map> #include <map>
#include <list> #include <list>
#include <string> #include <string>
#include <iostream>
/**Vector structure on file with simulated mmapping. /**Vector structure on file with simulated mmapping.
* a priority queue of buffers is used * a priority queue of buffers is used
@ -47,162 +50,161 @@ Created
* use an Iterator? * use an Iterator?
*/ */
namespace nxs {
template <class T> class VFile { template <class T> class VFile {
public: public:
struct Buffer { struct Buffer {
unsigned int key; unsigned int key;
unsigned int size;
T *data; T *data;
}; };
typedef std::list<Buffer>::iterator iterator;
private: private:
FILE *fp; FILE *fp;
std::map<unsigned int, iterator> index; //TODO move to hash_map
std::list<Buffer> buffers; std::list<Buffer> buffers;
unsigned int chunk_size; //default buffer size typedef typename std::list<Buffer>::iterator iterator;
unsigned int chunk_bits; //log2(chunk_size);
std::map<unsigned int, iterator> index; //TODO move to hash_map
unsigned int chunk_size; //default buffer size (expressed in number of T)
unsigned int queue_size; unsigned int queue_size;
unsigned int n_elements; //size of the vector unsigned int n_elements; //size of the vector
public: public:
VFile(): fp(NULL) {} VFile(): fp(NULL) {}
~VFile() { if(fp) close(); } ~VFile() { if(fp) Close(); }
bool create(const std::string &filename, bool Create(const std::string &filename,
unsigned int _chunk_bits = 12, unsigned int _chunk_size = 4096/sizeof(T),
unsigned int _queue_size = 1000) { unsigned int _queue_size = 1000) {
fp = fopen(filename.c_str(), "wb+"); assert(_chunk_size > 0);
if(!fp) chunk_size = _chunk_size;
return false;
chunk_bits = _chunk_bits;
chunk_size = 1<<_chunk_bits;
queue_size = _queue_size; queue_size = _queue_size;
n_elements = 0; n_elements = 0;
fp = fopen(filename.c_str(), "wb+");
if(!fp) return false;
return true; return true;
} }
bool load(const std:: string &filename, bool Load(const std:: string &filename,
unsigned int _chunk_bits = 12, unsigned int _chunk_size = 4096/sizeof(T),
unsigned int _queue_size = 1000) { unsigned int _queue_size = 1000) {
assert(_chunk_size > 0);
chunk_size = _chunk_size;
queue_size = _queue_size;
fp = fopen(filename.c_str(), "rb+"); fp = fopen(filename.c_str(), "rb+");
if(!fp) return false; if(!fp) return false;
//troviamone la lunghezza fseek(fp, 0, SEEK_END);
fseek(fp, -1, SEEK_END);
chunk_bits = _chunk_bits;
chunk_size = 1<<_chunk_bits;
queue_size = _queue_size;
n_elements = ftell(fp)/ sizeof(T); n_elements = ftell(fp)/ sizeof(T);
assert(n_elements >= chunk_size);
return true; return true;
} }
void close() { void Close() {
flush(); Flush();
fclose(fp); fclose(fp);
fp = 0; fp = 0;
} }
void flush() { void Flush() {
iterator i; iterator i;
for(i = buffers.begin(); i != buffers.end(); i++) for(i = buffers.begin(); i != buffers.end(); i++)
flushBuffer(*i); FlushBuffer(*i);
buffers.clear(); buffers.clear();
index.clear(); index.clear();
} }
void flushBuffer(Buffer buffer) { void FlushBuffer(Buffer buffer) {
fseek(fp, buffer.key * sizeof(T), SEEK_SET); fseek(fp, buffer.key * chunk_size * sizeof(T), SEEK_SET);
if(buffer.size != fwrite(buffer.data, sizeof(T), buffer.size, fp)) { if(chunk_size != fwrite(buffer.data, sizeof(T), chunk_size, fp)) {
cerr << "Could not write!" << endl; assert(0 && "Could not write");
exit(0);
} }
delete []buffer.data; delete []buffer.data;
} }
void resize(unsigned int elem) { void Resize(unsigned int elem) {
if(elem < n_elements) if(elem > n_elements) {
return; if(-1 == fseek(fp, elem*sizeof(T) -1, SEEK_SET)) {
if(n_elements < chunk_size) assert(0 && "Could not resize");
n_elements = chunk_size; }
while(elem > n_elements && n_elements < 256000000) unsigned char a;
n_elements *= 2; fwrite(&a, sizeof(unsigned char), 1, fp);
while(elem > n_elements) } else {
n_elements += 256000000; //TODO optimize: we do not need flush for buffers over elem.
if(-1 == fseek(fp, n_elements * sizeof(T), SEEK_SET)) { Flush();
cerr << "Could not resize!" << endl; int fd = fileno(fp);
exit(-1); ftruncate(fd, elem*sizeof(T));
} }
fwrite(&elem, sizeof(unsigned int), 1, fp); n_elements = elem;
} }
/** Remember that T is a valid pointer only until next call of /** Remember that T is a valid pointer only until next call of
* getElement or setElement * getElement or setElement
*/ */
T *getElement(unsigned int n) { T &operator[](unsigned int n) {
if(n > n_elements) { assert(n <= n_elements);
cerr << "Overflow!" << endl;
return NULL; unsigned int chunk = n/chunk_size;
} unsigned int offset = n - chunk*chunk_size;
unsigned int chunk = (n >> chunk_bits) << chunk_bits; assert(offset < chunk_size*sizeof(T));
unsigned int offset = n - chunk;
if(index.count(chunk)) if(index.count(chunk))
return (*(index[chunk])).data + offset; return *((*(index[chunk])).data + offset);
if(buffers.size() > queue_size) { if(buffers.size() > queue_size) {
Buffer &buffer= buffers.back(); Buffer &buffer= buffers.back();
flushBuffer(buffer); FlushBuffer(buffer);
index.erase(buffer.key); index.erase(buffer.key);
buffers.pop_back(); buffers.pop_back();
} }
Buffer buffer; Buffer buffer;
buffer.key = chunk; buffer.key = chunk;
buffer.data = new T[chunk_size * chunks]; buffer.data = new T[chunk_size * sizeof(T)];
buffer.size = chunks * chunk_size; if(fseek(fp, chunk * chunk_size * sizeof(T), SEEK_SET)) {
if(fseek(fp, chunk * sizeof(T), SEEK_SET)) { assert(0 && "failed to fseek");
cerr << "failed to fseek" << endl; return *(buffer.data);
return NULL;
} }
if(buffer.size != fread(buffer.data, sizeof(T), buffer.size, fp)) { unsigned int data_size = chunk_size;
if(!ferror(fp)) { if(data_size + chunk * chunk_size > n_elements)
cerr << "end of file" << endl; data_size = -chunk * chunk_size + n_elements;
if(data_size != fread(buffer.data, sizeof(T), data_size, fp)) {
if(feof(fp)) {
assert(0 && "end of file");
} else { } else {
cerr << "failed reading!: " << errno << endl; assert(0 && "failed reading!");
} }
return NULL; return (*buffer.data);
} }
buffers.push_front(buffer); buffers.push_front(buffer);
index[chunk] = buffers.begin(); index[chunk] = buffers.begin();
return buffer.data + offset; return *(buffer.data + offset);
} }
/**use this for directly writing on the file... /**use this for directly writing on the file...
* be careful to flush (unless you never readed or flushed) * be careful to flush (unless you never readed or flushed)
*/ */
unsigned int Size() { return n_elements; }
unsigned int ChunkSize() { return chunk_size; }
unsigned int QueueSize() { return queue_size; }
void setElement(unsigned int i, T &t) { protected:
*getElement(i) = t; void SetPosition(unsigned int chunk) {
fseek(fp, chunk * chunk_size * sizeof(T), SEEK_SET);
} }
void setPosition(unsigned int chunk) {
fseek(fp, chunk * sizeof(T), SEEK_SET);
}
unsigned int size() { return n_elements; }
unsigned int chunkSize() { return chunk_size; }
unsigned int queueSize() { return queue_size; }
}; };
}//namespace
#endif #endif