This commit is contained in:
Federico Ponchio 2004-06-22 15:32:09 +00:00
parent f33b962262
commit 098b2acac0
1 changed files with 203 additions and 201 deletions

View File

@ -24,6 +24,9 @@
History
$Log: not supported by cvs2svn $
Revision 1.2 2004/06/22 10:27:16 ponchio
*** empty log message ***
Revision 1.1 2004/06/22 00:39:56 ponchio
Created
@ -33,12 +36,12 @@ Created
#ifndef VFILE_H
#define VFILE_H
#include <unistd.h>
#include <errno.h>
//#include <hash_map>
#include <map>
#include <list>
#include <string>
#include <iostream>
/**Vector structure on file with simulated mmapping.
* a priority queue of buffers is used
@ -47,162 +50,161 @@ Created
* use an Iterator?
*/
namespace nxs {
template <class T> class VFile {
public:
struct Buffer {
unsigned int key;
unsigned int size;
T *data;
};
typedef std::list<Buffer>::iterator iterator;
private:
FILE *fp;
std::map<unsigned int, iterator> index; //TODO move to hash_map
std::list<Buffer> buffers;
unsigned int chunk_size; //default buffer size
unsigned int chunk_bits; //log2(chunk_size);
typedef typename std::list<Buffer>::iterator iterator;
std::map<unsigned int, iterator> index; //TODO move to hash_map
unsigned int chunk_size; //default buffer size (expressed in number of T)
unsigned int queue_size;
unsigned int n_elements; //size of the vector
public:
VFile(): fp(NULL) {}
~VFile() { if(fp) close(); }
bool create(const std::string &filename,
unsigned int _chunk_bits = 12,
~VFile() { if(fp) Close(); }
bool Create(const std::string &filename,
unsigned int _chunk_size = 4096/sizeof(T),
unsigned int _queue_size = 1000) {
fp = fopen(filename.c_str(), "wb+");
if(!fp)
return false;
chunk_bits = _chunk_bits;
chunk_size = 1<<_chunk_bits;
assert(_chunk_size > 0);
chunk_size = _chunk_size;
queue_size = _queue_size;
n_elements = 0;
fp = fopen(filename.c_str(), "wb+");
if(!fp) return false;
return true;
}
bool load(const std:: string &filename,
unsigned int _chunk_bits = 12,
bool Load(const std:: string &filename,
unsigned int _chunk_size = 4096/sizeof(T),
unsigned int _queue_size = 1000) {
assert(_chunk_size > 0);
chunk_size = _chunk_size;
queue_size = _queue_size;
fp = fopen(filename.c_str(), "rb+");
if(!fp) return false;
//troviamone la lunghezza
fseek(fp, -1, SEEK_END);
chunk_bits = _chunk_bits;
chunk_size = 1<<_chunk_bits;
queue_size = _queue_size;
fseek(fp, 0, SEEK_END);
n_elements = ftell(fp)/ sizeof(T);
assert(n_elements >= chunk_size);
return true;
}
void close() {
flush();
void Close() {
Flush();
fclose(fp);
fp = 0;
}
void flush() {
void Flush() {
iterator i;
for(i = buffers.begin(); i != buffers.end(); i++)
flushBuffer(*i);
FlushBuffer(*i);
buffers.clear();
index.clear();
}
void flushBuffer(Buffer buffer) {
fseek(fp, buffer.key * sizeof(T), SEEK_SET);
if(buffer.size != fwrite(buffer.data, sizeof(T), buffer.size, fp)) {
cerr << "Could not write!" << endl;
exit(0);
void FlushBuffer(Buffer buffer) {
fseek(fp, buffer.key * chunk_size * sizeof(T), SEEK_SET);
if(chunk_size != fwrite(buffer.data, sizeof(T), chunk_size, fp)) {
assert(0 && "Could not write");
}
delete []buffer.data;
}
void resize(unsigned int elem) {
if(elem < n_elements)
return;
if(n_elements < chunk_size)
n_elements = chunk_size;
while(elem > n_elements && n_elements < 256000000)
n_elements *= 2;
while(elem > n_elements)
n_elements += 256000000;
if(-1 == fseek(fp, n_elements * sizeof(T), SEEK_SET)) {
cerr << "Could not resize!" << endl;
exit(-1);
void Resize(unsigned int elem) {
if(elem > n_elements) {
if(-1 == fseek(fp, elem*sizeof(T) -1, SEEK_SET)) {
assert(0 && "Could not resize");
}
fwrite(&elem, sizeof(unsigned int), 1, fp);
unsigned char a;
fwrite(&a, sizeof(unsigned char), 1, fp);
} else {
//TODO optimize: we do not need flush for buffers over elem.
Flush();
int fd = fileno(fp);
ftruncate(fd, elem*sizeof(T));
}
n_elements = elem;
}
/** Remember that T is a valid pointer only until next call of
* getElement or setElement
*/
T *getElement(unsigned int n) {
T &operator[](unsigned int n) {
if(n > n_elements) {
cerr << "Overflow!" << endl;
return NULL;
}
unsigned int chunk = (n >> chunk_bits) << chunk_bits;
unsigned int offset = n - chunk;
assert(n <= n_elements);
unsigned int chunk = n/chunk_size;
unsigned int offset = n - chunk*chunk_size;
assert(offset < chunk_size*sizeof(T));
if(index.count(chunk))
return (*(index[chunk])).data + offset;
return *((*(index[chunk])).data + offset);
if(buffers.size() > queue_size) {
Buffer &buffer= buffers.back();
flushBuffer(buffer);
FlushBuffer(buffer);
index.erase(buffer.key);
buffers.pop_back();
}
Buffer buffer;
buffer.key = chunk;
buffer.data = new T[chunk_size * chunks];
buffer.size = chunks * chunk_size;
if(fseek(fp, chunk * sizeof(T), SEEK_SET)) {
cerr << "failed to fseek" << endl;
return NULL;
buffer.data = new T[chunk_size * sizeof(T)];
if(fseek(fp, chunk * chunk_size * sizeof(T), SEEK_SET)) {
assert(0 && "failed to fseek");
return *(buffer.data);
}
if(buffer.size != fread(buffer.data, sizeof(T), buffer.size, fp)) {
if(!ferror(fp)) {
cerr << "end of file" << endl;
unsigned int data_size = chunk_size;
if(data_size + chunk * chunk_size > n_elements)
data_size = -chunk * chunk_size + n_elements;
if(data_size != fread(buffer.data, sizeof(T), data_size, fp)) {
if(feof(fp)) {
assert(0 && "end of file");
} else {
cerr << "failed reading!: " << errno << endl;
assert(0 && "failed reading!");
}
return NULL;
return (*buffer.data);
}
buffers.push_front(buffer);
index[chunk] = buffers.begin();
return buffer.data + offset;
return *(buffer.data + offset);
}
/**use this for directly writing on the file...
* be careful to flush (unless you never readed or flushed)
*/
unsigned int Size() { return n_elements; }
unsigned int ChunkSize() { return chunk_size; }
unsigned int QueueSize() { return queue_size; }
void setElement(unsigned int i, T &t) {
*getElement(i) = t;
protected:
void SetPosition(unsigned int chunk) {
fseek(fp, chunk * chunk_size * sizeof(T), SEEK_SET);
}
void setPosition(unsigned int chunk) {
fseek(fp, chunk * sizeof(T), SEEK_SET);
}
unsigned int size() { return n_elements; }
unsigned int chunkSize() { return chunk_size; }
unsigned int queueSize() { return queue_size; }
};
}//namespace
#endif