Skip to content

Commit 94a11e2

Browse files
author
Akhtar
committed
finished csv2heapfile
1 parent b28883a commit 94a11e2

File tree

2 files changed

+154
-7
lines changed

2 files changed

+154
-7
lines changed

csv2heapfile.cc

Lines changed: 78 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,86 @@
66
#include "library.h"
77

88
int main(int argc, const char * argv[]){
9-
if (argc<4){
10-
std::cout << "csv2heapfile <csv_file> <heapfile> <page_size>";
11-
return -1;
9+
if (argc < 4) {
10+
std::cout << "USAGE: ./csv2headfile <csv_file> <heapfile> <page_size>";
11+
return 1;
1212
}
13-
int size_page,new_page, record_no, page_no;
1413

15-
std::string filename(argv[1]);
16-
std::string heapfile(argv[2]);
17-
size_page = std::stoi(argv[3]);
14+
int is_output = 1;
15+
int size_page, size_record, total_records;
16+
Page page;
17+
18+
//opening a csv file using ifstream
19+
std::ifstream csv_file;
20+
csv_file.open(argv[1]);
21+
22+
//if file fails to open, then return an error
23+
if (!csv_file){
24+
std::cout <<"Is not able to open the file" << argv[1] << " in line 18 \n";
25+
return 1;
26+
}
27+
28+
//Initializing the heapfile and page
29+
Heapfile *heap = new Heapfile();
30+
size_page = atoi(argv[3]);
31+
FILE *heap_file = fopen(argv[2], "w+b");
32+
if (!(heap_file)){
33+
std::cout << "ERROR: not able to find the file "<<argv[2]<<" in line 32\n";
34+
return 1;
35+
}
36+
size_record = NUM_ATTRIBUTES * ATTRIBUTE_SIZE;
37+
init_fixed_len_page(&page, size_page,size_record);
38+
init_heapfile(heap, size_page, heap_file);
39+
40+
//Timing
41+
struct timeb t;
42+
ftime(&t);
43+
long start_time_in_ms = (t.time * 1000) + t.millitm;
44+
45+
while(csv_file){
46+
Record *record;
47+
std::string line;
48+
int indexSlot;
49+
50+
csv_file >> line;
51+
if (line.size()==0){
52+
continue;
53+
}
54+
55+
//remove all commas from the line
56+
*record = new Record;
57+
line.erase(std::remove(line.begin(), line.end(), ","), line.end());
58+
59+
fixed_len_read((char*)line.c_str(), size_record, r);
60+
indexSlot = add_fixed_len_page(&page, r);
61+
62+
//IIf the page is full, write to the disk and recalculate the slot index
63+
if (indexSlot == -1){
64+
PageID id;
65+
id = alloc_page(&page, heap, id);
66+
write_page(&page, heap,id);
67+
init_fixed_len_page(&page, size_page, size_record);
68+
indexSlot = add_fixed_len_page(&page,r);
69+
}
70+
71+
write_fixed_len_page(&page, indexSlot, r);
72+
total_records=+1;
73+
}
1874

75+
//If there is some record, write it to the last file
76+
if (page.used_slot > 0){
77+
PageID id = alloc_page(heap);
78+
write_page(&page,heap, id);
79+
}
80+
81+
//calculating the end of time
82+
ftime(&t);
83+
long total_run_time = ((t.time * 1000) + t.millitm) - start_time_in_ms;
84+
85+
csv_file.close();
86+
std::cout << "RUN TIME Total: " << total_run_time << " milliseconds \n";
87+
std::cout << "TOTAL Records: " << total_records << "\n";
88+
89+
return 0;
1990

2091
}

library.cc

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,4 +82,80 @@ void read_fixed_len_page(Page *page, int slot, Record *r){
8282
*/
8383
int fixed_len_page_capacity(Page *page){
8484
return page->page_size/page->slot_size;
85+
}
86+
87+
/**
88+
* Initalize a heapfile to use the file and page size given.
89+
*/
90+
void init_heapfile(Heapfile *heapfile, int page_size, FILE *file){
91+
heapfile -> page_size = page_size;
92+
headfile -> file_ptr = file;
93+
94+
//We know the first 8 bytes is the offset to the next page
95+
int offset_dir = 0;
96+
fwrite(&offset_dir, sizeof(int), 1, file);
97+
98+
// Filling the rest of the directory with emptines
99+
int temp = ((page_size - sizeof(int)) / sizeof(DirectoryEntry));
100+
for (int i = 1; i <= temp; ++i){
101+
fwrite(&i, sizeof(int), 1, file);
102+
fwrite(&page_size, sizeof(int),1,file);
103+
}
104+
fflush(file);
105+
}
106+
107+
/**
108+
* Deserializes `size` bytes from the buffer, `buf`, and
109+
* stores the record in `record`.
110+
*/
111+
void fixed_len_read(void *buf, int size, Record *record){
112+
int i = 0;
113+
while(i< size/ATTRIBUTE_SIZE){
114+
char *attr = new char(ATTRIBUTE_SIZE+1);
115+
strncpy(attr, buf + (i*ATTRIBUTE_SIZE), ATTRIBUTE_SIZE);
116+
117+
attr[ATTRIBUTE_SIZE] - '\0';
118+
119+
if (strlen(attr) > 0){
120+
record -> push_back(attr);
121+
}
122+
++i;
123+
}
124+
}
125+
126+
/**
127+
* Write a record into a given slot.
128+
*/
129+
void write_fixed_len_page(Page *page, int slot, Record *r){
130+
int temp = !r->empty() && page->data->at(slot).empty();
131+
if (temp){
132+
page->used_slots++;
133+
}
134+
page->data->at(slot) = *r;
135+
}
136+
137+
/**
138+
* Write a page from memory to disk
139+
*/
140+
void write_page(Page *page, Heapfile *heapfile, PageID pid){
141+
fseek(heapfile->file_ptr, pid *heapfeal->page_size, SEEK_SET);
142+
char *temp = new char[heapfile->page_size];
143+
temp[0] = '\0';
144+
int i = 0;
145+
while(i<fixed_len_page_capacity(page)){
146+
fixed_len_write(&(page->data)->at(i), temp);
147+
++i;
148+
}
149+
fwrite(temp, heapfile->page_size,1 , heapfile->file_ptr);
150+
delete buf;
151+
152+
int dir_no = get_directory_number(pid, heapfile->page_size);
153+
go_to_directory_by_directory_number(dir_no, heapfile->file_ptr);
154+
if(search_directory(heapfile, pid)){
155+
throw;
156+
}
157+
158+
int space = heapfile->page_size - (page->used_slots * NUM_ATTRIBUTES * ATTRIBUTE_SIZE);
159+
fwrite(&space, sizeof(int), 1, heapfile->file_ptr);
160+
fflush(heapfile->file_ptr);
85161
}

0 commit comments

Comments
 (0)