Skip to content

Commit f98b342

Browse files
committed
changed
1 parent 8aa6a58 commit f98b342

File tree

3 files changed

+57
-97
lines changed

3 files changed

+57
-97
lines changed

myPage

-488 KB
Binary file not shown.

write_fixed_len_pages.cc

Lines changed: 57 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -1,124 +1,84 @@
1-
#include <algorithm>
2-
#include <stdlib.h>
31
#include <iostream>
42
#include <fstream>
5-
#include <string.h>
3+
#include <sstream>
64
#include <sys/timeb.h>
5+
#include <cstring>
76
#include "library.h"
87

9-
int main(int argc, char** argv) {
8+
int main(int argc, const char * argv[]) {
109
if (argc < 4) {
11-
std::cout << "Error, usage must be:\n";
12-
std::cout << "./write_fixed_len_pages <csv_file> <page_file> <page_size>\n";
13-
return 1;
14-
}
15-
16-
bool show_output = true;
17-
if (argc == 5 && strcmp(argv[4], "--no-output") == 0) {
18-
show_output = false;
19-
}
20-
21-
std::ifstream csv_file;
22-
csv_file.open(argv[1]);
23-
if (!csv_file) {
24-
std::cout << "Error, could not find file " << argv[1] << "\n";
10+
std::cout << "Usage: write_fixed_len_pages <csv_file> <page_file> <page_size>";
2511
return 1;
2612
}
13+
std::string csv_filename(argv[1]);
14+
std::string page_filename(argv[2]);
15+
int page_size = std::stoi(argv[3]); // 4096 for example
2716

17+
// Open the page file for writing
2818
std::ofstream page_file;
29-
page_file.open(argv[2]);
30-
if (!page_file) {
31-
std::cout << "Error, could not find file " << argv[2] << "\n";
32-
return 1;
33-
}
34-
35-
int page_size = atoi(argv[3]);
36-
int record_size = NUM_ATTRIBUTES * ATTRIBUTE_SIZE;
19+
page_file.open(page_filename, std::ios::out | std::ios::binary);
3720

21+
// Read the CSV file line-by-line:
22+
std::ifstream csv_file(csv_filename);
23+
std::string line;
3824
Page page;
39-
init_fixed_len_page(&page, page_size, record_size);
40-
41-
int total_records = 0;
42-
int total_pages = 0;
43-
44-
char* buf;
25+
int should_create_new_page = 1;
4526

27+
// start timer
4628
struct timeb t;
4729
ftime(&t);
48-
long start_time_in_ms = (t.time * 1000) + t.millitm;
49-
50-
while (csv_file) {
51-
std::string line;
52-
csv_file >> line;
53-
54-
if (line.size() == 0) {
55-
// ignore empty lines
56-
continue;
30+
unsigned long start_ms = t.time * 1000 + t.millitm;
31+
32+
// for output
33+
int number_of_records = 0;
34+
int number_of_pages = 0;
35+
36+
while (std::getline(csv_file, line)) {
37+
std::stringstream linestr(line);
38+
std::string cell;
39+
40+
// Read cells into a Record
41+
Record record;
42+
while (std::getline(linestr, cell, ',')) {
43+
char *temp = (char *) malloc(11);
44+
std::strncpy(temp, cell.c_str(), 11);
45+
record.push_back(temp);
5746
}
5847

59-
// remove all commas from the line
60-
line.erase(std::remove(line.begin(), line.end(), ','), line.end());
61-
62-
Record *r = new Record;
63-
64-
// turn 'line' from string to char*, and read the values into r
65-
fixed_len_read((char*)line.c_str(), record_size, r);
66-
67-
int slot_index = add_fixed_len_page(&page, r);
68-
69-
if (slot_index == -1) { // page is full
70-
71-
total_pages++;
72-
73-
int buf_size = page.page_size * record_size;
74-
buf = new char[buf_size];
75-
76-
std::vector<Record> *page_data = page.data;
77-
for (int i = 0; i < fixed_len_page_capacity(&page); i++) {
78-
fixed_len_write(&page_data->at(i), buf);
79-
}
80-
81-
// flush page to file
82-
page_file << buf << std::flush;
83-
84-
// allocate empty page
85-
init_fixed_len_page(&page, page_size, record_size);
86-
// recalculate slot index
87-
slot_index = add_fixed_len_page(&page, r);
48+
// First run, the page will not be initialized
49+
if (should_create_new_page) {
50+
init_fixed_len_page(&page, page_size, fixed_len_sizeof(&record));
51+
number_of_pages += 1;
8852
}
89-
90-
write_fixed_len_page(&page, slot_index, r);
91-
92-
total_records++;
93-
}
94-
// write last page to file if it has records
95-
if (page.used_slots > 0) {
96-
total_pages++;
97-
98-
// write page to page_file
99-
int buf_size = page.page_size * record_size;
100-
buf = new char[buf_size];
101-
std::vector<Record> *page_data = page.data;
102-
for (int i = 0; i < fixed_len_page_capacity(&page); i++) {
103-
if (!page_data->at(i).empty()) {
104-
fixed_len_write(&(page_data->at(i)), buf);
105-
}
53+
should_create_new_page = add_fixed_len_page(&page, &record) == -1;
54+
number_of_records += 1;
55+
56+
// if -1, init a new page and add this record to it
57+
if (should_create_new_page) {
58+
// Write page.data to page_file
59+
page_file.write((const char *) page.data, page.page_size);
60+
61+
init_fixed_len_page(&page, page_size, fixed_len_sizeof(&record));
62+
add_fixed_len_page(&page, &record);
63+
should_create_new_page = 0;
64+
number_of_pages += 1;
10665
}
66+
}
10767

108-
// flush page to file
109-
page_file << buf << std::flush;
68+
if (!should_create_new_page) {
69+
// Write page.data to page_file
70+
page_file.write((const char *) page.data, page.page_size);
11071
}
11172

73+
page_file.close();
74+
75+
// stop timer
11276
ftime(&t);
113-
long total_run_time = ((t.time * 1000) + t.millitm) - start_time_in_ms;
77+
unsigned long stop_ms = t.time * 1000 + t.millitm;
11478

115-
csv_file.close();
116-
page_file.close();
79+
std::cout << "NUMBER OF RECORDS: " << number_of_records << "\n";
80+
std::cout << "NUMBER OF PAGES: " << number_of_pages << "\n";
81+
std::cout << "TIME: " << stop_ms - start_ms << " milliseconds\n";
11782

118-
if (show_output) {
119-
std::cout << "NUMBER OF RECORDS: " << total_records << "\n";
120-
std::cout << "NUMBER OF PAGES: " << total_pages << "\n";
121-
std::cout << "TOTAL TIME: " << total_run_time << " milliseconds\n";
122-
}
12383
return 0;
12484
}
Binary file not shown.

0 commit comments

Comments
 (0)