|
1 | | -#include <algorithm> |
2 | | -#include <stdlib.h> |
3 | 1 | #include <iostream> |
4 | 2 | #include <fstream> |
5 | | -#include <string.h> |
| 3 | +#include <sstream> |
6 | 4 | #include <sys/timeb.h> |
| 5 | +#include <cstring> |
7 | 6 | #include "library.h" |
8 | 7 |
|
9 | | -int main(int argc, char** argv) { |
| 8 | +int main(int argc, const char * argv[]) { |
10 | 9 | if (argc < 4) { |
11 | | - std::cout << "Error, usage must be:\n"; |
12 | | - std::cout << "./write_fixed_len_pages <csv_file> <page_file> <page_size>\n"; |
13 | | - return 1; |
14 | | - } |
15 | | - |
16 | | - bool show_output = true; |
17 | | - if (argc == 5 && strcmp(argv[4], "--no-output") == 0) { |
18 | | - show_output = false; |
19 | | - } |
20 | | - |
21 | | - std::ifstream csv_file; |
22 | | - csv_file.open(argv[1]); |
23 | | - if (!csv_file) { |
24 | | - std::cout << "Error, could not find file " << argv[1] << "\n"; |
| 10 | + std::cout << "Usage: write_fixed_len_pages <csv_file> <page_file> <page_size>"; |
25 | 11 | return 1; |
26 | 12 | } |
| 13 | + std::string csv_filename(argv[1]); |
| 14 | + std::string page_filename(argv[2]); |
| 15 | + int page_size = std::stoi(argv[3]); // 4096 for example |
27 | 16 |
|
| 17 | + // Open the page file for writing |
28 | 18 | std::ofstream page_file; |
29 | | - page_file.open(argv[2]); |
30 | | - if (!page_file) { |
31 | | - std::cout << "Error, could not find file " << argv[2] << "\n"; |
32 | | - return 1; |
33 | | - } |
34 | | - |
35 | | - int page_size = atoi(argv[3]); |
36 | | - int record_size = NUM_ATTRIBUTES * ATTRIBUTE_SIZE; |
| 19 | + page_file.open(page_filename, std::ios::out | std::ios::binary); |
37 | 20 |
|
| 21 | + // Read the CSV file line-by-line: |
| 22 | + std::ifstream csv_file(csv_filename); |
| 23 | + std::string line; |
38 | 24 | Page page; |
39 | | - init_fixed_len_page(&page, page_size, record_size); |
40 | | - |
41 | | - int total_records = 0; |
42 | | - int total_pages = 0; |
43 | | - |
44 | | - char* buf; |
| 25 | + int should_create_new_page = 1; |
45 | 26 |
|
| 27 | + // start timer |
46 | 28 | struct timeb t; |
47 | 29 | ftime(&t); |
48 | | - long start_time_in_ms = (t.time * 1000) + t.millitm; |
49 | | - |
50 | | - while (csv_file) { |
51 | | - std::string line; |
52 | | - csv_file >> line; |
53 | | - |
54 | | - if (line.size() == 0) { |
55 | | - // ignore empty lines |
56 | | - continue; |
| 30 | + unsigned long start_ms = t.time * 1000 + t.millitm; |
| 31 | + |
| 32 | + // for output |
| 33 | + int number_of_records = 0; |
| 34 | + int number_of_pages = 0; |
| 35 | + |
| 36 | + while (std::getline(csv_file, line)) { |
| 37 | + std::stringstream linestr(line); |
| 38 | + std::string cell; |
| 39 | + |
| 40 | + // Read cells into a Record |
| 41 | + Record record; |
| 42 | + while (std::getline(linestr, cell, ',')) { |
| 43 | + char *temp = (char *) malloc(11); |
| 44 | + std::strncpy(temp, cell.c_str(), 11); |
| 45 | + record.push_back(temp); |
57 | 46 | } |
58 | 47 |
|
59 | | - // remove all commas from the line |
60 | | - line.erase(std::remove(line.begin(), line.end(), ','), line.end()); |
61 | | - |
62 | | - Record *r = new Record; |
63 | | - |
64 | | - // turn 'line' from string to char*, and read the values into r |
65 | | - fixed_len_read((char*)line.c_str(), record_size, r); |
66 | | - |
67 | | - int slot_index = add_fixed_len_page(&page, r); |
68 | | - |
69 | | - if (slot_index == -1) { // page is full |
70 | | - |
71 | | - total_pages++; |
72 | | - |
73 | | - int buf_size = page.page_size * record_size; |
74 | | - buf = new char[buf_size]; |
75 | | - |
76 | | - std::vector<Record> *page_data = page.data; |
77 | | - for (int i = 0; i < fixed_len_page_capacity(&page); i++) { |
78 | | - fixed_len_write(&page_data->at(i), buf); |
79 | | - } |
80 | | - |
81 | | - // flush page to file |
82 | | - page_file << buf << std::flush; |
83 | | - |
84 | | - // allocate empty page |
85 | | - init_fixed_len_page(&page, page_size, record_size); |
86 | | - // recalculate slot index |
87 | | - slot_index = add_fixed_len_page(&page, r); |
| 48 | + // First run, the page will not be initialized |
| 49 | + if (should_create_new_page) { |
| 50 | + init_fixed_len_page(&page, page_size, fixed_len_sizeof(&record)); |
| 51 | + number_of_pages += 1; |
88 | 52 | } |
89 | | - |
90 | | - write_fixed_len_page(&page, slot_index, r); |
91 | | - |
92 | | - total_records++; |
93 | | - } |
94 | | - // write last page to file if it has records |
95 | | - if (page.used_slots > 0) { |
96 | | - total_pages++; |
97 | | - |
98 | | - // write page to page_file |
99 | | - int buf_size = page.page_size * record_size; |
100 | | - buf = new char[buf_size]; |
101 | | - std::vector<Record> *page_data = page.data; |
102 | | - for (int i = 0; i < fixed_len_page_capacity(&page); i++) { |
103 | | - if (!page_data->at(i).empty()) { |
104 | | - fixed_len_write(&(page_data->at(i)), buf); |
105 | | - } |
| 53 | + should_create_new_page = add_fixed_len_page(&page, &record) == -1; |
| 54 | + number_of_records += 1; |
| 55 | + |
| 56 | + // if -1, init a new page and add this record to it |
| 57 | + if (should_create_new_page) { |
| 58 | + // Write page.data to page_file |
| 59 | + page_file.write((const char *) page.data, page.page_size); |
| 60 | + |
| 61 | + init_fixed_len_page(&page, page_size, fixed_len_sizeof(&record)); |
| 62 | + add_fixed_len_page(&page, &record); |
| 63 | + should_create_new_page = 0; |
| 64 | + number_of_pages += 1; |
106 | 65 | } |
| 66 | + } |
107 | 67 |
|
108 | | - // flush page to file |
109 | | - page_file << buf << std::flush; |
| 68 | + if (!should_create_new_page) { |
| 69 | + // Write page.data to page_file |
| 70 | + page_file.write((const char *) page.data, page.page_size); |
110 | 71 | } |
111 | 72 |
|
| 73 | + page_file.close(); |
| 74 | + |
| 75 | + // stop timer |
112 | 76 | ftime(&t); |
113 | | - long total_run_time = ((t.time * 1000) + t.millitm) - start_time_in_ms; |
| 77 | + unsigned long stop_ms = t.time * 1000 + t.millitm; |
114 | 78 |
|
115 | | - csv_file.close(); |
116 | | - page_file.close(); |
| 79 | + std::cout << "NUMBER OF RECORDS: " << number_of_records << "\n"; |
| 80 | + std::cout << "NUMBER OF PAGES: " << number_of_pages << "\n"; |
| 81 | + std::cout << "TIME: " << stop_ms - start_ms << " milliseconds\n"; |
117 | 82 |
|
118 | | - if (show_output) { |
119 | | - std::cout << "NUMBER OF RECORDS: " << total_records << "\n"; |
120 | | - std::cout << "NUMBER OF PAGES: " << total_pages << "\n"; |
121 | | - std::cout << "TOTAL TIME: " << total_run_time << " milliseconds\n"; |
122 | | - } |
123 | 83 | return 0; |
124 | 84 | } |
0 commit comments