Skip to content

Commit

Permalink
Use std::u8string where appropriate
Browse files Browse the repository at this point in the history
  • Loading branch information
fmang committed Mar 3, 2023
1 parent 89dc000 commit 1d13c25
Show file tree
Hide file tree
Showing 7 changed files with 123 additions and 122 deletions.
14 changes: 7 additions & 7 deletions src/base64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,23 @@

#include <cstring>

static const char base64_table[65] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static const char8_t base64_table[65] =
u8"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

std::string ot::encode_base64(ot::byte_string_view src)
std::u8string ot::encode_base64(ot::byte_string_view src)
{
size_t len = src.size();
size_t num_blocks = (len + 2) / 3; // Count of 3-byte blocks, rounded up.
size_t olen = num_blocks * 4; // Each 3-byte block becomes 4 base64 bytes.
if (olen < len)
throw std::overflow_error("failed to encode excessively long base64 block");

std::string out;
std::u8string out;
out.resize(olen);

const uint8_t* in = src.data();
const uint8_t* end = in + len;
char* pos = out.data();
char8_t* pos = out.data();
while (end - in >= 3) {
*pos++ = base64_table[in[0] >> 2];
*pos++ = base64_table[((in[0] & 0x03) << 4) | (in[1] >> 4)];
Expand All @@ -53,10 +53,10 @@ std::string ot::encode_base64(ot::byte_string_view src)
return out;
}

ot::byte_string ot::decode_base64(std::string_view src)
ot::byte_string ot::decode_base64(std::u8string_view src)
{
// Remove the padding and rely on the string length instead.
while (src.back() == '=')
while (src.back() == u8'=')
src.remove_suffix(1);

size_t olen = src.size() / 4 * 3; // Whole blocks;
Expand Down
93 changes: 50 additions & 43 deletions src/cli.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ ot::options ot::parse_options(int argc, char** argv, FILE* comments_input)
options opt;
const char* equal;
ot::status rc;
std::list<std::string> local_to_add; // opt.to_add before UTF-8 conversion.
std::list<std::string> local_to_delete; // opt.to_delete before UTF-8 conversion.
bool set_all = false;
std::optional<std::string> set_cover;
opt = {};
Expand All @@ -90,16 +92,16 @@ ot::options ot::parse_options(int argc, char** argv, FILE* comments_input)
opt.overwrite = true;
break;
case 'd':
opt.to_delete.emplace_back(optarg);
local_to_delete.emplace_back(optarg);
break;
case 'a':
case 's':
equal = strchr(optarg, '=');
if (equal == nullptr)
throw status {st::bad_arguments, "Comment does not contain an equal sign: "s + optarg + "."};
if (c == 's')
opt.to_delete.emplace_back(optarg, equal - optarg);
opt.to_add.emplace_back(optarg);
local_to_delete.emplace_back(optarg, equal - optarg);
local_to_add.emplace_back(optarg);
break;
case 'S':
opt.delete_all = true;
Expand Down Expand Up @@ -151,14 +153,22 @@ ot::options ot::parse_options(int argc, char** argv, FILE* comments_input)
throw status { st::bad_arguments, "Cannot use standard input more than once." };

// Convert arguments to UTF-8.
if (!opt.raw) {
for (std::list<std::string>* args : { &opt.to_add, &opt.to_delete }) {
try {
for (std::string& arg : *args)
arg = to_utf8(arg);
} catch (const ot::status& rc) {
throw status {st::bad_arguments, "Could not encode argument into UTF-8: " + rc.message};
}
if (opt.raw) {
// Cast the user data without any encoding conversion.
auto cast_to_utf8 = [](std::string_view in)
{ return std::u8string(reinterpret_cast<const char8_t*>(in.data()), in.size()); };
std::transform(local_to_add.begin(), local_to_add.end(),
std::back_inserter(opt.to_add), cast_to_utf8);
std::transform(local_to_delete.begin(), local_to_delete.end(),
std::back_inserter(opt.to_delete), cast_to_utf8);
} else {
try {
std::transform(local_to_add.begin(), local_to_add.end(),
std::back_inserter(opt.to_add), encode_utf8);
std::transform(local_to_delete.begin(), local_to_delete.end(),
std::back_inserter(opt.to_delete), encode_utf8);
} catch (const ot::status& rc) {
throw status {st::bad_arguments, "Could not encode argument into UTF-8: " + rc.message};
}
}

Expand Down Expand Up @@ -188,35 +198,35 @@ ot::options ot::parse_options(int argc, char** argv, FILE* comments_input)

if (set_cover) {
byte_string picture_data = ot::slurp_binary_file(set_cover->c_str());
opt.to_delete.push_back("METADATA_BLOCK_PICTURE");
opt.to_delete.push_back(u8"METADATA_BLOCK_PICTURE"s);
opt.to_add.push_back(ot::make_cover(picture_data));
}

if (set_all) {
// Read comments from stdin and prepend them to opt.to_add.
std::list<std::string> comments = read_comments(comments_input, opt.raw);
std::list<std::u8string> comments = read_comments(comments_input, opt.raw);
opt.to_add.splice(opt.to_add.begin(), std::move(comments));
}
return opt;
}

/** Format a UTF-8 string by adding tabulations (\t) after line feeds (\n) to mark continuation for
* multiline values. */
static std::string format_value(const std::string& source)
static std::u8string format_value(const std::u8string& source)
{
auto newline_count = std::count(source.begin(), source.end(), '\n');
auto newline_count = std::count(source.begin(), source.end(), u8'\n');

// General case: the value fits on a single line. Use std::string’s copy constructor for the
// most efficient copy we could hope for.
if (newline_count == 0)
return source;

std::string formatted;
std::u8string formatted;
formatted.reserve(source.size() + newline_count);
for (auto c : source) {
formatted.push_back(c);
if (c == '\n')
formatted.push_back('\t');
formatted.push_back(u8'\t');
}
return formatted;
}
Expand All @@ -227,11 +237,10 @@ static std::string format_value(const std::string& source)
* To disambiguate between a newline embedded in a comment and a newline representing the start of
* the next tag, continuation lines always have a single TAB (^I) character added to the beginning.
*/
void ot::print_comments(const std::list<std::string>& comments, FILE* output, bool raw)
void ot::print_comments(const std::list<std::u8string>& comments, FILE* output, bool raw)
{
std::string local;
bool has_control = false;
for (const std::string& source_comment : comments) {
for (const std::u8string& source_comment : comments) {
if (!has_control) { // Don’t bother analyzing comments if the flag is already up.
for (unsigned char c : source_comment) {
if (c < 0x20 && c != '\n') {
Expand All @@ -241,46 +250,43 @@ void ot::print_comments(const std::list<std::string>& comments, FILE* output, bo
}
}

std::string utf8_comment = format_value(source_comment);
const std::string* comment;
std::u8string utf8_comment = format_value(source_comment);
// Convert the comment from UTF-8 to the system encoding if relevant.
if (raw) {
comment = &utf8_comment;
fwrite(utf8_comment.data(), 1, utf8_comment.size(), output);
} else {
try {
local = from_utf8(utf8_comment);
comment = &local;
std::string local = decode_utf8(utf8_comment);
fwrite(local.data(), 1, local.size(), output);
} catch (ot::status& rc) {
rc.message += " See --raw.";
throw;
}
}

fwrite(comment->data(), 1, comment->size(), output);
putc('\n', output);
}
if (has_control)
fputs("warning: Some tags contain control characters.\n", stderr);
}

std::list<std::string> ot::read_comments(FILE* input, bool raw)
std::list<std::u8string> ot::read_comments(FILE* input, bool raw)
{
std::list<std::string> comments;
std::list<std::u8string> comments;
comments.clear();
char* source_line = nullptr;
size_t buflen = 0;
ssize_t nread;
std::string* previous_comment = nullptr;
std::u8string* previous_comment = nullptr;
while ((nread = getline(&source_line, &buflen, input)) != -1) {
if (nread > 0 && source_line[nread - 1] == '\n')
--nread; // Chomp.

std::string line;
std::u8string line;
if (raw) {
line = std::string(source_line, nread);
line = std::u8string(reinterpret_cast<char8_t*>(source_line), nread);
} else {
try {
line = to_utf8(std::string_view(source_line, nread));
line = encode_utf8(std::string_view(source_line, nread));
} catch (const ot::status& rc) {
free(source_line);
throw ot::status {ot::st::badly_encoded, "UTF-8 conversion error: " + rc.message};
Expand All @@ -290,10 +296,10 @@ std::list<std::string> ot::read_comments(FILE* input, bool raw)
if (line.empty()) {
// Ignore empty lines.
previous_comment = nullptr;
} else if (line[0] == '#') {
} else if (line[0] == u8'#') {
// Ignore comments.
previous_comment = nullptr;
} else if (line[0] == '\t') {
} else if (line[0] == u8'\t') {
// Continuation line: append the current line to the previous tag.
if (previous_comment == nullptr) {
ot::status rc = {ot::st::error, "Unexpected continuation line: " + std::string(source_line, nread)};
Expand All @@ -303,7 +309,7 @@ std::list<std::string> ot::read_comments(FILE* input, bool raw)
line[0] = '\n';
previous_comment->append(line);
}
} else if (line.find('=') == std::string::npos) {
} else if (line.find(u8'=') == decltype(line)::npos) {
ot::status rc = {ot::st::error, "Malformed tag: " + std::string(source_line, nread)};
free(source_line);
throw rc;
Expand All @@ -315,19 +321,20 @@ std::list<std::string> ot::read_comments(FILE* input, bool raw)
return comments;
}

void ot::delete_comments(std::list<std::string>& comments, const std::string& selector)
void ot::delete_comments(std::list<std::u8string>& comments, const std::u8string& selector)
{
auto name = selector.data();
auto equal = selector.find('=');
auto value = (equal == std::string::npos ? nullptr : name + equal + 1);
auto equal = selector.find(u8'=');
auto value = (equal == std::u8string::npos ? nullptr : name + equal + 1);
auto name_len = value ? equal : selector.size();
auto value_len = value ? selector.size() - equal - 1 : 0;
auto it = comments.begin(), end = comments.end();
while (it != end) {
auto current = it++;
/** \todo Avoid using strncasecmp because it assumes the system locale is UTF-8. */
bool name_match = current->size() > name_len + 1 &&
(*current)[name_len] == '=' &&
strncasecmp(current->data(), name, name_len) == 0;
strncasecmp((const char*) current->data(), (const char*) name, name_len) == 0;
if (!name_match)
continue;
bool value_match = value == nullptr ||
Expand All @@ -343,11 +350,11 @@ static void edit_tags(ot::opus_tags& tags, const ot::options& opt)
{
if (opt.delete_all) {
tags.comments.clear();
} else for (const std::string& name : opt.to_delete) {
ot::delete_comments(tags.comments, name.c_str());
} else for (const std::u8string& name : opt.to_delete) {
ot::delete_comments(tags.comments, name);
}

for (const std::string& comment : opt.to_add)
for (const std::u8string& comment : opt.to_add)
tags.comments.emplace_back(comment);
}

Expand Down
24 changes: 12 additions & 12 deletions src/opus.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@ ot::opus_tags ot::parse_tags(const ogg_packet& packet)
if (packet.bytes < 0)
throw status {st::int_overflow, "Overflowing comment header length"};
size_t size = static_cast<size_t>(packet.bytes);
const char* data = reinterpret_cast<char*>(packet.packet);
const uint8_t* data = reinterpret_cast<uint8_t*>(packet.packet);
size_t pos = 0;
opus_tags my_tags;

// Magic number
if (8 > size)
throw status {st::cut_magic_number, "Comment header too short for the magic number"};
if (memcmp(data, "OpusTags", 8) != 0)
if (memcmp(data, u8"OpusTags", 8) != 0)
throw status {st::bad_magic_number, "Comment header did not start with OpusTags"};

// Vendor
Expand All @@ -48,7 +48,7 @@ ot::opus_tags ot::parse_tags(const ogg_packet& packet)
size_t vendor_length = le32toh(*((uint32_t*) (data + pos)));
if (pos + 4 + vendor_length > size)
throw status {st::cut_vendor_data, "Vendor string did not fit the comment header"};
my_tags.vendor = std::string(data + pos + 4, vendor_length);
my_tags.vendor = std::u8string(reinterpret_cast<const char8_t*>(&data[pos + 4]), vendor_length);
pos += 4 + my_tags.vendor.size();

// Comment count
Expand All @@ -66,21 +66,21 @@ ot::opus_tags ot::parse_tags(const ogg_packet& packet)
if (pos + 4 + comment_length > size)
throw status {st::cut_comment_data,
"Comment string did not fit the comment header"};
const char *comment_value = data + pos + 4;
auto comment_value = reinterpret_cast<const char8_t*>(&data[pos + 4]);
my_tags.comments.emplace_back(comment_value, comment_length);
pos += 4 + comment_length;
}

// Extra data
my_tags.extra_data = std::string(data + pos, size - pos);
my_tags.extra_data = byte_string(data + pos, size - pos);

return my_tags;
}

ot::dynamic_ogg_packet ot::render_tags(const opus_tags& tags)
{
size_t size = 8 + 4 + tags.vendor.size() + 4;
for (const std::string& comment : tags.comments)
for (const std::u8string& comment : tags.comments)
size += 4 + comment.size();
size += tags.extra_data.size();

Expand All @@ -100,7 +100,7 @@ ot::dynamic_ogg_packet ot::render_tags(const opus_tags& tags)
n = htole32(tags.comments.size());
memcpy(data, &n, 4);
data += 4;
for (const std::string& comment : tags.comments) {
for (const std::u8string& comment : tags.comments) {
n = htole32(comment.size());
memcpy(data, &n, 4);
memcpy(data+4, comment.data(), comment.size());
Expand Down Expand Up @@ -166,8 +166,8 @@ ot::byte_string ot::picture::serialize() const
*/
std::optional<ot::picture> ot::extract_cover(const ot::opus_tags& tags)
{
static const std::string_view prefix = "METADATA_BLOCK_PICTURE="sv;
auto is_cover = [](const std::string& tag) { return tag.starts_with(prefix); };
static const std::u8string_view prefix = u8"METADATA_BLOCK_PICTURE="sv;
auto is_cover = [](const std::u8string& tag) { return tag.starts_with(prefix); };
auto cover_tag = std::find_if(tags.comments.begin(), tags.comments.end(), is_cover);
if (cover_tag == tags.comments.end())
return {}; // No cover art.
Expand All @@ -177,7 +177,7 @@ std::optional<ot::picture> ot::extract_cover(const ot::opus_tags& tags)
fputs("warning: Found multiple covers; only the first will be extracted."
" Please report your use case if you need a finer selection.\n", stderr);

std::string_view cover_value = *cover_tag;
std::u8string_view cover_value = *cover_tag;
cover_value.remove_prefix(prefix.size());
return picture(decode_base64(cover_value));
}
Expand All @@ -202,10 +202,10 @@ static ot::byte_string_view detect_mime_type(ot::byte_string_view data)
return "application/octet-stream"_bsv;
}

std::string ot::make_cover(ot::byte_string_view picture_data)
std::u8string ot::make_cover(ot::byte_string_view picture_data)
{
picture pic;
pic.mime_type = detect_mime_type(picture_data);
pic.picture_data = picture_data;
return "METADATA_BLOCK_PICTURE=" + encode_base64(pic.serialize());
return u8"METADATA_BLOCK_PICTURE=" + encode_base64(pic.serialize());
}
Loading

0 comments on commit 1d13c25

Please sign in to comment.