Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AVRO-2397: Implement Alias Support for C++ #522

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ test-output
/lang/java/compiler/nb-configuration.xml
/lang/java/compiler/nbproject/
**/.vscode/**/*
/lang/c++/CMakeResult
9 changes: 2 additions & 7 deletions lang/c++/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,7 @@ if (WIN32 AND NOT CYGWIN AND NOT MSYS)
add_definitions (/EHa)
add_definitions (
-DNOMINMAX
-DBOOST_REGEX_DYN_LINK
-DBOOST_FILESYSTEM_DYN_LINK
-DBOOST_SYSTEM_DYN_LINK
-DBOOST_IOSTREAMS_DYN_LINK
-DBOOST_PROGRAM_OPTIONS_DYN_LINK
-DBOOST_ALL_NO_LIB)
)
else()
# Replease c++11 with c++17 below in case C++ 17 should be used
add_definitions(-std=c++11 -fPIC)
Expand All @@ -68,7 +63,7 @@ endif ()


find_package (Boost 1.38 REQUIRED
COMPONENTS filesystem iostreams program_options regex system)
COMPONENTS zlib filesystem iostreams program_options regex system)

find_package(Snappy)
if (SNAPPY_FOUND)
Expand Down
9 changes: 6 additions & 3 deletions lang/c++/api/Node.hh
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ typedef std::shared_ptr<Node> NodePtr;
class AVRO_DECL Name {
std::string ns_;
std::string simpleName_;
std::vector<std::string> aliases_;
public:
Name() { }
Name(const std::string& fullname);
Expand All @@ -52,6 +53,8 @@ public:
void ns(const std::string& n) { ns_ = n; }
void simpleName(const std::string& n) { simpleName_ = n; }
void fullname(const std::string& n);
void addAlias(const std::string& alias);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It appears that you editor inserts hard-tab instead of spaces. We use four spaces for indentation.

bool hasAlias();

bool operator < (const Name& n) const;
void check() const;
Expand Down Expand Up @@ -147,8 +150,8 @@ class AVRO_DECL Node : private boost::noncopyable
doAddName(name);
}
virtual size_t names() const = 0;
virtual const std::string &nameAt(int index) const = 0;
virtual bool nameIndex(const std::string &name, size_t &index) const = 0;
virtual const Name &nameAt(int index) const = 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change will break existing client code that assigns a std::string with nameAt. One way to make it backward compatible is to have an implicit conversion from Name to std::string (as a member function in Name class.

virtual bool nameIndex(const Name &name, size_t &index) const = 0;

void setFixedSize(int size) {
checkLock();
Expand Down Expand Up @@ -187,7 +190,7 @@ class AVRO_DECL Node : private boost::noncopyable
virtual void doSetDoc(const std::string &name) = 0;

virtual void doAddLeaf(const NodePtr &newLeaf) = 0;
virtual void doAddName(const std::string &name) = 0;
virtual void doAddName(const Name &name) = 0;
virtual void doSetFixedSize(int size) = 0;

private:
Expand Down
12 changes: 6 additions & 6 deletions lang/c++/api/NodeConcepts.hh
Original file line number Diff line number Diff line change
Expand Up @@ -178,23 +178,23 @@ struct MultiAttribute
template<typename T>
struct NameIndexConcept {

bool lookup(const std::string &name, size_t &index) const {
bool lookup(const Name &name, size_t &index) const {
throw Exception("Name index does not exist");
return 0;
}

bool add(const::std::string &name, size_t index) {
bool add(const Name &name, size_t index) {
throw Exception("Name index does not exist");
return false;
}
};

template<>
struct NameIndexConcept < MultiAttribute<std::string> >
struct NameIndexConcept < MultiAttribute<Name> >
{
typedef std::map<std::string, size_t> IndexMap;
typedef std::map<Name, size_t> IndexMap;

bool lookup(const std::string &name, size_t &index) const {
bool lookup(const Name &name, size_t &index) const {
IndexMap::const_iterator iter = map_.find(name);
if(iter == map_.end()) {
return false;
Expand All @@ -203,7 +203,7 @@ struct NameIndexConcept < MultiAttribute<std::string> >
return true;
}

bool add(const::std::string &name, size_t index) {
bool add(const Name &name, size_t index) {
bool added = false;
IndexMap::iterator lb = map_.lower_bound(name);
if(lb == map_.end() || map_.key_comp()(name, lb->first)) {
Expand Down
10 changes: 5 additions & 5 deletions lang/c++/api/NodeImpl.hh
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ class NodeImpl : public Node
return leafAttributes_.get(index);
}

void doAddName(const std::string &name) {
void doAddName(const Name &name) {
if (! nameIndex_.add(name, leafNameAttributes_.size())) {
throw Exception(boost::format("Cannot add duplicate name: %1%") % name);
}
Expand All @@ -139,11 +139,11 @@ class NodeImpl : public Node
return leafNameAttributes_.size();
}

const std::string &nameAt(int index) const {
const Name &nameAt(int index) const {
return leafNameAttributes_.get(index);
}

bool nameIndex(const std::string &name, size_t &index) const {
bool nameIndex(const Name &name, size_t &index) const {
return nameIndex_.lookup(name, index);
}

Expand Down Expand Up @@ -218,8 +218,8 @@ typedef concepts::NoAttribute<NodePtr> NoLeaves;
typedef concepts::SingleAttribute<NodePtr> SingleLeaf;
typedef concepts::MultiAttribute<NodePtr> MultiLeaves;

typedef concepts::NoAttribute<std::string> NoLeafNames;
typedef concepts::MultiAttribute<std::string> LeafNames;
typedef concepts::NoAttribute<Name> NoLeafNames;
typedef concepts::MultiAttribute<Name> LeafNames;

typedef concepts::NoAttribute<int> NoSize;
typedef concepts::SingleAttribute<int> HasSize;
Expand Down
54 changes: 47 additions & 7 deletions lang/c++/impl/Compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,25 @@ static bool isFullName(const string &s)
return s.find('.') != string::npos;
}

static void addAliases(Name& nm, const Array * aliasesPtr)
{
if (aliasesPtr)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We use K&R convention of having { at the end of the previous line rather than on a line of its own. The only exception is standalone function. Even functions defined within a class definition use the end-of-line convention.

{
for (const Entity& item : *aliasesPtr)
{
string itemVal = item.stringValue();
bool missingNs = nm.ns().empty();
if(missingNs || isFullName(itemVal))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a coding style we use { and } even when we have just one statement. Also there should a space after if.

nm.addAlias(itemVal);
else
{
itemVal += ("." + nm.ns());
nm.addAlias(itemVal);
}
}
}
}

static Name getName(const string &name, const string &ns)
{
return (isFullName(name)) ? Name(name) : Name(name, ns);
Expand Down Expand Up @@ -166,10 +185,10 @@ const string getDocField(const Entity& e, const Object& m)
}

struct Field {
const string name;
const Name name;
const NodePtr schema;
const GenericDatum defaultValue;
Field(const string& n, const NodePtr& v, GenericDatum dv) :
Field(const Name& n, const NodePtr& v, GenericDatum dv) :
name(n), schema(v), defaultValue(dv) { }
};

Expand Down Expand Up @@ -295,7 +314,17 @@ static GenericDatum makeGenericDatum(NodePtr n,
static Field makeField(const Entity& e, SymbolTable& st, const string& ns)
{
const Object& m = e.objectValue();
const string& n = getStringField(e, m, "name");
Name n = getStringField(e, m, "name");

const Array *aliasesPtr = nullptr;

if (containsField(m, "aliases"))
{
const Array& aliases = getArrayField(e, m, "aliases");
for (const Entity& alias : aliases)
n.addAlias(alias.stringValue());
}

Object::const_iterator it = findField(e, m, "type");
map<string, Entity>::const_iterator it2 = m.find("default");
NodePtr node = makeNode(it->second, st, ns);
Expand All @@ -312,7 +341,7 @@ static NodePtr makeRecordNode(const Entity& e, const Name& name,
const string* doc, const Object& m,
SymbolTable& st, const string& ns) {
const Array& v = getArrayField(e, m, "fields");
concepts::MultiAttribute<string> fieldNames;
concepts::MultiAttribute<Name> fieldNames;
concepts::MultiAttribute<NodePtr> fieldValues;
vector<GenericDatum> defaultValues;

Expand Down Expand Up @@ -375,7 +404,7 @@ static NodePtr makeEnumNode(const Entity& e,
const Name& name, const Object& m)
{
const Array& v = getArrayField(e, m, "symbols");
concepts::MultiAttribute<string> symbols;
concepts::MultiAttribute<Name> symbols;
for (Array::const_iterator it = v.begin(); it != v.end(); ++it) {
if (it->type() != json::etString) {
throw Exception(boost::format("Enum symbol not a string: %1%") %
Expand Down Expand Up @@ -434,9 +463,17 @@ static NodePtr makeMapNode(const Entity& e, const Object& m,
static Name getName(const Entity& e, const Object& m, const string& ns)
{
const string& name = getStringField(e, m, "name");
const Array *aliasesPtr = nullptr;

if (containsField(m, "aliases"))
{
aliasesPtr = &getArrayField(e, m, "aliases");
}

if (isFullName(name)) {
return Name(name);
Name nm(name);
addAliases(nm, aliasesPtr);
return nm;
} else {
Object::const_iterator it = m.find("namespace");
if (it != m.end()) {
Expand All @@ -447,9 +484,12 @@ static Name getName(const Entity& e, const Object& m, const string& ns)
it->second.toString());
}
Name result = Name(name, it->second.stringValue());
addAliases(result, aliasesPtr);
return result;
}
return Name(name, ns);
Name nm(name, ns);
addAliases(nm, aliasesPtr);
return nm;
}
}

Expand Down
35 changes: 34 additions & 1 deletion lang/c++/impl/Node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,16 @@ void Name::fullname(const string& name)
check();
}

void Name::addAlias(const std::string& alias)
{
aliases_.emplace_back(alias);
}

bool Name::hasAlias()
{
return aliases_.size() > 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think !aliases_.empty() is more compact a wee bit faster.

}

bool Name::operator < (const Name& n) const
{
return (ns_ < n.ns_) ? true :
Expand Down Expand Up @@ -79,7 +89,30 @@ void Name::check() const

bool Name::operator == (const Name& n) const
{
return ns_ == n.ns_ && simpleName_ == n.simpleName_;
if (ns_ == n.ns_ && simpleName_ == n.simpleName_)
return true;

const std::string& myFullName = fullname();
const std::string& theirFullName = n.fullname();

for (const auto& theirAlias : n.aliases_)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have not used auto so far in our code base. I think it was because of some warning in some compilers that its meaning has changed or something like this. Is it possible to avoid using auto in these places before we figure out a consistent convention on use of auto?

{
if (myFullName == theirAlias)
return true;
}

for (const auto &myAlias : aliases_)
{
if (myAlias == theirFullName)
return true;
for (const auto& theirAlias : n.aliases_)
{
if (myAlias == theirAlias)
return true;
}
}

return false;
}

void Node::setLogicalType(LogicalType logicalType) {
Expand Down
16 changes: 8 additions & 8 deletions lang/c++/impl/parsing/ResolvingDecoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ class ResolvingGrammarGenerator : public ValidatingGrammarGenerator {
const NodePtr& reader, map<NodePair, ProductionPtr> &m,
map<NodePtr, ProductionPtr> &m2);

static vector<pair<string, size_t> > fields(const NodePtr& n) {
vector<pair<string, size_t> > result;
static vector<pair<Name, size_t> > fields(const NodePtr& n) {
vector<pair<Name, size_t> > result;
size_t c = n->names();
for (size_t i = 0; i < c; ++i) {
result.push_back(make_pair(n->nameAt(i), i));
Expand Down Expand Up @@ -191,8 +191,8 @@ ProductionPtr ResolvingGrammarGenerator::resolveRecords(
{
ProductionPtr result = make_shared<Production>();

vector<pair<string, size_t> > wf = fields(writer);
vector<pair<string, size_t> > rf = fields(reader);
vector<pair<Name, size_t> > wf = fields(writer);
vector<pair<Name, size_t> > rf = fields(reader);
vector<size_t> fieldOrder;
fieldOrder.reserve(reader->names());

Expand All @@ -202,11 +202,11 @@ ProductionPtr ResolvingGrammarGenerator::resolveRecords(
* If no matching field is found for reader, arrange to skip the writer
* field.
*/
for (vector<pair<string, size_t> >::const_iterator it = wf.begin();
for (vector<pair<Name, size_t> >::const_iterator it = wf.begin();
it != wf.end(); ++it) {
vector<pair<string, size_t> >::iterator it2 =
vector<pair<Name, size_t> >::iterator it2 =
find_if(rf.begin(), rf.end(),
equalsFirst<string, size_t>(it->first));
equalsFirst<Name, size_t>(it->first));
if (it2 != rf.end()) {
ProductionPtr p = doGenerate2(writer->leafAt(it->second),
reader->leafAt(it2->second), m, m2);
Expand All @@ -229,7 +229,7 @@ ProductionPtr ResolvingGrammarGenerator::resolveRecords(
* Examine the reader fields left out, (i.e. those didn't have corresponding
* writer field).
*/
for (vector<pair<string, size_t> >::const_iterator it = rf.begin();
for (vector<pair<Name, size_t> >::const_iterator it = rf.begin();
it != rf.end(); ++it) {

NodePtr s = reader->leafAt(it->second);
Expand Down
6 changes: 4 additions & 2 deletions lang/c++/jsonschemas/bigrecord_r
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
{
"type": "record",
"name": "RootRecord",
"name": "RootRecordAliased",
"aliases": ["RootRecord"],
"fields": [
{
"name": "mylong",
"name": "mylongAliased",
"aliases": ["mylong"],
"type": "long"
},
{
Expand Down
Loading