Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OKL][Feature] @max_inner_dims attribute #531

Merged
merged 15 commits into from
Dec 6, 2021
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/occa/internal/lang/builtins/attributes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <occa/internal/lang/builtins/attributes/implicitArg.hpp>
#include <occa/internal/lang/builtins/attributes/inner.hpp>
#include <occa/internal/lang/builtins/attributes/kernel.hpp>
#include <occa/internal/lang/builtins/attributes/maxInnerDims.hpp>
#include <occa/internal/lang/builtins/attributes/outer.hpp>
#include <occa/internal/lang/builtins/attributes/restrict.hpp>
#include <occa/internal/lang/builtins/attributes/shared.hpp>
Expand Down
56 changes: 56 additions & 0 deletions src/occa/internal/lang/builtins/attributes/maxInnerDims.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#include <occa/internal/lang/expr.hpp>
#include <occa/internal/lang/parser.hpp>
#include <occa/internal/lang/statement.hpp>
#include <occa/internal/lang/variable.hpp>
#include <occa/internal/lang/builtins/attributes/maxInnerDims.hpp>

namespace occa {
namespace lang {
namespace attributes {
maxInnerDims::maxInnerDims() {}

const std::string& maxInnerDims::name() const {
static const std::string name_{"max_inner_dims"};
return name_;
}

bool maxInnerDims::forStatementType(const int sType) const {
return (sType & statementType::for_);
}

bool maxInnerDims::isValid(const attributeToken_t &attr) const {
if (attr.kwargs.size()) {
attr.printError("[@max_inner_dims] does not take kwargs");
return false;
}
const auto argCount{attr.args.size()};
if (1 > argCount) {
attr.printError("[@max_inner_dims] expects at least one argument");
return false;
}
if(3 < argCount) {
attr.printError("[@max_inner_dims] takes at most 3 arguments");
return false;
}

for(auto&& arg : attr.args) {
exprNode *expr = arg.expr;
bool error = !(expr && expr->canEvaluate());
if(!error){
primitive value = expr->evaluate();
error = !value.isInteger();
if(!error) {
int intValue = value;
error = (intValue < 0);
}
}
if(error) {
attr.printError("[@max_inner_dims] arguments must be postive!");
return false;
}
}
return true;
}
}
}
}
23 changes: 23 additions & 0 deletions src/occa/internal/lang/builtins/attributes/maxInnerDims.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#ifndef OCCA_INTERNAL_LANG_BUILTINS_ATTRIBUTES_MAX_INNER_DIMS_HEADER
#define OCCA_INTERNAL_LANG_BUILTINS_ATTRIBUTES_MAX_INNER_DIMS_HEADER

#include <occa/internal/lang/attribute.hpp>

namespace occa {
namespace lang {
namespace attributes {
class maxInnerDims : public attribute_t {
public:
maxInnerDims();

virtual const std::string& name() const;

virtual bool forStatementType(const int sType) const;

virtual bool isValid(const attributeToken_t &attr) const;
};
}
}
}

#endif
6 changes: 3 additions & 3 deletions src/occa/internal/lang/expr/dpcppAtomicNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@ namespace occa
void dpcppAtomicNode::print(printer &pout) const
{

pout << "sycl::ONEAPI::atomic_ref<";
pout << "sycl::ext::oneapi::atomic_ref<";

// Currently CUDA only supports atomics on fundamental types:
// assume that we can safefuly ignore the pointer types for now
// and simply print the typename.
pout << atomic_type.name() << ",";
pout << "sycl::ONEAPI::memory_order::relaxed,";
pout << "sycl::ext::oneapi::memory_order::relaxed,";

// The SYCL standard states,
//
Expand All @@ -49,7 +49,7 @@ namespace occa
// Currently OCCA does not address system-wide atomics;
// therefore, assume for now that we can always safely
// use `memory_scope::device`.
pout << "sycl::ONEAPI::memory_scope::device,";
pout << "sycl::ext::oneapi::memory_scope::device,";

if(atomic_type.hasAttribute("shared"))
{
Expand Down
13 changes: 10 additions & 3 deletions src/occa/internal/lang/modes/cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ namespace occa {
return name;
}

std::string cudaParser::launchBoundsAttribute(const int innerDims[3]) {
const int innerTotal{innerDims[0]*innerDims[1]*innerDims[2]};
const std::string lbAttr = "__launch_bounds__(" + std::to_string(innerTotal) + ")";
return lbAttr;
}

void cudaParser::updateConstToConstant() {
root.children
.forEachDeclaration([&](variableDeclaration &decl) {
Expand Down Expand Up @@ -147,10 +153,11 @@ namespace occa {
root.children
.forEachKernelStatement([&](functionDeclStatement &kernelSmnt) {
// Set kernel qualifiers
vartype_t &vartype = kernelSmnt.function().returnType;
vartype.qualifiers.addFirst(vartype.origin(),
vartype_t &vartype = kernelSmnt.function().returnType;

vartype.qualifiers.addFirst(vartype.origin(),
global);
vartype.qualifiers.addFirst(vartype.origin(),
vartype.qualifiers.addFirst(vartype.origin(),
externC);
});
}
Expand Down
14 changes: 8 additions & 6 deletions src/occa/internal/lang/modes/cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,19 @@ namespace occa {

cudaParser(const occa::json &settings_ = occa::json());

virtual void onClear();
virtual void onClear() override;

virtual void beforePreprocessing();
virtual void beforePreprocessing() override;

virtual void beforeKernelSplit();
virtual void beforeKernelSplit() override;

virtual void afterKernelSplit();
virtual void afterKernelSplit() override;

virtual std::string getOuterIterator(const int loopIndex);
virtual std::string getOuterIterator(const int loopIndex) override;

virtual std::string getInnerIterator(const int loopIndex);
virtual std::string getInnerIterator(const int loopIndex) override;

virtual std::string launchBoundsAttribute(const int innerDims[3]) override;

void updateConstToConstant();

Expand Down
15 changes: 14 additions & 1 deletion src/occa/internal/lang/modes/dpcpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <occa/internal/lang/builtins/attributes.hpp>
#include <occa/internal/lang/builtins/types.hpp>
#include <occa/internal/lang/expr.hpp>
// #include <stringstream>

namespace occa
{
Expand Down Expand Up @@ -76,6 +77,19 @@ namespace occa
return "item_.get_local_id(" + occa::toString(dpcppDimensionOrder(loopIndex)) + ")";
}

std::string dpcppParser::launchBoundsAttribute(const int innerDims[3])
{
std::stringstream ss;
ss << "[[sycl::reqd_work_group_size("
<< innerDims[2]
<< ","
<< innerDims[1]
<< ","
<< innerDims[0]
<< ")]]\n";
return ss.str();
}

// @note: As of SYCL 2020 this will need to change from `CL/sycl.hpp` to `sycl.hpp`
void dpcppParser::setupHeaders()
{
Expand Down Expand Up @@ -246,7 +260,6 @@ namespace occa
{
functionDeclStatement &funcDeclSmnt = (functionDeclStatement &)*smnt;

// Only add __device__ to non-kernel functions
if (funcDeclSmnt.hasAttribute("kernel"))
{
return;
Expand Down
8 changes: 5 additions & 3 deletions src/occa/internal/lang/modes/dpcpp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ namespace occa

virtual std::string getOuterIterator(const int loopIndex) override;
virtual std::string getInnerIterator(const int loopIndex) override;
virtual std::string launchBoundsAttribute(const int innerDims[3]) override;

void addExtensions();
void addBarriers();
Expand All @@ -41,10 +42,11 @@ namespace occa
void setSharedQualifiers();
void setKernelQualifiers(function_t &function);
void migrateLocalDecls(functionDeclStatement &kernelSmnt);
void setLaunchBounds();

void setupAtomics();
static bool transformAtomicBlockStatement(blockStatement &blockSmnt);
static bool transformAtomicBasicExpressionStatement(expressionStatement &exprSmnt);
void setupAtomics();
static bool transformAtomicBlockStatement(blockStatement &blockSmnt);
static bool transformAtomicBasicExpressionStatement(expressionStatement &exprSmnt);

private:
inline int dpcppDimensionOrder(const int index) { return 2 - index; }
Expand Down
5 changes: 5 additions & 0 deletions src/occa/internal/lang/modes/metal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ namespace occa {
return name;
}

// Needs to be implemented: do nothing for now.
std::string metalParser::launchBoundsAttribute(const int innerDims[3]) {
return "";
}

void metalParser::setSharedQualifiers() {
statementArray::from(root)
.nestedForEachDeclaration([&](variableDeclaration &decl) {
Expand Down
14 changes: 8 additions & 6 deletions src/occa/internal/lang/modes/metal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,18 @@ namespace occa {

metalParser(const occa::json &settings_ = occa::json());

virtual void onClear();
virtual void beforePreprocessing();
virtual void onClear() override;
virtual void beforePreprocessing() override;

virtual void beforeKernelSplit();
virtual void beforeKernelSplit() override;

virtual void afterKernelSplit();
virtual void afterKernelSplit() override;

virtual std::string getOuterIterator(const int loopIndex);
virtual std::string getOuterIterator(const int loopIndex) override;

virtual std::string getInnerIterator(const int loopIndex);
virtual std::string getInnerIterator(const int loopIndex) override;

virtual std::string launchBoundsAttribute(const int innerDims[3]) override;

void setSharedQualifiers();

Expand Down
1 change: 1 addition & 0 deletions src/occa/internal/lang/modes/okl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,7 @@ namespace occa {
parser.addAttribute<attributes::kernel>();
parser.addAttribute<attributes::outer>();
parser.addAttribute<attributes::shared>();
parser.addAttribute<attributes::maxInnerDims>();
}

void setOklLoopIndices(functionDeclStatement &kernelSmnt) {
Expand Down
13 changes: 13 additions & 0 deletions src/occa/internal/lang/modes/opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,18 @@ namespace occa {
return name;
}

std::string openclParser::launchBoundsAttribute(const int innerDims[3]) {
std::stringstream ss;
ss << "__attribute__((reqd_work_group_size("
<< innerDims[0]
<< ","
<< innerDims[1]
<< ","
<< innerDims[2]
<< ")))\n";
return ss.str();
}

void openclParser::addExtensions() {
if (!settings.has("extensions")) {
return;
Expand Down Expand Up @@ -299,6 +311,7 @@ namespace occa {
}

void openclParser::setKernelQualifiers(function_t &function) {

function.returnType.add(0, kernel);

const int argCount = (int) function.args.size();
Expand Down
14 changes: 8 additions & 6 deletions src/occa/internal/lang/modes/opencl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,18 @@ namespace occa {

openclParser(const occa::json &settings_ = occa::json());

virtual void onClear();
virtual void beforePreprocessing();
virtual void onClear() override;
virtual void beforePreprocessing() override;

virtual void beforeKernelSplit();
virtual void beforeKernelSplit() override;

virtual void afterKernelSplit();
virtual void afterKernelSplit() override;

virtual std::string getOuterIterator(const int loopIndex);
virtual std::string getOuterIterator(const int loopIndex) override;

virtual std::string getInnerIterator(const int loopIndex);
virtual std::string getInnerIterator(const int loopIndex) override;

virtual std::string launchBoundsAttribute(const int innerDims[3]) override;

void addExtensions();

Expand Down
51 changes: 50 additions & 1 deletion src/occa/internal/lang/modes/withLauncher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,55 @@ namespace occa {
forStatement &newForSmnt = (forStatement&) forSmnt.clone();
newKernelSmnt.set(newForSmnt);

bool addLaunchBoundsAttribute{true};
int kernelInnerDims[3] = {1,1,1};
if (newForSmnt.hasAttribute("max_inner_dims")) {
attributeToken_t& attr = newForSmnt.attributes["max_inner_dims"];

for(size_t i=0; i < attr.args.size(); ++i) {
exprNode* expr = attr.args[i].expr;
primitive value = expr->evaluate();
kernelInnerDims[i] = value;
}
} else {
//Programmer hasn't specified launch bounds.
//If they are known at compile time, set them.
forStatement *innerSmnt = getInnerMostInnerLoop(newForSmnt);
statementArray path = oklForStatement::getOklLoopPath(*innerSmnt);

int innerIndex;
const int pathCount = (int) path.length();
for (int i = 0; i < pathCount; ++i) {
forStatement &pathSmnt = *((forStatement*) path[i]);
oklForStatement oklForSmnt(pathSmnt);

if(pathSmnt.hasAttribute("inner")) {
innerIndex = oklForSmnt.oklLoopIndex();
if(oklForSmnt.getIterationCount()->canEvaluate()) {
kernelInnerDims[innerIndex] = (int) oklForSmnt.getIterationCount()->evaluate();
} else {
std::string s = oklForSmnt.getIterationCount()->toString();
if(s.find("_occa_tiled_") != std::string::npos) {
size_t tile_size = s.find_first_of("123456789");
OCCA_ERROR("@tile size is undefined!",tile_size != std::string::npos);
kernelInnerDims[innerIndex] = std::stoi(s.substr(tile_size));
} else {
//loop bounds are unknown at compile time
addLaunchBoundsAttribute=false;
break;
}
}
}
}
}

if(addLaunchBoundsAttribute) {
std::string lbAttr = launchBoundsAttribute(kernelInnerDims);
qualifier_t& boundQualifier = *(new qualifier_t(lbAttr,qualifierType::custom));
function_t& function = newKernelSmnt.function();
function.returnType.add(1, boundQualifier);
}

const int argc = (int) newFunction.args.size();
for (int i = 0; i < argc; ++i) {
newForSmnt.replaceVariable(
Expand Down Expand Up @@ -568,4 +617,4 @@ namespace occa {
}
}
}
}
}
1 change: 1 addition & 0 deletions src/occa/internal/lang/modes/withLauncher.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ namespace occa {

virtual std::string getOuterIterator(const int loopIndex) = 0;
virtual std::string getInnerIterator(const int loopIndex) = 0;
virtual std::string launchBoundsAttribute(const int innerDims[3]) = 0;
};
}
}
Expand Down
Loading