Skip to content

Commit

Permalink
runtime_mode: Added buffer sharing and memory optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
christophe0606 committed Feb 22, 2024
1 parent 889c253 commit 119d95c
Show file tree
Hide file tree
Showing 14 changed files with 2,563 additions and 595 deletions.
122 changes: 122 additions & 0 deletions Examples/runtime_mode/AppNodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,38 @@ class GenericRuntimeNode:public NodeBase
RuntimeEdge &mDst;
};

template<typename IN1,typename IN2,typename OUT>
class GenericRuntimeNode21:public NodeBase
{
public:
explicit GenericRuntimeNode21(const arm_cmsis_stream::Node &n,
RuntimeEdge &src1,
RuntimeEdge &src2,
RuntimeEdge &dst):
ndesc(n),mSrc1(src1),mSrc2(src2),mDst(dst){};

std::size_t nb_input_samples1() const {return(ndesc.inputs()->Get(0)->nb());};
std::size_t nb_input_samples2() const {return(ndesc.inputs()->Get(1)->nb());};

std::size_t nb_output_samples() const {return(ndesc.outputs()->Get(0)->nb());};


protected:
OUT * getWriteBuffer(const int nb=0 ) {return (OUT*)mDst.getWriteBuffer(*(ndesc.outputs()),sizeof(OUT),0,nb);};
IN1 * getReadBuffer1(const int nb=0 ) {return (IN1*)mSrc1.getReadBuffer(*(ndesc.inputs()),sizeof(IN1),0,nb);};
IN2 * getReadBuffer2(const int nb=0 ) {return (IN2*)mSrc2.getReadBuffer(*(ndesc.inputs()),sizeof(IN2),1,nb);};

bool willOverflow(const int nb=0 ) const {return mDst.willOverflowWith(*(ndesc.outputs()),sizeof(OUT),0,nb);};
bool willUnderflow1(const int nb=0 ) const {return mSrc1.willUnderflowWith(*(ndesc.inputs()),sizeof(IN1),0,nb);};
bool willUnderflow2(const int nb=0 ) const {return mSrc2.willUnderflowWith(*(ndesc.inputs()),sizeof(IN2),1,nb);};

private:
const arm_cmsis_stream::Node &ndesc;
RuntimeEdge &mSrc1;
RuntimeEdge &mSrc2;
RuntimeEdge &mDst;
};

template<typename OUT>
class GenericRuntimeSource:public NodeBase
{
Expand All @@ -177,6 +209,8 @@ class GenericRuntimeSource:public NodeBase
RuntimeEdge &mDst;
};



template<typename IN>
class Sink<IN,RUNTIME>: public GenericRuntimeSink<IN>
{
Expand Down Expand Up @@ -576,5 +610,93 @@ class ProcessingNode<IN,RUNTIME,
const uint32_t mInc;
};

template<typename IN1, int input1Size,
typename IN2, int inputSize2,
typename OUT, int outputSize>
class AdderNode;

template<typename IN>
class AdderNode<IN,RUNTIME,
IN,RUNTIME,
IN,RUNTIME>:
public GenericRuntimeNode21<IN,IN,IN>
{
public:
using ADD = AdderNode<IN,RUNTIME,
IN,RUNTIME,
IN,RUNTIME>;

/* Constructor needs the input and output FIFOs */
AdderNode(const arm_cmsis_stream::Node &n,
RuntimeEdge &src1,
RuntimeEdge &src2,
RuntimeEdge &dst):GenericRuntimeNode21<IN,IN,IN>(n,src1,src2,dst){};


static int runNode(NodeBase* obj)
{
ADD *n = reinterpret_cast<ADD *>(obj);
return(n->run());
}

static int prepareForRunningNode(NodeBase* obj)
{
ADD *n = reinterpret_cast<ADD *>(obj);
return(n->prepareForRunning());
}


static NodeBase* mkNode(const runtime_context &ctx,
const arm_cmsis_stream::Node *ndesc)
{

auto inputs = ndesc->inputs();
RuntimeEdge &ia = *ctx.fifos[inputs->Get(0)->id()];
RuntimeEdge &ib = *ctx.fifos[inputs->Get(1)->id()];

auto outputs = ndesc->outputs();
RuntimeEdge &o = *ctx.fifos[outputs->Get(0)->id()];


ADD *node=new ADD(*ndesc,ia,ib,o);
return(static_cast<NodeBase*>(node));
}

/* In asynchronous mode, node execution will be
skipped in case of underflow on the input
or overflow in the output.
*/
int prepareForRunning() final
{
if (this->willOverflow() ||
this->willUnderflow1() ||
this->willUnderflow2())
{
return(CG_SKIP_EXECUTION_ID_CODE); // Skip execution
}

return(0);
};

/*
Node processing
1 is added to the input
*/
int run() final{
//printf("AdderNode\n");


IN *a=this->getReadBuffer1();
IN *b=this->getReadBuffer2();
IN *c=this->getWriteBuffer();
for(int i=0;i<this->nb_input_samples1();i++)
{
c[i] = a[i]+b[i];
}
return(0);
};

};


#endif
3 changes: 2 additions & 1 deletion Examples/runtime_mode/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@
# Disable inclusion of CMSIS-DSP headers so that we don't have
# to recompile CMSIS-DSP for such a simple example
conf.CMSISDSP = False
conf.CAPI = True
conf.asynchronous = False

conf.nodeIdentification = True

conf.memoryOptimization =True
conf.memoryOptimization=True

export_graph(the_graph,"graph.yml")
export_config(conf,"config.yml")
Expand Down
69 changes: 42 additions & 27 deletions Examples/runtime_mode/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
# Include definition of the nodes
from nodes import *

NBA = 5
NBB = 7
NB_SOURCE = 8
NB_PROCESSING = 4
NB_OF_SRC = 16

# Define the datatype we are using for all the IOs in this
# example
Expand All @@ -17,48 +18,62 @@
# source in the C code will generate 5 samples)
# "source" is the name of the C variable that will identify
# this node
src=Source("source",floatType,NBA)
src.identified=False

the_sources=[]
for i in range(NB_OF_SRC):
src=Source(f"source{i}",floatType,NB_SOURCE)
src.identified=False
the_sources.append(src)




# Instantiate a Processing node using a float data type for
# both the input and output. The number of samples consumed
# on the input and produced on the output is 7 each time
# the node is executed in the C code
# "processing" is the name of the C variable that will identify
# this node
processinga=ProcessingNode("processinga",floatType,NBB,NBB,v=10)
processinga.identified=False
processing=ProcessingNode("processing",floatType,NB_PROCESSING,NB_PROCESSING,v=10)
processing.identified=True

# Instantiate a Sink node with a float datatype and consuming
# 5 samples each time the node is executed in the C code
# "sink" is the name of the C variable that will identify
# this node
sinka=Sink("sinka",floatType,NBA)
sinka=Sink("sinka",floatType,NB_SOURCE)
sinka.identified=False
sinkb=Sink("sinkb",floatType,NBA)
sinkb=Sink("sinkb",floatType,NB_SOURCE)
sinkb.identified=False

# Create a Graph object
the_graph = Graph()

# Connect the source to the processing node
the_graph.connect(src.o,processinga.i)
# Connect the processing node to the sink
the_graph.connect(processinga.o,sinka.i)

def crazy(nb,r):
for i in range(nb):
processingb=ProcessingNode(f"processing{i+1}",floatType,NBB,NBB)
processingb.identified=False
the_graph.connect(r.o,processingb.i)
r = processingb
return(r)


processingb = crazy(10,processinga)

processinga.identified=True
processingb.identified=True
NB=0
def recurse(s):
global NB
if len(s)==2:
NB = NB + 1
adder=AdderNode(f"adder{NB}",floatType,NB_SOURCE)
the_graph.connect(s[0].o,adder.ia)
the_graph.connect(s[1].o,adder.ib)
return(adder)
else:
nb=len(s) >> 1
sa=s[:nb]
sb=s[nb:]
oa = recurse(sa)
ob = recurse(sb)
NB = NB + 1
adder=AdderNode(f"adder{NB}",floatType,NB_SOURCE)
the_graph.connect(oa.o,adder.ia)
the_graph.connect(ob.o,adder.ib)
return(adder)

the_graph.connect(processingb.o,sinkb.i)
res = recurse(the_sources)

the_graph.connect(res.o,processing.i)
the_graph.connect(processing.o,sinka.i)
the_graph.connect(processing.o,sinkb.i)


Loading

0 comments on commit 119d95c

Please sign in to comment.