Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -277,3 +277,9 @@ Change main.c to use OUTPUTTYPE instead of float
### Removed
- Remove the link to the precompiled LLVM 12 in the `testRunner` for Snitch and in the CI.
- Remove the sourcing of the cursed PULP SDK script.

## rv32imf_xpulpv2 ISA support for Siracusa platform

### Changed
- The ISA for the Siracusa platform has been updated from rv32imc_zfinx_xpulpv2 to rv32imf_xpulpv2.
- All floating-point comparison tasks in deeploytest.c are now offloaded to Cluster 0 for execution.
288 changes: 174 additions & 114 deletions DeeployTest/Platforms/Siracusa/src/deeploytest.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
* Copyright (C) 2020 ETH Zurich and University of Bologna.
*
* Author: Moritz Scherer, ETH Zurich
*
* Author: Run Wang, ETH Zurich
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
Expand All @@ -25,119 +26,178 @@
* limitations under the License.
*/

#include "CycleCounter.h"
#include "Network.h"
#include "dory_mem.h"
#include "pmsis.h"
#include "testinputs.h"
#include "testoutputs.h"

#define MAINSTACKSIZE 8000
#define SLAVESTACKSIZE 3800

struct pi_device cluster_dev;

void main(void) {
#ifndef CI
printf("HELLO WORLD:\r\n");
#endif
struct pi_cluster_conf conf;

pi_cluster_conf_init(&conf);
conf.id = 0;
pi_open_from_conf(&cluster_dev, &conf);
if (pi_cluster_open(&cluster_dev))
return;

mem_init();
#ifndef NOFLASH
open_fs();
#endif

printf("Intializing\r\n");

struct pi_cluster_task cluster_task;

pi_cluster_task(&cluster_task, InitNetwork, NULL);
cluster_task.stack_size = MAINSTACKSIZE;
cluster_task.slave_stack_size = SLAVESTACKSIZE;
pi_cluster_send_task_to_cl(&cluster_dev, &cluster_task);

#ifndef CI
printf("Initialized\r\n");
#endif
for (int buf = 0; buf < DeeployNetwork_num_inputs; buf++) {
if (DeeployNetwork_inputs[buf] >= 0x10000000) {
memcpy(DeeployNetwork_inputs[buf], testInputVector[buf],
DeeployNetwork_inputs_bytes[buf]);
}
}

#ifndef CI
printf("Input copied\r\n");
#endif
// RunNetwork(0, 1);
pi_cluster_task(&cluster_task, RunNetwork, NULL);
cluster_task.stack_size = MAINSTACKSIZE;
cluster_task.slave_stack_size = SLAVESTACKSIZE;
ResetTimer();
StartTimer();
pi_cluster_send_task_to_cl(&cluster_dev, &cluster_task);
StopTimer();

#ifndef CI
printf("Output:\r\n");
#endif

int32_t tot_err, tot_tested;
tot_err = 0;
tot_tested = 0;
OUTPUTTYPE *compbuf;
OUTPUTTYPE diff, expected, actual;
for (int buf = 0; buf < DeeployNetwork_num_outputs; buf++)
{
tot_tested += DeeployNetwork_outputs_bytes[buf] / sizeof(OUTPUTTYPE);
if (DeeployNetwork_outputs[buf] < 0x1000000)
{
compbuf = pi_l2_malloc(DeeployNetwork_outputs_bytes[buf]);
ram_read(compbuf, DeeployNetwork_outputs[buf],
DeeployNetwork_outputs_bytes[buf]);
}
else
{
compbuf = DeeployNetwork_outputs[buf];
}
for (int i = 0; i < DeeployNetwork_outputs_bytes[buf] / sizeof(OUTPUTTYPE); i++)
{
expected = ((OUTPUTTYPE *)testOutputVector[buf])[i];
actual = ((OUTPUTTYPE *)compbuf)[i];
diff = expected - actual;
if (ISOUTPUTFLOAT)
{
if ((diff < -1e-4) || (diff > 1e-4) || (isnan(diff)))
{
tot_err += 1;
printf("Expected: %10.6f ", expected);
printf("Actual: %10.6f ", actual);
printf("Diff: %10.6f at Index %12u in Output %u\r\n", diff, i, buf);
}
}
else
{
if (diff)
{
tot_err += 1;
printf("Expected: %4d ", expected);
printf("Actual: %4d ", actual);
printf("Diff: %4d at Index %12u in Output %u\r\n", diff, i, buf);
}
}
}
if (DeeployNetwork_outputs[buf] < 0x1000000)
{
pi_l2_free(compbuf, DeeployNetwork_outputs_bytes[buf]);
}
}
#include "CycleCounter.h"
#include "Network.h"
#include "dory_mem.h"
#include "pmsis.h"
#include "testinputs.h"
#include "testoutputs.h"

#define MAINSTACKSIZE 8000
#define SLAVESTACKSIZE 3800

struct pi_device cluster_dev;

typedef struct {
void *expected;
void *actual;
int num_elements;
int output_buf_index;
int *err_count;
} FloatCompareArgs;


void CompareFloatOnCluster(void *args)
{

if(pi_core_id() == 0)
{
FloatCompareArgs *compare_args = (FloatCompareArgs *)args;
float *expected = (float *)compare_args->expected;
float *actual = (float *)compare_args->actual;
int num_elements = compare_args->num_elements;
int output_buf_index = compare_args->output_buf_index;
int *err_count = compare_args->err_count;

int local_err_count = 0;

for (int i = 0; i < num_elements; i++)
{
float expected_val = expected[i];
float actual_val = actual[i];
float diff = expected_val - actual_val;


if ((diff < -1e-4) || (diff > 1e-4) || isnan(diff))
{
local_err_count += 1;

printf("Expected: %10.6f ", expected_val);
printf("Actual: %10.6f ", actual_val);
printf("Diff: %10.6f at Index %12u in Output %u\r\n", diff, i, output_buf_index);
}
}

*err_count = local_err_count;
}
}

void main(void)
{
#ifndef CI
printf("HELLO WORLD:\r\n");
#endif
struct pi_cluster_conf conf;

pi_cluster_conf_init(&conf);
conf.id = 0;
pi_open_from_conf(&cluster_dev, &conf);
if (pi_cluster_open(&cluster_dev))
return;

mem_init();
#ifndef NOFLASH
open_fs();
#endif

printf("Intializing\r\n");

struct pi_cluster_task cluster_task;

pi_cluster_task(&cluster_task, InitNetwork, NULL);
cluster_task.stack_size = MAINSTACKSIZE;
cluster_task.slave_stack_size = SLAVESTACKSIZE;
pi_cluster_send_task_to_cl(&cluster_dev, &cluster_task);

#ifndef CI
printf("Initialized\r\n");
#endif
for (int buf = 0; buf < DeeployNetwork_num_inputs; buf++)
{
if (DeeployNetwork_inputs[buf] >= 0x10000000)
{
memcpy(DeeployNetwork_inputs[buf], testInputVector[buf],
DeeployNetwork_inputs_bytes[buf]);
}
}

#ifndef CI
printf("Input copied\r\n");
#endif
// RunNetwork(0, 1);
pi_cluster_task(&cluster_task, RunNetwork, NULL);
cluster_task.stack_size = MAINSTACKSIZE;
cluster_task.slave_stack_size = SLAVESTACKSIZE;
ResetTimer();
StartTimer();
pi_cluster_send_task_to_cl(&cluster_dev, &cluster_task);
StopTimer();

#ifndef CI
printf("Output:\r\n");
#endif

uint32_t tot_err, tot_tested;
tot_err = 0;
tot_tested = 0;
void *compbuf;
FloatCompareArgs float_compare_args;
uint32_t float_error_count = 0;

for (int buf = 0; buf < DeeployNetwork_num_outputs; buf++)
{
tot_tested += DeeployNetwork_outputs_bytes[buf] / sizeof(OUTPUTTYPE);

if (DeeployNetwork_outputs[buf] < 0x1000000)
{
compbuf = pi_l2_malloc(DeeployNetwork_outputs_bytes[buf]);
ram_read(compbuf, DeeployNetwork_outputs[buf],
DeeployNetwork_outputs_bytes[buf]);
}
else
{
compbuf = DeeployNetwork_outputs[buf];
}

if (ISOUTPUTFLOAT)
{
float_error_count = 0;
float_compare_args.expected = testOutputVector[buf];
float_compare_args.actual = compbuf;
float_compare_args.num_elements = DeeployNetwork_outputs_bytes[buf] / sizeof(float);
float_compare_args.output_buf_index = buf;
float_compare_args.err_count = &float_error_count;

pi_cluster_task(&cluster_task, CompareFloatOnCluster, &float_compare_args);
cluster_task.stack_size = MAINSTACKSIZE;
cluster_task.slave_stack_size = SLAVESTACKSIZE;
pi_cluster_send_task_to_cl(&cluster_dev, &cluster_task);

tot_err += float_error_count;
}
else
{

for (int i = 0; i < DeeployNetwork_outputs_bytes[buf] / sizeof(OUTPUTTYPE); i++)
{
OUTPUTTYPE expected = ((OUTPUTTYPE *)testOutputVector[buf])[i];
OUTPUTTYPE actual = ((OUTPUTTYPE *)compbuf)[i];
OUTPUTTYPE diff = expected - actual;

if (diff)
{
tot_err += 1;
printf("Expected: %4d ", expected);
printf("Actual: %4d ", actual);
printf("Diff: %4d at Index %12u in Output %u\r\n", diff, i, buf);
}
}
}
if (DeeployNetwork_outputs[buf] < 0x1000000)
{
pi_l2_free(compbuf, DeeployNetwork_outputs_bytes[buf]);
}
}

printf("Runtime: %u cycles\r\n", getCycles());
printf("Errors: %u out of %u \r\n", tot_err, tot_tested);
Expand Down
2 changes: 1 addition & 1 deletion cmake/pulp/toolchain_llvm.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ set(CMAKE_ASM_COMPILER ${TOOLCHAIN_PREFIX}/clang)
set(CMAKE_OBJCOPY ${TOOLCHAIN_PREFIX}/${LLVM_TAG}-objcopy)
set(CMAKE_OBJDUMP ${TOOLCHAIN_PREFIX}/${LLVM_TAG}-objdump)

set(ISA rv32imc_zfinx_xpulpv2)
set(ISA rv32imf_xpulpv2)
set(PE 8)
set(FC 1)

Expand Down
Loading