Skip to content

fatal: Ruby functional read failed for address 0x968c0 #29

Open
@Taya666

Description

@Taya666

Hi cirosantilli,
I am trying to run a multi-thread program in gem5 SE mode with ruby(MESI_Three_LEVEL) and SimpleTimingCPU. But there is a weird bug. I have no idea what is going on here. My program is simple:

    /*
    * Ramdon swaps between entries in array
    */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <pthread.h>
#include "tx_util.h"

typedef int u32;
#define NODES       1024
#define TX_PER_TREHAD 1000
#define SWAP_TIMES 128
struct test_node {
	u32 key;
	u32 val;
};


static struct test_node* nodes;

static void init(void)
{
    int i;
    nodes = (struct test_node*)malloc(sizeof(struct test_node)*NODES);
    for (i = 0; i < NODES; i++) {
        nodes[i].key = i;
        nodes[i].val = i;
    }
}
#define valid_pos(_pos) (_pos >= 0 && _pos < NODES)
#define random(_a, _b) (_a + (int)(_b * rand()/(RAND_MAX + 1.0))) 

static void ramdon_swap(int pos_a, int pos_b)
{
    //swap the key
    nodes[pos_a].key ^= nodes[pos_b].key;
    nodes[pos_b].key ^= nodes[pos_a].key;
    nodes[pos_a].key ^= nodes[pos_b].key;
    //swap the val
    nodes[pos_a].val ^= nodes[pos_b].val;
    nodes[pos_b].val ^= nodes[pos_a].val;
    nodes[pos_a].val ^= nodes[pos_b].val;
}
void work(void* arg){
    int id = *(int*)arg;
    //printf("work start threadid:%d\n",id);
    int i,j, pos_a, pos_b;
    srand((int)time(0));
    pos_a = rand()%NODES;
    pos_b = rand()%NODES;
    for(j=0;j<TX_PER_TREHAD;j++){
                for (i = 0; i < SWAP_TIMES; ++i) {
                    ramdon_swap(pos_a, pos_b);
                    //nodes[0].key=1;
                }
    }//tx_per_thread
    //printf("ThreadID:%d total_abort:%d, total_retry:%d\n",id,total_abort,total_retry);
}
int main(int argc,char* argv[])
{
    if(argc<2){
        printf("Usage %s [thread_num]\n",argv[0]);
        exit(0);
    }
    int thread_num = atoi(argv[1]),i;
    pthread_t t[thread_num];
    int tid[thread_num];
    printf("thread num:%d SWAP_TIMES:%d\n",thread_num,SWAP_TIMES);
    init();
    printf("after init\n");
    for(int i=0;i<thread_num;i++){
        tid[i] = i;
        pthread_create(&t[i],NULL,(void*)&work,(void*)&tid[i]);
    }

    for(int i=0;i<thread_num;i++){
        pthread_join(t[i],NULL);
    }
    printf("!! END !!!\n");
    return 0;
} 

Gem5 reports that "fatal: Ruby functional read failed for address 0x968c0". I search for what the bug is. I found a bug report in "https://gem5.atlassian.net/browse/GEM5-676" that is posted by you. So I try to eliminate the pthread_join function. And my program becomes:

    /*
    * Ramdon swaps between entries in array
    */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <pthread.h>
#include "tx_util.h"

typedef int u32;
#define NODES       1024
#define TX_PER_TREHAD 1000
#define SWAP_TIMES 128
struct test_node {
	u32 key;
	u32 val;
};
int * all_finish;
static struct test_node* nodes;

static void init(void)
{
    int i;
    nodes = (struct test_node*)malloc(sizeof(struct test_node)*NODES);
    for (i = 0; i < NODES; i++) {
        nodes[i].key = i;
        nodes[i].val = i;
    }
}
#define valid_pos(_pos) (_pos >= 0 && _pos < NODES)
#define random(_a, _b) (_a + (int)(_b * rand()/(RAND_MAX + 1.0))) 

static void ramdon_swap(int pos_a, int pos_b)
{
    //swap the key
    nodes[pos_a].key ^= nodes[pos_b].key;
    nodes[pos_b].key ^= nodes[pos_a].key;
    nodes[pos_a].key ^= nodes[pos_b].key;
    //swap the val
    nodes[pos_a].val ^= nodes[pos_b].val;
    nodes[pos_b].val ^= nodes[pos_a].val;
    nodes[pos_a].val ^= nodes[pos_b].val;
}
void work(void* arg){
    int id = *(int*)arg;
       // printf("work start threadid:%d\n",id);
    int i,j, pos_a, pos_b;
    srand((int)time(0));
    pos_a = rand()%NODES;
    pos_b = rand()%NODES;
    for(j=0;j<TX_PER_TREHAD;j++){
                for (i = 0; i < SWAP_TIMES; ++i) {
                    ramdon_swap(pos_a, pos_b);
                    //nodes[0].key=1;
                }
    }//tx_per_thread
    //printf("ThreadID:%d total_abort:%d, total_retry:%d\n",id,total_abort,total_retry);
    all_finish[id]=1;
}
int main(int argc,char* argv[])
{
    if(argc<2){
        printf("Usage %s [thread_num]\n",argv[0]);
        exit(0);
    }
    int thread_num = atoi(argv[1]),i;
    pthread_t t[thread_num];
    int tid[thread_num];

    int finish_flag=0;
    all_finish = (int*)malloc(sizeof(int)*thread_num);

    printf("thread num:%d SWAP_TIMES:%d\n",thread_num,SWAP_TIMES);
    init();
    printf("after init\n");
    for(int i=0;i<thread_num;i++){
        tid[i] = i;
        pthread_create(&t[i],NULL,(void*)&work,(void*)&tid[i]);
    }

    /*for(int i=0;i<thread_num;i++){
        pthread_join(t[i],NULL);
    }*/
    while(finish_flag!=1){
        for(int i=0;i<thread_num;i++){
            if(all_finish[i]==1)
                finish_flag=1;
            else
                finish_flag=0;
        }
    }
    printf("!! END !!!\n");
    return 0;
} 

It works by removing the pthread_join function!
Then I remove the "printf" inside the "work" function and run it again.
This time the gem5 reports "fatal: Ruby functional read failed for address 0x968c0" again. It is so magical for me. It seems like the bug is caused by some syscall.
Do you have any idea on how to fix it?

Thanks!

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions