diff --git a/mkl/basic/Box.hpp b/mkl/basic/Box.hpp new file mode 100644 index 0000000..62046e4 --- /dev/null +++ b/mkl/basic/Box.hpp @@ -0,0 +1,22 @@ +#ifndef _Box_hpp_ +#define _Box_hpp_ + +/** + * a 'Box' is 3 pairs of ints, where each pair specifies a lower + * and upper bound for one of the 3 spatial dimensions. + * + * This struct stores the 3 pairs as a simple array of 6 ints, + * but defines the bracket operator so that it can be referenced + * using 2-dimensional array notation like this: + * int xmin = box[0][0]; int xmax = box[0][1]; + * int ymin = box[1][0]; int ymax = box[1][1]; + * int zmin = box[2][0]; int zmax = box[2][1]; + */ +struct Box { + int ranges[6]; + int* operator[](int xyz) { return &ranges[xyz*2]; } + const int* operator[](int xyz) const { return &ranges[xyz*2]; } +}; + +#endif + diff --git a/mkl/basic/BoxIterator.hpp b/mkl/basic/BoxIterator.hpp new file mode 100644 index 0000000..f644119 --- /dev/null +++ b/mkl/basic/BoxIterator.hpp @@ -0,0 +1,143 @@ +#ifndef _BoxTraverser_hpp_ +#define _BoxTraverser_hpp_ + +//@HEADER +// ************************************************************************ +// +// miniFE: simple finite-element assembly and linear-solve +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +namespace miniFE { + +/** Class for traversing a 3-dimensional 'box' of indices. + + //One way to traverse a 'box[3][2]' is to use a triply-nested for-loop: + for(int z=box[2][0]; z= box_[0][1]) { + x = box_[0][0]; + ++y; + if (y >= box_[1][1]) { + y = box_[1][0]; + ++z; + if (z >= box_[2][1]) { + z = box_[2][1]; + y = box_[1][1]; + x = box_[0][1]; + } + } + } + return *this; + } + + BoxIterator operator++(int) + { + BoxIterator temp = *this; + ++(*this); + return temp; + } + + bool operator==(const BoxIterator& rhs) const + { + return x == rhs.x && y == rhs.y && z == rhs.z; + } + + bool operator!=(const BoxIterator& rhs) const + { + return !(this->operator==(rhs)); + } + + int x; + int y; + int z; + +private: + BoxIterator(const Box& box, bool at_end = false) + : x(box[0][0]), + y(box[1][0]), + z(box[2][0]), + box_() + { + box_[0][0] = box[0][0]; box_[0][1] = box[0][1]; + box_[1][0] = box[1][0]; box_[1][1] = box[1][1]; + box_[2][0] = box[2][0]; box_[2][1] = box[2][1]; + if (at_end) { + x = box[0][1]; + y = box[1][1]; + z = box[2][1]; + } + } + + Box box_; +};//class BoxTraverser + +}//namespace miniFE + +#endif + diff --git a/mkl/basic/BoxPartition.cpp b/mkl/basic/BoxPartition.cpp new file mode 100644 index 0000000..2a4e5a7 --- /dev/null +++ b/mkl/basic/BoxPartition.cpp @@ -0,0 +1,477 @@ + +#include +#include + +#include +#include + +/*--------------------------------------------------------------------*/ + +static int box_map_local_entry( const Box& box , + const int ghost , + int local_x , + int local_y , + int local_z ) +{ + const int nx = 2 * ghost + box[0][1] - box[0][0] ; + const int ny = 2 * ghost + box[1][1] - box[1][0] ; + const int nz = 2 * ghost + box[2][1] - box[2][0] ; + int result = -1 ; + + local_x += ghost ; + local_y += ghost ; + local_z += ghost ; + + if ( 0 <= local_x && local_x < nx && + 0 <= local_y && local_y < ny && + 0 <= local_z && local_z < nz ) { + + result = local_z * ny * nx + local_y * nx + local_x ; + } + return result ; +} + +int box_map_local( const Box& box_local, + const int ghost , + const int box_local_map[] , + const int local_x , + const int local_y , + const int local_z ) +{ + int result = box_map_local_entry(box_local,ghost,local_x,local_y,local_z); + + if ( 0 <= result ) { + result = box_local_map[ result ]; + } + + return result ; +} + +/*--------------------------------------------------------------------*/ +/* Recursively split a box into into (up-ip) sub-boxes */ + +void box_partition( int ip , int up , int axis , + const Box& box, + Box* p_box ) +{ + const int np = up - ip ; + if ( 1 == np ) { + p_box[ip][0][0] = box[0][0] ; p_box[ip][0][1] = box[0][1] ; + p_box[ip][1][0] = box[1][0] ; p_box[ip][1][1] = box[1][1] ; + p_box[ip][2][0] = box[2][0] ; p_box[ip][2][1] = box[2][1] ; + } + else { + const int n = box[ axis ][1] - box[ axis ][0] ; + const int np_low = np / 2 ; /* Rounded down */ + const int np_upp = np - np_low ; + + const int n_upp = (int) (((double) n) * ( ((double)np_upp) / ((double)np))); + const int n_low = n - n_upp ; + const int next_axis = ( axis + 2 ) % 3 ; + + if ( np_low ) { /* P = [ip,ip+np_low) */ + Box dbox ; + dbox[0][0] = box[0][0] ; dbox[0][1] = box[0][1] ; + dbox[1][0] = box[1][0] ; dbox[1][1] = box[1][1] ; + dbox[2][0] = box[2][0] ; dbox[2][1] = box[2][1] ; + + dbox[ axis ][1] = dbox[ axis ][0] + n_low ; + + box_partition( ip, ip + np_low, next_axis, dbox, p_box ); + } + + if ( np_upp ) { /* P = [ip+np_low,ip+np_low+np_upp) */ + Box dbox; + dbox[0][0] = box[0][0] ; dbox[0][1] = box[0][1] ; + dbox[1][0] = box[1][0] ; dbox[1][1] = box[1][1] ; + dbox[2][0] = box[2][0] ; dbox[2][1] = box[2][1] ; + + ip += np_low ; + dbox[ axis ][0] += n_low ; + dbox[ axis ][1] = dbox[ axis ][0] + n_upp ; + + box_partition( ip, ip + np_upp, next_axis, dbox, p_box ); + } + } +} + +/*--------------------------------------------------------------------*/ + +static int box_disjoint( const Box& a , const Box& b) +{ + return a[0][1] <= b[0][0] || b[0][1] <= a[0][0] || + a[1][1] <= b[1][0] || b[1][1] <= a[1][0] || + a[2][1] <= b[2][0] || b[2][1] <= a[2][0] ; +} + +static void resize_int( int ** a , int * allocLen , int newLen ) +{ + int k = 32; + while ( k < newLen ) { k <<= 1 ; } + if ( NULL == *a ) + { *a = (int*)malloc( sizeof(int)*(*allocLen = k) ); } + else if ( *allocLen < k ) + { *a = (int*)realloc(*a , sizeof(int)*(*allocLen = k)); } +} + +static void box_partition_maps( + const int np , + const int my_p , + const Box* pbox, + const int ghost , + int ** map_local_id , + int ** map_recv_pc , + int ** map_send_pc , + int ** map_send_id ) +{ + const Box& my_box = pbox[my_p] ; + + const int my_ix = my_box[0][0] ; + const int my_iy = my_box[1][0] ; + const int my_iz = my_box[2][0] ; + const int my_nx = my_box[0][1] - my_box[0][0] ; + const int my_ny = my_box[1][1] - my_box[1][0] ; + const int my_nz = my_box[2][1] - my_box[2][0] ; + + const int my_use_nx = 2 * ghost + my_nx ; + const int my_use_ny = 2 * ghost + my_ny ; + const int my_use_nz = 2 * ghost + my_nz ; + + const int id_length = my_use_nx * my_use_ny * my_use_nz ; + + int * local_id = (int *) malloc( id_length * sizeof(int) ); + int * recv_pc = (int *) malloc( ( np + 1 ) * sizeof(int) ); + int * send_pc = (int *) malloc( ( np + 1 ) * sizeof(int) ); + + int * send_id = NULL ; + int send_id_size = 0 ; + + int iLocal , iSend ; + int i ; + + Box my_use_box; + + my_use_box[0][0] = my_box[0][0] - ghost ; + my_use_box[0][1] = my_box[0][1] + ghost ; + my_use_box[1][0] = my_box[1][0] - ghost ; + my_use_box[1][1] = my_box[1][1] + ghost ; + my_use_box[2][0] = my_box[2][0] - ghost ; + my_use_box[2][1] = my_box[2][1] + ghost ; + + for ( i = 0 ; i < id_length ; ++i ) { local_id[i] = -1 ; } + + iSend = 0 ; + iLocal = 0 ; + + /* The vector space is partitioned by processors */ + + for ( i = 0 ; i < np ; ++i ) { + const int ip = ( i + my_p ) % np ; + recv_pc[i] = iLocal ; + send_pc[i] = iSend ; + + if ( ! box_disjoint( my_use_box , pbox[ip] ) ) { + const int p_ix = pbox[ip][0][0] ; + const int p_iy = pbox[ip][1][0] ; + const int p_iz = pbox[ip][2][0] ; + const int p_ex = pbox[ip][0][1] ; + const int p_ey = pbox[ip][1][1] ; + const int p_ez = pbox[ip][2][1] ; + + int local_x , local_y , local_z ; + + /* Run the span of global cells that my processor uses */ + + for ( local_z = -ghost ; local_z < my_nz + ghost ; ++local_z ) { + for ( local_y = -ghost ; local_y < my_ny + ghost ; ++local_y ) { + for ( local_x = -ghost ; local_x < my_nx + ghost ; ++local_x ) { + + const int global_z = local_z + my_iz ; + const int global_y = local_y + my_iy ; + const int global_x = local_x + my_ix ; + + const int entry = + box_map_local_entry(my_box,ghost,local_x,local_y,local_z); + + if ( entry < 0 ) { abort(); } + + if ( p_iz <= global_z && global_z < p_ez && + p_iy <= global_y && global_y < p_ey && + p_ix <= global_x && global_x < p_ex ) { + + /* This ordinal is owned by processor 'ip' */ + + local_id[ entry ] = iLocal++ ; + +#if defined(DEBUG_PRINT) +if ( my_p != ip ) { + fprintf(stdout," (%d,%d,%d) : P%d recv at local %d from P%d\n", + global_x,global_y,global_z,my_p,local_id[entry],ip); + fflush(stdout); +} +#endif + } + + /* If in my ownership and used by the other processor */ + if ( my_p != ip && + /* In my ownership: */ + ( 0 <= local_z && local_z < my_nz && + 0 <= local_y && local_y < my_ny && + 0 <= local_x && local_x < my_nx ) && + /* In other processors usage: */ + ( p_iz - ghost <= global_z && global_z < p_ez + ghost && + p_iy - ghost <= global_y && global_y < p_ey + ghost && + p_ix - ghost <= global_x && global_x < p_ex + ghost ) ) { + + resize_int( & send_id , & send_id_size , (iSend + 1) ); + send_id[ iSend ] = local_id[ entry ] ; + ++iSend ; + +#if defined(DEBUG_PRINT) +{ + fprintf(stdout," (%d,%d,%d) : P%d send at local %d to P%d\n", + global_x,global_y,global_z,my_p,local_id[entry],ip); + fflush(stdout); +} +#endif + } + } + } + } + } + } + recv_pc[np] = iLocal ; + send_pc[np] = iSend ; + + *map_local_id = local_id ; + *map_recv_pc = recv_pc ; + *map_send_pc = send_pc ; + *map_send_id = send_id ; +} + +void box_partition_rcb( const int np , + const int my_p , + const Box& root_box, + const int ghost , + Box** pbox, + int ** map_local_id , + int ** map_recv_pc , + int ** map_send_pc , + int ** map_send_id ) +{ + *pbox = new Box[ np ]; + + box_partition( 0 , np , 2 , root_box , *pbox ); + + box_partition_maps( np , my_p , *pbox , ghost , + map_local_id , map_recv_pc , + map_send_pc , map_send_id ); +} + +/*--------------------------------------------------------------------*/ + +#ifdef UNIT_TEST + +static int box_contain( const Box& a , const Box& b ) +{ + return a[0][0] <= b[0][0] && b[0][1] <= a[0][1] && + a[1][0] <= b[1][0] && b[1][1] <= a[1][1] && + a[2][0] <= b[2][0] && b[2][1] <= a[2][1] ; +} + +static void box_print( FILE * fp , const Box& a ) +{ + fprintf(fp,"{ [ %d , %d ) , [ %d , %d ) , [ %d , %d ) }", + a[0][0] , a[0][1] , + a[1][0] , a[1][1] , + a[2][0] , a[2][1] ); +} + +static void test_box( const Box& box , const int np ) +{ + const int ncell_box = box[0][1] * box[1][1] * box[2][1] ; + int ncell_total = 0 ; + int ncell_min = ncell_box ; + int ncell_max = 0 ; + std::vector pbox(np); + int i , j ; + + box_partition( 0 , np , 2 , box , &pbox[0] ); + + for ( i = 0 ; i < np ; ++i ) { + const int ncell = ( pbox[i][0][1] - pbox[i][0][0] ) * + ( pbox[i][1][1] - pbox[i][1][0] ) * + ( pbox[i][2][1] - pbox[i][2][0] ); + + if ( ! box_contain( box , pbox[i] ) ) { + fprintf(stdout," OUT OF BOUNDS pbox[%d/%d] = ",i,np); + box_print(stdout,pbox[i]); + fprintf(stdout,"\n"); + abort(); + } + + for ( j = i + 1 ; j < np ; ++j ) { + if ( ! box_disjoint( pbox[i] , pbox[j] ) ) { + fprintf(stdout," NOT DISJOINT pbox[%d/%d] = ",i,np); + box_print(stdout, pbox[i]); + fprintf(stdout,"\n"); + fprintf(stdout," pbox[%d/%d] = ",j,np); + box_print(stdout, pbox[j]); + fprintf(stdout,"\n"); + abort(); + } + } + ncell_total += ncell ; + + if ( ncell_max < ncell ) { ncell_max = ncell ; } + if ( ncell < ncell_min ) { ncell_min = ncell ; } + } + + if ( ncell_total != ncell_box ) { + fprintf(stdout," WRONG CELL COUNT NP = %d\n",np); + abort(); + } + fprintf(stdout,"NP = %d, total = %d, avg = %d, min = %d, max = %d\n", + np,ncell_box,ncell_box/np,ncell_min,ncell_max); +} + +/*--------------------------------------------------------------------*/ + +static void test_maps( const Box& root_box , const int np ) +{ + const int ghost = 1 ; + const int nx_global = root_box[0][1] - root_box[0][0] ; + const int ny_global = root_box[1][1] - root_box[1][0] ; + int ieq , i , j ; + std::vector pbox(np); + int **local_values ; + int **map_local_id ; + int **map_recv_pc ; + int **map_send_pc ; + int **map_send_id ; + + box_partition( 0 , np , 2 , root_box , &pbox[0] ); + + local_values = (int **) malloc( sizeof(int*) * np ); + map_local_id = (int **) malloc( sizeof(int*) * np ); + map_recv_pc = (int **) malloc( sizeof(int*) * np ); + map_send_pc = (int **) malloc( sizeof(int*) * np ); + map_send_id = (int **) malloc( sizeof(int*) * np ); + + /* Set each local value to the global equation number */ + + for ( ieq = i = 0 ; i < np ; ++i ) { + const Box& mybox = pbox[i] ; + const int nx = mybox[0][1] - mybox[0][0] ; + const int ny = mybox[1][1] - mybox[1][0] ; + const int nz = mybox[2][1] - mybox[2][0] ; + int ix , iy , iz ; + + /* Generate the partition maps for this rank */ + box_partition_maps( np , i , &pbox[0] , ghost , + & map_local_id[i] , & map_recv_pc[i] , + & map_send_pc[i] , & map_send_id[i] ); + + local_values[i] = (int *) malloc( sizeof(int) * map_recv_pc[i][np] ); + + for ( iz = -ghost ; iz < nz + ghost ; ++iz ) { + for ( iy = -ghost ; iy < ny + ghost ; ++iy ) { + for ( ix = -ghost ; ix < nx + ghost ; ++ix ) { + const int ieq = box_map_local(mybox,ghost,map_local_id[i],ix,iy,iz); + + if ( 0 <= ieq ) { + const int ix_global = ix + mybox[0][0] ; + const int iy_global = iy + mybox[1][0] ; + const int iz_global = iz + mybox[2][0] ; + + if ( root_box[0][0] <= ix_global && ix_global < root_box[0][1] && + root_box[1][0] <= iy_global && iy_global < root_box[1][1] && + root_box[2][0] <= iz_global && iz_global < root_box[2][1] ) { + + local_values[i][ ieq ] = ix_global + + iy_global * nx_global + + iz_global * nx_global * ny_global ; + } + else { + local_values[i][ ieq ] = -1 ; + } + } + } + } + } + } + + /* Pair-wise compare the local values */ + /* i == receiving processor rank */ + /* ip == sending processor rank */ + /* j == receiving processor data entry for message from 'ip' */ + /* jp == sending processor data entry for message to 'i' */ + + for ( i = 0 ; i < np ; ++i ) { + for ( j = 1 ; j < np ; ++j ) { + const int ip = ( i + j ) % np ; + const int jp = ( i + np - ip ) % np ; + const int nrecv = map_recv_pc[i] [j+1] - map_recv_pc[i] [j] ; + const int nsend = map_send_pc[ip][jp+1] - map_send_pc[ip][jp] ; + int k ; + if ( nrecv != nsend ) { + fprintf(stderr,"P%d recv %d from P%d\n",i,nrecv,ip); + fprintf(stderr,"P%d send %d to P%d\n",ip,nsend,i); + abort(); + } + for ( k = 0 ; k < nrecv ; ++k ) { + const int irecv = map_recv_pc[i][j] + k ; + const int isend = map_send_pc[ip][jp] + k ; + const int val_irecv = local_values[i][irecv] ; + const int val_isend = local_values[ip][ map_send_id[ip][isend] ] ; + if ( val_irecv != val_isend ) { + fprintf(stderr,"P%d recv[%d] = %d , from P%d\n",i,k,val_irecv,ip); + fprintf(stderr,"P%d send[%d] = %d , to P%d\n",ip,k,val_isend,i); + abort(); + } + } + } + } + + for ( i = 0 ; i < np ; ++i ) { + free( map_local_id[i] ); + free( map_recv_pc[i] ); + free( map_send_pc[i] ); + free( map_send_id[i] ); + free( local_values[i] ); + } + free( map_send_id ); + free( map_send_pc ); + free( map_recv_pc ); + free( map_local_id ); + free( local_values ); +} + +/*--------------------------------------------------------------------*/ + +int main( int argc , char * argv[] ) +{ + int np_max = 256 ; + Box box = { 0 , 64 , 0 , 64 , 0 , 64 }; + int np = 0 ; + + switch( argc ) { + case 3: + sscanf(argv[1],"%d",&np); + sscanf(argv[2],"%dx%dx%d",& box[0][1] , & box[1][1] , & box[2][1] ); + if ( 0 < np ) { test_box( box , np ); } + if ( 0 < np ) { test_maps( box , np ); } + break ; + default: + for ( np = 1 ; np <= np_max ; ++np ) { + test_box( box , np ); + test_maps( box , np ); + } + break ; + } + return 0 ; +} + +#endif + + diff --git a/mkl/basic/BoxPartition.hpp b/mkl/basic/BoxPartition.hpp new file mode 100644 index 0000000..4359a16 --- /dev/null +++ b/mkl/basic/BoxPartition.hpp @@ -0,0 +1,76 @@ +#ifndef _BoxPartition_hpp_ +#define _BoxPartition_hpp_ + +#include + +/** \brief Recursively split a box into (up-ip) sub-boxes + */ +void box_partition( int ip , int up , int axis , + const Box& box , + Box* p_box ); + +/** \brief Partition a { [ix,jx) X [iy,jy) X [iz,jz) } box. + * + * Use recursive coordinate bisection to partition a box + * into np disjoint sub-boxes. Allocate (via malloc) and + * populate the sub-boxes, mapping the local (x,y,z) to + * a local ordinal, and mappings for the send-recv messages + * to update the ghost cells. + * + * usage: + * + * my_nx = pbox[my_p][0][1] - pbox[my_p][0][0] ; + * my_ny = pbox[my_p][1][1] - pbox[my_p][1][0] ; + * my_nz = pbox[my_p][2][1] - pbox[my_p][2][0] ; + * + * for ( x = -ghost ; x < my_nx + ghost ; ++x ) { + * for ( y = -ghost ; y < my_ny + ghost ; ++y ) { + * for ( z = -ghost ; z < my_nz + ghost ; ++z ) { + * const int x_global = x + pbox[my_p][0][0] ; + * const int y_global = y + pbox[my_p][1][0] ; + * const int z_global = z + pbox[my_p][2][0] ; + * + * const int local_ordinal = + * box_map_local( pbox[my_p], ghost, map_local_id, x, y, z ); + * + * if ( 0 <= local_ordinal ) { + * } + * } + * + * for ( i = 1 ; i < np ; ++i ) { + * const int recv_processor = ( my_p + i ) % np ; + * const int recv_ordinal_begin = map_recv_pc[i]; + * const int recv_ordinal_end = map_recv_pc[i+1]; + * } + * + * for ( i = 1 ; i < np ; ++i ) { + * const int send_processor = ( my_p + i ) % np ; + * const int send_map_begin = map_send_pc[i]; + * const int send_map_end = map_send_pc[i+1]; + * for ( j = send_map_begin ; j < send_map_end ; ++j ) { + * send_ordinal = map_send_id[j] ; + * } + * } + */ +void box_partition_rcb( + const int np /**< [in] Number of partitions */ , + const int my_p /**< [in] My partition rank */ , + const Box& root_box /**< [in] 3D Box to partition */ , + const int ghost /**< [in] Ghost cell boundary */ , + Box* pbox /**< [out] Partition's 3D boxes */ , + int ** map_local_id /**< [out] Map local cells */ , + int ** map_recv_pc /**< [out] Receive spans per processor */ , + int ** map_send_pc /**< [out] Send prefix counts per processor */ , + int ** map_send_id /**< [out] Send message ordinals */ ); + +/* \brief Map a local (x,y,z) to a local ordinal. + */ +int box_map_local( const Box& box_local , + const int ghost , + const int map_local_id[] , + const int local_x , + const int local_y , + const int local_z ); + +#endif + diff --git a/mkl/basic/CSRMatrix.hpp b/mkl/basic/CSRMatrix.hpp new file mode 100644 index 0000000..9cfeaee --- /dev/null +++ b/mkl/basic/CSRMatrix.hpp @@ -0,0 +1,139 @@ +#ifndef _CSRMatrix_hpp_ +#define _CSRMatrix_hpp_ + +//@HEADER +// ************************************************************************ +// +// miniFE: simple finite-element assembly and linear-solve +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include +#include +#include +#ifdef HAVE_MPI +#include +#endif + +namespace miniFE { + +template +struct +CSRMatrix { + CSRMatrix(ComputeNode& comp_node) + : has_local_indices(false), + rows(), row_offsets(), row_offsets_external(), + packed_cols(), packed_coefs(), + num_cols(0), + compute_node(comp_node) +#ifdef HAVE_MPI + ,external_index(), external_local_index(), elements_to_send(), + neighbors(), recv_length(), send_length(), send_buffer(), request() +#endif + { + } + + ~CSRMatrix() + {} + + typedef Scalar ScalarType; + typedef LocalOrdinal LocalOrdinalType; + typedef GlobalOrdinal GlobalOrdinalType; + typedef ComputeNode ComputeNodeType; + + bool has_local_indices; + std::vector rows; + std::vector row_offsets; + std::vector row_offsets_external; + std::vector packed_cols; + std::vector packed_coefs; + LocalOrdinal num_cols; + ComputeNode& compute_node; + +#ifdef HAVE_MPI + std::vector external_index; + std::vector external_local_index; + std::vector elements_to_send; + std::vector neighbors; + std::vector recv_length; + std::vector send_length; + std::vector send_buffer; + std::vector request; +#endif + + size_t num_nonzeros() const + { + return row_offsets[row_offsets.size()-1]; + } + + void reserve_space(unsigned nrows, unsigned ncols_per_row) + { + rows.resize(nrows); + row_offsets.resize(nrows+1); + packed_cols.reserve(nrows * ncols_per_row); + packed_coefs.reserve(nrows * ncols_per_row); + } + + void get_row_pointers(GlobalOrdinalType row, size_t& row_length, + GlobalOrdinalType*& cols, + ScalarType*& coefs) + { + ptrdiff_t local_row = -1; + //first see if we can get the local-row index using fast direct lookup: + if (rows.size() >= 1) { + ptrdiff_t idx = row - rows[0]; + if (idx < rows.size() && rows[idx] == row) { + local_row = idx; + } + } + + //if we didn't get the local-row index using direct lookup, try a + //more expensive binary-search: + if (local_row == -1) { + typename std::vector::iterator row_iter = + std::lower_bound(rows.begin(), rows.end(), row); + + //if we still haven't found row, it's not local so jump out: + if (row_iter == rows.end() || *row_iter != row) { + row_length = 0; + return; + } + + local_row = row_iter - rows.begin(); + } + + LocalOrdinalType offset = row_offsets[local_row]; + row_length = row_offsets[local_row+1] - offset; + cols = &packed_cols[offset]; + coefs = &packed_coefs[offset]; + } +}; + +}//namespace miniFE + +#endif + diff --git a/mkl/basic/ComputeNodeType.hpp b/mkl/basic/ComputeNodeType.hpp new file mode 100644 index 0000000..e59f3eb --- /dev/null +++ b/mkl/basic/ComputeNodeType.hpp @@ -0,0 +1,29 @@ +#ifndef _ComputeNodeType_hpp_ +#define _ComputeNodeType_hpp_ + +#if defined(MINIFE_HAVE_TBB) + +#include +#include +typedef TBBNode ComputeNodeType; + +#elif defined(MINIFE_HAVE_TPI) + +#include +#include +typedef TPINode ComputeNodeType; + +#elif defined(MINIFE_HAVE_CUDA) + +#include +typedef CUDANode ComputeNodeType; + +#else + +#include +typedef SerialComputeNode ComputeNodeType; + +#endif + +#endif + diff --git a/mkl/basic/DotOp.hpp b/mkl/basic/DotOp.hpp new file mode 100644 index 0000000..6471949 --- /dev/null +++ b/mkl/basic/DotOp.hpp @@ -0,0 +1,35 @@ +#ifndef DOTOP_HPP_ +#define DOTOP_HPP_ + +template +struct DotOp { + typedef Scalar ReductionType; + + const Scalar* x; + const Scalar* y; + + size_t n; + + ReductionType result; + + inline DotOp() { + result = identity(); + } + + static inline KERNEL_PREFIX ReductionType identity() + { + return 0.0; + } + + inline KERNEL_PREFIX ReductionType reduce(ReductionType u, ReductionType v) const + { + return u+v; + } + + inline KERNEL_PREFIX Scalar generate(int i) const + { + return x[i]*y[i]; + } +}; + +#endif diff --git a/mkl/basic/ELLMatrix.hpp b/mkl/basic/ELLMatrix.hpp new file mode 100644 index 0000000..97b662f --- /dev/null +++ b/mkl/basic/ELLMatrix.hpp @@ -0,0 +1,144 @@ +#ifndef _ELLMatrix_hpp_ +#define _ELLMatrix_hpp_ + +//@HEADER +// ************************************************************************ +// +// miniFE: simple finite-element assembly and linear-solve +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include +#include +#include +#ifdef HAVE_MPI +#include +#endif + +namespace miniFE { + +template +struct +ELLMatrix { + ELLMatrix(ComputeNode& comp_node) + : has_local_indices(false), + rows(), + cols(), coefs(), + num_cols(0), + num_cols_per_row(0), + compute_node(comp_node) +#ifdef HAVE_MPI + ,external_index(), external_local_index(), elements_to_send(), + neighbors(), recv_length(), send_length(), send_buffer(), request() +#endif + { + } + + ~ELLMatrix() + {} + + typedef Scalar ScalarType; + typedef LocalOrdinal LocalOrdinalType; + typedef GlobalOrdinal GlobalOrdinalType; + typedef ComputeNode ComputeNodeType; + + bool has_local_indices; + std::vector rows; + std::vector cols; + std::vector coefs; + LocalOrdinal num_cols; + LocalOrdinal num_cols_per_row; + ComputeNode& compute_node; + +#ifdef HAVE_MPI + std::vector external_index; + std::vector external_local_index; + std::vector elements_to_send; + std::vector neighbors; + std::vector recv_length; + std::vector send_length; + std::vector send_buffer; + std::vector request; +#endif + + size_t num_nonzeros() const + { + return rows.size()*num_cols_per_row; + } + + void reserve_space(unsigned nrows, unsigned ncols_per_row) + { + rows.resize(nrows); + cols.resize(nrows * ncols_per_row); + coefs.resize(nrows * ncols_per_row); + num_cols_per_row = ncols_per_row; + } + + void get_row_pointers(GlobalOrdinalType row, size_t& row_length, + GlobalOrdinalType*& cols_ptr, + ScalarType*& coefs_ptr) + { + ptrdiff_t local_row = -1; + //first see if we can get the local-row index using fast direct lookup: + if (rows.size() >= 1) { + ptrdiff_t idx = row - rows[0]; + if (idx < rows.size() && rows[idx] == row) { + local_row = idx; + } + } + + //if we didn't get the local-row index using direct lookup, try a + //more expensive binary-search: + if (local_row == -1) { + typename std::vector::iterator row_iter = + std::lower_bound(rows.begin(), rows.end(), row); + + //if we still haven't found row, it's not local so jump out: + if (row_iter == rows.end() || *row_iter != row) { + row_length = 0; + return; + } + + local_row = row_iter - rows.begin(); + } + + cols_ptr = &cols[local_row*num_cols_per_row]; + coefs_ptr = &coefs[local_row*num_cols_per_row]; + + int idx = num_cols_per_row-1; + while(idx>=0) { + if (cols_ptr[idx] != 0) break; + --idx; + } + row_length = idx+1; + } +}; + +}//namespace miniFE + +#endif + diff --git a/mkl/basic/FEComputeElem.hpp b/mkl/basic/FEComputeElem.hpp new file mode 100644 index 0000000..03aa8a2 --- /dev/null +++ b/mkl/basic/FEComputeElem.hpp @@ -0,0 +1,29 @@ +#ifndef FECOMPUTEELEM_HPP_ +#define FECOMPUTEELEM_HPP_ + +#include + +#ifndef KERNEL_PREFIX +#define KERNEL_PREFIX +#endif + +template +struct FEComputeElem { + Scalar* elem_node_coords; + Scalar* elem_diffusion_matrix; + Scalar* elem_source_vector; + +inline KERNEL_PREFIX void operator()(int i) +{ + unsigned nnodes = miniFE::Hex8::numNodesPerElem; + unsigned dim = miniFE::Hex8::spatialDim; + Scalar* coords = elem_node_coords+i*nnodes*dim; + Scalar* diffusionMat = elem_diffusion_matrix+i*nnodes*nnodes; + Scalar* sourceVec = elem_source_vector+i*nnodes; + + miniFE::Hex8::diffusionMatrix(coords, diffusionMat); + miniFE::Hex8::sourceVector(coords, sourceVec); +} +}; + +#endif diff --git a/mkl/basic/FusedMatvecDotOp.hpp b/mkl/basic/FusedMatvecDotOp.hpp new file mode 100644 index 0000000..e4b59e4 --- /dev/null +++ b/mkl/basic/FusedMatvecDotOp.hpp @@ -0,0 +1,59 @@ +#ifndef FUSEDMATVECDOTOP_HPP_ +#define FUSEDMATVECDOTOP_HPP_ + +#ifndef KERNEL_PREFIX +#define KERNEL_PREFIX +#endif + +template +struct FusedMatvecDotOp { + + typedef typename VectorType::GlobalOrdinalType GlobalOrdinalType; + typedef typename VectorType::LocalOrdinalType LocalOrdinalType; + typedef typename VectorType::ScalarType ScalarType; + typedef ScalarType ReductionType; + + size_t n; + + const LocalOrdinalType* Arowoffsets; + const GlobalOrdinalType* Acols; + const ScalarType* Acoefs; + + const ScalarType* x; + ScalarType* y; + ScalarType beta; + + ReductionType result; + + inline FusedMatvecDotOp() { + result = identity(); + } + + static inline KERNEL_PREFIX ReductionType identity() + { + return 0.0; + } + + inline KERNEL_PREFIX ReductionType reduce(ReductionType u, ReductionType v) const + { + return u+v; + } + + inline KERNEL_PREFIX ScalarType generate(int row) + { + //we count on the caller (ComputeNode) to pass in 'row' + //in range 0..n-1 + + ScalarType sum = beta*y[row]; + + for(LocalOrdinalType i=Arowoffsets[row]; i +#include + +template +struct GetNodesCoords { + const miniFE::simple_mesh_description* mesh; + GlobalOrdinal* elemIDs; + GlobalOrdinal* node_ordinals; + Scalar* elem_node_coords; + +inline void operator()(int i) +{ + unsigned nnodes = miniFE::Hex8::numNodesPerElem; + GlobalOrdinal elemID = elemIDs[i]; + GlobalOrdinal* node_ords = node_ordinals+i*nnodes; + Scalar* node_coords = elem_node_coords+i*nnodes*miniFE::Hex8::spatialDim; + get_elem_nodes_and_coords(*mesh, elemID, node_ords, node_coords); +} +}; + +#endif diff --git a/mkl/basic/Hex8_box_utils.hpp b/mkl/basic/Hex8_box_utils.hpp new file mode 100644 index 0000000..c1662ec --- /dev/null +++ b/mkl/basic/Hex8_box_utils.hpp @@ -0,0 +1,174 @@ +#ifndef _Hex8_box_utils_hpp_ +#define _Hex8_box_utils_hpp_ + +//@HEADER +// ************************************************************************ +// +// miniFE: simple finite-element assembly and linear-solve +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include + +#include +#include +#include +#include + +namespace miniFE { + + +template +void get_hex8_node_ids(int nx, int ny, + GlobalOrdinal node0, + GlobalOrdinal* elem_node_ids) +{ +//Given box dimensions nx and ny, and a starting node +//(local-node-0 for a hex8), compute the other nodes +//of the hex8 using the exodus ordering convention. + elem_node_ids[0] = node0; + elem_node_ids[1] = node0 + 1; + elem_node_ids[2] = node0 + nx + 1; + elem_node_ids[3] = node0 + nx; + elem_node_ids[4] = node0 + nx*ny; + elem_node_ids[5] = node0 + 1 + nx*ny; + elem_node_ids[6] = node0 + nx + nx*ny + 1; + elem_node_ids[7] = node0 + nx + nx*ny; +} + +template +void get_hex8_node_coords_3d(Scalar x, Scalar y, Scalar z, + Scalar hx, Scalar hy, Scalar hz, + Scalar* elem_node_coords) +{ + //Input: x,y,z are the coordinates of local-node 0 for a Hex8. + //'hx', 'hy', 'hz' are the lengths of the sides of the element + //in each direction. + + elem_node_coords[0] = x; + elem_node_coords[1] = y; + elem_node_coords[2] = z; + + elem_node_coords[3] = x + hx; + elem_node_coords[4] = y; + elem_node_coords[5] = z; + + elem_node_coords[6] = x + hx; + elem_node_coords[7] = y + hy; + elem_node_coords[8] = z; + + elem_node_coords[9] = x; + elem_node_coords[10] = y + hy; + elem_node_coords[11] = z; + + elem_node_coords[12] = x; + elem_node_coords[13] = y; + elem_node_coords[14] = z + hz; + + elem_node_coords[15] = x + hx; + elem_node_coords[16] = y; + elem_node_coords[17] = z + hz; + + elem_node_coords[18] = x + hx; + elem_node_coords[19] = y + hy; + elem_node_coords[20] = z + hz; + + elem_node_coords[21] = x; + elem_node_coords[22] = y + hy; + elem_node_coords[23] = z + hz; +} + +template +void +get_elem_nodes_and_coords(const simple_mesh_description& mesh, + GlobalOrdinal elemID, + GlobalOrdinal* node_ords, Scalar* node_coords) +{ + int global_nodes_x = mesh.global_box[0][1]+1; + int global_nodes_y = mesh.global_box[1][1]+1; + int global_nodes_z = mesh.global_box[2][1]+1; + + if (elemID < 0) { + //I don't think this can happen, but check for the sake of paranoia... + throw std::runtime_error("get_elem_nodes_and_coords ERROR, negative elemID"); + } + + int elem_int_x, elem_int_y, elem_int_z; + get_int_coords(elemID, global_nodes_x-1, global_nodes_y-1, global_nodes_z-1, + elem_int_x, elem_int_y, elem_int_z); + GlobalOrdinal nodeID = get_id(global_nodes_x, global_nodes_y, global_nodes_z, elem_int_x, elem_int_y, elem_int_z); + +#ifdef MINIFE_DEBUG + std::cout<<"\nelemID: "<(nodeID, global_nodes_x,global_nodes_y,global_nodes_z, + ix,iy,iz); + Scalar hx = 1.0/global_elems_x; + Scalar hy = 1.0/global_elems_y; + Scalar hz = 1.0/global_elems_z; + get_hex8_node_coords_3d(ix, iy, iz, hx, hy, hz, node_coords); +#ifdef MINIFE_DEBUG + int offset = 0; + for(int i=0; i +void +get_elem_nodes_and_coords(const simple_mesh_description& mesh, + GlobalOrdinal elemID, + ElemData& elem_data) +{ + get_elem_nodes_and_coords(mesh, elemID, elem_data.elem_node_ids, elem_data.elem_node_coords); +} + +}//namespace miniFE + +#endif + diff --git a/mkl/basic/Lock.hpp b/mkl/basic/Lock.hpp new file mode 100644 index 0000000..16be86f --- /dev/null +++ b/mkl/basic/Lock.hpp @@ -0,0 +1,103 @@ +#ifndef _Lock_hpp_ +#define _Lock_hpp_ + +//@HEADER +// ************************************************************************ +// +// miniFE: simple finite-element assembly and linear-solve +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifdef MINIFE_HAVE_TBB + +#include +#include + +namespace miniFE { + +static tbb::atomic miniFE_num_matrix_conflicts; +static tbb::atomic miniFE_num_vector_conflicts; + +//We have two lock classes, LockM and LockV. The only reason for +//this is so that they can separately track the number of conflicts +//for matrix accesses versus vector accesses (by incrementing the +//above counters). +//The LockingMatrix class uses LockM, LockingVector uses LockV. + +template +class LockM { +public: + // Constructors/destructors + LockM(tbb::atomic& row) + : locked_row_(row) + { + if (++locked_row_ != 1) { + unsigned counter = 0; + while(locked_row_ != 1) { + ++counter; + } + ++miniFE_num_matrix_conflicts; + } + } + ~LockM() + { --locked_row_; } + +private: + tbb::atomic& locked_row_; + LockM(const LockM&); + LockM& operator=(const LockM&); +}; + +template +class LockV { +public: + // Constructors/destructors + LockV(tbb::atomic& row) + : locked_row_(row) + { + if (++locked_row_ != 1) { + unsigned counter = 0; + while(locked_row_ != 1) { + ++counter; + } + ++miniFE_num_vector_conflicts; + } + } + ~LockV() + { --locked_row_; } + +private: + tbb::atomic& locked_row_; + LockV(const LockV&); + LockV& operator=(const LockV&); +}; + +}//namespace miniFE + +#else +#error "ERROR, this file shouldn't be compiled if MINIFE_HAVE_TBB isn't defined." +#endif + +#endif + diff --git a/mkl/basic/LockingMatrix.hpp b/mkl/basic/LockingMatrix.hpp new file mode 100644 index 0000000..c278274 --- /dev/null +++ b/mkl/basic/LockingMatrix.hpp @@ -0,0 +1,74 @@ +#ifndef _LockingMatrix_hpp_ +#define _LockingMatrix_hpp_ + +//@HEADER +// ************************************************************************ +// +// miniFE: simple finite-element assembly and linear-solve +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include + +#include + +namespace miniFE { + +template +class LockingMatrix { +public: + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::ScalarType Scalar; + + LockingMatrix(MatrixType& A) : A_(A), myFirstRow_(0), myLastRow_(0), numMyRows_(0), row_locks_() + { + if (A_.rows.size() > 0) { + myFirstRow_ = A_.rows[0]; + myLastRow_ = A_.rows[A_.rows.size()-1]; + } + numMyRows_ = myLastRow_-myFirstRow_+1; + row_locks_.resize(numMyRows_); + } + + void sum_in(GlobalOrdinal row, size_t row_len, const GlobalOrdinal* col_indices, const Scalar* values) + { + int local_row = row - myFirstRow_; + if (local_row >= 0 && local_row < numMyRows_) { + LockM lock(row_locks_[local_row]); + sum_into_row(row, row_len, col_indices, values, A_); + } + } + +private: + MatrixType& A_; + GlobalOrdinal myFirstRow_; + GlobalOrdinal myLastRow_; + size_t numMyRows_; + std::vector > row_locks_; +}; + +}//namespace miniFE + +#endif + diff --git a/mkl/basic/LockingVector.hpp b/mkl/basic/LockingVector.hpp new file mode 100644 index 0000000..60f7598 --- /dev/null +++ b/mkl/basic/LockingVector.hpp @@ -0,0 +1,77 @@ +#ifndef _LockingVector_hpp_ +#define _LockingVector_hpp_ + +//@HEADER +// ************************************************************************ +// +// miniFE: simple finite-element assembly and linear-solve +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include + +#include + +namespace miniFE { + +template +class LockingVector { +public: + typedef typename VectorType::GlobalOrdinalType GlobalOrdinal; + typedef typename VectorType::ScalarType Scalar; + + LockingVector(VectorType& x) : x_(x), myFirstRow_(0), myLastRow_(0), numMyRows_(0), row_locks_() + { + if (x_.local_size > 0) { + myFirstRow_ = x_.startIndex; + myLastRow_ = myFirstRow_ + x_.local_size - 1; + } + numMyRows_ = myLastRow_-myFirstRow_+1; + row_locks_.resize(numMyRows_); + } + + void sum_in(size_t num_indices, const GlobalOrdinal* indices, const Scalar* values) + { + for(int i=0; i= 0 && local_row < numMyRows_) { + LockV lock(row_locks_[local_row]); + sum_into_vector(1, &row, &values[i], x_); + } + } + } + +private: + VectorType& x_; + GlobalOrdinal myFirstRow_; + GlobalOrdinal myLastRow_; + size_t numMyRows_; + std::vector > row_locks_; +}; + +}//namespace miniFE + +#endif + diff --git a/mkl/basic/MatrixCopyOp.hpp b/mkl/basic/MatrixCopyOp.hpp new file mode 100644 index 0000000..f6c300a --- /dev/null +++ b/mkl/basic/MatrixCopyOp.hpp @@ -0,0 +1,33 @@ +#ifndef _MatrixCopyOp_hpp_ +#define _MatrixCopyOp_hpp_ + +template +struct MatrixCopyOp { + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinalType; + typedef typename MatrixType::LocalOrdinalType LocalOrdinalType; + typedef typename MatrixType::ScalarType ScalarType; + + const GlobalOrdinalType* src_rows; + const LocalOrdinalType* src_rowoffsets; + const GlobalOrdinalType* src_cols; + const ScalarType* src_coefs; + + GlobalOrdinalType* dest_rows; + LocalOrdinalType* dest_rowoffsets; + GlobalOrdinalType* dest_cols; + ScalarType* dest_coefs; + int n; + + inline void operator()(int i) + { + dest_rows[i] = src_rows[i]; + dest_rowoffsets[i] = src_rowoffsets[i]; + for(int j=src_rowoffsets[i]; j +#include +#include + +#include +#include + +#include + +template +void sort_if_needed(GlobalOrdinal* list, + GlobalOrdinal list_len) +{ + bool need_to_sort = false; + for(GlobalOrdinal i=list_len-1; i>=1; --i) { + if (list[i] < list[i-1]) { + need_to_sort = true; + break; + } + } + + if (need_to_sort) { + std::sort(list,list+list_len); + } +} + +template +struct MatrixInitOp { +}; + +template<> +struct MatrixInitOp > { + MatrixInitOp(const std::vector& rows_vec, + const std::vector& row_offsets_vec, + const std::vector& row_coords_vec, + int global_nx, int global_ny, int global_nz, + MINIFE_GLOBAL_ORDINAL global_n_rows, + const miniFE::simple_mesh_description& input_mesh, + miniFE::CSRMatrix& matrix) + : rows(&rows_vec[0]), + row_offsets(&row_offsets_vec[0]), + row_coords(&row_coords_vec[0]), + global_nodes_x(global_nx), + global_nodes_y(global_ny), + global_nodes_z(global_nz), + global_nrows(global_n_rows), + mesh(&input_mesh), + dest_rows(&matrix.rows[0]), + dest_rowoffsets(&matrix.row_offsets[0]), + dest_cols(&matrix.packed_cols[0]), + dest_coefs(&matrix.packed_coefs[0]), + n(matrix.rows.size()) + { + matrix.packed_cols.resize(row_offsets_vec[n]); + matrix.packed_coefs.resize(row_offsets_vec[n]); + dest_rowoffsets[n] = row_offsets_vec[n]; + } + + typedef MINIFE_GLOBAL_ORDINAL GlobalOrdinalType; + typedef MINIFE_LOCAL_ORDINAL LocalOrdinalType; + typedef MINIFE_SCALAR ScalarType; + + const GlobalOrdinalType* rows; + const LocalOrdinalType* row_offsets; + const int* row_coords; + + int global_nodes_x; + int global_nodes_y; + int global_nodes_z; + + GlobalOrdinalType global_nrows; + + GlobalOrdinalType* dest_rows; + LocalOrdinalType* dest_rowoffsets; + GlobalOrdinalType* dest_cols; + ScalarType* dest_coefs; + int n; + + const miniFE::simple_mesh_description* mesh; + + inline void operator()(int i) + { + dest_rows[i] = rows[i]; + int offset = row_offsets[i]; + dest_rowoffsets[i] = offset; + int ix = row_coords[i*3]; + int iy = row_coords[i*3+1]; + int iz = row_coords[i*3+2]; + GlobalOrdinalType nnz = 0; + for(int sz=-1; sz<=1; ++sz) + for(int sy=-1; sy<=1; ++sy) + for(int sx=-1; sx<=1; ++sx) { + GlobalOrdinalType col_id = + miniFE::get_id(global_nodes_x, global_nodes_y, global_nodes_z, + ix+sx, iy+sy, iz+sz); + if (col_id >= 0 && col_id < global_nrows) { + GlobalOrdinalType col = mesh->map_id_to_row(col_id); + dest_cols[offset+nnz] = col; + dest_coefs[offset+nnz] = 0; + ++nnz; + } + } + + sort_if_needed(&dest_cols[offset], nnz); + } +}; + +template<> +struct MatrixInitOp > { + MatrixInitOp(const std::vector& rows_vec, + const std::vector& /*row_offsets_vec*/, + const std::vector& row_coords_vec, + int global_nx, int global_ny, int global_nz, + MINIFE_GLOBAL_ORDINAL global_n_rows, + const miniFE::simple_mesh_description& input_mesh, + miniFE::ELLMatrix& matrix) + : rows(&rows_vec[0]), + row_coords(&row_coords_vec[0]), + global_nodes_x(global_nx), + global_nodes_y(global_ny), + global_nodes_z(global_nz), + global_nrows(global_n_rows), + mesh(&input_mesh), + dest_rows(&matrix.rows[0]), + dest_cols(&matrix.cols[0]), + dest_coefs(&matrix.coefs[0]), + n(matrix.rows.size()), + ncols_per_row(matrix.num_cols_per_row) + { + } + + typedef MINIFE_GLOBAL_ORDINAL GlobalOrdinalType; + typedef MINIFE_LOCAL_ORDINAL LocalOrdinalType; + typedef MINIFE_SCALAR ScalarType; + + const GlobalOrdinalType* rows; + const int* row_coords; + + int global_nodes_x; + int global_nodes_y; + int global_nodes_z; + + GlobalOrdinalType global_nrows; + + GlobalOrdinalType* dest_rows; + GlobalOrdinalType* dest_cols; + ScalarType* dest_coefs; + int n; + int ncols_per_row; + + const miniFE::simple_mesh_description* mesh; + + inline void operator()(int i) + { + dest_rows[i] = rows[i]; + int offset = i*ncols_per_row; + int ix = row_coords[i*3]; + int iy = row_coords[i*3+1]; + int iz = row_coords[i*3+2]; + GlobalOrdinalType nnz = 0; + for(int sz=-1; sz<=1; ++sz) + for(int sy=-1; sy<=1; ++sy) + for(int sx=-1; sx<=1; ++sx) { + GlobalOrdinalType col_id = + miniFE::get_id(global_nodes_x, global_nodes_y, global_nodes_z, + ix+sx, iy+sy, iz+sz); + if (col_id >= 0 && col_id < global_nrows) { + GlobalOrdinalType col = mesh->map_id_to_row(col_id); + dest_cols[offset+nnz] = col; + dest_coefs[offset+nnz] = 0; + ++nnz; + } + } + + sort_if_needed(&dest_cols[offset], nnz); + } +}; + +#endif + diff --git a/mkl/basic/MatvecOp.hpp b/mkl/basic/MatvecOp.hpp new file mode 100644 index 0000000..9c5c8e4 --- /dev/null +++ b/mkl/basic/MatvecOp.hpp @@ -0,0 +1,99 @@ +#ifndef _MatvecOp_hpp_ +#define _MatvecOp_hpp_ + +#ifndef KERNEL_PREFIX +#define KERNEL_PREFIX +#endif + +#include +#include +#include + +template +struct MatvecOp { +}; + +template<> +struct MatvecOp > { + MatvecOp(miniFE::CSRMatrix& A) + : n(A.rows.size()), + Arowoffsets(&A.row_offsets[0]), + Acols(&A.packed_cols[0]), + Acoefs(&A.packed_coefs[0]) + { + } + + size_t n; + + typedef MINIFE_GLOBAL_ORDINAL GlobalOrdinalType; + typedef MINIFE_LOCAL_ORDINAL LocalOrdinalType; + typedef MINIFE_SCALAR ScalarType; + + const LocalOrdinalType* Arowoffsets; + const GlobalOrdinalType* Acols; + const ScalarType* Acoefs; + + const ScalarType* x; + ScalarType* y; + ScalarType beta; + + inline KERNEL_PREFIX void operator()(int row) + { + //we count on the caller (ComputeNode) to pass in 'row' + //in range 0..n-1 + + ScalarType sum = beta*y[row]; + + for(LocalOrdinalType i=Arowoffsets[row]; i +struct MatvecOp > { + MatvecOp(miniFE::ELLMatrix& A) + : n(A.rows.size()), + Acols(&A.cols[0]), + Acoefs(&A.coefs[0]), + ncols_per_row(A.num_cols_per_row) + { + } + + size_t n; + + typedef MINIFE_GLOBAL_ORDINAL GlobalOrdinalType; + typedef MINIFE_LOCAL_ORDINAL LocalOrdinalType; + typedef MINIFE_SCALAR ScalarType; + + const GlobalOrdinalType* Acols; + const ScalarType* Acoefs; + int ncols_per_row; + + const ScalarType* x; + ScalarType* y; + ScalarType beta; + + inline KERNEL_PREFIX void operator()(int row) + { + //we count on the caller (ComputeNode) to pass in 'row' + //in range 0..n-1 + + ScalarType sum = beta*y[row]; + + for(LocalOrdinalType i=0; i +struct MemInitOp { + Scalar* ptr; + size_t n; + inline void operator()(size_t i) + { + ptr[i] = 0; + } +}; + +#endif diff --git a/mkl/basic/NoOpMemoryModel.hpp b/mkl/basic/NoOpMemoryModel.hpp new file mode 100644 index 0000000..92d1eb1 --- /dev/null +++ b/mkl/basic/NoOpMemoryModel.hpp @@ -0,0 +1,27 @@ +#ifndef _NoOpMemoryModel_hpp_ +#define _NoOpMemoryModel_hpp_ + +class NoOpMemoryModel { + public: + NoOpMemoryModel(){} + virtual ~NoOpMemoryModel(){} + + template + T* get_buffer(const T* host_ptr, size_t buf_size) + { return const_cast(host_ptr); } + + template + void destroy_buffer(T*& device_ptr) + { } + + template + void copy_to_buffer(const T* host_ptr, size_t buf_size, T* device_ptr) + { } + + template + void copy_from_buffer(T* host_ptr, size_t buf_size, const T* device_ptr) + { } +}; + +#endif + diff --git a/mkl/basic/SerialComputeNode.hpp b/mkl/basic/SerialComputeNode.hpp new file mode 100644 index 0000000..1f45ed8 --- /dev/null +++ b/mkl/basic/SerialComputeNode.hpp @@ -0,0 +1,25 @@ +#ifndef SERIALCOMPUTENODE_HPP_ +#define SERIALCOMPUTENODE_HPP_ + +#include + +class SerialComputeNode : public NoOpMemoryModel { + public: + template + void parallel_for(unsigned int length, WDP wd) { + for(int i=0; i + void parallel_reduce(unsigned int length, WDP &wd) { + wd.result = wd.identity(); + for(int i=0; i +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef MINIFE_HAVE_TBB +#include +#endif + +#ifdef HAVE_MPI +#include +#endif + +namespace miniFE { + +template +void init_matrix(MatrixType& M, + const std::vector& rows, + const std::vector& row_offsets, + const std::vector& row_coords, + int global_nodes_x, + int global_nodes_y, + int global_nodes_z, + typename MatrixType::GlobalOrdinalType global_nrows, + const simple_mesh_description& mesh) +{ + MatrixInitOp mat_init(rows, row_offsets, row_coords, + global_nodes_x, global_nodes_y, global_nodes_z, + global_nrows, mesh, M); + +#ifdef MINIFE_HAVE_CUDA +//if on cuda, don't do this with parallel_for... + for(size_t i=0; i +void sort_with_companions(ptrdiff_t len, T* array, U* companions) +{ + ptrdiff_t i, j, index; + U companion; + + for (i=1; i < len; i++) { + index = array[i]; + companion = companions[i]; + j = i; + while ((j > 0) && (array[j-1] > index)) + { + array[j] = array[j-1]; + companions[j] = companions[j-1]; + j = j - 1; + } + array[j] = index; + companions[j] = companion; + } +} + +template +void write_matrix(const std::string& filename, + MatrixType& mat) +{ + typedef typename MatrixType::LocalOrdinalType LocalOrdinalType; + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinalType; + typedef typename MatrixType::ScalarType ScalarType; + + int numprocs = 1, myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + std::ostringstream osstr; + osstr << filename << "." << numprocs << "." << myproc; + std::string full_name = osstr.str(); + std::ofstream ofs(full_name.c_str()); + + size_t nrows = mat.rows.size(); + size_t nnz = mat.num_nonzeros(); + + for(int p=0; p +void +sum_into_row(int row_len, + GlobalOrdinal* row_indices, + Scalar* row_coefs, + int num_inputs, + const GlobalOrdinal* input_indices, + const Scalar* input_coefs) +{ + for(size_t i=0; i +void +sum_into_row(typename MatrixType::GlobalOrdinalType row, + size_t num_indices, + const typename MatrixType::GlobalOrdinalType* col_inds, + const typename MatrixType::ScalarType* coefs, + MatrixType& mat) +{ + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::ScalarType Scalar; + + size_t row_len = 0; + GlobalOrdinal* mat_row_cols = NULL; + Scalar* mat_row_coefs = NULL; + + mat.get_row_pointers(row, row_len, mat_row_cols, mat_row_coefs); + if (row_len == 0) return; + + sum_into_row(row_len, mat_row_cols, mat_row_coefs, num_indices, col_inds, coefs); +} + +template +void +sum_in_symm_elem_matrix(size_t num, + const typename MatrixType::GlobalOrdinalType* indices, + const typename MatrixType::ScalarType* coefs, + MatrixType& mat) +{ + typedef typename MatrixType::ScalarType Scalar; + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + +//indices is length num (which should be nodes-per-elem) +//coefs is the upper triangle of the element diffusion matrix +//which should be length num*(num+1)/2 +//std::cout< +void +sum_in_elem_matrix(size_t num, + const typename MatrixType::GlobalOrdinalType* indices, + const typename MatrixType::ScalarType* coefs, + MatrixType& mat) +{ + size_t offset = 0; + + for(size_t i=0; i +void +sum_into_global_linear_system(ElemData& elem_data, + MatrixType& A, VectorType& b) +{ + sum_in_symm_elem_matrix(elem_data.nodes_per_elem, elem_data.elem_node_ids, + elem_data.elem_diffusion_matrix, A); + sum_into_vector(elem_data.nodes_per_elem, elem_data.elem_node_ids, + elem_data.elem_source_vector, b); +} + +#ifdef MINIFE_HAVE_TBB +template +void +sum_in_elem_matrix(size_t num, + const typename MatrixType::GlobalOrdinalType* indices, + const typename MatrixType::ScalarType* coefs, + LockingMatrix& mat) +{ + size_t offset = 0; + + for(size_t i=0; i +void +sum_into_global_linear_system(ElemData& elem_data, + LockingMatrix& A, LockingVector& b) +{ + sum_in_elem_matrix(elem_data.nodes_per_elem, elem_data.elem_node_ids, + elem_data.elem_diffusion_matrix, A); + sum_into_vector(elem_data.nodes_per_elem, elem_data.elem_node_ids, + elem_data.elem_source_vector, b); +} +#endif + +template +void +add_to_diagonal(typename MatrixType::ScalarType value, MatrixType& mat) +{ + for(size_t i=0; i +double +parallel_memory_overhead_MB(const MatrixType& A) +{ + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + double mem_MB = 0; + +#ifdef HAVE_MPI + double invMB = 1.0/(1024*1024); + mem_MB = invMB*A.external_index.size()*sizeof(GlobalOrdinal); + mem_MB += invMB*A.external_local_index.size()*sizeof(GlobalOrdinal); + mem_MB += invMB*A.elements_to_send.size()*sizeof(GlobalOrdinal); + mem_MB += invMB*A.neighbors.size()*sizeof(int); + mem_MB += invMB*A.recv_length.size()*sizeof(LocalOrdinal); + mem_MB += invMB*A.send_length.size()*sizeof(LocalOrdinal); + + double tmp = mem_MB; + MPI_Allreduce(&tmp, &mem_MB, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); +#endif + + return mem_MB; +} + +template +void rearrange_matrix_local_external(MatrixType& A) +{ + //This function will rearrange A so that local entries are contiguous at the front + //of A's memory, and external entries are contiguous at the back of A's memory. + // + //A.row_offsets will describe where the local entries occur, and + //A.row_offsets_external will describe where the external entries occur. + + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + typedef typename MatrixType::ScalarType Scalar; + + size_t nrows = A.rows.size(); + std::vector tmp_row_offsets(nrows*2); + std::vector tmp_row_offsets_external(nrows*2); + + LocalOrdinal num_local_nz = 0; + LocalOrdinal num_extern_nz = 0; + + //First sort within each row of A, so that local entries come + //before external entries within each row. + //tmp_row_offsets describe the locations of the local entries, and + //tmp_row_offsets_external describe the locations of the external entries. + // + for(size_t i=0; i ext_cols(num_extern_nz); + std::vector ext_coefs(num_extern_nz); + std::vector ext_offsets(nrows+1); + LocalOrdinal offset = 0; + for(size_t i=0; i +void +zero_row_and_put_1_on_diagonal(MatrixType& A, typename MatrixType::GlobalOrdinalType row) +{ + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + typedef typename MatrixType::ScalarType Scalar; + + size_t row_len = 0; + GlobalOrdinal* cols = NULL; + Scalar* coefs = NULL; + A.get_row_pointers(row, row_len, cols, coefs); + + for(size_t i=0; i +void +impose_dirichlet(typename MatrixType::ScalarType prescribed_value, + MatrixType& A, + VectorType& b, + int global_nx, + int global_ny, + int global_nz, + const std::set& bc_rows) +{ + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + typedef typename MatrixType::ScalarType Scalar; + + GlobalOrdinal first_local_row = A.rows.size()>0 ? A.rows[0] : 0; + GlobalOrdinal last_local_row = A.rows.size()>0 ? A.rows[A.rows.size()-1] : -1; + + typename std::set::const_iterator + bc_iter = bc_rows.begin(), bc_end = bc_rows.end(); + for(; bc_iter!=bc_end; ++bc_iter) { + GlobalOrdinal row = *bc_iter; + if (row >= first_local_row && row <= last_local_row) { + size_t local_row = row - first_local_row; + b.coefs[local_row] = prescribed_value; + zero_row_and_put_1_on_diagonal(A, row); + } + } + + for(size_t i=0; i +typename TypeTraits::magnitude_type +matvec_and_dot(MatrixType& A, + VectorType& x, + VectorType& y) +{ + timer_type t0 = mytimer(); + exchange_externals(A, x); + exchtime += mytimer()-t0; + + typedef typename TypeTraits::magnitude_type magnitude; + typedef typename MatrixType::ScalarType ScalarType; + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinalType; + typedef typename MatrixType::LocalOrdinalType LocalOrdinalType; + typedef typename MatrixType::ComputeNodeType ComputeNodeType; + + ComputeNodeType& comp_node = A.compute_node; + + FusedMatvecDotOp mvdotop; + + mvdotop.n = A.rows.size(); + mvdotop.Arowoffsets = comp_node.get_buffer(&A.row_offsets[0], A.row_offsets.size()); + mvdotop.Acols = comp_node.get_buffer(&A.packed_cols[0], A.packed_cols.size()); + mvdotop.Acoefs = comp_node.get_buffer(&A.packed_coefs[0], A.packed_coefs.size()); + mvdotop.x = comp_node.get_buffer(&x.coefs[0], x.coefs.size()); + mvdotop.y = comp_node.get_buffer(&y.coefs[0], y.coefs.size()); + mvdotop.beta = 0; + + comp_node.parallel_reduce(mvdotop.n, mvdotop); + +#ifdef HAVE_MPI + magnitude local_dot = mvdotop.result, global_dot = 0; + MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); + MPI_Allreduce(&local_dot, &global_dot, 1, mpi_dtype, MPI_SUM, MPI_COMM_WORLD); + return global_dot; +#else + return mvdotop.result; +#endif +} + +//------------------------------------------------------------------------ +//Compute matrix vector product y = A*x where: +// +// A - input matrix +// x - input vector +// y - result vector +// +template +struct matvec_std { +void operator()(MatrixType& A, + VectorType& x, + VectorType& y) +{ + exchange_externals(A, x); + + typedef typename MatrixType::ScalarType ScalarType; + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinalType; + typedef typename MatrixType::LocalOrdinalType LocalOrdinalType; + typedef typename MatrixType::ComputeNodeType ComputeNodeType; + + ComputeNodeType& comp_node = A.compute_node; + + MatvecOp mvop(A); + + mvop.x = comp_node.get_buffer(&x.coefs[0], x.coefs.size()); + mvop.y = comp_node.get_buffer(&y.coefs[0], y.coefs.size()); + mvop.beta = 0; + + comp_node.parallel_for(mvop.n, mvop); +} +}; + +template +void matvec(MatrixType& A, VectorType& x, VectorType& y) +{ + matvec_std mv; + mv(A, x, y); +} + +template +struct matvec_overlap { +void operator()(MatrixType& A, + VectorType& x, + VectorType& y) +{ +#ifdef HAVE_MPI + begin_exchange_externals(A, x); +#endif + + typedef typename MatrixType::ScalarType ScalarType; + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinalType; + typedef typename MatrixType::LocalOrdinalType LocalOrdinalType; + typedef typename MatrixType::ComputeNodeType ComputeNodeType; + + ComputeNodeType& comp_node = A.compute_node; + + MatvecOp mvop(A); + + mvop.x = comp_node.get_buffer(&x.coefs[0], x.coefs.size()); + mvop.y = comp_node.get_buffer(&y.coefs[0], y.coefs.size()); + mvop.beta = 0; + + comp_node.parallel_for(mvop.n, mvop); + +#ifdef HAVE_MPI + finish_exchange_externals(A.neighbors.size()); + + mvop.Arowoffsets = comp_node.get_buffer(&A.row_offsets_external[0], A.row_offsets_external.size()); + mvop.beta = 1; + + comp_node.parallel_for(A.rows.size(), mvop); +#endif +} +}; + +}//namespace miniFE + +#endif + diff --git a/mkl/basic/SumInLinSys.hpp b/mkl/basic/SumInLinSys.hpp new file mode 100644 index 0000000..d5f6471 --- /dev/null +++ b/mkl/basic/SumInLinSys.hpp @@ -0,0 +1,33 @@ +#ifndef _SUMINLINSYS_HPP_ +#define _SUMINLINSYS_HPP_ + +#include +#include +#include + +template +struct SumInLinSys { + GlobalOrdinal* node_ordinals; + Scalar* elem_diffusion_matrix; + Scalar* elem_source_vector; + miniFE::LockingMatrix* A; + miniFE::LockingVector* b; + +inline void operator()(int i) +{ + size_t nnodes = miniFE::Hex8::numNodesPerElem; + GlobalOrdinal* node_ords = node_ordinals+i*nnodes; + Scalar* diffusionMat = elem_diffusion_matrix+i*nnodes*nnodes; + Scalar* sourceVec = elem_source_vector+i*nnodes; + for(size_t ii=0; iisum_in(row, nnodes, node_ords, + &(diffusionMat[ii*nnodes])); + b->sum_in(1, &row, &(sourceVec[ii])); + } +} + +}; + +#endif diff --git a/mkl/basic/TBBNode.cpp b/mkl/basic/TBBNode.cpp new file mode 100644 index 0000000..20078fd --- /dev/null +++ b/mkl/basic/TBBNode.cpp @@ -0,0 +1,8 @@ +#ifdef MINIFE_HAVE_TBB + +#include "TBBNode.hpp" + +tbb::task_scheduler_init TBBNode::tsi_(tbb::task_scheduler_init::deferred); + +#endif + diff --git a/mkl/basic/TBBNode.hpp b/mkl/basic/TBBNode.hpp new file mode 100644 index 0000000..6b1fe89 --- /dev/null +++ b/mkl/basic/TBBNode.hpp @@ -0,0 +1,76 @@ +#ifndef TBBNODE_HPP_ +#define TBBNODE_HPP_ + +#include +#include +#include +#include +#include + +#include + +#include // debug + +template +struct BlockedRangeWDP { + mutable WDPin wd; + BlockedRangeWDP(WDPin &in) : wd(in) {} + inline void operator()(tbb::blocked_range &rng) const + { + for(int i=rng.begin(); i +struct BlockedRangeWDPReducer { + WDPin wd; + BlockedRangeWDPReducer(WDPin &in) : wd(in) {} + BlockedRangeWDPReducer(BlockedRangeWDPReducer &in, tbb::split) : wd(in.wd) + { + wd.result = wd.identity(); + } + void operator()(tbb::blocked_range &rng) + { + for(int i=rng.begin(); i &other ) { + wd.result = wd.reduce( wd.result, other.wd.result ); + } +}; + +class TBBNode : public NoOpMemoryModel { + public: + + TBBNode(int numThreads=0) { + if (numThreads >= 1) { + tsi_.initialize(numThreads); + } + else { + tsi_.initialize(tbb::task_scheduler_init::automatic); + } + } + + ~TBBNode() {} + + template + void parallel_for(int length, WDP wd) { + BlockedRangeWDP tbb_wd(wd); + tbb::parallel_for(tbb::blocked_range(0,length), tbb_wd, tbb::auto_partitioner()); + } + + template + void parallel_reduce(int length, WDP &wd) { + BlockedRangeWDPReducer tbb_wd(wd); + tbb::parallel_reduce(tbb::blocked_range(0,length), tbb_wd, tbb::auto_partitioner()); + wd.result = tbb_wd.wd.result; // have to put result from final tbb_wd into orginal wd + } + + private: + static tbb::task_scheduler_init tsi_; +}; + +#endif diff --git a/mkl/basic/TPINode.hpp b/mkl/basic/TPINode.hpp new file mode 100644 index 0000000..66ec84f --- /dev/null +++ b/mkl/basic/TPINode.hpp @@ -0,0 +1,113 @@ +#ifndef TPINODE_HPP_ +#define TPINODE_HPP_ + +#include + +#include + +#include // debug + +inline +void tpi_work_span(TPI_Work* work, int n, + size_t& ibeg, size_t& iend) +{ + const int chunk = ( n + work->count - 1 ) / work->count ; + + iend = chunk * ( work->rank + 1 ); + ibeg = chunk * ( work->rank ); + + if ( n < iend ) { iend = n; } +} + +template +void tpi_execute(TPI_Work * work) +{ + const WDP* const_wdp = static_cast(work->info); + WDP* wdp = const_cast(const_wdp); + size_t n = wdp->n; + size_t ibeg = 0, iend = n; + tpi_work_span(work, n, ibeg, iend); + for(size_t i=ibeg; i +void tpi_reduction_work(TPI_Work * work) +{ + const WDP* wdp = static_cast(work->info); + size_t n = wdp->n; + size_t ibeg = 0, iend = n; + tpi_work_span(work, n, ibeg, iend); + + typedef typename WDP::ReductionType ReductionType; + ReductionType tmpres = wdp->result, tmpi; + + for(size_t i=ibeg; igenerate(i); + tmpres = wdp->reduce(tmpres, tmpi); + } + *(static_cast(work->reduce)) = tmpres; +} + +template +void tpi_reduction_join(TPI_Work * work, const void* src) +{ + typedef typename WDP::ReductionType ReductionType; + + const WDP* wdp = static_cast(work->info); + + ReductionType& work_reduce = *(static_cast(work->reduce)); + + work_reduce = wdp->reduce(work_reduce, *(static_cast(src)) ); +} + +template +void tpi_reduction_init(TPI_Work * work) +{ + typedef typename WDP::ReductionType ReductionType; + + const WDP* wdp = static_cast(work->info); + + *(static_cast(work->reduce)) = wdp->identity(); +} + +class TPINode : public NoOpMemoryModel { + public: + + TPINode(int numThreads=0) + : numThreads_(numThreads) + { + if (numThreads >= 1) { + TPI_Init(numThreads); + } + } + + ~TPINode() + { + if (numThreads_ >= 1) { + TPI_Finalize(); + } + } + + template + void parallel_for(int length, WDP & wd ) { + TPI_Run_threads(tpi_execute, &wd, 0 ); + } + + template + void parallel_reduce(int length, WDP & wd ) { + typedef typename WDP::ReductionType ReductionType; + ReductionType result = 0; + TPI_Run_threads_reduce(tpi_reduction_work, &wd, + tpi_reduction_join, + tpi_reduction_init, sizeof(result), &result); + wd.result = result; + } + + private: + int numThreads_; +}; + +#endif + diff --git a/mkl/basic/TypeTraits.hpp b/mkl/basic/TypeTraits.hpp new file mode 100644 index 0000000..3ac472c --- /dev/null +++ b/mkl/basic/TypeTraits.hpp @@ -0,0 +1,137 @@ +#ifndef _TypeTraits_hpp_ +#define _TypeTraits_hpp_ + +//@HEADER +// ************************************************************************ +// +// miniFE: simple finite-element assembly and linear-solve +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include + +#ifdef HAVE_MPI +#include +#endif + +namespace miniFE { + +template struct TypeTraits {}; + +template<> +struct TypeTraits { + typedef float magnitude_type; + + static const char* name() {return "float";} + +#ifdef HAVE_MPI + static MPI_Datatype mpi_type() {return MPI_FLOAT;} +#endif +}; + +template<> +struct TypeTraits { + typedef double magnitude_type; + + static const char* name() {return "double";} + +#ifdef HAVE_MPI + static MPI_Datatype mpi_type() {return MPI_DOUBLE;} +#endif +}; + +template<> +struct TypeTraits { + typedef int magnitude_type; + + static const char* name() {return "int";} + +#ifdef HAVE_MPI + static MPI_Datatype mpi_type() {return MPI_INT;} +#endif +}; + +template<> +struct TypeTraits { + typedef long int magnitude_type; + + static const char* name() {return "long int";} + +#ifdef HAVE_MPI + static MPI_Datatype mpi_type() {return MPI_LONG;} +#endif +}; + +#ifndef MINIFE_NO_LONG_LONG + +template<> +struct TypeTraits { + typedef long long magnitude_type; + + static const char* name() {return "long long";} + +#ifdef HAVE_MPI + static MPI_Datatype mpi_type() {return MPI_LONG_LONG;} +#endif +}; + +#endif + +template<> +struct TypeTraits { + typedef unsigned magnitude_type; + + static const char* name() {return "unsigned";} + +#ifdef HAVE_MPI + static MPI_Datatype mpi_type() {return MPI_UNSIGNED;} +#endif +}; + +template<> +struct TypeTraits > { + typedef float magnitude_type; + + static const char* name() {return "std::complex";} + +#ifdef HAVE_MPI + static MPI_Datatype mpi_type() {return MPI_COMPLEX;} +#endif +}; + +template<> +struct TypeTraits > { + typedef double magnitude_type; + + static const char* name() {return "std::complex";} + +#ifdef HAVE_MPI + static MPI_Datatype mpi_type() {return MPI_DOUBLE_COMPLEX;} +#endif +}; + +}//namespace miniFE + +#endif + diff --git a/mkl/basic/Vector.hpp b/mkl/basic/Vector.hpp new file mode 100644 index 0000000..4290ae4 --- /dev/null +++ b/mkl/basic/Vector.hpp @@ -0,0 +1,83 @@ +#ifndef _Vector_hpp_ +#define _Vector_hpp_ + +//@HEADER +// ************************************************************************ +// +// miniFE: simple finite-element assembly and linear-solve +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include + +#include + +namespace miniFE { + + +template +struct Vector { + typedef ComputeNode ComputeNodeType; + typedef Scalar ScalarType; + typedef LocalOrdinal LocalOrdinalType; + typedef GlobalOrdinal GlobalOrdinalType; + + Vector(GlobalOrdinal startIdx, LocalOrdinal local_sz, ComputeNode& cn) + : startIndex(startIdx), + local_size(local_sz), + coefs(local_size), + compute_node(cn) + { + MemInitOp mem_init; + mem_init.ptr = &coefs[0]; + mem_init.n = local_size; +#ifdef MINIFE_HAVE_CUDA +//we don't want to run this mem-init kernel on cuda, we want +//to just run it locally on the host. + for(size_t i=0; i coefs; + ComputeNode& compute_node; +}; + + +}//namespace miniFE + +#endif + diff --git a/mkl/basic/Vector_functions.hpp b/mkl/basic/Vector_functions.hpp new file mode 100644 index 0000000..f82866e --- /dev/null +++ b/mkl/basic/Vector_functions.hpp @@ -0,0 +1,249 @@ +#ifndef _Vector_functions_hpp_ +#define _Vector_functions_hpp_ + +//@HEADER +// ************************************************************************ +// +// miniFE: simple finite-element assembly and linear-solve +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +#ifdef MINIFE_HAVE_TBB +#include +#endif + +#include +#include +#include +#include + + +namespace miniFE { + + +template +void write_vector(const std::string& filename, + const VectorType& vec) +{ + int numprocs = 1, myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + std::ostringstream osstr; + osstr << filename << "." << numprocs << "." << myproc; + std::string full_name = osstr.str(); + std::ofstream ofs(full_name.c_str()); + + typedef typename VectorType::ScalarType ScalarType; + + const std::vector& coefs = vec.coefs; + for(int p=0; p +void sum_into_vector(size_t num_indices, + const typename VectorType::GlobalOrdinalType* indices, + const typename VectorType::ScalarType* coefs, + VectorType& vec) +{ + typedef typename VectorType::GlobalOrdinalType GlobalOrdinal; + typedef typename VectorType::ScalarType Scalar; + + GlobalOrdinal first = vec.startIndex; + GlobalOrdinal last = first + vec.local_size - 1; + + std::vector& vec_coefs = vec.coefs; + + for(size_t i=0; i last) continue; + size_t idx = indices[i] - first; + vec_coefs[idx] += coefs[i]; + } +} + +#ifdef MINIFE_HAVE_TBB +template +void sum_into_vector(size_t num_indices, + const typename VectorType::GlobalOrdinalType* indices, + const typename VectorType::ScalarType* coefs, + LockingVector& vec) +{ + vec.sum_in(num_indices, indices, coefs); +} +#endif + +//------------------------------------------------------------ +//Compute the update of a vector with the sum of two scaled vectors where: +// +// w = alpha*x + beta*y +// +// x,y - input vectors +// +// alpha,beta - scalars applied to x and y respectively +// +// w - output vector +// +template +void + waxpby(typename VectorType::ScalarType alpha, const VectorType& x, + typename VectorType::ScalarType beta, const VectorType& y, + VectorType& w) +{ + typedef typename VectorType::ScalarType ScalarType; + typedef typename VectorType::ComputeNodeType ComputeNodeType; + + ComputeNodeType& compute_node = x.compute_node; + + WaxpbyOp waxpbyop; + + waxpbyop.w = compute_node.get_buffer(&w.coefs[0], w.coefs.size()); + waxpbyop.x = compute_node.get_buffer(&x.coefs[0], x.coefs.size()); + waxpbyop.y = compute_node.get_buffer(&y.coefs[0], y.coefs.size()); + waxpbyop.alpha = alpha; + waxpbyop.beta = beta; + waxpbyop.n = x.local_size; + +#ifdef MINIFE_DEBUG + if (y.local_size < x.local_size || w.local_size < x.local_size) { + std::cerr << "miniFE::waxpby ERROR, y and w must be at least as long as x." << std::endl; + return; + } +#endif + + compute_node.parallel_for(waxpbyop.n, waxpbyop); +} + +//Like waxpby above, except operates on two sets of arguments. +//In other words, performs two waxpby operations in one loop. +template +void + fused_waxpby(typename VectorType::ScalarType alpha, const VectorType& x, + typename VectorType::ScalarType beta, const VectorType& y, + VectorType& w, + typename VectorType::ScalarType alpha2, const VectorType& x2, + typename VectorType::ScalarType beta2, const VectorType& y2, + VectorType& w2) +{ + typedef typename VectorType::ScalarType ScalarType; + typedef typename VectorType::ComputeNodeType ComputeNodeType; + + ComputeNodeType& compute_node = x.compute_node; + + FusedWaxpbyOp waxpbyop; + + waxpbyop.w = compute_node.get_buffer(&w.coefs[0], w.coefs.size()); + waxpbyop.x = compute_node.get_buffer(&x.coefs[0], x.coefs.size()); + waxpbyop.y = compute_node.get_buffer(&y.coefs[0], y.coefs.size()); + waxpbyop.alpha = alpha; + waxpbyop.beta = beta; + waxpbyop.w2 = compute_node.get_buffer(&w2.coefs[0], w2.coefs.size()); + waxpbyop.x2 = compute_node.get_buffer(&x2.coefs[0], x2.coefs.size()); + waxpbyop.y2 = compute_node.get_buffer(&y2.coefs[0], y2.coefs.size()); + waxpbyop.alpha2 = alpha2; + waxpbyop.beta2 = beta2; + waxpbyop.n = x.local_size; + +#ifdef MINIFE_DEBUG + if (y.local_size < x.local_size || w.local_size < x.local_size) { + std::cerr << "miniFE::waxpby ERROR, y and w must be at least as long as x." << std::endl; + return; + } +#endif + + compute_node.parallel_for(waxpbyop.n, waxpbyop); +} + +//----------------------------------------------------------- +//Compute the dot product of two vectors where: +// +// x,y - input vectors +// +// result - return-value +// +template +typename TypeTraits::magnitude_type + dot(const Vector& x, + const Vector& y) +{ + size_t n = x.local_size; + +#ifdef MINIFE_DEBUG + if (y.local_size < n) { + std::cerr << "miniFE::dot ERROR, y must be at least as long as x."<::magnitude_type magnitude; + + typedef typename Vector::ComputeNodeType ComputeNodeType; + + ComputeNodeType& compute_node = x.compute_node; + + DotOp dotop; + dotop.x = compute_node.get_buffer(&x.coefs[0], x.coefs.size()); + dotop.y = compute_node.get_buffer(&y.coefs[0], y.coefs.size()); + dotop.n = x.local_size; + + compute_node.parallel_reduce(n, dotop); + +#ifdef HAVE_MPI + magnitude local_dot = dotop.result, global_dot = 0; + MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); + MPI_Allreduce(&local_dot, &global_dot, 1, mpi_dtype, MPI_SUM, MPI_COMM_WORLD); + return global_dot; +#else + return dotop.result; +#endif +} + +}//namespace miniFE + +#endif + diff --git a/mkl/basic/WaxpbyOp.hpp b/mkl/basic/WaxpbyOp.hpp new file mode 100644 index 0000000..6eaaa6e --- /dev/null +++ b/mkl/basic/WaxpbyOp.hpp @@ -0,0 +1,43 @@ +#ifndef WAXPBYOP_HPP_ +#define WAXPBYOP_HPP_ + +#ifndef KERNEL_PREFIX +#define KERNEL_PREFIX +#endif + +template +struct WaxpbyOp { + Scalar* w; + const Scalar* x; + const Scalar* y; + Scalar alpha, beta; + size_t n; + KERNEL_PREFIX void operator()(size_t i) const + { + //here we count on the caller (ComputeNode) to pass in 'i' + //that is in the range 0..n-1 + w[i] = alpha*x[i] + beta*y[i]; + } +}; + +template +struct FusedWaxpbyOp { + Scalar* w; + const Scalar* x; + const Scalar* y; + Scalar alpha, beta; + Scalar* w2; + const Scalar* x2; + const Scalar* y2; + Scalar alpha2, beta2; + size_t n; + KERNEL_PREFIX void operator()(size_t i) const + { + //here we count on the caller (ComputeNode) to pass in 'i' + //that is in the range 0..n-1 + w[i] = alpha*x[i] + beta*y[i]; + w2[i] = alpha2*x2[i] + beta2*y2[i]; + } +}; + +#endif diff --git a/mkl/basic/analytic_soln.hpp b/mkl/basic/analytic_soln.hpp new file mode 100644 index 0000000..8dcdfad --- /dev/null +++ b/mkl/basic/analytic_soln.hpp @@ -0,0 +1,117 @@ +#ifndef _analytic_soln_hpp_ +#define _analytic_soln_hpp_ + +//@HEADER +// ************************************************************************ +// +// miniFE: simple finite-element assembly and linear-solve +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include + +#ifndef MINIFE_SCALAR +#define MINIFE_SCALAR double; +#endif + +namespace miniFE { + +typedef MINIFE_SCALAR Scalar; + +// The 'soln' function below computes the analytic solution for +// steady state temperature in a brick-shaped domain (formally called +// a rectangular parallelepiped). The inputs to the function are +// the x,y,z coordinates of the point at which temperature is to be +// computed, and the number of terms p,q in the series expansion. +// +// The equations used for the temperature solution are equations 9 and 10 +// in section 6.2 of Carslaw & Jaeger, "Conduction of Heat in Solids". +// +// The paralellepiped being used is defined by this domain: +// 0 <= x <= 1.0 +// 0 <= y <= 1.0 +// 0 <= z <= 1.0 +// +// With boundary conditions prescribing the temperature to be 1.0 on +// the x==1.0 face, and 0.0 on all other faces. +// +// Thus, in the equations from Carslaw & Jaeger, the following constants +// are used: +// +// a == b == c == 1.0 (the extents of the domain) +// v1 == 0.0 (temperature at x == 0.0) +// v2 == 1.0 (temperature at x == 1.0) +// + +const Scalar PI = 3.141592653589793238462; +const Scalar PI_SQR = PI*PI; +const Scalar term0 = 16.0/(PI_SQR); + +inline Scalar fcn_l(int p, int q) +{ + return std::sqrt((2*p+1)*(2*p+1)*PI_SQR + (2*q+1)*(2*q+1)*PI_SQR); +} + +inline Scalar fcn(int n, Scalar u) +{ + return (2*n+1)*PI*u; +} + +inline Scalar soln(Scalar x, Scalar y, Scalar z, int max_p, int max_q) +{ + Scalar sum = 0; + for(int p=0; p<=max_p; ++p) { + const Scalar p21y = fcn(p, y); + const Scalar sin_py = std::sin(p21y)/(2*p+1); + for(int q=0; q<=max_q; ++q) { + const Scalar q21z = fcn(q, z); + const Scalar sin_qz = std::sin(q21z)/(2*q+1); + + const Scalar l = fcn_l(p, q); + + const Scalar sinh1 = std::sinh(l*x); + const Scalar sinh2 = std::sinh(l); + + const Scalar tmp = (sinh1*sin_py)*(sin_qz/sinh2); + + //if the scalar l gets too big, sinh(l) becomes inf. + //if that happens, tmp is a NaN. + //crude check for NaN: + //if tmp != tmp, tmp is NaN + if (tmp == tmp) { + sum += tmp; + } + else { + //if we got a NaN, break out of this inner loop and go to + //the next iteration of the outer loop. + break; + } + } + } + return term0*sum; +} + +}//namespace miniFE + +#endif /* _analytic_soln_hpp_ */ diff --git a/mkl/basic/assemble_FE_data.hpp b/mkl/basic/assemble_FE_data.hpp new file mode 100644 index 0000000..f34b14a --- /dev/null +++ b/mkl/basic/assemble_FE_data.hpp @@ -0,0 +1,85 @@ +#ifndef _assemble_FE_data_hpp_ +#define _assemble_FE_data_hpp_ + +//@HEADER +// ************************************************************************ +// +// miniFE: simple finite-element assembly and linear-solve +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include +#include + +#ifdef MINIFE_HAVE_TBB +//#include +#include +//#include +#else +#include +#endif + +namespace miniFE { + +template +void +assemble_FE_data(const simple_mesh_description& mesh, + MatrixType& A, + VectorType& b, + Parameters& params) +{ + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + + int global_elems_x = mesh.global_box[0][1]; + int global_elems_y = mesh.global_box[1][1]; + int global_elems_z = mesh.global_box[2][1]; + + Box local_elem_box; + copy_box(mesh.local_box, local_elem_box); + + if (get_num_ids(local_elem_box) < 1) { + return; + } + + // + //We want the element-loop to loop over our (processor-local) domain plus a + //ghost layer, so we can assemble the complete linear-system without doing + //any communication. + // + int ghost = 1; + if (local_elem_box[0][0] > 0) local_elem_box[0][0] -= ghost; + if (local_elem_box[1][0] > 0) local_elem_box[1][0] -= ghost; + if (local_elem_box[2][0] > 0) local_elem_box[2][0] -= ghost; + if (local_elem_box[0][1] < global_elems_x) local_elem_box[0][1] += ghost; + if (local_elem_box[1][1] < global_elems_y) local_elem_box[1][1] += ghost; + if (local_elem_box[2][1] < global_elems_z) local_elem_box[2][1] += ghost; + + perform_element_loop(mesh, local_elem_box, A, b, params); +} + +}//namespace miniFE + +#endif + diff --git a/mkl/basic/box_utils.hpp b/mkl/basic/box_utils.hpp new file mode 100644 index 0000000..ee10975 --- /dev/null +++ b/mkl/basic/box_utils.hpp @@ -0,0 +1,199 @@ +#ifndef _box_utils_hpp_ +#define _box_utils_hpp_ + +//@HEADER +// ************************************************************************ +// +// miniFE: simple finite-element assembly and linear-solve +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +#include +#include + +namespace miniFE { + +inline void copy_box(const Box& from_box, Box& to_box) +{ + for(int i=0; i<3; ++i) { + to_box[i][0] = from_box[i][0]; + to_box[i][1] = from_box[i][1]; + } +} + +template +void get_int_coords(GlobalOrdinal ID, int nx, int ny, int nz, + int& x, int& y, int& z) +{ + z = ID/(nx*ny); + y = (ID%(nx*ny))/nx; + x = ID%nx; +} + +template +void get_coords(GlobalOrdinal ID, int nx, int ny, int nz, + Scalar& x, Scalar& y, Scalar& z) +{ + const int xdiv = nx>1 ? nx-1 : 1; + const int ydiv = ny>1 ? ny-1 : 1; + const int zdiv = nz>1 ? nz-1 : 1; + +//This code assumes that ID is 0-based. +// +//compute coordinates that lie on (or in) the unit cube. +//that's why we're dividing by nz,ny,nx: + z = (1.0*(ID/(nx*ny)))/zdiv; + y = 1.0*((ID%(nx*ny))/nx)/ydiv; + x = 1.0*(ID%nx)/xdiv; +} + +template +GlobalOrdinal get_num_ids(const Box& box) +{ + int nx = box[0][1] - box[0][0]; + int ny = box[1][1] - box[1][0]; + int nz = box[2][1] - box[2][0]; + GlobalOrdinal tmp = nx*ny; + tmp *= nz; + return tmp; +} + +template +GlobalOrdinal get_id(int nx, int ny, int nz, + int x, int y, int z) +{ + if (x<0 || y<0 || z<0) return -1; + if (x>=nx || y>=ny || z>=nz) return -1; + + //form x + nx*y + nx*ny*z: + + GlobalOrdinal tmp = nx*ny; + tmp *= z; + tmp = x + nx * y + tmp; + return tmp; +} + +template +void get_ids(int nx, int ny, int nz, + const Box& box, + GlobalOrdinal* ids) +{ + unsigned offset = 0; + for(int z=box[2][0]; z(nx, ny, nz, x, y, z); + } + } + } +} + +template +void create_map_id_to_row(int global_nx, int global_ny, int global_nz, + const Box& box, + std::map& id_to_row) +{ + GlobalOrdinal num_my_ids = get_num_ids(box); + GlobalOrdinal my_first_row = 0; + +#ifdef HAVE_MPI + int numprocs = 1, myproc = 0; + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); + + typename std::vector tmp_buffer(numprocs, 0); + tmp_buffer[myproc] = num_my_ids; + typename std::vector global_offsets(numprocs); + MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); + MPI_Allreduce(&tmp_buffer[0], &global_offsets[0], numprocs, mpi_dtype, + MPI_SUM, MPI_COMM_WORLD); + GlobalOrdinal offset = 0; + for(int i=0; i all_my_ids(num_my_ids); + get_ids(global_nx, global_ny, global_nz, box, &all_my_ids[0]); + + typename std::vector ids; + typename std::vector rows; + + if (all_my_ids.size() > 0) { + ids.push_back(all_my_ids[0]); + rows.push_back(my_first_row); + } + + for(size_t i=1; i lengths(numprocs); + MPI_Allgather(&len, 1, MPI_INT, &lengths[0], 1, MPI_INT, MPI_COMM_WORLD); + + std::vector displs(lengths); + int displ = 0; + for(int i=0; i global_ids(displ); + typename std::vector global_rows(displ); + + MPI_Allgatherv(&ids[0], len, mpi_dtype, &global_ids[0], + &lengths[0], &displs[0], mpi_dtype, MPI_COMM_WORLD); + MPI_Allgatherv(&rows[0], len, mpi_dtype, &global_rows[0], + &lengths[0], &displs[0], mpi_dtype, MPI_COMM_WORLD); + + ids = global_ids; + rows = global_rows; +#endif + + for(size_t i=0; i +#include + +#include +#include + +#include + +namespace miniFE { + +template +void print_vec(const std::vector& vec, const std::string& name) +{ + for(size_t i=0; i +bool breakdown(typename VectorType::ScalarType inner, + const VectorType& v, + const VectorType& w) +{ + typedef typename VectorType::ScalarType Scalar; + typedef typename TypeTraits::magnitude_type magnitude; + +//This is code that was copied from Aztec, and originally written +//by my hero, Ray Tuminaro. +// +//Assuming that inner = (inner product of v and w), +//v and w are considered orthogonal if +// |inner| < 100 * ||v||_2 * ||w||_2 * epsilon + + magnitude vnorm = std::sqrt(dot(v,v)); + magnitude wnorm = std::sqrt(dot(w,w)); + return std::abs(inner) <= 100*vnorm*wnorm*std::numeric_limits::epsilon(); +} + +template +void +cg_solve(OperatorType& A, + const VectorType& b, + VectorType& x, + Matvec matvec, + typename OperatorType::LocalOrdinalType max_iter, + typename TypeTraits::magnitude_type& tolerance, + typename OperatorType::LocalOrdinalType& num_iters, + typename TypeTraits::magnitude_type& normr, + timer_type* my_cg_times) +{ + typedef typename OperatorType::ScalarType ScalarType; + typedef typename OperatorType::GlobalOrdinalType GlobalOrdinalType; + typedef typename OperatorType::LocalOrdinalType LocalOrdinalType; + typedef typename TypeTraits::magnitude_type magnitude_type; + + timer_type t0 = 0, tWAXPY = 0, tDOT = 0, tMATVEC = 0, tMATVECDOT = 0; + timer_type total_time = mytimer(); + + int myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + if (!A.has_local_indices) { + std::cerr << "miniFE::cg_solve ERROR, A.has_local_indices is false, needs to be true. This probably means " + << "miniFE::make_local_matrix(A) was not called prior to calling miniFE::cg_solve." + << std::endl; + return; + } + + size_t nrows = A.rows.size(); + LocalOrdinalType ncols = A.num_cols; + + VectorType r(b.startIndex, nrows, b.compute_node); + VectorType p(0, ncols, b.compute_node); + VectorType Ap(b.startIndex, nrows, b.compute_node); + + normr = 0; + magnitude_type rtrans = 0; + magnitude_type oldrtrans = 0; + + LocalOrdinalType print_freq = max_iter/10; + if (print_freq>50) print_freq = 50; + if (print_freq<1) print_freq = 1; + + ScalarType one = 1.0; + ScalarType zero = 0.0; + + typedef typename VectorType::ComputeNodeType ComputeNodeType; + ComputeNodeType& compute_node = x.compute_node; + + //The following lines that create and initialize buffers are no-ops in many + //cases, but perform actual allocations and copies if an off-cpu device such + //as a GPU is being used by compute_node. + + //Do any required allocations for buffers that will be needed during CG: + ScalarType* d_x = compute_node.get_buffer(&x.coefs[0], x.coefs.size()); + ScalarType* d_p = compute_node.get_buffer(&p.coefs[0], p.coefs.size()); + ScalarType* d_b = compute_node.get_buffer(&b.coefs[0], b.coefs.size()); + ScalarType* d_Ap = compute_node.get_buffer(&Ap.coefs[0], Ap.coefs.size()); + ScalarType* d_r = compute_node.get_buffer(&r.coefs[0], r.coefs.size()); +#ifdef MINIFE_CSR_MATRIX + LocalOrdinalType* d_Arowoff = compute_node.get_buffer(&A.row_offsets[0], A.row_offsets.size()); + GlobalOrdinalType* d_Acols = compute_node.get_buffer(&A.packed_cols[0], A.packed_cols.size()); + ScalarType* d_Acoefs = compute_node.get_buffer(&A.packed_coefs[0], A.packed_coefs.size()); +#endif +#ifdef MINIFE_ELL_MATRIX + GlobalOrdinalType* d_Acols = compute_node.get_buffer(&A.cols[0], A.cols.size()); + ScalarType* d_Acoefs = compute_node.get_buffer(&A.coefs[0], A.coefs.size()); +#endif + + //Copy data to buffers that need to be initialized from input data: + compute_node.copy_to_buffer(&x.coefs[0], x.coefs.size(), d_x); + compute_node.copy_to_buffer(&b.coefs[0], b.coefs.size(), d_b); +#ifdef MINIFE_CSR_MATRIX + compute_node.copy_to_buffer(&A.row_offsets[0], A.row_offsets.size(), d_Arowoff); + compute_node.copy_to_buffer(&A.packed_cols[0], A.packed_cols.size(), d_Acols); + compute_node.copy_to_buffer(&A.packed_coefs[0], A.packed_coefs.size(), d_Acoefs); +#endif +#ifdef MINIFE_ELL_MATRIX + compute_node.copy_to_buffer(&A.cols[0], A.cols.size(), d_Acols); + compute_node.copy_to_buffer(&A.coefs[0], A.coefs.size(), d_Acoefs); +#endif + + TICK(); waxpby(one, x, zero, x, p); TOCK(tWAXPY); + + compute_node.copy_from_buffer(&p.coefs[0], p.coefs.size(), d_p); +// print_vec(p.coefs, "p"); + + TICK(); + matvec(A, p, Ap); + TOCK(tMATVEC); + + TICK(); waxpby(one, b, -one, Ap, r); TOCK(tWAXPY); + +// if (b.coefs.size() == r.coefs.size()) std::cout << "b.size == r.size" << std::endl; +// else std::cout << "b.size != r.size" << std::endl; +// if (b.coefs == r.coefs) std::cout << "b == r" << std::endl; +// else std::cout << "b != r" << std::endl; +// compute_node.copy_from_buffer(&r.coefs[0], r.coefs.size(), d_r); +// print_vec(b.coefs, "b"); +// print_vec(r.coefs, "r"); + + TICK(); rtrans = dot(r, r); TOCK(tDOT); + +//std::cout << "rtrans="<add("Global Nrows",global_nrows); + ydoc.get("Matrix attributes")->add("Global NNZ",global_nnz); + + //compute how much memory the matrix occupies: + //num-bytes = sizeof(GlobalOrdinal)*global_nrows for A.rows + // + sizeof(LocalOrdinal)*global_nrows for A.rows_offsets + // + sizeof(GlobalOrdinal)*global_nnz for A.packed_cols + // + sizeof(Scalar)*global_nnz for A.packed_coefs + + double invGB = 1.0/(1024*1024*1024); + double memGB = invGB*global_nrows*sizeof(GlobalOrdinal); + memGB += invGB*global_nrows*sizeof(LocalOrdinal); + memGB += invGB*global_nnz*sizeof(GlobalOrdinal); + memGB += invGB*global_nnz*sizeof(Scalar); + ydoc.get("Matrix attributes")->add("Global Memory (GB)",memGB); + + ydoc.get("Matrix attributes")->add("Pll Memory Overhead (MB)",mem_overhead_MB); + + ydoc.get("Matrix attributes")->add("Rows per proc MIN",min_nrows); + ydoc.get("Matrix attributes")->add("Rows per proc MAX",max_nrows); + ydoc.get("Matrix attributes")->add("Rows per proc AVG",avg_nrows); + ydoc.get("Matrix attributes")->add("NNZ per proc MIN",min_nnz); + ydoc.get("Matrix attributes")->add("NNZ per proc MAX",max_nnz); + ydoc.get("Matrix attributes")->add("NNZ per proc AVG",avg_nnz); + } + + return global_nnz; +} + +}//namespace miniFE + +#endif + diff --git a/mkl/basic/driver.hpp b/mkl/basic/driver.hpp new file mode 100644 index 0000000..d3966eb --- /dev/null +++ b/mkl/basic/driver.hpp @@ -0,0 +1,403 @@ +#ifndef _driver_hpp_ +#define _driver_hpp_ + +//@HEADER +// ************************************************************************ +// +// miniFE: simple finite-element assembly and linear-solve +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifdef MINIFE_CSR_MATRIX +#include +#elif defined(MINIFE_ELL_MATRIX) +#include +#else +#include +#endif + +#include + +#include + +#include +#include + +#include + +#include +#include +#include +#include +#if MINIFE_KERNELS != 0 +#include +#endif +#include +#include +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +#define RUN_TIMED_FUNCTION(msg, fn, time_inc, time_total) \ +{ \ + if (myproc==0) { \ + std::cout.width(30); \ + std::cout << msg; \ + std::cout.flush(); \ + } \ + timer_type rtf_t0 = mytimer(); \ + fn; \ + time_inc = mytimer() - rtf_t0; \ + time_total += time_inc; \ + if (myproc==0) { \ + std::cout << time_inc << "s, total time: " << time_total << std::endl; \ + } \ +} + +//This program assembles finite-element matrices into a global matrix and +//vector, then solves the linear-system using Conjugate Gradients. +//Each finite-element is a hexahedron with 8 vertex-nodes. +// +//Notes: +//- In finite-element terms, the box dimensions are in elements, not nodes. +// In other words, a 2x2x2 box describes 8 elements, each of which has 8 nodes, +// so it is a 3x3x3 node domain (27 nodes). +// The assembled linear system will have 1 equation for each finite element node. +// +//- The coordinate origin is at the corner of the global box where x=0, +// y=0, z=0, and the box extends along the positive x-axis, positive y-axis, +// and the positive z-axis. +// +//- Some aspects of matrix-structure generation and finite-element assembly +// are convenient to do using global node identifiers. +// A global identifier for each node is obtained from coordinates plus +// global box dimensions. See the function 'get_id' in box_utils.hpp. +// +//- Each node corresponds to a row in the matrix. The RCB partitioning method +// we use to split the global box among processors results in some +// processors owning non-contiguous blocks of global node identifiers. +// Since it is convenient for matrices and vectors to store contiguously- +// numbered blocks of rows, we map global node identifiers to a separate +// space of row numbers such that each processor's nodes correspond to a +// contiguous block of row numbers. +// + +namespace miniFE { + +template +void +driver(const Box& global_box, Box& my_box, ComputeNodeType& compute_node, + Parameters& params, YAML_Doc& ydoc) +{ + int global_nx = global_box[0][1]; + int global_ny = global_box[1][1]; + int global_nz = global_box[2][1]; + + int numprocs = 1, myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + if (params.load_imbalance > 0) { + add_imbalance(global_box, my_box, params.load_imbalance, ydoc); + } + + float largest_imbalance = 0, std_dev = 0; + compute_imbalance(global_box, my_box, largest_imbalance, + std_dev, ydoc, true); + + + //Create a representation of the mesh: + //Note that 'simple_mesh_description' is a virtual or conceptual + //mesh that doesn't actually store mesh data. + + if (myproc==0) { + std::cout.width(30); + std::cout << "creating/filling mesh..."; + std::cout.flush(); + } + + timer_type t_start = mytimer(); + timer_type t0 = mytimer(); + + simple_mesh_description mesh(global_box, my_box); + + timer_type mesh_fill = mytimer() - t0; + timer_type t_total = mytimer() - t_start; + + if (myproc==0) { + std::cout << mesh_fill << "s, total time: " << t_total << std::endl; + } + + //next we will generate the matrix structure. + + //Declare matrix object: + +#ifdef MINIFE_CSR_MATRIX + typedef CSRMatrix MatrixType; +#elif defined(MINIFE_ELL_MATRIX) + typedef ELLMatrix MatrixType; +#else + typedef CSRMatrix MatrixType; +#endif + + MatrixType A(compute_node); + + timer_type gen_structure; + RUN_TIMED_FUNCTION("generating matrix structure...", + generate_matrix_structure(mesh, A), + gen_structure, t_total); + + GlobalOrdinal local_nrows = A.rows.size(); + GlobalOrdinal my_first_row = local_nrows > 0 ? A.rows[0] : -1; + + Vector b(my_first_row, local_nrows,compute_node); + Vector x(my_first_row, local_nrows,compute_node); + + //Assemble finite-element sub-matrices and sub-vectors into the global + //linear system: + + timer_type fe_assembly; + RUN_TIMED_FUNCTION("assembling FE data...", + assemble_FE_data(mesh, A, b, params), + fe_assembly, t_total); + + if (myproc == 0) { + ydoc.add("Matrix structure generation",""); + ydoc.get("Matrix structure generation")->add("Mat-struc-gen Time",gen_structure); + ydoc.add("FE assembly",""); + ydoc.get("FE assembly")->add("FE assembly Time",fe_assembly); + } + +#ifdef MINIFE_DEBUG + write_matrix("A_prebc.mtx", A); + write_vector("b_prebc.vec", b); +#endif + + //Now apply dirichlet boundary-conditions + //(Apply the 0-valued surfaces first, then the 1-valued surface last.) + + timer_type dirbc_time; + RUN_TIMED_FUNCTION("imposing Dirichlet BC...", + impose_dirichlet(0.0, A, b, global_nx+1, global_ny+1, global_nz+1, mesh.bc_rows_0), dirbc_time, t_total); + RUN_TIMED_FUNCTION("imposing Dirichlet BC...", + impose_dirichlet(1.0, A, b, global_nx+1, global_ny+1, global_nz+1, mesh.bc_rows_1), dirbc_time, t_total); + +#ifdef MINIFE_DEBUG + write_matrix("A.mtx", A); + write_vector("b.vec", b); +#endif + + //Transform global indices to local, set up communication information: + + timer_type make_local_time; + RUN_TIMED_FUNCTION("making matrix indices local...", + make_local_matrix(A), + make_local_time, t_total); + +#ifdef MINIFE_DEBUG + write_matrix("A_local.mtx", A); + write_vector("b_local.vec", b); +#endif + + size_t global_nnz = compute_matrix_stats(A, myproc, numprocs, ydoc); + + //Prepare to perform conjugate gradient solve: + + LocalOrdinal max_iters = 50; + LocalOrdinal num_iters = 0; + typedef typename TypeTraits::magnitude_type magnitude; + magnitude rnorm = 0; + magnitude tol = std::numeric_limits::epsilon(); + + timer_type cg_times[NUM_TIMERS]; + + typedef Vector VectorType; + + t_total = mytimer() - t_start; + + bool matvec_with_comm_overlap = params.mv_overlap_comm_comp==1; + +#if MINIFE_KERNELS != 0 + if (myproc==0) { + std::cout.width(30); + std::cout << "Starting kernel timing loops ..." << std::endl; + } + + max_iters = 500; + x.coefs[0] = 0.9; + if (matvec_with_comm_overlap) { + time_kernels(A, b, x, matvec_overlap(), max_iters, rnorm, cg_times); + } + else { + time_kernels(A, b, x, matvec_std(), max_iters, rnorm, cg_times); + } + num_iters = max_iters; + std::string title("Kernel timings"); +#else + if (myproc==0) { + std::cout << "Starting CG solver ... " << std::endl; + } + + if (matvec_with_comm_overlap) { +#ifdef MINIFE_CSR_MATRIX + rearrange_matrix_local_external(A); + cg_solve(A, b, x, matvec_overlap(), max_iters, tol, + num_iters, rnorm, cg_times); +#else + std::cout << "ERROR, matvec with overlapping comm/comp only works with CSR matrix."<(), max_iters, tol, + num_iters, rnorm, cg_times); + if (myproc == 0) { + std::cout << "Final Resid Norm: " << rnorm << std::endl; + } + +#ifdef MINIFE_DEBUG + if (myproc == 0) { + std::cout << "verifying solution..." << std::endl; + } + verify_solution(mesh, x); +#endif + } + +#ifdef MINIFE_DEBUG + write_vector("x.vec", x); +#endif + std::string title("CG solve"); +#endif + + if (myproc == 0) { + ydoc.get("Global Run Parameters")->add("ScalarType",TypeTraits::name()); + ydoc.get("Global Run Parameters")->add("GlobalOrdinalType",TypeTraits::name()); + ydoc.get("Global Run Parameters")->add("LocalOrdinalType",TypeTraits::name()); + ydoc.add(title,""); + ydoc.get(title)->add("Iterations",num_iters); + ydoc.get(title)->add("Final Resid Norm",rnorm); + + GlobalOrdinal global_nrows = global_nx; + global_nrows *= global_ny*global_nz; + + //flops-per-mv, flops-per-dot, flops-per-waxpy: + double mv_flops = global_nnz*2.0; + double dot_flops = global_nrows*2.0; + double waxpy_flops = global_nrows*3.0; + +#if MINIFE_KERNELS == 0 +//if MINIFE_KERNELS == 0 then we did a CG solve, and in that case +//there were num_iters+1 matvecs, num_iters*2 dots, and num_iters*3+2 waxpys. + mv_flops *= (num_iters+1); + dot_flops *= (2*num_iters); + waxpy_flops *= (3*num_iters+2); +#else +//if MINIFE_KERNELS then we did one of each operation per iteration. + mv_flops *= num_iters; + dot_flops *= num_iters; + waxpy_flops *= num_iters; +#endif + + double total_flops = mv_flops + dot_flops + waxpy_flops; + + double mv_mflops = -1; + if (cg_times[MATVEC] > 1.e-4) + mv_mflops = 1.e-6 * (mv_flops/cg_times[MATVEC]); + + double dot_mflops = -1; + if (cg_times[DOT] > 1.e-4) + dot_mflops = 1.e-6 * (dot_flops/cg_times[DOT]); + + double waxpy_mflops = -1; + if (cg_times[WAXPY] > 1.e-4) + waxpy_mflops = 1.e-6 * (waxpy_flops/cg_times[WAXPY]); + + double total_mflops = -1; + if (cg_times[TOTAL] > 1.e-4) + total_mflops = 1.e-6 * (total_flops/cg_times[TOTAL]); + + ydoc.get(title)->add("WAXPY Time",cg_times[WAXPY]); + ydoc.get(title)->add("WAXPY Flops",waxpy_flops); + if (waxpy_mflops >= 0) + ydoc.get(title)->add("WAXPY Mflops",waxpy_mflops); + else + ydoc.get(title)->add("WAXPY Mflops","inf"); + + ydoc.get(title)->add("DOT Time",cg_times[DOT]); + ydoc.get(title)->add("DOT Flops",dot_flops); + if (dot_mflops >= 0) + ydoc.get(title)->add("DOT Mflops",dot_mflops); + else + ydoc.get(title)->add("DOT Mflops","inf"); + + ydoc.get(title)->add("MATVEC Time",cg_times[MATVEC]); + ydoc.get(title)->add("MATVEC Flops",mv_flops); + if (mv_mflops >= 0) + ydoc.get(title)->add("MATVEC Mflops",mv_mflops); + else + ydoc.get(title)->add("MATVEC Mflops","inf"); + +#ifdef MINIFE_FUSED + ydoc.get(title)->add("MATVECDOT Time",cg_times[MATVECDOT]); + ydoc.get(title)->add("MATVECDOT Flops",mv_flops); + if (mv_mflops >= 0) + ydoc.get(title)->add("MATVECDOT Mflops",mv_mflops); + else + ydoc.get(title)->add("MATVECDOT Mflops","inf"); +#endif + +#if MINIFE_KERNELS == 0 + ydoc.get(title)->add("Total",""); + ydoc.get(title)->get("Total")->add("Total CG Time",cg_times[TOTAL]); + ydoc.get(title)->get("Total")->add("Total CG Flops",total_flops); + if (total_mflops >= 0) + ydoc.get(title)->get("Total")->add("Total CG Mflops",total_mflops); + else + ydoc.get(title)->get("Total")->add("Total CG Mflops","inf"); + ydoc.get(title)->add("Time per iteration",cg_times[TOTAL]/num_iters); +#endif + } +} + +}//namespace miniFE + +#endif + diff --git a/mkl/basic/exchange_externals.hpp b/mkl/basic/exchange_externals.hpp new file mode 100644 index 0000000..167ba1b --- /dev/null +++ b/mkl/basic/exchange_externals.hpp @@ -0,0 +1,270 @@ +#ifndef _exchange_externals_hpp_ +#define _exchange_externals_hpp_ + +//@HEADER +// ************************************************************************ +// +// miniFE: simple finite-element assembly and linear-solve +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +#include + +#include + +namespace miniFE { + +template +void +exchange_externals(MatrixType& A, + VectorType& x) +{ +#ifdef HAVE_MPI +#ifdef MINIFE_DEBUG + std::ostream& os = outstream(); + os << "entering exchange_externals\n"; +#endif + + int numprocs = 1; + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + + if (numprocs < 2) return; + + typedef typename MatrixType::ScalarType Scalar; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + + // Extract Matrix pieces + + int local_nrow = A.rows.size(); + int num_neighbors = A.neighbors.size(); + const std::vector& recv_length = A.recv_length; + const std::vector& send_length = A.send_length; + const std::vector& neighbors = A.neighbors; + const std::vector& elements_to_send = A.elements_to_send; + + std::vector& send_buffer = A.send_buffer; + + // + // first post receives, these are immediate receives + // Do not wait for result to come, will do that at the + // wait call below. + // + + int MPI_MY_TAG = 99; + + std::vector& request = A.request; + + // + // Externals are at end of locals + // + + std::vector& x_coefs = x.coefs; + Scalar* x_external = &(x_coefs[local_nrow]); + + MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); + + // Post receives first + for(int i=0; i x.coefs.size()) { + os << "error, out-of-range. x.coefs.size()=="< exch_ext_requests; +#endif + +template +void +begin_exchange_externals(MatrixType& A, + VectorType& x) +{ +#ifdef HAVE_MPI + + int numprocs = 1, myproc = 0; + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); + + if (numprocs < 2) return; + + typedef typename MatrixType::ScalarType Scalar; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + + // Extract Matrix pieces + + int local_nrow = A.rows.size(); + int num_neighbors = A.neighbors.size(); + const std::vector& recv_length = A.recv_length; + const std::vector& send_length = A.send_length; + const std::vector& neighbors = A.neighbors; + const std::vector& elements_to_send = A.elements_to_send; + + std::vector send_buffer(elements_to_send.size(), 0); + + // + // first post receives, these are immediate receives + // Do not wait for result to come, will do that at the + // wait call below. + // + + int MPI_MY_TAG = 99; + + exch_ext_requests.resize(num_neighbors); + + // + // Externals are at end of locals + // + + std::vector& x_coefs = x.coefs; + Scalar* x_external = &(x_coefs[local_nrow]); + + MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); + + // Post receives first + for(int i=0; i +#include +#include +#include + +#include +#include +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +namespace miniFE { + +template +int +generate_matrix_structure(const simple_mesh_description& mesh, + MatrixType& A) +{ + int myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + int threw_exc = 0; + try { + + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + + int global_nodes_x = mesh.global_box[0][1]+1; + int global_nodes_y = mesh.global_box[1][1]+1; + int global_nodes_z = mesh.global_box[2][1]+1; + Box box; + copy_box(mesh.local_box, box); + + //num-owned-nodes in each dimension is num-elems+1 + //only if num-elems > 0 in that dimension *and* + //we are at the high end of the global range in that dimension: + if (box[0][1] > box[0][0] && box[0][1] == mesh.global_box[0][1]) ++box[0][1]; + if (box[1][1] > box[1][0] && box[1][1] == mesh.global_box[1][1]) ++box[1][1]; + if (box[2][1] > box[2][0] && box[2][1] == mesh.global_box[2][1]) ++box[2][1]; + + GlobalOrdinal global_nrows = global_nodes_x; + global_nrows *= global_nodes_y*global_nodes_z; + + GlobalOrdinal nrows = get_num_ids(box); + try { + A.reserve_space(nrows, 27); + } + catch(std::exception& exc) { + std::ostringstream osstr; + osstr << "One of A.rows.resize, A.row_offsets.resize, A.packed_cols.reserve or A.packed_coefs.reserve: nrows=" < rows(nrows); + std::vector row_offsets(nrows+1); + std::vector row_coords(nrows*3); + + unsigned roffset = 0; + GlobalOrdinal nnz = 0; + + for(int iz=box[2][0]; iz(global_nodes_x, global_nodes_y, global_nodes_z, + ix, iy, iz); + rows[roffset] = mesh.map_id_to_row(row_id); + row_coords[roffset*3] = ix; + row_coords[roffset*3+1] = iy; + row_coords[roffset*3+2] = iz; + row_offsets[roffset++] = nnz; + + GlobalOrdinal row_begin_offset = nnz; + for(int sz=-1; sz<=1; ++sz) { + for(int sy=-1; sy<=1; ++sy) { + for(int sx=-1; sx<=1; ++sx) { + GlobalOrdinal col_id = + get_id(global_nodes_x, global_nodes_y, global_nodes_z, + ix+sx, iy+sy, iz+sz); + if (col_id >= 0 && col_id < global_nrows) { + ++nnz; + } + } + } + } + } + } + } + row_offsets[roffset] = nnz; + init_matrix(A, rows, row_offsets, row_coords, + global_nodes_x, global_nodes_y, global_nodes_z, global_nrows, mesh); + } + catch(...) { + std::cout << "proc " << myproc << " threw an exception in generate_matrix_structure, probably due to running out of memory." << std::endl; + threw_exc = 1; + } +#ifdef HAVE_MPI + int global_throw = 0; + MPI_Allreduce(&threw_exc, &global_throw, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + threw_exc = global_throw; +#endif + if (threw_exc) { + return 1; + } + + return 0; +} + +}//namespace miniFE + +#endif + diff --git a/mkl/basic/get_common_files b/mkl/basic/get_common_files new file mode 100755 index 0000000..dec46a7 --- /dev/null +++ b/mkl/basic/get_common_files @@ -0,0 +1,11 @@ +#!/bin/bash + +dir=../../common + +cp ${dir}/YAML_Doc.cpp . +cp ${dir}/YAML_Doc.hpp . +cp ${dir}/YAML_Element.cpp . +cp ${dir}/YAML_Element.hpp . + +cp ${dir}/generate_info_header . + diff --git a/mkl/basic/gold_files/1x1x2_A.mtx.1.0 b/mkl/basic/gold_files/1x1x2_A.mtx.1.0 new file mode 100644 index 0000000..d337780 --- /dev/null +++ b/mkl/basic/gold_files/1x1x2_A.mtx.1.0 @@ -0,0 +1,113 @@ +12 112 +0 0 1 +0 1 0 +0 2 0 +0 3 0 +0 4 0 +0 5 0 +0 6 0 +0 7 0 +1 0 0 +1 1 1.16667 +1 2 0 +1 3 2.18961e-10 +1 4 0 +1 5 2.18961e-10 +1 6 0 +1 7 0.333333 +2 0 0 +2 1 0 +2 2 1 +2 3 0 +2 4 0 +2 5 0 +2 6 0 +2 7 0 +3 0 0 +3 1 2.18961e-10 +3 2 0 +3 3 2.5 +3 4 0 +3 5 -1 +3 6 0 +3 7 2.18961e-10 +4 0 0 +4 1 0 +4 2 0 +4 3 0 +4 4 1 +4 5 0 +4 6 0 +4 7 0 +4 8 0 +4 9 0 +4 10 0 +4 11 0 +5 0 0 +5 1 2.18961e-10 +5 2 0 +5 3 -1 +5 4 0 +5 5 2.5 +5 6 0 +5 7 2.18961e-10 +5 8 0 +5 9 0 +5 10 0 +5 11 0 +6 0 0 +6 1 0 +6 2 0 +6 3 0 +6 4 0 +6 5 0 +6 6 1 +6 7 0 +6 8 0 +6 9 0 +6 10 0 +6 11 0 +7 0 0 +7 1 0.333333 +7 2 0 +7 3 2.18961e-10 +7 4 0 +7 5 2.18961e-10 +7 6 0 +7 7 1.16667 +7 8 0 +7 9 0 +7 10 0 +7 11 0 +8 4 0 +8 5 0 +8 6 0 +8 7 0 +8 8 1 +8 9 0 +8 10 0 +8 11 0 +9 4 0 +9 5 0 +9 6 0 +9 7 0 +9 8 0 +9 9 0.5 +9 10 0 +9 11 0 +10 4 0 +10 5 0 +10 6 0 +10 7 0 +10 8 0 +10 9 0 +10 10 1 +10 11 0 +11 4 0 +11 5 0 +11 6 0 +11 7 0 +11 8 0 +11 9 0 +11 10 0 +11 11 0.5 diff --git a/mkl/basic/gold_files/1x1x2_A.mtx.2.0 b/mkl/basic/gold_files/1x1x2_A.mtx.2.0 new file mode 100644 index 0000000..363c0bd --- /dev/null +++ b/mkl/basic/gold_files/1x1x2_A.mtx.2.0 @@ -0,0 +1,33 @@ +4 32 +0 0 1 +0 1 0 +0 2 0 +0 3 0 +0 4 0 +0 5 0 +0 6 0 +0 7 0 +1 0 0 +1 1 1.16667 +1 2 0 +1 3 2.18961e-10 +1 4 0 +1 5 2.18961e-10 +1 6 0 +1 7 0.333333 +2 0 0 +2 1 0 +2 2 1 +2 3 0 +2 4 0 +2 5 0 +2 6 0 +2 7 0 +3 0 0 +3 1 2.18961e-10 +3 2 0 +3 3 2.5 +3 4 0 +3 5 -1 +3 6 0 +3 7 2.18961e-10 diff --git a/mkl/basic/gold_files/1x1x2_A.mtx.2.1 b/mkl/basic/gold_files/1x1x2_A.mtx.2.1 new file mode 100644 index 0000000..3b435ca --- /dev/null +++ b/mkl/basic/gold_files/1x1x2_A.mtx.2.1 @@ -0,0 +1,80 @@ +4 0 0 +4 1 0 +4 2 0 +4 3 0 +4 4 1 +4 5 0 +4 6 0 +4 7 0 +4 8 0 +4 9 0 +4 10 0 +4 11 0 +5 0 0 +5 1 2.18961e-10 +5 2 0 +5 3 -1 +5 4 0 +5 5 2.5 +5 6 0 +5 7 2.18961e-10 +5 8 0 +5 9 0 +5 10 0 +5 11 0 +6 0 0 +6 1 0 +6 2 0 +6 3 0 +6 4 0 +6 5 0 +6 6 1 +6 7 0 +6 8 0 +6 9 0 +6 10 0 +6 11 0 +7 0 0 +7 1 0.333333 +7 2 0 +7 3 2.18961e-10 +7 4 0 +7 5 2.18961e-10 +7 6 0 +7 7 1.16667 +7 8 0 +7 9 0 +7 10 0 +7 11 0 +8 4 0 +8 5 0 +8 6 0 +8 7 0 +8 8 1 +8 9 0 +8 10 0 +8 11 0 +9 4 0 +9 5 0 +9 6 0 +9 7 0 +9 8 0 +9 9 0.5 +9 10 0 +9 11 0 +10 4 0 +10 5 0 +10 6 0 +10 7 0 +10 8 0 +10 9 0 +10 10 1 +10 11 0 +11 4 0 +11 5 0 +11 6 0 +11 7 0 +11 8 0 +11 9 0 +11 10 0 +11 11 0.5 diff --git a/mkl/basic/gold_files/1x1x2_b.vec.1.0 b/mkl/basic/gold_files/1x1x2_b.vec.1.0 new file mode 100644 index 0000000..b0b890a --- /dev/null +++ b/mkl/basic/gold_files/1x1x2_b.vec.1.0 @@ -0,0 +1,13 @@ +12 +0 1 +1 1.25 +2 1 +3 1.41667 +4 1 +5 1.25 +6 1 +7 1.25 +8 1 +9 0 +10 1 +11 0 diff --git a/mkl/basic/gold_files/1x1x2_b.vec.2.0 b/mkl/basic/gold_files/1x1x2_b.vec.2.0 new file mode 100644 index 0000000..15d91a4 --- /dev/null +++ b/mkl/basic/gold_files/1x1x2_b.vec.2.0 @@ -0,0 +1,5 @@ +4 +0 1 +1 1.25 +2 1 +3 1.41667 diff --git a/mkl/basic/gold_files/1x1x2_b.vec.2.1 b/mkl/basic/gold_files/1x1x2_b.vec.2.1 new file mode 100644 index 0000000..a8349a9 --- /dev/null +++ b/mkl/basic/gold_files/1x1x2_b.vec.2.1 @@ -0,0 +1,8 @@ +4 1 +5 1.25 +6 1 +7 1.25 +8 1 +9 0 +10 1 +11 0 diff --git a/mkl/basic/gold_files/1x1x2_x.vec.1.0 b/mkl/basic/gold_files/1x1x2_x.vec.1.0 new file mode 100644 index 0000000..78dcba9 --- /dev/null +++ b/mkl/basic/gold_files/1x1x2_x.vec.1.0 @@ -0,0 +1,13 @@ +12 +0 1 +1 0.833333 +2 1 +3 0.912698 +4 1 +5 0.865079 +6 1 +7 0.833333 +8 1 +9 0 +10 1 +11 0 diff --git a/mkl/basic/gold_files/1x1x2_x.vec.2.0 b/mkl/basic/gold_files/1x1x2_x.vec.2.0 new file mode 100644 index 0000000..024797b --- /dev/null +++ b/mkl/basic/gold_files/1x1x2_x.vec.2.0 @@ -0,0 +1,5 @@ +4 +0 1 +1 0.833333 +2 1 +3 0.912698 diff --git a/mkl/basic/gold_files/1x1x2_x.vec.2.1 b/mkl/basic/gold_files/1x1x2_x.vec.2.1 new file mode 100644 index 0000000..f774883 --- /dev/null +++ b/mkl/basic/gold_files/1x1x2_x.vec.2.1 @@ -0,0 +1,8 @@ +4 1 +5 0.865079 +6 1 +7 0.833333 +8 1 +9 0 +10 1 +11 0 diff --git a/mkl/basic/imbalance.hpp b/mkl/basic/imbalance.hpp new file mode 100644 index 0000000..f801efc --- /dev/null +++ b/mkl/basic/imbalance.hpp @@ -0,0 +1,271 @@ +#ifndef _imbalance_hpp_ +#define _imbalance_hpp_ + +#include + +#ifdef HAVE_MPI +#include +#endif + +#include +#include +#include + +namespace miniFE { + +const int X = 0; +const int Y = 1; +const int Z = 2; +const int NONE = 3; + +const int LOWER = 0; +const int UPPER = 1; + +template +void +compute_imbalance(const Box& global_box, + const Box& local_box, + float& largest_imbalance, + float& std_dev, + YAML_Doc& doc, + bool record_in_doc) +{ + int numprocs = 1, myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + GlobalOrdinal local_nrows = get_num_ids(local_box); + GlobalOrdinal min_nrows = 0, max_nrows = 0, global_nrows = 0; + int min_proc = myproc, max_proc = myproc; + get_global_min_max(local_nrows, global_nrows, min_nrows, min_proc, + max_nrows, max_proc); + + float avg_nrows = global_nrows; + avg_nrows /= numprocs; + + //largest_imbalance will be the difference between the min (or max) + //rows-per-processor and avg_nrows, represented as a percentage: + largest_imbalance = percentage_difference(min_nrows, avg_nrows); + + float tmp = percentage_difference(max_nrows, avg_nrows); + if (tmp > largest_imbalance) largest_imbalance = tmp; + + std_dev = compute_std_dev_as_percentage(local_nrows, avg_nrows); + + if (myproc == 0 && record_in_doc) { + doc.add("Rows-per-proc Load Imbalance",""); + doc.get("Rows-per-proc Load Imbalance")->add("Largest (from avg, %)",largest_imbalance); + doc.get("Rows-per-proc Load Imbalance")->add("Std Dev (%)",std_dev); + } +} + +std::pair +decide_how_to_grow(const Box& global_box, const Box& local_box) +{ + std::pair result(NONE,UPPER); + + if (local_box[Z][UPPER] < global_box[Z][UPPER]) { + result.first = Z; + result.second = UPPER; + return result; + } + if (local_box[Z][LOWER] > global_box[Z][LOWER]) { + result.first = Z; + result.second = LOWER; + return result; + } + if (local_box[Y][UPPER] < global_box[Y][UPPER]) { + result.first = Y; + result.second = UPPER; + return result; + } + if (local_box[Y][LOWER] > global_box[Y][LOWER]) { + result.first = Y; + result.second = LOWER; + return result; + } + if (local_box[X][UPPER] < global_box[X][UPPER]) { + result.first = X; + result.second = UPPER; + return result; + } + if (local_box[X][LOWER] > global_box[X][LOWER]) { + result.first = X; + result.second = LOWER; + return result; + } + return result; +} + +std::pair +decide_how_to_shrink(const Box& global_box, const Box& local_box) +{ + std::pair result(NONE,UPPER); + + if (local_box[Z][UPPER] < global_box[Z][UPPER] && local_box[Z][UPPER]-local_box[Z][LOWER] > 2) { + result.first = Z; + result.second = UPPER; + return result; + } + if (local_box[Z][LOWER] > global_box[Z][LOWER] && local_box[Z][UPPER]-local_box[Z][LOWER] > 2) { + result.first = Z; + result.second = LOWER; + return result; + } + if (local_box[Y][UPPER] < global_box[Y][UPPER] && local_box[Y][UPPER]-local_box[Y][LOWER] > 2) { + result.first = Y; + result.second = UPPER; + return result; + } + if (local_box[Y][LOWER] > global_box[Y][LOWER] && local_box[Y][UPPER]-local_box[Y][LOWER] > 2) { + result.first = Y; + result.second = LOWER; + return result; + } + if (local_box[X][UPPER] < global_box[X][UPPER] && local_box[X][UPPER]-local_box[X][LOWER] > 2) { + result.first = X; + result.second = UPPER; + return result; + } + if (local_box[X][LOWER] > global_box[X][LOWER] && local_box[X][UPPER]-local_box[X][LOWER] > 2) { + result.first = X; + result.second = LOWER; + return result; + } + return result; +} + +template +void +add_imbalance(const Box& global_box, + Box& local_box, + float imbalance, + YAML_Doc& doc) +{ + int numprocs = 1, myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + if (numprocs == 1) { + return; + } + + float cur_imbalance = 0, cur_std_dev = 0; + compute_imbalance(global_box, local_box, + cur_imbalance, cur_std_dev, doc, false); + + while (cur_imbalance < imbalance) { + GlobalOrdinal local_nrows = get_num_ids(local_box); + GlobalOrdinal min_nrows = 0, max_nrows = 0, global_nrows = 0; + int min_proc = myproc, max_proc = myproc; + get_global_min_max(local_nrows, global_nrows, min_nrows, min_proc, + max_nrows, max_proc); + + std::pair grow(NONE,UPPER); + int grow_axis_val = -1; + std::pair shrink(NONE,UPPER); + int shrink_axis_val = -1; + + if (myproc == max_proc) { + grow = decide_how_to_grow(global_box, local_box); + if (grow.first != NONE) { + grow_axis_val = local_box[grow.first][grow.second]; + } + } + if (myproc == min_proc) { + shrink = decide_how_to_shrink(global_box, local_box); + if (shrink.first != NONE) { + shrink_axis_val = local_box[shrink.first][shrink.second]; + } + } + + int grow_info[8] = {grow.first, grow.second, + local_box[X][0], local_box[X][1], + local_box[Y][0], local_box[Y][1], + local_box[Z][0], local_box[Z][1]}; + + int shrink_info[8] = {shrink.first, shrink.second, + local_box[X][0], local_box[X][1], + local_box[Y][0], local_box[Y][1], + local_box[Z][0], local_box[Z][1]}; +#ifdef HAVE_MPI + MPI_Bcast(&grow_info[0], 8, MPI_INT, max_proc, MPI_COMM_WORLD); + MPI_Bcast(&shrink_info[0], 8, MPI_INT, min_proc, MPI_COMM_WORLD); +#endif + + int grow_axis = grow_info[0]; + int grow_end = grow_info[1]; + int shrink_axis = shrink_info[0]; + int shrink_end = shrink_info[1]; + int grow_incr = 1; + if (grow_end == LOWER) grow_incr = -1; + int shrink_incr = -1; + if (shrink_end == LOWER) shrink_incr = 1; + if (grow_axis != NONE) grow_axis_val = grow_info[2+grow_axis*2+grow_end]; + if (shrink_axis != NONE) shrink_axis_val = shrink_info[2+shrink_axis*2+shrink_end]; + + if (grow_axis == NONE && shrink_axis == NONE) break; + + bool grow_status = grow_axis==NONE ? false : true; + if (grow_axis != NONE) { + if ((grow_incr == 1 && local_box[grow_axis][0] == grow_axis_val) || + (grow_incr == -1 && local_box[grow_axis][1] == grow_axis_val)) { + if (local_box[grow_axis][1] - local_box[grow_axis][0] < 2) { + grow_status = false; + } + } + } + + bool shrink_status = shrink_axis==NONE ? false : true; + if (shrink_axis != NONE) { + if ((shrink_incr == 1 && local_box[shrink_axis][0] == shrink_axis_val) || + (shrink_incr == -1 && local_box[shrink_axis][1] == shrink_axis_val)) { + if (local_box[shrink_axis][1] - local_box[shrink_axis][0] < 2) { + shrink_status = false; + } + } + } + +#ifdef HAVE_MPI + int statusints[2] = { grow_status ? 0 : 1, shrink_status ? 0 : 1 }; + int globalstatus[2] = { 0, 0 }; + MPI_Allreduce(&statusints, &globalstatus, 2, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + grow_status = globalstatus[0]>0 ? false : true; + shrink_status = globalstatus[1]>0 ? false : true; +#endif + + if (grow_status == false && shrink_status == false) break; + + if (grow_status && grow_axis != NONE) { + if (local_box[grow_axis][0] == grow_axis_val) { + local_box[grow_axis][0] += grow_incr; + } + + if (local_box[grow_axis][1] == grow_axis_val) { + local_box[grow_axis][1] += grow_incr; + } + } + + if (shrink_status && shrink_axis != NONE) { + if (local_box[shrink_axis][0] == shrink_axis_val) { + local_box[shrink_axis][0] += shrink_incr; + } + + if (local_box[shrink_axis][1] == shrink_axis_val) { + local_box[shrink_axis][1] += shrink_incr; + } + } + + compute_imbalance(global_box, local_box, + cur_imbalance, cur_std_dev, doc, false); + } +} + +}//namespace miniFE + +#endif + diff --git a/mkl/basic/main.cpp b/mkl/basic/main.cpp new file mode 100644 index 0000000..ed3753f --- /dev/null +++ b/mkl/basic/main.cpp @@ -0,0 +1,247 @@ + +//@HEADER +// ************************************************************************ +// +// miniFE: simple finite-element assembly and linear-solve +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER +#include +#include +#include +#include + +#include + +#include + +#ifdef HAVE_MPI +#include +#endif + +//-------------------------------------------------------------------- +#include +//-------------------------------------------------------------------- + +#include +#include +#include +#include +#include +#include +#include + +#if MINIFE_INFO != 0 +#include +#else +#include +#endif + +//The following macros should be specified as compile-macros in the +//makefile. They are defaulted here just in case... +#ifndef MINIFE_SCALAR +#define MINIFE_SCALAR double +#endif +#ifndef MINIFE_LOCAL_ORDINAL +#define MINIFE_LOCAL_ORDINAL int +#endif +#ifndef MINIFE_GLOBAL_ORDINAL +#define MINIFE_GLOBAL_ORDINAL int +#endif + +// ************************************************************************ + +void add_params_to_yaml(YAML_Doc& doc, miniFE::Parameters& params); +void add_configuration_to_yaml(YAML_Doc& doc, int numprocs, int numthreads); +void add_timestring_to_yaml(YAML_Doc& doc); + +inline void print_box(int myproc, const char* name, const Box& box, + const char* name2, const Box& box2) +{ + std::cout << "proc " << myproc << " "< local_boxes(numprocs); + + box_partition(0, numprocs, 2, global_box, &local_boxes[0]); + + Box& my_box = local_boxes[myproc]; + +//print_box(myproc, "global-box", global_box, "local-box", my_box); + + std::ostringstream osstr; + osstr << "miniFE." << params.nx << "x" << params.ny << "x" << params.nz; +#ifdef HAVE_MPI + osstr << ".P"<. + //To run miniFE with float instead of double, or 'long long' instead of int, + //etc., change these template-parameters by changing the macro definitions in + //the makefile or on the make command-line. + + miniFE::driver< MINIFE_SCALAR, MINIFE_LOCAL_ORDINAL, MINIFE_GLOBAL_ORDINAL, + ComputeNodeType>(global_box, my_box, compute_node, params, doc); + + miniFE::timer_type total_time = miniFE::mytimer() - start_time; + + if (myproc == 0) { + doc.add("Total Program Time",total_time); + std::cout << doc.generateYAML() << std::endl; + } + + miniFE::finalize_mpi(); + + return 0; +} + +void add_params_to_yaml(YAML_Doc& doc, miniFE::Parameters& params) +{ + doc.add("Global Run Parameters",""); + doc.get("Global Run Parameters")->add("dimensions",""); + doc.get("Global Run Parameters")->get("dimensions")->add("nx",params.nx); + doc.get("Global Run Parameters")->get("dimensions")->add("ny",params.ny); + doc.get("Global Run Parameters")->get("dimensions")->add("nz",params.nz); + doc.get("Global Run Parameters")->add("load_imbalance", params.load_imbalance); + if (params.mv_overlap_comm_comp == 1) { + std::string val("1 (yes)"); + doc.get("Global Run Parameters")->add("mv_overlap_comm_comp", val); + } + else { + std::string val("0 (no)"); + doc.get("Global Run Parameters")->add("mv_overlap_comm_comp", val); + } +} + +void add_configuration_to_yaml(YAML_Doc& doc, int numprocs, int numthreads) +{ + doc.get("Global Run Parameters")->add("number of processors", numprocs); + std::string threading("none"); + +#ifdef MINIFE_HAVE_TPI + threading = "TPI"; +#endif +#ifdef MINIFE_HAVE_TBB + threading = "TBB"; +#endif +#ifdef MINIFE_HAVE_CUDA + threading = "CUDA"; +#endif + if (threading != "none") { + doc.get("Global Run Parameters")->add("(per proc) numthreads",numthreads); + } + + doc.add("Platform",""); + doc.get("Platform")->add("hostname",MINIFE_HOSTNAME); + doc.get("Platform")->add("kernel name",MINIFE_KERNEL_NAME); + doc.get("Platform")->add("kernel release",MINIFE_KERNEL_RELEASE); + doc.get("Platform")->add("processor",MINIFE_PROCESSOR); + + doc.add("Build",""); + doc.get("Build")->add("CXX",MINIFE_CXX); + doc.get("Build")->add("compiler version",MINIFE_CXX_VERSION); + doc.get("Build")->add("CXXFLAGS",MINIFE_CXXFLAGS); + std::string using_mpi("no"); +#ifdef HAVE_MPI + using_mpi = "yes"; +#endif + doc.get("Build")->add("using MPI",using_mpi); + doc.get("Build")->add("Threading",threading.c_str()); +} + +void add_timestring_to_yaml(YAML_Doc& doc) +{ + std::time_t rawtime; + struct tm * timeinfo; + std::time(&rawtime); + timeinfo = std::localtime(&rawtime); + std::ostringstream osstr; + osstr.fill('0'); + osstr << timeinfo->tm_year+1900 << "-"; + osstr.width(2); osstr << timeinfo->tm_mon+1 << "-"; + osstr.width(2); osstr << timeinfo->tm_mday << ", "; + osstr.width(2); osstr << timeinfo->tm_hour << "-"; + osstr.width(2); osstr << timeinfo->tm_min << "-"; + osstr.width(2); osstr << timeinfo->tm_sec; + std::string timestring = osstr.str(); + doc.add("Run Date/Time",timestring); +} + diff --git a/mkl/basic/make_local_matrix.hpp b/mkl/basic/make_local_matrix.hpp new file mode 100644 index 0000000..99c2cf7 --- /dev/null +++ b/mkl/basic/make_local_matrix.hpp @@ -0,0 +1,440 @@ +#ifndef _make_local_matrix_hpp_ +#define _make_local_matrix_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include + +#ifdef HAVE_MPI +#include +#endif + +namespace miniFE { + +template +void +make_local_matrix(MatrixType& A) +{ +#ifdef HAVE_MPI + int numprocs = 1, myproc = 0; + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); + + if (numprocs < 2) { + A.num_cols = A.rows.size(); + A.has_local_indices = true; + return; + } + + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + typedef typename MatrixType::ScalarType Scalar; + + std::map externals; + LocalOrdinal num_external = 0; + + //Extract Matrix pieces + + size_t local_nrow = A.rows.size(); + GlobalOrdinal start_row = local_nrow>0 ? A.rows[0] : -1; + GlobalOrdinal stop_row = local_nrow>0 ? A.rows[local_nrow-1] : -1; + + // We need to convert the index values for the rows on this processor + // to a local index space. We need to: + // - Determine if each index reaches to a local value or external value + // - If local, subtract start_row from index value to get local index + // - If external, find out if it is already accounted for. + // - If so, then do nothing, + // - otherwise + // - add it to the list of external indices, + // - find out which processor owns the value. + // - Set up communication for sparse MV operation + + /////////////////////////////////////////// + // Scan the indices and transform to local + /////////////////////////////////////////// + + std::vector& external_index = A.external_index; + + for(size_t i=0; i tmp_buffer(numprocs, 0); // Temp buffer space needed below + + // Build list of global index offset + + std::vector global_index_offsets(numprocs, 0); + + tmp_buffer[myproc] = start_row; // This is my start row + + // This call sends the start_row of each ith processor to the ith + // entry of global_index_offsets on all processors. + // Thus, each processor knows the range of indices owned by all + // other processors. + // Note: There might be a better algorithm for doing this, but this + // will work... + + MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); + MPI_Allreduce(&tmp_buffer[0], &global_index_offsets[0], numprocs, mpi_dtype, + MPI_SUM, MPI_COMM_WORLD); + + // Go through list of externals and find the processor that owns each + std::vector external_processor(num_external); + + for(LocalOrdinal i=0; i=0; --j) { + if (global_index_offsets[j] <= cur_ind && global_index_offsets[j] >= 0) { + external_processor[i] = j; + break; + } + } + } + + ///////////////////////////////////////////////////////////////////////// + // Sift through the external elements. For each newly encountered external + // point assign it the next index in the sequence. Then look for other + // external elements who are updated by the same node and assign them the next + // set of index numbers in the sequence (ie. elements updated by the same node + // have consecutive indices). + ///////////////////////////////////////////////////////////////////////// + + size_t count = local_nrow; + std::vector& external_local_index = A.external_local_index; + external_local_index.assign(num_external, -1); + + for(LocalOrdinal i=0; i new_external_processor(num_external, 0); + + for(int i=0; i No external elements are updated by + // processor i. + // tmp_neighbors[i] = x ==> (x-1)/numprocs elements are updated from + // processor i. + /// + //////////////////////////////////////////////////////////////////////// + + std::vector tmp_neighbors(numprocs, 0); + + int num_recv_neighbors = 0; + int length = 1; + + for(LocalOrdinal i=0; i recv_list; + recv_list.push_back(new_external_processor[0]); + for(LocalOrdinal i=1; i send_list(num_send_neighbors, 0); + + // + // first post receives, these are immediate receives + // Do not wait for result to come, will do that at the + // wait call below. + // + int MPI_MY_TAG = 99; + + std::vector request(num_send_neighbors); + for(int i=0; i new_external(num_external); + for(LocalOrdinal i=0; i lengths(num_recv_neighbors); + + ++MPI_MY_TAG; + + // First post receives + + for(int i=0; i& neighbors = A.neighbors; + std::vector& recv_length = A.recv_length; + std::vector& send_length = A.send_length; + + neighbors.resize(num_recv_neighbors, 0); + A.request.resize(num_recv_neighbors); + recv_length.resize(num_recv_neighbors, 0); + send_length.resize(num_recv_neighbors, 0); + + LocalOrdinal j = 0; + for(int i=0; i $(DESTDIR)$(includedir)/Makefile.export.threadpool.macros + +uninstall-hook: + rm -f $(includedir)/Makefile.export.threadpool + rm -f $(includedir)/Makefile.export.threadpool.macros + +else + +install-exec-hook: + +uninstall-hook: + +endif + +## ####################################################################### +## Subdirectories to be make'd recursively +## ####################################################################### +#We now build tests and examples through separate make targets, rather than +#during "make". We still need to conditionally include the test and example +#in SUBDIRS, even though BUILD_TESTS and BUILD_EXAMPLES will never be +#defined, so that the tests and examples are included in the distribution +#tarball. + +if SUB_TEST +TEST_SUBDIR=test +endif + +#if SUB_EXAMPLE +#EXAMPLE_SUBDIR=example +#endif + +# #np# - The following make targets must be defined for all packages. +# #np# - If the package does not have tests or examples, replace the +# #np# - corresponding rules with something like: +# #np# - @echo "new_package does not have any tests yet" +if BUILD_TESTS +tests: + @echo "" + @echo "Now building ThreadPool tests." + @echo "" + cd $(top_builddir)/test && $(MAKE) + @echo "" + @echo "Finished building ThreadPool tests." + @echo "" +else +tests: + @echo "ThreadPool tests were disabled at configure time" +endif + +examples: + @echo "ThreadPool does not have any examples yet" + +install-examples: + @echo "ThreadPool does not have any examples yet" + +clean-tests: + cd $(top_builddir)/test && $(MAKE) clean + +clean-examples: + @echo "ThreadPool does not have any examples yet" + +everything: + $(MAKE) && $(MAKE) examples && $(MAKE) tests + +clean-everything: + $(MAKE) clean-examples && $(MAKE) clean-tests && $(MAKE) clean + +install-everything: + $(MAKE) install && $(MAKE) install-examples + +SUBDIRS = src $(TEST_SUBDIR) + +## ####################################################################### +## The below targets allow you to use the new +## testharness to run the test suite as make targets +## ####################################################################### + +TRILINOS_HOME_DIR=@abs_top_srcdir@/../.. +TRILINOS_BUILD_DIR=@abs_top_builddir@/../.. +TRILINOS_TEST_CATEGORY=INSTALL + +runtests-serial : + $(PERL_EXE) $(TRILINOS_HOME_DIR)/commonTools/test/utilities/runtests \ + --trilinos-dir=$(TRILINOS_HOME_DIR) \ + --comm=serial \ + --build-dir=$(TRILINOS_BUILD_DIR) \ + --category=$(TRILINOS_TEST_CATEGORY) \ + --output-dir=@abs_top_builddir@/test/runtests-results \ + --verbosity=1 \ + --packages=ThreadPool + +runtests-mpi : + $(PERL_EXE) $(TRILINOS_HOME_DIR)/commonTools/test/utilities/runtests \ + --trilinos-dir=$(TRILINOS_HOME_DIR) \ + --comm=mpi \ + --mpi-go=$(TRILINOS_MPI_GO) \ + --build-dir=$(TRILINOS_BUILD_DIR) \ + --category=$(TRILINOS_TEST_CATEGORY) \ + --output-dir=@abs_top_builddir@/test/runtests-results \ + --verbosity=1 \ + --packages=ThreadPool + +if HAVE_MPI +THREADPOOL_CHECK_COMM=mpi +else +THREADPOOL_CHECK_COMM=serial +endif + diff --git a/mkl/basic/optional/ThreadPool/Makefile.export.threadpool.in b/mkl/basic/optional/ThreadPool/Makefile.export.threadpool.in new file mode 100644 index 0000000..66bfda9 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/Makefile.export.threadpool.in @@ -0,0 +1,9 @@ +_THREADPOOL_INCLUDES = -I@abs_top_srcdir@/include -I@abs_top_builddir@/include + +_THREADPOOL_LIBS = @LDFLAGS@ -L@abs_top_builddir@/src -ltpi $(LIBS) + +@USING_GNUMAKE_TRUE@THREADPOOL_INCLUDES = $(shell @PERL_EXE@ @abs_top_srcdir@/config/strip_dup_incl_paths.pl $(_THREADPOOL_INCLUDES)) +@USING_GNUMAKE_TRUE@THREADPOOL_LIBS = $(shell @PERL_EXE@ @abs_top_srcdir@/config/strip_dup_libs.pl $(_THREADPOOL_LIBS)) + +@USING_GNUMAKE_FALSE@THREADPOOL_INCLUDES = $(_THREADPOOL_INCLUDES) +@USING_GNUMAKE_FALSE@THREADPOOL_LIBS = $(_THREADPOOL_LIBS) diff --git a/mkl/basic/optional/ThreadPool/Makefile.in b/mkl/basic/optional/ThreadPool/Makefile.in new file mode 100644 index 0000000..3e4abfd --- /dev/null +++ b/mkl/basic/optional/ThreadPool/Makefile.in @@ -0,0 +1,777 @@ +# Makefile.in generated by automake 1.10 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# @HEADER +# ************************************************************************ +# +# ThreadPool Package +# Copyright (2008) Sandia Corporation +# +# Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +# license for use of this work by or on behalf of the U.S. Government. +# +# This library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 2.1 of the +# License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +# USA +# Questions? Contact Carter Edwards (hcedwar@sandia.gov) +# +# ************************************************************************ +# @HEADER +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = . +DIST_COMMON = $(am__configure_deps) $(srcdir)/Makefile.am \ + $(srcdir)/Makefile.export.threadpool.in $(srcdir)/Makefile.in \ + $(top_srcdir)/configure config/config.guess config/config.sub \ + config/depcomp config/install-sh config/missing +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/acx_pthread.m4 \ + $(top_srcdir)/config/tac_arg_check_mpi.m4 \ + $(top_srcdir)/config/tac_arg_config_mpi.m4 \ + $(top_srcdir)/config/tac_arg_enable_export-makefiles.m4 \ + $(top_srcdir)/config/tac_arg_enable_feature.m4 \ + $(top_srcdir)/config/tac_arg_enable_feature_sub_check.m4 \ + $(top_srcdir)/config/tac_arg_with_ar.m4 \ + $(top_srcdir)/config/tac_arg_with_flags.m4 \ + $(top_srcdir)/config/tac_arg_with_incdirs.m4 \ + $(top_srcdir)/config/tac_arg_with_libdirs.m4 \ + $(top_srcdir)/config/tac_arg_with_libs.m4 \ + $(top_srcdir)/config/tac_arg_with_perl.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ + configure.lineno config.status.lineno +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/ThreadPool_config.h +CONFIG_CLEAN_FILES = Makefile.export.threadpool +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ + html-recursive info-recursive install-data-recursive \ + install-dvi-recursive install-exec-recursive \ + install-html-recursive install-info-recursive \ + install-pdf-recursive install-ps-recursive install-recursive \ + installcheck-recursive installdirs-recursive pdf-recursive \ + ps-recursive uninstall-recursive +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = src test +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +distdir = $(PACKAGE)-$(VERSION) +top_distdir = $(distdir) +am__remove_distdir = \ + { test ! -d $(distdir) \ + || { find $(distdir) -type d ! -perm -200 -exec chmod u+w {} ';' \ + && rm -fr $(distdir); }; } +DIST_ARCHIVES = $(distdir).tar.gz +GZIP_ENV = --best +distuninstallcheck_listfiles = find . -type f -print +distcleancheck_listfiles = find . -type f -print +ACLOCAL = @ACLOCAL@ +ALTERNATE_AR = @ALTERNATE_AR@ +AMTAR = @AMTAR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +GREP = @GREP@ +HAVE_PERL = @HAVE_PERL@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +MPI_CC_EXISTS = @MPI_CC_EXISTS@ +MPI_CXX = @MPI_CXX@ +MPI_CXX_EXISTS = @MPI_CXX_EXISTS@ +MPI_F77_EXISTS = @MPI_F77_EXISTS@ +MPI_TEMP_CXX = @MPI_TEMP_CXX@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERL_EXE = @PERL_EXE@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_aux_dir = @ac_aux_dir@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AUTOMAKE_OPTIONS = foreign +ACLOCAL_AMFLAGS = -I config + +# +# I believe that by switching to AUX_DIR(../../config) one +# could get rid of these. +# +#np# For a typical package, there is no reason to distribute these files +#np# because users should not have to bootstrap. We distribute them with +#np# new package so that the files can be used in creating the +#np# configure script for other packages. +EXTRA_DIST = \ +config/generate-makeoptions.pl \ +config/replace-install-prefix.pl config/string-replace.pl \ +config/strip_dup_incl_paths.pl config/strip_dup_libs.pl \ +config/token-replace.pl + +AUX_DIST = config/install-sh config/missing config/mkinstalldirs +# +# Again, I hope that AUX_DIR(../../config) eliminates these +# config/install-sh config/missing config/mkinstalldirs +MAINTAINERCLEANFILES = Makefile.in aclocal.m4 autom4te.cache/* \ + configure config.status config.log \ + src/common/config-h.in src/common/stamp-h.in \ + $(AUX_DIST) + + +#We now build tests and examples through separate make targets, rather than +#during "make". We still need to conditionally include the test and example +#in SUBDIRS, even though BUILD_TESTS and BUILD_EXAMPLES will never be +#defined, so that the tests and examples are included in the distribution +#tarball. +@SUB_TEST_TRUE@TEST_SUBDIR = test +SUBDIRS = src $(TEST_SUBDIR) +TRILINOS_HOME_DIR = @abs_top_srcdir@/../.. +TRILINOS_BUILD_DIR = @abs_top_builddir@/../.. +TRILINOS_TEST_CATEGORY = INSTALL +@HAVE_MPI_FALSE@THREADPOOL_CHECK_COMM = serial +@HAVE_MPI_TRUE@THREADPOOL_CHECK_COMM = mpi +all: all-recursive + +.SUFFIXES: +am--refresh: + @: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + echo ' cd $(srcdir) && $(AUTOMAKE) --foreign '; \ + cd $(srcdir) && $(AUTOMAKE) --foreign \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign Makefile'; \ + cd $(top_srcdir) && \ + $(AUTOMAKE) --foreign Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + echo ' $(SHELL) ./config.status'; \ + $(SHELL) ./config.status;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + $(SHELL) ./config.status --recheck + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(srcdir) && $(AUTOCONF) +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) +Makefile.export.threadpool: $(top_builddir)/config.status $(srcdir)/Makefile.export.threadpool.in + cd $(top_builddir) && $(SHELL) ./config.status $@ + +# This directory's subdirectories are mostly independent; you can cd +# into them and run `make' without going through this Makefile. +# To change the values of `make' variables: instead of editing Makefiles, +# (1) if the variable is set in `config.status', edit `config.status' +# (which will cause the Makefiles to be regenerated when you run `make'); +# (2) otherwise, pass the desired values on the `make' command line. +$(RECURSIVE_TARGETS): + @failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +$(RECURSIVE_CLEAN_TARGETS): + @failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + rev=''; for subdir in $$list; do \ + if test "$$subdir" = "."; then :; else \ + rev="$$subdir $$rev"; \ + fi; \ + done; \ + rev="$$rev ."; \ + target=`echo $@ | sed s/-recursive//`; \ + for subdir in $$rev; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done && test -z "$$fail" +tags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ + done +ctags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ + done + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + tags="$$tags $$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique; \ + fi +ctags: CTAGS +CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(CTAGS_ARGS)$$tags$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + $(am__remove_distdir) + test -d $(distdir) || mkdir $(distdir) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done + list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + distdir=`$(am__cd) $(distdir) && pwd`; \ + top_distdir=`$(am__cd) $(top_distdir) && pwd`; \ + (cd $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$top_distdir" \ + distdir="$$distdir/$$subdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + distdir) \ + || exit 1; \ + fi; \ + done + -find $(distdir) -type d ! -perm -777 -exec chmod a+rwx {} \; -o \ + ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \ + || chmod -R a+r $(distdir) +dist-gzip: distdir + tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz + $(am__remove_distdir) + +dist-bzip2: distdir + tardir=$(distdir) && $(am__tar) | bzip2 -9 -c >$(distdir).tar.bz2 + $(am__remove_distdir) + +dist-tarZ: distdir + tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z + $(am__remove_distdir) + +dist-shar: distdir + shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz + $(am__remove_distdir) + +dist-zip: distdir + -rm -f $(distdir).zip + zip -rq $(distdir).zip $(distdir) + $(am__remove_distdir) + +dist dist-all: distdir + tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz + $(am__remove_distdir) + +# This target untars the dist file and tries a VPATH configuration. Then +# it guarantees that the distribution is self-contained by making another +# tarfile. +distcheck: dist + case '$(DIST_ARCHIVES)' in \ + *.tar.gz*) \ + GZIP=$(GZIP_ENV) gunzip -c $(distdir).tar.gz | $(am__untar) ;;\ + *.tar.bz2*) \ + bunzip2 -c $(distdir).tar.bz2 | $(am__untar) ;;\ + *.tar.Z*) \ + uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ + *.shar.gz*) \ + GZIP=$(GZIP_ENV) gunzip -c $(distdir).shar.gz | unshar ;;\ + *.zip*) \ + unzip $(distdir).zip ;;\ + esac + chmod -R a-w $(distdir); chmod a+w $(distdir) + mkdir $(distdir)/_build + mkdir $(distdir)/_inst + chmod a-w $(distdir) + dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ + && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ + && cd $(distdir)/_build \ + && ../configure --srcdir=.. --prefix="$$dc_install_base" \ + $(DISTCHECK_CONFIGURE_FLAGS) \ + && $(MAKE) $(AM_MAKEFLAGS) \ + && $(MAKE) $(AM_MAKEFLAGS) dvi \ + && $(MAKE) $(AM_MAKEFLAGS) check \ + && $(MAKE) $(AM_MAKEFLAGS) install \ + && $(MAKE) $(AM_MAKEFLAGS) installcheck \ + && $(MAKE) $(AM_MAKEFLAGS) uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \ + distuninstallcheck \ + && chmod -R a-w "$$dc_install_base" \ + && ({ \ + (cd ../.. && umask 077 && mkdir "$$dc_destdir") \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \ + distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \ + } || { rm -rf "$$dc_destdir"; exit 1; }) \ + && rm -rf "$$dc_destdir" \ + && $(MAKE) $(AM_MAKEFLAGS) dist \ + && rm -rf $(DIST_ARCHIVES) \ + && $(MAKE) $(AM_MAKEFLAGS) distcleancheck + $(am__remove_distdir) + @(echo "$(distdir) archives ready for distribution: "; \ + list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \ + sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x' +distuninstallcheck: + @cd $(distuninstallcheck_dir) \ + && test `$(distuninstallcheck_listfiles) | wc -l` -le 1 \ + || { echo "ERROR: files left after uninstall:" ; \ + if test -n "$(DESTDIR)"; then \ + echo " (check DESTDIR support)"; \ + fi ; \ + $(distuninstallcheck_listfiles) ; \ + exit 1; } >&2 +distcleancheck: distclean + @if test '$(srcdir)' = . ; then \ + echo "ERROR: distcleancheck can only run from a VPATH build" ; \ + exit 1 ; \ + fi + @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \ + || { echo "ERROR: files left in build directory after distclean:" ; \ + $(distcleancheck_listfiles) ; \ + exit 1; } >&2 +check-am: all-am +check: check-recursive +all-am: Makefile all-local +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES) +clean: clean-recursive + +clean-am: clean-generic mostlyclean-am + +distclean: distclean-recursive + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-exec-am: + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) install-exec-hook + +install-html: install-html-recursive + +install-info: install-info-recursive + +install-man: + +install-pdf: install-pdf-recursive + +install-ps: install-ps-recursive + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -rf $(top_srcdir)/autom4te.cache + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) uninstall-hook + +.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) install-am \ + install-exec-am install-strip uninstall-am + +.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ + all all-am all-local am--refresh check check-am clean \ + clean-generic ctags ctags-recursive dist dist-all dist-bzip2 \ + dist-gzip dist-shar dist-tarZ dist-zip distcheck distclean \ + distclean-generic distclean-tags distcleancheck distdir \ + distuninstallcheck dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-exec-hook \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic pdf \ + pdf-am ps ps-am tags tags-recursive uninstall uninstall-am \ + uninstall-hook + + +#The following line helps the test harness recover from build errors. + +all-local: + @echo "Trilinos package ThreadPool built successfully" + +@USING_EXPORT_MAKEFILES_TRUE@install-exec-hook: +@USING_EXPORT_MAKEFILES_TRUE@ mkdir -p $(DESTDIR)$(includedir) +@USING_EXPORT_MAKEFILES_TRUE@ cp $(top_builddir)/Makefile.export.threadpool $(DESTDIR)$(includedir)/. +@USING_EXPORT_MAKEFILES_TRUE@ $(PERL_EXE) $(top_srcdir)/config/replace-install-prefix.pl \ +@USING_EXPORT_MAKEFILES_TRUE@ --exec-prefix=$(exec_prefix) \ +@USING_EXPORT_MAKEFILES_TRUE@ --my-export-makefile=Makefile.export.threadpool \ +@USING_EXPORT_MAKEFILES_TRUE@ --my-abs-top-srcdir=@abs_top_srcdir@ \ +@USING_EXPORT_MAKEFILES_TRUE@ --my-abs-incl-dirs=@abs_top_builddir@/src:@abs_top_srcdir@/src \ +@USING_EXPORT_MAKEFILES_TRUE@ --my-abs-lib-dirs=@abs_top_builddir@/src +@USING_EXPORT_MAKEFILES_TRUE@ $(PERL_EXE) $(top_srcdir)/config/generate-makeoptions.pl $(top_builddir)/src/Makefile \ +@USING_EXPORT_MAKEFILES_TRUE@ THREADPOOL > $(DESTDIR)$(includedir)/Makefile.export.threadpool.macros + +@USING_EXPORT_MAKEFILES_TRUE@uninstall-hook: +@USING_EXPORT_MAKEFILES_TRUE@ rm -f $(includedir)/Makefile.export.threadpool +@USING_EXPORT_MAKEFILES_TRUE@ rm -f $(includedir)/Makefile.export.threadpool.macros + +@USING_EXPORT_MAKEFILES_FALSE@install-exec-hook: + +@USING_EXPORT_MAKEFILES_FALSE@uninstall-hook: + +#if SUB_EXAMPLE +#EXAMPLE_SUBDIR=example +#endif + +# #np# - The following make targets must be defined for all packages. +# #np# - If the package does not have tests or examples, replace the +# #np# - corresponding rules with something like: +# #np# - @echo "new_package does not have any tests yet" +@BUILD_TESTS_TRUE@tests: +@BUILD_TESTS_TRUE@ @echo "" +@BUILD_TESTS_TRUE@ @echo "Now building ThreadPool tests." +@BUILD_TESTS_TRUE@ @echo "" +@BUILD_TESTS_TRUE@ cd $(top_builddir)/test && $(MAKE) +@BUILD_TESTS_TRUE@ @echo "" +@BUILD_TESTS_TRUE@ @echo "Finished building ThreadPool tests." +@BUILD_TESTS_TRUE@ @echo "" +@BUILD_TESTS_FALSE@tests: +@BUILD_TESTS_FALSE@ @echo "ThreadPool tests were disabled at configure time" + +examples: + @echo "ThreadPool does not have any examples yet" + +install-examples: + @echo "ThreadPool does not have any examples yet" + +clean-tests: + cd $(top_builddir)/test && $(MAKE) clean + +clean-examples: + @echo "ThreadPool does not have any examples yet" + +everything: + $(MAKE) && $(MAKE) examples && $(MAKE) tests + +clean-everything: + $(MAKE) clean-examples && $(MAKE) clean-tests && $(MAKE) clean + +install-everything: + $(MAKE) install && $(MAKE) install-examples + +runtests-serial : + $(PERL_EXE) $(TRILINOS_HOME_DIR)/commonTools/test/utilities/runtests \ + --trilinos-dir=$(TRILINOS_HOME_DIR) \ + --comm=serial \ + --build-dir=$(TRILINOS_BUILD_DIR) \ + --category=$(TRILINOS_TEST_CATEGORY) \ + --output-dir=@abs_top_builddir@/test/runtests-results \ + --verbosity=1 \ + --packages=ThreadPool + +runtests-mpi : + $(PERL_EXE) $(TRILINOS_HOME_DIR)/commonTools/test/utilities/runtests \ + --trilinos-dir=$(TRILINOS_HOME_DIR) \ + --comm=mpi \ + --mpi-go=$(TRILINOS_MPI_GO) \ + --build-dir=$(TRILINOS_BUILD_DIR) \ + --category=$(TRILINOS_TEST_CATEGORY) \ + --output-dir=@abs_top_builddir@/test/runtests-results \ + --verbosity=1 \ + --packages=ThreadPool +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/mkl/basic/optional/ThreadPool/ThreadPool_config.h b/mkl/basic/optional/ThreadPool/ThreadPool_config.h new file mode 100644 index 0000000..b941069 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/ThreadPool_config.h @@ -0,0 +1,3 @@ +#ifndef HAVE_PTHREAD +#define HAVE_PTHREAD +#endif diff --git a/mkl/basic/optional/ThreadPool/aclocal.m4 b/mkl/basic/optional/ThreadPool/aclocal.m4 new file mode 100644 index 0000000..e1f57a9 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/aclocal.m4 @@ -0,0 +1,932 @@ +# generated automatically by aclocal 1.10 -*- Autoconf -*- + +# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, +# 2005, 2006 Free Software Foundation, Inc. +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +m4_if(m4_PACKAGE_VERSION, [2.61],, +[m4_fatal([this file was generated for autoconf 2.61. +You have another version of autoconf. If you want to use that, +you should regenerate the build system entirely.], [63])]) + +# Copyright (C) 2002, 2003, 2005, 2006 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_AUTOMAKE_VERSION(VERSION) +# ---------------------------- +# Automake X.Y traces this macro to ensure aclocal.m4 has been +# generated from the m4 files accompanying Automake X.Y. +# (This private macro should not be called outside this file.) +AC_DEFUN([AM_AUTOMAKE_VERSION], +[am__api_version='1.10' +dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to +dnl require some minimum version. Point them to the right macro. +m4_if([$1], [1.10], [], + [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl +]) + +# _AM_AUTOCONF_VERSION(VERSION) +# ----------------------------- +# aclocal traces this macro to find the Autoconf version. +# This is a private macro too. Using m4_define simplifies +# the logic in aclocal, which can simply ignore this definition. +m4_define([_AM_AUTOCONF_VERSION], []) + +# AM_SET_CURRENT_AUTOMAKE_VERSION +# ------------------------------- +# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. +# This function is AC_REQUIREd by AC_INIT_AUTOMAKE. +AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], +[AM_AUTOMAKE_VERSION([1.10])dnl +_AM_AUTOCONF_VERSION(m4_PACKAGE_VERSION)]) + +# AM_AUX_DIR_EXPAND -*- Autoconf -*- + +# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets +# $ac_aux_dir to `$srcdir/foo'. In other projects, it is set to +# `$srcdir', `$srcdir/..', or `$srcdir/../..'. +# +# Of course, Automake must honor this variable whenever it calls a +# tool from the auxiliary directory. The problem is that $srcdir (and +# therefore $ac_aux_dir as well) can be either absolute or relative, +# depending on how configure is run. This is pretty annoying, since +# it makes $ac_aux_dir quite unusable in subdirectories: in the top +# source directory, any form will work fine, but in subdirectories a +# relative path needs to be adjusted first. +# +# $ac_aux_dir/missing +# fails when called from a subdirectory if $ac_aux_dir is relative +# $top_srcdir/$ac_aux_dir/missing +# fails if $ac_aux_dir is absolute, +# fails when called from a subdirectory in a VPATH build with +# a relative $ac_aux_dir +# +# The reason of the latter failure is that $top_srcdir and $ac_aux_dir +# are both prefixed by $srcdir. In an in-source build this is usually +# harmless because $srcdir is `.', but things will broke when you +# start a VPATH build or use an absolute $srcdir. +# +# So we could use something similar to $top_srcdir/$ac_aux_dir/missing, +# iff we strip the leading $srcdir from $ac_aux_dir. That would be: +# am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"` +# and then we would define $MISSING as +# MISSING="\${SHELL} $am_aux_dir/missing" +# This will work as long as MISSING is not called from configure, because +# unfortunately $(top_srcdir) has no meaning in configure. +# However there are other variables, like CC, which are often used in +# configure, and could therefore not use this "fixed" $ac_aux_dir. +# +# Another solution, used here, is to always expand $ac_aux_dir to an +# absolute PATH. The drawback is that using absolute paths prevent a +# configured tree to be moved without reconfiguration. + +AC_DEFUN([AM_AUX_DIR_EXPAND], +[dnl Rely on autoconf to set up CDPATH properly. +AC_PREREQ([2.50])dnl +# expand $ac_aux_dir to an absolute path +am_aux_dir=`cd $ac_aux_dir && pwd` +]) + +# AM_CONDITIONAL -*- Autoconf -*- + +# Copyright (C) 1997, 2000, 2001, 2003, 2004, 2005, 2006 +# Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# serial 8 + +# AM_CONDITIONAL(NAME, SHELL-CONDITION) +# ------------------------------------- +# Define a conditional. +AC_DEFUN([AM_CONDITIONAL], +[AC_PREREQ(2.52)dnl + ifelse([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])], + [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl +AC_SUBST([$1_TRUE])dnl +AC_SUBST([$1_FALSE])dnl +_AM_SUBST_NOTMAKE([$1_TRUE])dnl +_AM_SUBST_NOTMAKE([$1_FALSE])dnl +if $2; then + $1_TRUE= + $1_FALSE='#' +else + $1_TRUE='#' + $1_FALSE= +fi +AC_CONFIG_COMMANDS_PRE( +[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then + AC_MSG_ERROR([[conditional "$1" was never defined. +Usually this means the macro was only invoked conditionally.]]) +fi])]) + +# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 +# Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# serial 9 + +# There are a few dirty hacks below to avoid letting `AC_PROG_CC' be +# written in clear, in which case automake, when reading aclocal.m4, +# will think it sees a *use*, and therefore will trigger all it's +# C support machinery. Also note that it means that autoscan, seeing +# CC etc. in the Makefile, will ask for an AC_PROG_CC use... + + +# _AM_DEPENDENCIES(NAME) +# ---------------------- +# See how the compiler implements dependency checking. +# NAME is "CC", "CXX", "GCJ", or "OBJC". +# We try a few techniques and use that to set a single cache variable. +# +# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was +# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular +# dependency, and given that the user is not expected to run this macro, +# just rely on AC_PROG_CC. +AC_DEFUN([_AM_DEPENDENCIES], +[AC_REQUIRE([AM_SET_DEPDIR])dnl +AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl +AC_REQUIRE([AM_MAKE_INCLUDE])dnl +AC_REQUIRE([AM_DEP_TRACK])dnl + +ifelse([$1], CC, [depcc="$CC" am_compiler_list=], + [$1], CXX, [depcc="$CXX" am_compiler_list=], + [$1], OBJC, [depcc="$OBJC" am_compiler_list='gcc3 gcc'], + [$1], UPC, [depcc="$UPC" am_compiler_list=], + [$1], GCJ, [depcc="$GCJ" am_compiler_list='gcc3 gcc'], + [depcc="$$1" am_compiler_list=]) + +AC_CACHE_CHECK([dependency style of $depcc], + [am_cv_$1_dependencies_compiler_type], +[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named `D' -- because `-MD' means `put the output + # in D'. + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_$1_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp` + fi + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with + # Solaris 8's {/usr,}/bin/sh. + touch sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + case $depmode in + nosideeffect) + # after this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + none) break ;; + esac + # We check with `-c' and `-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle `-M -o', and we need to detect this. + if depmode=$depmode \ + source=sub/conftest.c object=sub/conftest.${OBJEXT-o} \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c -o sub/conftest.${OBJEXT-o} sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftest.${OBJEXT-o} sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_$1_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_$1_dependencies_compiler_type=none +fi +]) +AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type]) +AM_CONDITIONAL([am__fastdep$1], [ + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_$1_dependencies_compiler_type" = gcc3]) +]) + + +# AM_SET_DEPDIR +# ------------- +# Choose a directory name for dependency files. +# This macro is AC_REQUIREd in _AM_DEPENDENCIES +AC_DEFUN([AM_SET_DEPDIR], +[AC_REQUIRE([AM_SET_LEADING_DOT])dnl +AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl +]) + + +# AM_DEP_TRACK +# ------------ +AC_DEFUN([AM_DEP_TRACK], +[AC_ARG_ENABLE(dependency-tracking, +[ --disable-dependency-tracking speeds up one-time build + --enable-dependency-tracking do not reject slow dependency extractors]) +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' +fi +AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno]) +AC_SUBST([AMDEPBACKSLASH])dnl +_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl +]) + +# Generate code to set up dependency tracking. -*- Autoconf -*- + +# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005 +# Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +#serial 3 + +# _AM_OUTPUT_DEPENDENCY_COMMANDS +# ------------------------------ +AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS], +[for mf in $CONFIG_FILES; do + # Strip MF so we end up with the name of the file. + mf=`echo "$mf" | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile or not. + # We used to match only the files named `Makefile.in', but + # some people rename them; so instead we look at the file content. + # Grep'ing the first line is not enough: some people post-process + # each Makefile.in and add a new line on top of each file to say so. + # Grep'ing the whole file is not good either: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + if sed 10q "$mf" | grep '^#.*generated by automake' > /dev/null 2>&1; then + dirpart=`AS_DIRNAME("$mf")` + else + continue + fi + # Extract the definition of DEPDIR, am__include, and am__quote + # from the Makefile without running `make'. + DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"` + test -z "$DEPDIR" && continue + am__include=`sed -n 's/^am__include = //p' < "$mf"` + test -z "am__include" && continue + am__quote=`sed -n 's/^am__quote = //p' < "$mf"` + # When using ansi2knr, U may be empty or an underscore; expand it + U=`sed -n 's/^U = //p' < "$mf"` + # Find all dependency output files, they are included files with + # $(DEPDIR) in their names. We invoke sed twice because it is the + # simplest approach to changing $(DEPDIR) to its actual value in the + # expansion. + for file in `sed -n " + s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \ + sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do + # Make sure the directory exists. + test -f "$dirpart/$file" && continue + fdir=`AS_DIRNAME(["$file"])` + AS_MKDIR_P([$dirpart/$fdir]) + # echo "creating $dirpart/$file" + echo '# dummy' > "$dirpart/$file" + done +done +])# _AM_OUTPUT_DEPENDENCY_COMMANDS + + +# AM_OUTPUT_DEPENDENCY_COMMANDS +# ----------------------------- +# This macro should only be invoked once -- use via AC_REQUIRE. +# +# This code is only required when automatic dependency tracking +# is enabled. FIXME. This creates each `.P' file that we will +# need in order to bootstrap the dependency handling code. +AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS], +[AC_CONFIG_COMMANDS([depfiles], + [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS], + [AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"]) +]) + +# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005 +# Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# serial 8 + +# AM_CONFIG_HEADER is obsolete. It has been replaced by AC_CONFIG_HEADERS. +AU_DEFUN([AM_CONFIG_HEADER], [AC_CONFIG_HEADERS($@)]) + +# Do all the work for Automake. -*- Autoconf -*- + +# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, +# 2005, 2006 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# serial 12 + +# This macro actually does too much. Some checks are only needed if +# your package does certain things. But this isn't really a big deal. + +# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE]) +# AM_INIT_AUTOMAKE([OPTIONS]) +# ----------------------------------------------- +# The call with PACKAGE and VERSION arguments is the old style +# call (pre autoconf-2.50), which is being phased out. PACKAGE +# and VERSION should now be passed to AC_INIT and removed from +# the call to AM_INIT_AUTOMAKE. +# We support both call styles for the transition. After +# the next Automake release, Autoconf can make the AC_INIT +# arguments mandatory, and then we can depend on a new Autoconf +# release and drop the old call support. +AC_DEFUN([AM_INIT_AUTOMAKE], +[AC_PREREQ([2.60])dnl +dnl Autoconf wants to disallow AM_ names. We explicitly allow +dnl the ones we care about. +m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl +AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl +AC_REQUIRE([AC_PROG_INSTALL])dnl +if test "`cd $srcdir && pwd`" != "`pwd`"; then + # Use -I$(srcdir) only when $(srcdir) != ., so that make's output + # is not polluted with repeated "-I." + AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl + # test to see if srcdir already configured + if test -f $srcdir/config.status; then + AC_MSG_ERROR([source directory already configured; run "make distclean" there first]) + fi +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi +AC_SUBST([CYGPATH_W]) + +# Define the identity of the package. +dnl Distinguish between old-style and new-style calls. +m4_ifval([$2], +[m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl + AC_SUBST([PACKAGE], [$1])dnl + AC_SUBST([VERSION], [$2])], +[_AM_SET_OPTIONS([$1])dnl +dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT. +m4_if(m4_ifdef([AC_PACKAGE_NAME], 1)m4_ifdef([AC_PACKAGE_VERSION], 1), 11,, + [m4_fatal([AC_INIT should be called with package and version arguments])])dnl + AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl + AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl + +_AM_IF_OPTION([no-define],, +[AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of package]) + AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version number of package])])dnl + +# Some tools Automake needs. +AC_REQUIRE([AM_SANITY_CHECK])dnl +AC_REQUIRE([AC_ARG_PROGRAM])dnl +AM_MISSING_PROG(ACLOCAL, aclocal-${am__api_version}) +AM_MISSING_PROG(AUTOCONF, autoconf) +AM_MISSING_PROG(AUTOMAKE, automake-${am__api_version}) +AM_MISSING_PROG(AUTOHEADER, autoheader) +AM_MISSING_PROG(MAKEINFO, makeinfo) +AM_PROG_INSTALL_SH +AM_PROG_INSTALL_STRIP +AC_REQUIRE([AM_PROG_MKDIR_P])dnl +# We need awk for the "check" target. The system "awk" is bad on +# some platforms. +AC_REQUIRE([AC_PROG_AWK])dnl +AC_REQUIRE([AC_PROG_MAKE_SET])dnl +AC_REQUIRE([AM_SET_LEADING_DOT])dnl +_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])], + [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])], + [_AM_PROG_TAR([v7])])]) +_AM_IF_OPTION([no-dependencies],, +[AC_PROVIDE_IFELSE([AC_PROG_CC], + [_AM_DEPENDENCIES(CC)], + [define([AC_PROG_CC], + defn([AC_PROG_CC])[_AM_DEPENDENCIES(CC)])])dnl +AC_PROVIDE_IFELSE([AC_PROG_CXX], + [_AM_DEPENDENCIES(CXX)], + [define([AC_PROG_CXX], + defn([AC_PROG_CXX])[_AM_DEPENDENCIES(CXX)])])dnl +AC_PROVIDE_IFELSE([AC_PROG_OBJC], + [_AM_DEPENDENCIES(OBJC)], + [define([AC_PROG_OBJC], + defn([AC_PROG_OBJC])[_AM_DEPENDENCIES(OBJC)])])dnl +]) +]) + + +# When config.status generates a header, we must update the stamp-h file. +# This file resides in the same directory as the config header +# that is generated. The stamp files are numbered to have different names. + +# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the +# loop where config.status creates the headers, so we can generate +# our stamp files there. +AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK], +[# Compute $1's index in $config_headers. +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $1 | $1:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $1" >`AS_DIRNAME([$1])`/stamp-h[]$_am_stamp_count]) + +# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_SH +# ------------------ +# Define $install_sh. +AC_DEFUN([AM_PROG_INSTALL_SH], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +install_sh=${install_sh-"\$(SHELL) $am_aux_dir/install-sh"} +AC_SUBST(install_sh)]) + +# Copyright (C) 2003, 2005 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# serial 2 + +# Check whether the underlying file-system supports filenames +# with a leading dot. For instance MS-DOS doesn't. +AC_DEFUN([AM_SET_LEADING_DOT], +[rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null +AC_SUBST([am__leading_dot])]) + +# Add --enable-maintainer-mode option to configure. -*- Autoconf -*- +# From Jim Meyering + +# Copyright (C) 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005 +# Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# serial 4 + +AC_DEFUN([AM_MAINTAINER_MODE], +[AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles]) + dnl maintainer-mode is disabled by default + AC_ARG_ENABLE(maintainer-mode, +[ --enable-maintainer-mode enable make rules and dependencies not useful + (and sometimes confusing) to the casual installer], + USE_MAINTAINER_MODE=$enableval, + USE_MAINTAINER_MODE=no) + AC_MSG_RESULT([$USE_MAINTAINER_MODE]) + AM_CONDITIONAL(MAINTAINER_MODE, [test $USE_MAINTAINER_MODE = yes]) + MAINT=$MAINTAINER_MODE_TRUE + AC_SUBST(MAINT)dnl +] +) + +AU_DEFUN([jm_MAINTAINER_MODE], [AM_MAINTAINER_MODE]) + +# Check to see how 'make' treats includes. -*- Autoconf -*- + +# Copyright (C) 2001, 2002, 2003, 2005 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# serial 3 + +# AM_MAKE_INCLUDE() +# ----------------- +# Check to see how make treats includes. +AC_DEFUN([AM_MAKE_INCLUDE], +[am_make=${MAKE-make} +cat > confinc << 'END' +am__doit: + @echo done +.PHONY: am__doit +END +# If we don't find an include directive, just comment out the code. +AC_MSG_CHECKING([for style of include used by $am_make]) +am__include="#" +am__quote= +_am_result=none +# First try GNU make style include. +echo "include confinc" > confmf +# We grep out `Entering directory' and `Leaving directory' +# messages which can occur if `w' ends up in MAKEFLAGS. +# In particular we don't look at `^make:' because GNU make might +# be invoked under some other name (usually "gmake"), in which +# case it prints its new name instead of `make'. +if test "`$am_make -s -f confmf 2> /dev/null | grep -v 'ing directory'`" = "done"; then + am__include=include + am__quote= + _am_result=GNU +fi +# Now try BSD make style include. +if test "$am__include" = "#"; then + echo '.include "confinc"' > confmf + if test "`$am_make -s -f confmf 2> /dev/null`" = "done"; then + am__include=.include + am__quote="\"" + _am_result=BSD + fi +fi +AC_SUBST([am__include]) +AC_SUBST([am__quote]) +AC_MSG_RESULT([$_am_result]) +rm -f confinc confmf +]) + +# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*- + +# Copyright (C) 1997, 1999, 2000, 2001, 2003, 2004, 2005 +# Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# serial 5 + +# AM_MISSING_PROG(NAME, PROGRAM) +# ------------------------------ +AC_DEFUN([AM_MISSING_PROG], +[AC_REQUIRE([AM_MISSING_HAS_RUN]) +$1=${$1-"${am_missing_run}$2"} +AC_SUBST($1)]) + + +# AM_MISSING_HAS_RUN +# ------------------ +# Define MISSING if not defined so far and test if it supports --run. +# If it does, set am_missing_run to use it, otherwise, to nothing. +AC_DEFUN([AM_MISSING_HAS_RUN], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([missing])dnl +test x"${MISSING+set}" = xset || MISSING="\${SHELL} $am_aux_dir/missing" +# Use eval to expand $SHELL +if eval "$MISSING --run true"; then + am_missing_run="$MISSING --run " +else + am_missing_run= + AC_MSG_WARN([`missing' script is too old or missing]) +fi +]) + +# Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_MKDIR_P +# --------------- +# Check for `mkdir -p'. +AC_DEFUN([AM_PROG_MKDIR_P], +[AC_PREREQ([2.60])dnl +AC_REQUIRE([AC_PROG_MKDIR_P])dnl +dnl Automake 1.8 to 1.9.6 used to define mkdir_p. We now use MKDIR_P, +dnl while keeping a definition of mkdir_p for backward compatibility. +dnl @MKDIR_P@ is magic: AC_OUTPUT adjusts its value for each Makefile. +dnl However we cannot define mkdir_p as $(MKDIR_P) for the sake of +dnl Makefile.ins that do not define MKDIR_P, so we do our own +dnl adjustment using top_builddir (which is defined more often than +dnl MKDIR_P). +AC_SUBST([mkdir_p], ["$MKDIR_P"])dnl +case $mkdir_p in + [[\\/$]]* | ?:[[\\/]]*) ;; + */*) mkdir_p="\$(top_builddir)/$mkdir_p" ;; +esac +]) + +# Helper functions for option handling. -*- Autoconf -*- + +# Copyright (C) 2001, 2002, 2003, 2005 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# serial 3 + +# _AM_MANGLE_OPTION(NAME) +# ----------------------- +AC_DEFUN([_AM_MANGLE_OPTION], +[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])]) + +# _AM_SET_OPTION(NAME) +# ------------------------------ +# Set option NAME. Presently that only means defining a flag for this option. +AC_DEFUN([_AM_SET_OPTION], +[m4_define(_AM_MANGLE_OPTION([$1]), 1)]) + +# _AM_SET_OPTIONS(OPTIONS) +# ---------------------------------- +# OPTIONS is a space-separated list of Automake options. +AC_DEFUN([_AM_SET_OPTIONS], +[AC_FOREACH([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])]) + +# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET]) +# ------------------------------------------- +# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. +AC_DEFUN([_AM_IF_OPTION], +[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])]) + +# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_RUN_LOG(COMMAND) +# ------------------- +# Run COMMAND, save the exit status in ac_status, and log it. +# (This has been adapted from Autoconf's _AC_RUN_LOG macro.) +AC_DEFUN([AM_RUN_LOG], +[{ echo "$as_me:$LINENO: $1" >&AS_MESSAGE_LOG_FD + ($1) >&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + (exit $ac_status); }]) + +# Check to make sure that the build environment is sane. -*- Autoconf -*- + +# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005 +# Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# serial 4 + +# AM_SANITY_CHECK +# --------------- +AC_DEFUN([AM_SANITY_CHECK], +[AC_MSG_CHECKING([whether build environment is sane]) +# Just in case +sleep 1 +echo timestamp > conftest.file +# Do `set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + set X `ls -Lt $srcdir/configure conftest.file 2> /dev/null` + if test "$[*]" = "X"; then + # -L didn't work. + set X `ls -t $srcdir/configure conftest.file` + fi + rm -f conftest.file + if test "$[*]" != "X $srcdir/configure conftest.file" \ + && test "$[*]" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken +alias in your environment]) + fi + + test "$[2]" = conftest.file + ) +then + # Ok. + : +else + AC_MSG_ERROR([newly created file is older than distributed files! +Check your system clock]) +fi +AC_MSG_RESULT(yes)]) + +# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_STRIP +# --------------------- +# One issue with vendor `install' (even GNU) is that you can't +# specify the program used to strip binaries. This is especially +# annoying in cross-compiling environments, where the build's strip +# is unlikely to handle the host's binaries. +# Fortunately install-sh will honor a STRIPPROG variable, so we +# always use install-sh in `make install-strip', and initialize +# STRIPPROG with the value of the STRIP variable (set by the user). +AC_DEFUN([AM_PROG_INSTALL_STRIP], +[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl +# Installed binaries are usually stripped using `strip' when the user +# run `make install-strip'. However `strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the `STRIP' environment variable to overrule this program. +dnl Don't test for $cross_compiling = yes, because it might be `maybe'. +if test "$cross_compiling" != no; then + AC_CHECK_TOOL([STRIP], [strip], :) +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" +AC_SUBST([INSTALL_STRIP_PROGRAM])]) + +# Copyright (C) 2006 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_SUBST_NOTMAKE(VARIABLE) +# --------------------------- +# Prevent Automake from outputing VARIABLE = @VARIABLE@ in Makefile.in. +# This macro is traced by Automake. +AC_DEFUN([_AM_SUBST_NOTMAKE]) + +# Check how to create a tarball. -*- Autoconf -*- + +# Copyright (C) 2004, 2005 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# serial 2 + +# _AM_PROG_TAR(FORMAT) +# -------------------- +# Check how to create a tarball in format FORMAT. +# FORMAT should be one of `v7', `ustar', or `pax'. +# +# Substitute a variable $(am__tar) that is a command +# writing to stdout a FORMAT-tarball containing the directory +# $tardir. +# tardir=directory && $(am__tar) > result.tar +# +# Substitute a variable $(am__untar) that extract such +# a tarball read from stdin. +# $(am__untar) < result.tar +AC_DEFUN([_AM_PROG_TAR], +[# Always define AMTAR for backward compatibility. +AM_MISSING_PROG([AMTAR], [tar]) +m4_if([$1], [v7], + [am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'], + [m4_case([$1], [ustar],, [pax],, + [m4_fatal([Unknown tar format])]) +AC_MSG_CHECKING([how to create a $1 tar archive]) +# Loop over all known methods to create a tar archive until one works. +_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none' +_am_tools=${am_cv_prog_tar_$1-$_am_tools} +# Do not fold the above two line into one, because Tru64 sh and +# Solaris sh will not grok spaces in the rhs of `-'. +for _am_tool in $_am_tools +do + case $_am_tool in + gnutar) + for _am_tar in tar gnutar gtar; + do + AM_RUN_LOG([$_am_tar --version]) && break + done + am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"' + am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"' + am__untar="$_am_tar -xf -" + ;; + plaintar) + # Must skip GNU tar: if it does not support --format= it doesn't create + # ustar tarball either. + (tar --version) >/dev/null 2>&1 && continue + am__tar='tar chf - "$$tardir"' + am__tar_='tar chf - "$tardir"' + am__untar='tar xf -' + ;; + pax) + am__tar='pax -L -x $1 -w "$$tardir"' + am__tar_='pax -L -x $1 -w "$tardir"' + am__untar='pax -r' + ;; + cpio) + am__tar='find "$$tardir" -print | cpio -o -H $1 -L' + am__tar_='find "$tardir" -print | cpio -o -H $1 -L' + am__untar='cpio -i -H $1 -d' + ;; + none) + am__tar=false + am__tar_=false + am__untar=false + ;; + esac + + # If the value was cached, stop now. We just wanted to have am__tar + # and am__untar set. + test -n "${am_cv_prog_tar_$1}" && break + + # tar/untar a dummy directory, and stop if the command works + rm -rf conftest.dir + mkdir conftest.dir + echo GrepMe > conftest.dir/file + AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar]) + rm -rf conftest.dir + if test -s conftest.tar; then + AM_RUN_LOG([$am__untar /dev/null 2>&1 && break + fi +done +rm -rf conftest.dir + +AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool]) +AC_MSG_RESULT([$am_cv_prog_tar_$1])]) +AC_SUBST([am__tar]) +AC_SUBST([am__untar]) +]) # _AM_PROG_TAR + +m4_include([config/acx_pthread.m4]) +m4_include([config/tac_arg_check_mpi.m4]) +m4_include([config/tac_arg_config_mpi.m4]) +m4_include([config/tac_arg_enable_export-makefiles.m4]) +m4_include([config/tac_arg_enable_feature.m4]) +m4_include([config/tac_arg_enable_feature_sub_check.m4]) +m4_include([config/tac_arg_with_ar.m4]) +m4_include([config/tac_arg_with_flags.m4]) +m4_include([config/tac_arg_with_incdirs.m4]) +m4_include([config/tac_arg_with_libdirs.m4]) +m4_include([config/tac_arg_with_libs.m4]) +m4_include([config/tac_arg_with_perl.m4]) diff --git a/mkl/basic/optional/ThreadPool/bootstrap b/mkl/basic/optional/ThreadPool/bootstrap new file mode 100755 index 0000000..8706e9e --- /dev/null +++ b/mkl/basic/optional/ThreadPool/bootstrap @@ -0,0 +1,9 @@ +#! /bin/sh +#np# This file does not need to be edited, other than removing this line. +set -x +# Only run aclocal if we need to create aclocal.m4 +aclocal -I config +# autoheader is smart and doesn't change anything unless it's necessary +autoheader +automake --foreign --add-missing --copy +autoconf diff --git a/mkl/basic/optional/ThreadPool/cmake/Dependencies.cmake b/mkl/basic/optional/ThreadPool/cmake/Dependencies.cmake new file mode 100644 index 0000000..746d066 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/cmake/Dependencies.cmake @@ -0,0 +1,11 @@ +SET(LIB_REQUIRED_DEP_PACKAGES) +SET(LIB_OPTIONAL_DEP_PACKAGES) +SET(TEST_REQUIRED_DEP_PACKAGES) +SET(TEST_OPTIONAL_DEP_PACKAGES) +SET(LIB_REQUIRED_DEP_TPLS) +SET(LIB_OPTIONAL_DEP_TPLS Pthread MPI) +SET(TEST_REQUIRED_DEP_TPLS) +SET(TEST_OPTIONAL_DEP_TPLS) + +TPL_TENTATIVELY_ENABLE(Pthread) + diff --git a/mkl/basic/optional/ThreadPool/cmake/ThreadPool_config.h.in b/mkl/basic/optional/ThreadPool/cmake/ThreadPool_config.h.in new file mode 100644 index 0000000..55614b9 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/cmake/ThreadPool_config.h.in @@ -0,0 +1,2 @@ +#cmakedefine HAVE_MPI +#cmakedefine HAVE_PTHREAD diff --git a/mkl/basic/optional/ThreadPool/config/acx_pthread.m4 b/mkl/basic/optional/ThreadPool/config/acx_pthread.m4 new file mode 100644 index 0000000..3bd3ec2 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/config/acx_pthread.m4 @@ -0,0 +1,224 @@ +dnl @synopsis ACX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) +dnl +dnl This macro figures out how to build C programs using POSIX +dnl threads. It sets the PTHREAD_LIBS output variable to the threads +dnl library and linker flags, and the PTHREAD_CFLAGS output variable +dnl to any special C compiler flags that are needed. (The user can also +dnl force certain compiler flags/libs to be tested by setting these +dnl environment variables.) +dnl +dnl Also sets PTHREAD_CC to any special C compiler that is needed for +dnl multi-threaded programs (defaults to the value of CC otherwise). +dnl (This is necessary on AIX to use the special cc_r compiler alias.) +dnl +dnl If you are only building threads programs, you may wish to +dnl use these variables in your default LIBS, CFLAGS, and CC: +dnl +dnl LIBS="$PTHREAD_LIBS $LIBS" +dnl CFLAGS="$CFLAGS $PTHREAD_CFLAGS" +dnl CC="$PTHREAD_CC" +dnl +dnl In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute +dnl constant has a nonstandard name, defines PTHREAD_CREATE_JOINABLE +dnl to that name (e.g. PTHREAD_CREATE_UNDETACHED on AIX). +dnl +dnl ACTION-IF-FOUND is a list of shell commands to run if a threads +dnl library is found, and ACTION-IF-NOT-FOUND is a list of commands +dnl to run it if it is not found. If ACTION-IF-FOUND is not specified, +dnl the default action will define HAVE_PTHREAD. +dnl +dnl Please let the authors know if this macro fails on any platform, +dnl or if you have any other suggestions or comments. This macro was +dnl based on work by SGJ on autoconf scripts for FFTW (www.fftw.org) +dnl (with help from M. Frigo), as well as ac_pthread and hb_pthread +dnl macros posted by AFC to the autoconf macro repository. We are also +dnl grateful for the helpful feedback of numerous users. +dnl +dnl @version $Id$ +dnl @author Steven G. Johnson and Alejandro Forero Cuervo + +AC_DEFUN([ACX_PTHREAD], [ +AC_REQUIRE([AC_CANONICAL_HOST]) +acx_pthread_ok=no + +# First, check if the POSIX threads header, pthread.h, is available. +# If it isn't, don't bother looking for the threads libraries. +AC_CHECK_HEADER(pthread.h, , acx_pthread_ok=noheader) + +# We must check for the threads library under a number of different +# names; the ordering is very important because some systems +# (e.g. DEC) have both -lpthread and -lpthreads, where one of the +# libraries is broken (non-POSIX). + +# First of all, check if the user has set any of the PTHREAD_LIBS, +# etcetera environment variables, and if threads linking works using +# them: +if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + save_LIBS="$LIBS" + LIBS="$PTHREAD_LIBS $LIBS" + AC_MSG_CHECKING([for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS]) + AC_TRY_LINK_FUNC(pthread_join, acx_pthread_ok=yes) + AC_MSG_RESULT($acx_pthread_ok) + if test x"$acx_pthread_ok" = xno; then + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" + fi + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" +fi + +# Create a list of thread flags to try. Items starting with a "-" are +# C compiler flags, and other items are library names, except for "none" +# which indicates that we try without any flags at all. + +acx_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt" + +# The ordering *is* (sometimes) important. Some notes on the +# individual items follow: + +# pthreads: AIX (must check this before -lpthread) +# none: in case threads are in libc; should be tried before -Kthread and +# other compiler flags to prevent continual compiler warnings +# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) +# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) +# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) +# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads) +# -pthreads: Solaris/gcc +# -mthreads: Mingw32/gcc, Lynx/gcc +# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it +# doesn't hurt to check since this sometimes defines pthreads too; +# also defines -D_REENTRANT) +# pthread: Linux, etcetera +# --thread-safe: KAI C++ + +case "${host_cpu}-${host_os}" in + *solaris*) + + # On Solaris (at least, for some versions), libc contains stubbed + # (non-functional) versions of the pthreads routines, so link-based + # tests will erroneously succeed. (We need to link with -pthread or + # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather + # a function called by this macro, so we could check for that, but + # who knows whether they'll stub that too in a future libc.) So, + # we'll just look for -pthreads and -lpthread first: + + acx_pthread_flags="-pthread -pthreads pthread -mt $acx_pthread_flags" + ;; +esac + +if test x"$acx_pthread_ok" = xno; then +for flag in $acx_pthread_flags; do + + case $flag in + none) + AC_MSG_CHECKING([whether pthreads work without any flags]) + ;; + + -*) + AC_MSG_CHECKING([whether pthreads work with $flag]) + PTHREAD_CFLAGS="$flag" + ;; + + *) + AC_MSG_CHECKING([for the pthreads library -l$flag]) + PTHREAD_LIBS="-l$flag" + ;; + esac + + save_LIBS="$LIBS" + save_CFLAGS="$CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + + # Check for various functions. We must include pthread.h, + # since some functions may be macros. (On the Sequent, we + # need a special flag -Kthread to make this header compile.) + # We check for pthread_join because it is in -lpthread on IRIX + # while pthread_create is in libc. We check for pthread_attr_init + # due to DEC craziness with -lpthreads. We check for + # pthread_cleanup_push because it is one of the few pthread + # functions on Solaris that doesn't have a non-functional libc stub. + # We try pthread_create on general principles. + AC_TRY_LINK([#include ], + [pthread_t th; pthread_join(th, 0); + pthread_attr_init(0); pthread_cleanup_push(0, 0); + pthread_create(0,0,0,0); pthread_cleanup_pop(0); ], + [acx_pthread_ok=yes]) + + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + + AC_MSG_RESULT($acx_pthread_ok) + if test "x$acx_pthread_ok" = xyes; then + break; + fi + + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" +done +fi + +# Various other checks: +if test "x$acx_pthread_ok" = xyes; then + save_LIBS="$LIBS" + LIBS="$PTHREAD_LIBS $LIBS" + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + + # Detect AIX lossage: threads are created detached by default + # and the JOINABLE attribute has a nonstandard name (UNDETACHED). + AC_MSG_CHECKING([for joinable pthread attribute]) + AC_TRY_LINK([#include ], + [int attr=PTHREAD_CREATE_JOINABLE;], + ok=PTHREAD_CREATE_JOINABLE, ok=unknown) + if test x"$ok" = xunknown; then + AC_TRY_LINK([#include ], + [int attr=PTHREAD_CREATE_UNDETACHED;], + ok=PTHREAD_CREATE_UNDETACHED, ok=unknown) + fi + if test x"$ok" != xPTHREAD_CREATE_JOINABLE; then + AC_DEFINE(PTHREAD_CREATE_JOINABLE, $ok, + [Define to the necessary symbol if this constant + uses a non-standard name on your system.]) + fi + AC_MSG_RESULT(${ok}) + if test x"$ok" = xunknown; then + AC_MSG_WARN([we do not know how to create joinable pthreads]) + fi + + AC_MSG_CHECKING([if more special flags are required for pthreads]) + flag=no + case "${host_cpu}-${host_os}" in + *-aix* | *-freebsd*) flag="-D_THREAD_SAFE";; + *solaris* | alpha*-osf*) flag="-D_REENTRANT";; + esac + AC_MSG_RESULT(${flag}) + if test "x$flag" != xno; then + PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS" + fi + + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + + # More AIX lossage: must compile with cc_r + AC_CHECK_PROG(PTHREAD_CC, cc_r, cc_r, ${CC}) +else + PTHREAD_CC="$CC" +fi + +AC_SUBST(PTHREAD_LIBS) +AC_SUBST(PTHREAD_CFLAGS) +AC_SUBST(PTHREAD_CC) + +# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test x"$acx_pthread_ok" = xyes; then + ifelse([$1],,AC_DEFINE(HAVE_PTHREAD,1,[Define if you have POSIX threads libraries and header files.]),[$1]) + : +else + acx_pthread_ok=no + $2 +fi + +])dnl ACX_PTHREAD diff --git a/mkl/basic/optional/ThreadPool/config/config.guess b/mkl/basic/optional/ThreadPool/config/config.guess new file mode 100755 index 0000000..396482d --- /dev/null +++ b/mkl/basic/optional/ThreadPool/config/config.guess @@ -0,0 +1,1500 @@ +#! /bin/sh +# Attempt to guess a canonical system name. +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +# 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation, +# Inc. + +timestamp='2006-07-02' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA +# 02110-1301, USA. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + + +# Originally written by Per Bothner . +# Please send patches to . Submit a context +# diff and a properly formatted ChangeLog entry. +# +# This script attempts to guess a canonical system name similar to +# config.sub. If it succeeds, it prints the system name on stdout, and +# exits with 0. Otherwise, it exits with 1. +# +# The plan is that this can be called by configure scripts if you +# don't specify an explicit build system type. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] + +Output the configuration name of the system \`$me' is run on. + +Operation modes: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.guess ($timestamp) + +Originally written by Per Bothner. +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 +Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" >&2 + exit 1 ;; + * ) + break ;; + esac +done + +if test $# != 0; then + echo "$me: too many arguments$help" >&2 + exit 1 +fi + +trap 'exit 1' 1 2 15 + +# CC_FOR_BUILD -- compiler used by this script. Note that the use of a +# compiler to aid in system detection is discouraged as it requires +# temporary files to be created and, as you can see below, it is a +# headache to deal with in a portable fashion. + +# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still +# use `HOST_CC' if defined, but it is deprecated. + +# Portable tmp directory creation inspired by the Autoconf team. + +set_cc_for_build=' +trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; +trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; +: ${TMPDIR=/tmp} ; + { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || + { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || + { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || + { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; +dummy=$tmp/dummy ; +tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; +case $CC_FOR_BUILD,$HOST_CC,$CC in + ,,) echo "int x;" > $dummy.c ; + for c in cc gcc c89 c99 ; do + if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then + CC_FOR_BUILD="$c"; break ; + fi ; + done ; + if test x"$CC_FOR_BUILD" = x ; then + CC_FOR_BUILD=no_compiler_found ; + fi + ;; + ,,*) CC_FOR_BUILD=$CC ;; + ,*,*) CC_FOR_BUILD=$HOST_CC ;; +esac ; set_cc_for_build= ;' + +# This is needed to find uname on a Pyramid OSx when run in the BSD universe. +# (ghazi@noc.rutgers.edu 1994-08-24) +if (test -f /.attbin/uname) >/dev/null 2>&1 ; then + PATH=$PATH:/.attbin ; export PATH +fi + +UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown +UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown +UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown +UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown + +# Note: order is significant - the case branches are not exclusive. + +case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in + *:NetBSD:*:*) + # NetBSD (nbsd) targets should (where applicable) match one or + # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*, + # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently + # switched to ELF, *-*-netbsd* would select the old + # object file format. This provides both forward + # compatibility and a consistent mechanism for selecting the + # object file format. + # + # Note: NetBSD doesn't particularly care about the vendor + # portion of the name. We always set it to "unknown". + sysctl="sysctl -n hw.machine_arch" + UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ + /usr/sbin/$sysctl 2>/dev/null || echo unknown)` + case "${UNAME_MACHINE_ARCH}" in + armeb) machine=armeb-unknown ;; + arm*) machine=arm-unknown ;; + sh3el) machine=shl-unknown ;; + sh3eb) machine=sh-unknown ;; + *) machine=${UNAME_MACHINE_ARCH}-unknown ;; + esac + # The Operating System including object format, if it has switched + # to ELF recently, or will in the future. + case "${UNAME_MACHINE_ARCH}" in + arm*|i386|m68k|ns32k|sh3*|sparc|vax) + eval $set_cc_for_build + if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep __ELF__ >/dev/null + then + # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). + # Return netbsd for either. FIX? + os=netbsd + else + os=netbsdelf + fi + ;; + *) + os=netbsd + ;; + esac + # The OS release + # Debian GNU/NetBSD machines have a different userland, and + # thus, need a distinct triplet. However, they do not need + # kernel version information, so it can be replaced with a + # suitable tag, in the style of linux-gnu. + case "${UNAME_VERSION}" in + Debian*) + release='-gnu' + ;; + *) + release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` + ;; + esac + # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: + # contains redundant information, the shorter form: + # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. + echo "${machine}-${os}${release}" + exit ;; + *:OpenBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` + echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} + exit ;; + *:ekkoBSD:*:*) + echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} + exit ;; + *:SolidBSD:*:*) + echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE} + exit ;; + macppc:MirBSD:*:*) + echo powerpc-unknown-mirbsd${UNAME_RELEASE} + exit ;; + *:MirBSD:*:*) + echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} + exit ;; + alpha:OSF1:*:*) + case $UNAME_RELEASE in + *4.0) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` + ;; + *5.*) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + ;; + esac + # According to Compaq, /usr/sbin/psrinfo has been available on + # OSF/1 and Tru64 systems produced since 1995. I hope that + # covers most systems running today. This code pipes the CPU + # types through head -n 1, so we only detect the type of CPU 0. + ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` + case "$ALPHA_CPU_TYPE" in + "EV4 (21064)") + UNAME_MACHINE="alpha" ;; + "EV4.5 (21064)") + UNAME_MACHINE="alpha" ;; + "LCA4 (21066/21068)") + UNAME_MACHINE="alpha" ;; + "EV5 (21164)") + UNAME_MACHINE="alphaev5" ;; + "EV5.6 (21164A)") + UNAME_MACHINE="alphaev56" ;; + "EV5.6 (21164PC)") + UNAME_MACHINE="alphapca56" ;; + "EV5.7 (21164PC)") + UNAME_MACHINE="alphapca57" ;; + "EV6 (21264)") + UNAME_MACHINE="alphaev6" ;; + "EV6.7 (21264A)") + UNAME_MACHINE="alphaev67" ;; + "EV6.8CB (21264C)") + UNAME_MACHINE="alphaev68" ;; + "EV6.8AL (21264B)") + UNAME_MACHINE="alphaev68" ;; + "EV6.8CX (21264D)") + UNAME_MACHINE="alphaev68" ;; + "EV6.9A (21264/EV69A)") + UNAME_MACHINE="alphaev69" ;; + "EV7 (21364)") + UNAME_MACHINE="alphaev7" ;; + "EV7.9 (21364A)") + UNAME_MACHINE="alphaev79" ;; + esac + # A Pn.n version is a patched version. + # A Vn.n version is a released version. + # A Tn.n version is a released field test version. + # A Xn.n version is an unreleased experimental baselevel. + # 1.2 uses "1.2" for uname -r. + echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + exit ;; + Alpha\ *:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # Should we change UNAME_MACHINE based on the output of uname instead + # of the specific Alpha model? + echo alpha-pc-interix + exit ;; + 21064:Windows_NT:50:3) + echo alpha-dec-winnt3.5 + exit ;; + Amiga*:UNIX_System_V:4.0:*) + echo m68k-unknown-sysv4 + exit ;; + *:[Aa]miga[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-amigaos + exit ;; + *:[Mm]orph[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-morphos + exit ;; + *:OS/390:*:*) + echo i370-ibm-openedition + exit ;; + *:z/VM:*:*) + echo s390-ibm-zvmoe + exit ;; + *:OS400:*:*) + echo powerpc-ibm-os400 + exit ;; + arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) + echo arm-acorn-riscix${UNAME_RELEASE} + exit ;; + arm:riscos:*:*|arm:RISCOS:*:*) + echo arm-unknown-riscos + exit ;; + SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) + echo hppa1.1-hitachi-hiuxmpp + exit ;; + Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) + # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. + if test "`(/bin/universe) 2>/dev/null`" = att ; then + echo pyramid-pyramid-sysv3 + else + echo pyramid-pyramid-bsd + fi + exit ;; + NILE*:*:*:dcosx) + echo pyramid-pyramid-svr4 + exit ;; + DRS?6000:unix:4.0:6*) + echo sparc-icl-nx6 + exit ;; + DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) + case `/usr/bin/uname -p` in + sparc) echo sparc-icl-nx7; exit ;; + esac ;; + sun4H:SunOS:5.*:*) + echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) + echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + i86pc:SunOS:5.*:*) + echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:6*:*) + # According to config.sub, this is the proper way to canonicalize + # SunOS6. Hard to guess exactly what SunOS6 will be like, but + # it's likely to be more like Solaris than SunOS4. + echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:*:*) + case "`/usr/bin/arch -k`" in + Series*|S4*) + UNAME_RELEASE=`uname -v` + ;; + esac + # Japanese Language versions have a version number like `4.1.3-JL'. + echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` + exit ;; + sun3*:SunOS:*:*) + echo m68k-sun-sunos${UNAME_RELEASE} + exit ;; + sun*:*:4.2BSD:*) + UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` + test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 + case "`/bin/arch`" in + sun3) + echo m68k-sun-sunos${UNAME_RELEASE} + ;; + sun4) + echo sparc-sun-sunos${UNAME_RELEASE} + ;; + esac + exit ;; + aushp:SunOS:*:*) + echo sparc-auspex-sunos${UNAME_RELEASE} + exit ;; + # The situation for MiNT is a little confusing. The machine name + # can be virtually everything (everything which is not + # "atarist" or "atariste" at least should have a processor + # > m68000). The system name ranges from "MiNT" over "FreeMiNT" + # to the lowercase version "mint" (or "freemint"). Finally + # the system name "TOS" denotes a system which is actually not + # MiNT. But MiNT is downward compatible to TOS, so this should + # be no problem. + atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) + echo m68k-milan-mint${UNAME_RELEASE} + exit ;; + hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) + echo m68k-hades-mint${UNAME_RELEASE} + exit ;; + *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) + echo m68k-unknown-mint${UNAME_RELEASE} + exit ;; + m68k:machten:*:*) + echo m68k-apple-machten${UNAME_RELEASE} + exit ;; + powerpc:machten:*:*) + echo powerpc-apple-machten${UNAME_RELEASE} + exit ;; + RISC*:Mach:*:*) + echo mips-dec-mach_bsd4.3 + exit ;; + RISC*:ULTRIX:*:*) + echo mips-dec-ultrix${UNAME_RELEASE} + exit ;; + VAX*:ULTRIX*:*:*) + echo vax-dec-ultrix${UNAME_RELEASE} + exit ;; + 2020:CLIX:*:* | 2430:CLIX:*:*) + echo clipper-intergraph-clix${UNAME_RELEASE} + exit ;; + mips:*:*:UMIPS | mips:*:*:RISCos) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c +#ifdef __cplusplus +#include /* for printf() prototype */ + int main (int argc, char *argv[]) { +#else + int main (argc, argv) int argc; char *argv[]; { +#endif + #if defined (host_mips) && defined (MIPSEB) + #if defined (SYSTYPE_SYSV) + printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_SVR4) + printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) + printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0); + #endif + #endif + exit (-1); + } +EOF + $CC_FOR_BUILD -o $dummy $dummy.c && + dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` && + SYSTEM_NAME=`$dummy $dummyarg` && + { echo "$SYSTEM_NAME"; exit; } + echo mips-mips-riscos${UNAME_RELEASE} + exit ;; + Motorola:PowerMAX_OS:*:*) + echo powerpc-motorola-powermax + exit ;; + Motorola:*:4.3:PL8-*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:Power_UNIX:*:*) + echo powerpc-harris-powerunix + exit ;; + m88k:CX/UX:7*:*) + echo m88k-harris-cxux7 + exit ;; + m88k:*:4*:R4*) + echo m88k-motorola-sysv4 + exit ;; + m88k:*:3*:R3*) + echo m88k-motorola-sysv3 + exit ;; + AViiON:dgux:*:*) + # DG/UX returns AViiON for all architectures + UNAME_PROCESSOR=`/usr/bin/uname -p` + if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] + then + if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ + [ ${TARGET_BINARY_INTERFACE}x = x ] + then + echo m88k-dg-dgux${UNAME_RELEASE} + else + echo m88k-dg-dguxbcs${UNAME_RELEASE} + fi + else + echo i586-dg-dgux${UNAME_RELEASE} + fi + exit ;; + M88*:DolphinOS:*:*) # DolphinOS (SVR3) + echo m88k-dolphin-sysv3 + exit ;; + M88*:*:R3*:*) + # Delta 88k system running SVR3 + echo m88k-motorola-sysv3 + exit ;; + XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) + echo m88k-tektronix-sysv3 + exit ;; + Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) + echo m68k-tektronix-bsd + exit ;; + *:IRIX*:*:*) + echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` + exit ;; + ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. + echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id + exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + i*86:AIX:*:*) + echo i386-ibm-aix + exit ;; + ia64:AIX:*:*) + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + fi + echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} + exit ;; + *:AIX:2:3) + if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + + main() + { + if (!__power_pc()) + exit(1); + puts("powerpc-ibm-aix3.2.5"); + exit(0); + } +EOF + if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` + then + echo "$SYSTEM_NAME" + else + echo rs6000-ibm-aix3.2.5 + fi + elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then + echo rs6000-ibm-aix3.2.4 + else + echo rs6000-ibm-aix3.2 + fi + exit ;; + *:AIX:*:[45]) + IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` + if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then + IBM_ARCH=rs6000 + else + IBM_ARCH=powerpc + fi + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + fi + echo ${IBM_ARCH}-ibm-aix${IBM_REV} + exit ;; + *:AIX:*:*) + echo rs6000-ibm-aix + exit ;; + ibmrt:4.4BSD:*|romp-ibm:BSD:*) + echo romp-ibm-bsd4.4 + exit ;; + ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and + echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to + exit ;; # report: romp-ibm BSD 4.3 + *:BOSX:*:*) + echo rs6000-bull-bosx + exit ;; + DPX/2?00:B.O.S.:*:*) + echo m68k-bull-sysv3 + exit ;; + 9000/[34]??:4.3bsd:1.*:*) + echo m68k-hp-bsd + exit ;; + hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) + echo m68k-hp-bsd4.4 + exit ;; + 9000/[34678]??:HP-UX:*:*) + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + case "${UNAME_MACHINE}" in + 9000/31? ) HP_ARCH=m68000 ;; + 9000/[34]?? ) HP_ARCH=m68k ;; + 9000/[678][0-9][0-9]) + if [ -x /usr/bin/getconf ]; then + sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` + sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` + case "${sc_cpu_version}" in + 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 + 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 + 532) # CPU_PA_RISC2_0 + case "${sc_kernel_bits}" in + 32) HP_ARCH="hppa2.0n" ;; + 64) HP_ARCH="hppa2.0w" ;; + '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 + esac ;; + esac + fi + if [ "${HP_ARCH}" = "" ]; then + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + + #define _HPUX_SOURCE + #include + #include + + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); + + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } +EOF + (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` + test -z "$HP_ARCH" && HP_ARCH=hppa + fi ;; + esac + if [ ${HP_ARCH} = "hppa2.0w" ] + then + eval $set_cc_for_build + + # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating + # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler + # generating 64-bit code. GNU and HP use different nomenclature: + # + # $ CC_FOR_BUILD=cc ./config.guess + # => hppa2.0w-hp-hpux11.23 + # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess + # => hppa64-hp-hpux11.23 + + if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | + grep __LP64__ >/dev/null + then + HP_ARCH="hppa2.0w" + else + HP_ARCH="hppa64" + fi + fi + echo ${HP_ARCH}-hp-hpux${HPUX_REV} + exit ;; + ia64:HP-UX:*:*) + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + echo ia64-hp-hpux${HPUX_REV} + exit ;; + 3050*:HI-UX:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + int + main () + { + long cpu = sysconf (_SC_CPU_VERSION); + /* The order matters, because CPU_IS_HP_MC68K erroneously returns + true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct + results, however. */ + if (CPU_IS_PA_RISC (cpu)) + { + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; + case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; + default: puts ("hppa-hitachi-hiuxwe2"); break; + } + } + else if (CPU_IS_HP_MC68K (cpu)) + puts ("m68k-hitachi-hiuxwe2"); + else puts ("unknown-hitachi-hiuxwe2"); + exit (0); + } +EOF + $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } + echo unknown-hitachi-hiuxwe2 + exit ;; + 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) + echo hppa1.1-hp-bsd + exit ;; + 9000/8??:4.3bsd:*:*) + echo hppa1.0-hp-bsd + exit ;; + *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) + echo hppa1.0-hp-mpeix + exit ;; + hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) + echo hppa1.1-hp-osf + exit ;; + hp8??:OSF1:*:*) + echo hppa1.0-hp-osf + exit ;; + i*86:OSF1:*:*) + if [ -x /usr/sbin/sysversion ] ; then + echo ${UNAME_MACHINE}-unknown-osf1mk + else + echo ${UNAME_MACHINE}-unknown-osf1 + fi + exit ;; + parisc*:Lites*:*:*) + echo hppa1.1-hp-lites + exit ;; + C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) + echo c1-convex-bsd + exit ;; + C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) + echo c34-convex-bsd + exit ;; + C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) + echo c38-convex-bsd + exit ;; + C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) + echo c4-convex-bsd + exit ;; + CRAY*Y-MP:*:*:*) + echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*[A-Z]90:*:*:*) + echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ + | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ + -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ + -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*TS:*:*:*) + echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*T3E:*:*:*) + echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*SV1:*:*:*) + echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + *:UNICOS/mp:*:*) + echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) + FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` + echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + 5000:UNIX_System_V:4.*:*) + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` + echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) + echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} + exit ;; + sparc*:BSD/OS:*:*) + echo sparc-unknown-bsdi${UNAME_RELEASE} + exit ;; + *:BSD/OS:*:*) + echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} + exit ;; + *:FreeBSD:*:*) + case ${UNAME_MACHINE} in + pc98) + echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + amd64) + echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + *) + echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + esac + exit ;; + i*:CYGWIN*:*) + echo ${UNAME_MACHINE}-pc-cygwin + exit ;; + i*:MINGW*:*) + echo ${UNAME_MACHINE}-pc-mingw32 + exit ;; + i*:windows32*:*) + # uname -m includes "-pc" on this system. + echo ${UNAME_MACHINE}-mingw32 + exit ;; + i*:PW*:*) + echo ${UNAME_MACHINE}-pc-pw32 + exit ;; + x86:Interix*:[3456]*) + echo i586-pc-interix${UNAME_RELEASE} + exit ;; + EM64T:Interix*:[3456]*) + echo x86_64-unknown-interix${UNAME_RELEASE} + exit ;; + [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) + echo i${UNAME_MACHINE}-pc-mks + exit ;; + i*:Windows_NT*:* | Pentium*:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we + # UNAME_MACHINE based on the output of uname instead of i386? + echo i586-pc-interix + exit ;; + i*:UWIN*:*) + echo ${UNAME_MACHINE}-pc-uwin + exit ;; + amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) + echo x86_64-unknown-cygwin + exit ;; + p*:CYGWIN*:*) + echo powerpcle-unknown-cygwin + exit ;; + prep*:SunOS:5.*:*) + echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + *:GNU:*:*) + # the GNU system + echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` + exit ;; + *:GNU/*:*:*) + # other systems with GNU libc and userland + echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu + exit ;; + i*86:Minix:*:*) + echo ${UNAME_MACHINE}-pc-minix + exit ;; + arm*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + avr32*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + cris:Linux:*:*) + echo cris-axis-linux-gnu + exit ;; + crisv32:Linux:*:*) + echo crisv32-axis-linux-gnu + exit ;; + frv:Linux:*:*) + echo frv-unknown-linux-gnu + exit ;; + ia64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + m32r*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + m68*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + mips:Linux:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #undef CPU + #undef mips + #undef mipsel + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=mipsel + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=mips + #else + CPU= + #endif + #endif +EOF + eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' + /^CPU/{ + s: ::g + p + }'`" + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } + ;; + mips64:Linux:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #undef CPU + #undef mips64 + #undef mips64el + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=mips64el + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=mips64 + #else + CPU= + #endif + #endif +EOF + eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' + /^CPU/{ + s: ::g + p + }'`" + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } + ;; + or32:Linux:*:*) + echo or32-unknown-linux-gnu + exit ;; + ppc:Linux:*:*) + echo powerpc-unknown-linux-gnu + exit ;; + ppc64:Linux:*:*) + echo powerpc64-unknown-linux-gnu + exit ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in + EV5) UNAME_MACHINE=alphaev5 ;; + EV56) UNAME_MACHINE=alphaev56 ;; + PCA56) UNAME_MACHINE=alphapca56 ;; + PCA57) UNAME_MACHINE=alphapca56 ;; + EV6) UNAME_MACHINE=alphaev6 ;; + EV67) UNAME_MACHINE=alphaev67 ;; + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null + if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi + echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} + exit ;; + parisc:Linux:*:* | hppa:Linux:*:*) + # Look for CPU level + case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in + PA7*) echo hppa1.1-unknown-linux-gnu ;; + PA8*) echo hppa2.0-unknown-linux-gnu ;; + *) echo hppa-unknown-linux-gnu ;; + esac + exit ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + echo hppa64-unknown-linux-gnu + exit ;; + s390:Linux:*:* | s390x:Linux:*:*) + echo ${UNAME_MACHINE}-ibm-linux + exit ;; + sh64*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + sh*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + sparc:Linux:*:* | sparc64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + vax:Linux:*:*) + echo ${UNAME_MACHINE}-dec-linux-gnu + exit ;; + x86_64:Linux:*:*) + echo x86_64-unknown-linux-gnu + exit ;; + i*86:Linux:*:*) + # The BFD linker knows what the default object file format is, so + # first see if it will tell us. cd to the root directory to prevent + # problems with other programs or directories called `ld' in the path. + # Set LC_ALL=C to ensure ld outputs messages in English. + ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \ + | sed -ne '/supported targets:/!d + s/[ ][ ]*/ /g + s/.*supported targets: *// + s/ .*// + p'` + case "$ld_supported_targets" in + elf32-i386) + TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu" + ;; + a.out-i386-linux) + echo "${UNAME_MACHINE}-pc-linux-gnuaout" + exit ;; + coff-i386) + echo "${UNAME_MACHINE}-pc-linux-gnucoff" + exit ;; + "") + # Either a pre-BFD a.out linker (linux-gnuoldld) or + # one that does not give us useful --help. + echo "${UNAME_MACHINE}-pc-linux-gnuoldld" + exit ;; + esac + # Determine whether the default compiler is a.out or elf + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + #ifdef __ELF__ + # ifdef __GLIBC__ + # if __GLIBC__ >= 2 + LIBC=gnu + # else + LIBC=gnulibc1 + # endif + # else + LIBC=gnulibc1 + # endif + #else + #if defined(__INTEL_COMPILER) || defined(__PGI) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) + LIBC=gnu + #else + LIBC=gnuaout + #endif + #endif + #ifdef __dietlibc__ + LIBC=dietlibc + #endif +EOF + eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' + /^LIBC/{ + s: ::g + p + }'`" + test x"${LIBC}" != x && { + echo "${UNAME_MACHINE}-pc-linux-${LIBC}" + exit + } + test x"${TENTATIVE}" != x && { echo "${TENTATIVE}"; exit; } + ;; + i*86:DYNIX/ptx:4*:*) + # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. + # earlier versions are messed up and put the nodename in both + # sysname and nodename. + echo i386-sequent-sysv4 + exit ;; + i*86:UNIX_SV:4.2MP:2.*) + # Unixware is an offshoot of SVR4, but it has its own version + # number series starting with 2... + # I am not positive that other SVR4 systems won't match this, + # I just have to hope. -- rms. + # Use sysv4.2uw... so that sysv4* matches it. + echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} + exit ;; + i*86:OS/2:*:*) + # If we were able to find `uname', then EMX Unix compatibility + # is probably installed. + echo ${UNAME_MACHINE}-pc-os2-emx + exit ;; + i*86:XTS-300:*:STOP) + echo ${UNAME_MACHINE}-unknown-stop + exit ;; + i*86:atheos:*:*) + echo ${UNAME_MACHINE}-unknown-atheos + exit ;; + i*86:syllable:*:*) + echo ${UNAME_MACHINE}-pc-syllable + exit ;; + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*) + echo i386-unknown-lynxos${UNAME_RELEASE} + exit ;; + i*86:*DOS:*:*) + echo ${UNAME_MACHINE}-pc-msdosdjgpp + exit ;; + i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) + UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` + if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then + echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL} + else + echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} + fi + exit ;; + i*86:*:5:[678]*) + # UnixWare 7.x, OpenUNIX and OpenServer 6. + case `/bin/uname -X | grep "^Machine"` in + *486*) UNAME_MACHINE=i486 ;; + *Pentium) UNAME_MACHINE=i586 ;; + *Pent*|*Celeron) UNAME_MACHINE=i686 ;; + esac + echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} + exit ;; + i*86:*:3.2:*) + if test -f /usr/options/cb.name; then + UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then + UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` + (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 + (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ + && UNAME_MACHINE=i586 + (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ + && UNAME_MACHINE=i686 + (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ + && UNAME_MACHINE=i686 + echo ${UNAME_MACHINE}-pc-sco$UNAME_REL + else + echo ${UNAME_MACHINE}-pc-sysv32 + fi + exit ;; + pc:*:*:*) + # Left here for compatibility: + # uname -m prints for DJGPP always 'pc', but it prints nothing about + # the processor, so we play safe by assuming i386. + echo i386-pc-msdosdjgpp + exit ;; + Intel:Mach:3*:*) + echo i386-pc-mach3 + exit ;; + paragon:*:*:*) + echo i860-intel-osf1 + exit ;; + i860:*:4.*:*) # i860-SVR4 + if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then + echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 + else # Add other i860-SVR4 vendors below as they are discovered. + echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 + fi + exit ;; + mini*:CTIX:SYS*5:*) + # "miniframe" + echo m68010-convergent-sysv + exit ;; + mc68k:UNIX:SYSTEM5:3.51m) + echo m68k-convergent-sysv + exit ;; + M680?0:D-NIX:5.3:*) + echo m68k-diab-dnix + exit ;; + M68*:*:R3V[5678]*:*) + test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; + 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) + OS_REL='' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; + 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4; exit; } ;; + m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) + echo m68k-unknown-lynxos${UNAME_RELEASE} + exit ;; + mc68030:UNIX_System_V:4.*:*) + echo m68k-atari-sysv4 + exit ;; + TSUNAMI:LynxOS:2.*:*) + echo sparc-unknown-lynxos${UNAME_RELEASE} + exit ;; + rs6000:LynxOS:2.*:*) + echo rs6000-unknown-lynxos${UNAME_RELEASE} + exit ;; + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*) + echo powerpc-unknown-lynxos${UNAME_RELEASE} + exit ;; + SM[BE]S:UNIX_SV:*:*) + echo mips-dde-sysv${UNAME_RELEASE} + exit ;; + RM*:ReliantUNIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + RM*:SINIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + *:SINIX-*:*:*) + if uname -p 2>/dev/null >/dev/null ; then + UNAME_MACHINE=`(uname -p) 2>/dev/null` + echo ${UNAME_MACHINE}-sni-sysv4 + else + echo ns32k-sni-sysv + fi + exit ;; + PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says + echo i586-unisys-sysv4 + exit ;; + *:UNIX_System_V:4*:FTX*) + # From Gerald Hewes . + # How about differentiating between stratus architectures? -djm + echo hppa1.1-stratus-sysv4 + exit ;; + *:*:*:FTX*) + # From seanf@swdc.stratus.com. + echo i860-stratus-sysv4 + exit ;; + i*86:VOS:*:*) + # From Paul.Green@stratus.com. + echo ${UNAME_MACHINE}-stratus-vos + exit ;; + *:VOS:*:*) + # From Paul.Green@stratus.com. + echo hppa1.1-stratus-vos + exit ;; + mc68*:A/UX:*:*) + echo m68k-apple-aux${UNAME_RELEASE} + exit ;; + news*:NEWS-OS:6*:*) + echo mips-sony-newsos6 + exit ;; + R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) + if [ -d /usr/nec ]; then + echo mips-nec-sysv${UNAME_RELEASE} + else + echo mips-unknown-sysv${UNAME_RELEASE} + fi + exit ;; + BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. + echo powerpc-be-beos + exit ;; + BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. + echo powerpc-apple-beos + exit ;; + BePC:BeOS:*:*) # BeOS running on Intel PC compatible. + echo i586-pc-beos + exit ;; + SX-4:SUPER-UX:*:*) + echo sx4-nec-superux${UNAME_RELEASE} + exit ;; + SX-5:SUPER-UX:*:*) + echo sx5-nec-superux${UNAME_RELEASE} + exit ;; + SX-6:SUPER-UX:*:*) + echo sx6-nec-superux${UNAME_RELEASE} + exit ;; + Power*:Rhapsody:*:*) + echo powerpc-apple-rhapsody${UNAME_RELEASE} + exit ;; + *:Rhapsody:*:*) + echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} + exit ;; + *:Darwin:*:*) + UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown + case $UNAME_PROCESSOR in + unknown) UNAME_PROCESSOR=powerpc ;; + esac + echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} + exit ;; + *:procnto*:*:* | *:QNX:[0123456789]*:*) + UNAME_PROCESSOR=`uname -p` + if test "$UNAME_PROCESSOR" = "x86"; then + UNAME_PROCESSOR=i386 + UNAME_MACHINE=pc + fi + echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} + exit ;; + *:QNX:*:4*) + echo i386-pc-qnx + exit ;; + NSE-?:NONSTOP_KERNEL:*:*) + echo nse-tandem-nsk${UNAME_RELEASE} + exit ;; + NSR-?:NONSTOP_KERNEL:*:*) + echo nsr-tandem-nsk${UNAME_RELEASE} + exit ;; + *:NonStop-UX:*:*) + echo mips-compaq-nonstopux + exit ;; + BS2000:POSIX*:*:*) + echo bs2000-siemens-sysv + exit ;; + DS/*:UNIX_System_V:*:*) + echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} + exit ;; + *:Plan9:*:*) + # "uname -m" is not consistent, so use $cputype instead. 386 + # is converted to i386 for consistency with other x86 + # operating systems. + if test "$cputype" = "386"; then + UNAME_MACHINE=i386 + else + UNAME_MACHINE="$cputype" + fi + echo ${UNAME_MACHINE}-unknown-plan9 + exit ;; + *:TOPS-10:*:*) + echo pdp10-unknown-tops10 + exit ;; + *:TENEX:*:*) + echo pdp10-unknown-tenex + exit ;; + KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) + echo pdp10-dec-tops20 + exit ;; + XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) + echo pdp10-xkl-tops20 + exit ;; + *:TOPS-20:*:*) + echo pdp10-unknown-tops20 + exit ;; + *:ITS:*:*) + echo pdp10-unknown-its + exit ;; + SEI:*:*:SEIUX) + echo mips-sei-seiux${UNAME_RELEASE} + exit ;; + *:DragonFly:*:*) + echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` + exit ;; + *:*VMS:*:*) + UNAME_MACHINE=`(uname -p) 2>/dev/null` + case "${UNAME_MACHINE}" in + A*) echo alpha-dec-vms ; exit ;; + I*) echo ia64-dec-vms ; exit ;; + V*) echo vax-dec-vms ; exit ;; + esac ;; + *:XENIX:*:SysV) + echo i386-pc-xenix + exit ;; + i*86:skyos:*:*) + echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' + exit ;; + i*86:rdos:*:*) + echo ${UNAME_MACHINE}-pc-rdos + exit ;; +esac + +#echo '(No uname command or uname output not recognized.)' 1>&2 +#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2 + +eval $set_cc_for_build +cat >$dummy.c < +# include +#endif +main () +{ +#if defined (sony) +#if defined (MIPSEB) + /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, + I don't know.... */ + printf ("mips-sony-bsd\n"); exit (0); +#else +#include + printf ("m68k-sony-newsos%s\n", +#ifdef NEWSOS4 + "4" +#else + "" +#endif + ); exit (0); +#endif +#endif + +#if defined (__arm) && defined (__acorn) && defined (__unix) + printf ("arm-acorn-riscix\n"); exit (0); +#endif + +#if defined (hp300) && !defined (hpux) + printf ("m68k-hp-bsd\n"); exit (0); +#endif + +#if defined (NeXT) +#if !defined (__ARCHITECTURE__) +#define __ARCHITECTURE__ "m68k" +#endif + int version; + version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; + if (version < 4) + printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); + else + printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); + exit (0); +#endif + +#if defined (MULTIMAX) || defined (n16) +#if defined (UMAXV) + printf ("ns32k-encore-sysv\n"); exit (0); +#else +#if defined (CMU) + printf ("ns32k-encore-mach\n"); exit (0); +#else + printf ("ns32k-encore-bsd\n"); exit (0); +#endif +#endif +#endif + +#if defined (__386BSD__) + printf ("i386-pc-bsd\n"); exit (0); +#endif + +#if defined (sequent) +#if defined (i386) + printf ("i386-sequent-dynix\n"); exit (0); +#endif +#if defined (ns32000) + printf ("ns32k-sequent-dynix\n"); exit (0); +#endif +#endif + +#if defined (_SEQUENT_) + struct utsname un; + + uname(&un); + + if (strncmp(un.version, "V2", 2) == 0) { + printf ("i386-sequent-ptx2\n"); exit (0); + } + if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ + printf ("i386-sequent-ptx1\n"); exit (0); + } + printf ("i386-sequent-ptx\n"); exit (0); + +#endif + +#if defined (vax) +# if !defined (ultrix) +# include +# if defined (BSD) +# if BSD == 43 + printf ("vax-dec-bsd4.3\n"); exit (0); +# else +# if BSD == 199006 + printf ("vax-dec-bsd4.3reno\n"); exit (0); +# else + printf ("vax-dec-bsd\n"); exit (0); +# endif +# endif +# else + printf ("vax-dec-bsd\n"); exit (0); +# endif +# else + printf ("vax-dec-ultrix\n"); exit (0); +# endif +#endif + +#if defined (alliant) && defined (i860) + printf ("i860-alliant-bsd\n"); exit (0); +#endif + + exit (1); +} +EOF + +$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } + +# Apollos put the system type in the environment. + +test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; } + +# Convex versions that predate uname can use getsysinfo(1) + +if [ -x /usr/convex/getsysinfo ] +then + case `getsysinfo -f cpu_type` in + c1*) + echo c1-convex-bsd + exit ;; + c2*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + c34*) + echo c34-convex-bsd + exit ;; + c38*) + echo c38-convex-bsd + exit ;; + c4*) + echo c4-convex-bsd + exit ;; + esac +fi + +cat >&2 < in order to provide the needed +information to handle your system. + +config.guess timestamp = $timestamp + +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null` + +hostinfo = `(hostinfo) 2>/dev/null` +/bin/universe = `(/bin/universe) 2>/dev/null` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` +/bin/arch = `(/bin/arch) 2>/dev/null` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` + +UNAME_MACHINE = ${UNAME_MACHINE} +UNAME_RELEASE = ${UNAME_RELEASE} +UNAME_SYSTEM = ${UNAME_SYSTEM} +UNAME_VERSION = ${UNAME_VERSION} +EOF + +exit 1 + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/mkl/basic/optional/ThreadPool/config/config.sub b/mkl/basic/optional/ThreadPool/config/config.sub new file mode 100755 index 0000000..fab0aa3 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/config/config.sub @@ -0,0 +1,1616 @@ +#! /bin/sh +# Configuration validation subroutine script. +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +# 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation, +# Inc. + +timestamp='2006-09-20' + +# This file is (in principle) common to ALL GNU software. +# The presence of a machine in this file suggests that SOME GNU software +# can handle that machine. It does not imply ALL GNU software can. +# +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA +# 02110-1301, USA. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + + +# Please send patches to . Submit a context +# diff and a properly formatted ChangeLog entry. +# +# Configuration subroutine to validate and canonicalize a configuration type. +# Supply the specified configuration type as an argument. +# If it is invalid, we print an error message on stderr and exit with code 1. +# Otherwise, we print the canonical config type on stdout and succeed. + +# This file is supposed to be the same for all GNU packages +# and recognize all the CPU types, system types and aliases +# that are meaningful with *any* GNU software. +# Each package is responsible for reporting which valid configurations +# it does not support. The user should be able to distinguish +# a failure to support a valid configuration from a meaningless +# configuration. + +# The goal of this file is to map all the various variations of a given +# machine specification into a single specification in the form: +# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM +# or in some cases, the newer four-part form: +# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM +# It is wrong to echo any other type of specification. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] CPU-MFR-OPSYS + $0 [OPTION] ALIAS + +Canonicalize a configuration name. + +Operation modes: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.sub ($timestamp) + +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 +Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" + exit 1 ;; + + *local*) + # First pass through any local machine types. + echo $1 + exit ;; + + * ) + break ;; + esac +done + +case $# in + 0) echo "$me: missing argument$help" >&2 + exit 1;; + 1) ;; + *) echo "$me: too many arguments$help" >&2 + exit 1;; +esac + +# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). +# Here we must recognize all the valid KERNEL-OS combinations. +maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` +case $maybe_os in + nto-qnx* | linux-gnu* | linux-dietlibc | linux-newlib* | linux-uclibc* | \ + uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | \ + storm-chaos* | os2-emx* | rtmk-nova*) + os=-$maybe_os + basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` + ;; + *) + basic_machine=`echo $1 | sed 's/-[^-]*$//'` + if [ $basic_machine != $1 ] + then os=`echo $1 | sed 's/.*-/-/'` + else os=; fi + ;; +esac + +### Let's recognize common machines as not being operating systems so +### that things like config.sub decstation-3100 work. We also +### recognize some manufacturers as not being operating systems, so we +### can provide default operating systems below. +case $os in + -sun*os*) + # Prevent following clause from handling this invalid input. + ;; + -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ + -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ + -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ + -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ + -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ + -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ + -apple | -axis | -knuth | -cray) + os= + basic_machine=$1 + ;; + -sim | -cisco | -oki | -wec | -winbond) + os= + basic_machine=$1 + ;; + -scout) + ;; + -wrs) + os=-vxworks + basic_machine=$1 + ;; + -chorusos*) + os=-chorusos + basic_machine=$1 + ;; + -chorusrdb) + os=-chorusrdb + basic_machine=$1 + ;; + -hiux*) + os=-hiuxwe2 + ;; + -sco6) + os=-sco5v6 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco5) + os=-sco3.2v5 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco4) + os=-sco3.2v4 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2.[4-9]*) + os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2v[4-9]*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco5v6*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco*) + os=-sco3.2v2 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -udk*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -isc) + os=-isc2.2 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -clix*) + basic_machine=clipper-intergraph + ;; + -isc*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -lynx*) + os=-lynxos + ;; + -ptx*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` + ;; + -windowsnt*) + os=`echo $os | sed -e 's/windowsnt/winnt/'` + ;; + -psos*) + os=-psos + ;; + -mint | -mint[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; +esac + +# Decode aliases for certain CPU-COMPANY combinations. +case $basic_machine in + # Recognize the basic CPU types without company name. + # Some are omitted here because they have special meanings below. + 1750a | 580 \ + | a29k \ + | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ + | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ + | am33_2.0 \ + | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \ + | bfin \ + | c4x | clipper \ + | d10v | d30v | dlx | dsp16xx \ + | fr30 | frv \ + | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ + | i370 | i860 | i960 | ia64 \ + | ip2k | iq2000 \ + | m32c | m32r | m32rle | m68000 | m68k | m88k \ + | maxq | mb | microblaze | mcore \ + | mips | mipsbe | mipseb | mipsel | mipsle \ + | mips16 \ + | mips64 | mips64el \ + | mips64vr | mips64vrel \ + | mips64orion | mips64orionel \ + | mips64vr4100 | mips64vr4100el \ + | mips64vr4300 | mips64vr4300el \ + | mips64vr5000 | mips64vr5000el \ + | mips64vr5900 | mips64vr5900el \ + | mipsisa32 | mipsisa32el \ + | mipsisa32r2 | mipsisa32r2el \ + | mipsisa64 | mipsisa64el \ + | mipsisa64r2 | mipsisa64r2el \ + | mipsisa64sb1 | mipsisa64sb1el \ + | mipsisa64sr71k | mipsisa64sr71kel \ + | mipstx39 | mipstx39el \ + | mn10200 | mn10300 \ + | mt \ + | msp430 \ + | nios | nios2 \ + | ns16k | ns32k \ + | or32 \ + | pdp10 | pdp11 | pj | pjl \ + | powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \ + | pyramid \ + | score \ + | sh | sh[1234] | sh[24]a | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ + | sh64 | sh64le \ + | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ + | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ + | spu | strongarm \ + | tahoe | thumb | tic4x | tic80 | tron \ + | v850 | v850e \ + | we32k \ + | x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \ + | z8k) + basic_machine=$basic_machine-unknown + ;; + m6811 | m68hc11 | m6812 | m68hc12) + # Motorola 68HC11/12. + basic_machine=$basic_machine-unknown + os=-none + ;; + m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k) + ;; + ms1) + basic_machine=mt-unknown + ;; + + # We use `pc' rather than `unknown' + # because (1) that's what they normally are, and + # (2) the word "unknown" tends to confuse beginning users. + i*86 | x86_64) + basic_machine=$basic_machine-pc + ;; + # Object if more than one company name word. + *-*-*) + echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + exit 1 + ;; + # Recognize the basic CPU types with company name. + 580-* \ + | a29k-* \ + | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ + | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ + | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ + | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ + | avr-* | avr32-* \ + | bfin-* | bs2000-* \ + | c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \ + | clipper-* | craynv-* | cydra-* \ + | d10v-* | d30v-* | dlx-* \ + | elxsi-* \ + | f30[01]-* | f700-* | fr30-* | frv-* | fx80-* \ + | h8300-* | h8500-* \ + | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ + | i*86-* | i860-* | i960-* | ia64-* \ + | ip2k-* | iq2000-* \ + | m32c-* | m32r-* | m32rle-* \ + | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ + | m88110-* | m88k-* | maxq-* | mcore-* \ + | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ + | mips16-* \ + | mips64-* | mips64el-* \ + | mips64vr-* | mips64vrel-* \ + | mips64orion-* | mips64orionel-* \ + | mips64vr4100-* | mips64vr4100el-* \ + | mips64vr4300-* | mips64vr4300el-* \ + | mips64vr5000-* | mips64vr5000el-* \ + | mips64vr5900-* | mips64vr5900el-* \ + | mipsisa32-* | mipsisa32el-* \ + | mipsisa32r2-* | mipsisa32r2el-* \ + | mipsisa64-* | mipsisa64el-* \ + | mipsisa64r2-* | mipsisa64r2el-* \ + | mipsisa64sb1-* | mipsisa64sb1el-* \ + | mipsisa64sr71k-* | mipsisa64sr71kel-* \ + | mipstx39-* | mipstx39el-* \ + | mmix-* \ + | mt-* \ + | msp430-* \ + | nios-* | nios2-* \ + | none-* | np1-* | ns16k-* | ns32k-* \ + | orion-* \ + | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ + | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \ + | pyramid-* \ + | romp-* | rs6000-* \ + | sh-* | sh[1234]-* | sh[24]a-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ + | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ + | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ + | sparclite-* \ + | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \ + | tahoe-* | thumb-* \ + | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ + | tron-* \ + | v850-* | v850e-* | vax-* \ + | we32k-* \ + | x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \ + | xstormy16-* | xtensa-* \ + | ymp-* \ + | z8k-*) + ;; + # Recognize the various machine names and aliases which stand + # for a CPU type and a company and sometimes even an OS. + 386bsd) + basic_machine=i386-unknown + os=-bsd + ;; + 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) + basic_machine=m68000-att + ;; + 3b*) + basic_machine=we32k-att + ;; + a29khif) + basic_machine=a29k-amd + os=-udi + ;; + abacus) + basic_machine=abacus-unknown + ;; + adobe68k) + basic_machine=m68010-adobe + os=-scout + ;; + alliant | fx80) + basic_machine=fx80-alliant + ;; + altos | altos3068) + basic_machine=m68k-altos + ;; + am29k) + basic_machine=a29k-none + os=-bsd + ;; + amd64) + basic_machine=x86_64-pc + ;; + amd64-*) + basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + amdahl) + basic_machine=580-amdahl + os=-sysv + ;; + amiga | amiga-*) + basic_machine=m68k-unknown + ;; + amigaos | amigados) + basic_machine=m68k-unknown + os=-amigaos + ;; + amigaunix | amix) + basic_machine=m68k-unknown + os=-sysv4 + ;; + apollo68) + basic_machine=m68k-apollo + os=-sysv + ;; + apollo68bsd) + basic_machine=m68k-apollo + os=-bsd + ;; + aux) + basic_machine=m68k-apple + os=-aux + ;; + balance) + basic_machine=ns32k-sequent + os=-dynix + ;; + c90) + basic_machine=c90-cray + os=-unicos + ;; + convex-c1) + basic_machine=c1-convex + os=-bsd + ;; + convex-c2) + basic_machine=c2-convex + os=-bsd + ;; + convex-c32) + basic_machine=c32-convex + os=-bsd + ;; + convex-c34) + basic_machine=c34-convex + os=-bsd + ;; + convex-c38) + basic_machine=c38-convex + os=-bsd + ;; + cray | j90) + basic_machine=j90-cray + os=-unicos + ;; + craynv) + basic_machine=craynv-cray + os=-unicosmp + ;; + cr16c) + basic_machine=cr16c-unknown + os=-elf + ;; + crds | unos) + basic_machine=m68k-crds + ;; + crisv32 | crisv32-* | etraxfs*) + basic_machine=crisv32-axis + ;; + cris | cris-* | etrax*) + basic_machine=cris-axis + ;; + crx) + basic_machine=crx-unknown + os=-elf + ;; + da30 | da30-*) + basic_machine=m68k-da30 + ;; + decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) + basic_machine=mips-dec + ;; + decsystem10* | dec10*) + basic_machine=pdp10-dec + os=-tops10 + ;; + decsystem20* | dec20*) + basic_machine=pdp10-dec + os=-tops20 + ;; + delta | 3300 | motorola-3300 | motorola-delta \ + | 3300-motorola | delta-motorola) + basic_machine=m68k-motorola + ;; + delta88) + basic_machine=m88k-motorola + os=-sysv3 + ;; + djgpp) + basic_machine=i586-pc + os=-msdosdjgpp + ;; + dpx20 | dpx20-*) + basic_machine=rs6000-bull + os=-bosx + ;; + dpx2* | dpx2*-bull) + basic_machine=m68k-bull + os=-sysv3 + ;; + ebmon29k) + basic_machine=a29k-amd + os=-ebmon + ;; + elxsi) + basic_machine=elxsi-elxsi + os=-bsd + ;; + encore | umax | mmax) + basic_machine=ns32k-encore + ;; + es1800 | OSE68k | ose68k | ose | OSE) + basic_machine=m68k-ericsson + os=-ose + ;; + fx2800) + basic_machine=i860-alliant + ;; + genix) + basic_machine=ns32k-ns + ;; + gmicro) + basic_machine=tron-gmicro + os=-sysv + ;; + go32) + basic_machine=i386-pc + os=-go32 + ;; + h3050r* | hiux*) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + h8300hms) + basic_machine=h8300-hitachi + os=-hms + ;; + h8300xray) + basic_machine=h8300-hitachi + os=-xray + ;; + h8500hms) + basic_machine=h8500-hitachi + os=-hms + ;; + harris) + basic_machine=m88k-harris + os=-sysv3 + ;; + hp300-*) + basic_machine=m68k-hp + ;; + hp300bsd) + basic_machine=m68k-hp + os=-bsd + ;; + hp300hpux) + basic_machine=m68k-hp + os=-hpux + ;; + hp3k9[0-9][0-9] | hp9[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k2[0-9][0-9] | hp9k31[0-9]) + basic_machine=m68000-hp + ;; + hp9k3[2-9][0-9]) + basic_machine=m68k-hp + ;; + hp9k6[0-9][0-9] | hp6[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k7[0-79][0-9] | hp7[0-79][0-9]) + basic_machine=hppa1.1-hp + ;; + hp9k78[0-9] | hp78[0-9]) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][13679] | hp8[0-9][13679]) + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][0-9] | hp8[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hppa-next) + os=-nextstep3 + ;; + hppaosf) + basic_machine=hppa1.1-hp + os=-osf + ;; + hppro) + basic_machine=hppa1.1-hp + os=-proelf + ;; + i370-ibm* | ibm*) + basic_machine=i370-ibm + ;; +# I'm not sure what "Sysv32" means. Should this be sysv3.2? + i*86v32) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv32 + ;; + i*86v4*) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv4 + ;; + i*86v) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv + ;; + i*86sol2) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-solaris2 + ;; + i386mach) + basic_machine=i386-mach + os=-mach + ;; + i386-vsta | vsta) + basic_machine=i386-unknown + os=-vsta + ;; + iris | iris4d) + basic_machine=mips-sgi + case $os in + -irix*) + ;; + *) + os=-irix4 + ;; + esac + ;; + isi68 | isi) + basic_machine=m68k-isi + os=-sysv + ;; + m88k-omron*) + basic_machine=m88k-omron + ;; + magnum | m3230) + basic_machine=mips-mips + os=-sysv + ;; + merlin) + basic_machine=ns32k-utek + os=-sysv + ;; + mingw32) + basic_machine=i386-pc + os=-mingw32 + ;; + miniframe) + basic_machine=m68000-convergent + ;; + *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; + mips3*-*) + basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` + ;; + mips3*) + basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown + ;; + monitor) + basic_machine=m68k-rom68k + os=-coff + ;; + morphos) + basic_machine=powerpc-unknown + os=-morphos + ;; + msdos) + basic_machine=i386-pc + os=-msdos + ;; + ms1-*) + basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` + ;; + mvs) + basic_machine=i370-ibm + os=-mvs + ;; + ncr3000) + basic_machine=i486-ncr + os=-sysv4 + ;; + netbsd386) + basic_machine=i386-unknown + os=-netbsd + ;; + netwinder) + basic_machine=armv4l-rebel + os=-linux + ;; + news | news700 | news800 | news900) + basic_machine=m68k-sony + os=-newsos + ;; + news1000) + basic_machine=m68030-sony + os=-newsos + ;; + news-3600 | risc-news) + basic_machine=mips-sony + os=-newsos + ;; + necv70) + basic_machine=v70-nec + os=-sysv + ;; + next | m*-next ) + basic_machine=m68k-next + case $os in + -nextstep* ) + ;; + -ns2*) + os=-nextstep2 + ;; + *) + os=-nextstep3 + ;; + esac + ;; + nh3000) + basic_machine=m68k-harris + os=-cxux + ;; + nh[45]000) + basic_machine=m88k-harris + os=-cxux + ;; + nindy960) + basic_machine=i960-intel + os=-nindy + ;; + mon960) + basic_machine=i960-intel + os=-mon960 + ;; + nonstopux) + basic_machine=mips-compaq + os=-nonstopux + ;; + np1) + basic_machine=np1-gould + ;; + nsr-tandem) + basic_machine=nsr-tandem + ;; + op50n-* | op60c-*) + basic_machine=hppa1.1-oki + os=-proelf + ;; + openrisc | openrisc-*) + basic_machine=or32-unknown + ;; + os400) + basic_machine=powerpc-ibm + os=-os400 + ;; + OSE68000 | ose68000) + basic_machine=m68000-ericsson + os=-ose + ;; + os68k) + basic_machine=m68k-none + os=-os68k + ;; + pa-hitachi) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + paragon) + basic_machine=i860-intel + os=-osf + ;; + pbd) + basic_machine=sparc-tti + ;; + pbb) + basic_machine=m68k-tti + ;; + pc532 | pc532-*) + basic_machine=ns32k-pc532 + ;; + pc98) + basic_machine=i386-pc + ;; + pc98-*) + basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentium | p5 | k5 | k6 | nexgen | viac3) + basic_machine=i586-pc + ;; + pentiumpro | p6 | 6x86 | athlon | athlon_*) + basic_machine=i686-pc + ;; + pentiumii | pentium2 | pentiumiii | pentium3) + basic_machine=i686-pc + ;; + pentium4) + basic_machine=i786-pc + ;; + pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) + basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentiumpro-* | p6-* | 6x86-* | athlon-*) + basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) + basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentium4-*) + basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pn) + basic_machine=pn-gould + ;; + power) basic_machine=power-ibm + ;; + ppc) basic_machine=powerpc-unknown + ;; + ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppcle | powerpclittle | ppc-le | powerpc-little) + basic_machine=powerpcle-unknown + ;; + ppcle-* | powerpclittle-*) + basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppc64) basic_machine=powerpc64-unknown + ;; + ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppc64le | powerpc64little | ppc64-le | powerpc64-little) + basic_machine=powerpc64le-unknown + ;; + ppc64le-* | powerpc64little-*) + basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ps2) + basic_machine=i386-ibm + ;; + pw32) + basic_machine=i586-unknown + os=-pw32 + ;; + rdos) + basic_machine=i386-pc + os=-rdos + ;; + rom68k) + basic_machine=m68k-rom68k + os=-coff + ;; + rm[46]00) + basic_machine=mips-siemens + ;; + rtpc | rtpc-*) + basic_machine=romp-ibm + ;; + s390 | s390-*) + basic_machine=s390-ibm + ;; + s390x | s390x-*) + basic_machine=s390x-ibm + ;; + sa29200) + basic_machine=a29k-amd + os=-udi + ;; + sb1) + basic_machine=mipsisa64sb1-unknown + ;; + sb1el) + basic_machine=mipsisa64sb1el-unknown + ;; + sde) + basic_machine=mipsisa32-sde + os=-elf + ;; + sei) + basic_machine=mips-sei + os=-seiux + ;; + sequent) + basic_machine=i386-sequent + ;; + sh) + basic_machine=sh-hitachi + os=-hms + ;; + sh64) + basic_machine=sh64-unknown + ;; + sparclite-wrs | simso-wrs) + basic_machine=sparclite-wrs + os=-vxworks + ;; + sps7) + basic_machine=m68k-bull + os=-sysv2 + ;; + spur) + basic_machine=spur-unknown + ;; + st2000) + basic_machine=m68k-tandem + ;; + stratus) + basic_machine=i860-stratus + os=-sysv4 + ;; + sun2) + basic_machine=m68000-sun + ;; + sun2os3) + basic_machine=m68000-sun + os=-sunos3 + ;; + sun2os4) + basic_machine=m68000-sun + os=-sunos4 + ;; + sun3os3) + basic_machine=m68k-sun + os=-sunos3 + ;; + sun3os4) + basic_machine=m68k-sun + os=-sunos4 + ;; + sun4os3) + basic_machine=sparc-sun + os=-sunos3 + ;; + sun4os4) + basic_machine=sparc-sun + os=-sunos4 + ;; + sun4sol2) + basic_machine=sparc-sun + os=-solaris2 + ;; + sun3 | sun3-*) + basic_machine=m68k-sun + ;; + sun4) + basic_machine=sparc-sun + ;; + sun386 | sun386i | roadrunner) + basic_machine=i386-sun + ;; + sv1) + basic_machine=sv1-cray + os=-unicos + ;; + symmetry) + basic_machine=i386-sequent + os=-dynix + ;; + t3e) + basic_machine=alphaev5-cray + os=-unicos + ;; + t90) + basic_machine=t90-cray + os=-unicos + ;; + tic54x | c54x*) + basic_machine=tic54x-unknown + os=-coff + ;; + tic55x | c55x*) + basic_machine=tic55x-unknown + os=-coff + ;; + tic6x | c6x*) + basic_machine=tic6x-unknown + os=-coff + ;; + tx39) + basic_machine=mipstx39-unknown + ;; + tx39el) + basic_machine=mipstx39el-unknown + ;; + toad1) + basic_machine=pdp10-xkl + os=-tops20 + ;; + tower | tower-32) + basic_machine=m68k-ncr + ;; + tpf) + basic_machine=s390x-ibm + os=-tpf + ;; + udi29k) + basic_machine=a29k-amd + os=-udi + ;; + ultra3) + basic_machine=a29k-nyu + os=-sym1 + ;; + v810 | necv810) + basic_machine=v810-nec + os=-none + ;; + vaxv) + basic_machine=vax-dec + os=-sysv + ;; + vms) + basic_machine=vax-dec + os=-vms + ;; + vpp*|vx|vx-*) + basic_machine=f301-fujitsu + ;; + vxworks960) + basic_machine=i960-wrs + os=-vxworks + ;; + vxworks68) + basic_machine=m68k-wrs + os=-vxworks + ;; + vxworks29k) + basic_machine=a29k-wrs + os=-vxworks + ;; + w65*) + basic_machine=w65-wdc + os=-none + ;; + w89k-*) + basic_machine=hppa1.1-winbond + os=-proelf + ;; + xbox) + basic_machine=i686-pc + os=-mingw32 + ;; + xps | xps100) + basic_machine=xps100-honeywell + ;; + ymp) + basic_machine=ymp-cray + os=-unicos + ;; + z8k-*-coff) + basic_machine=z8k-unknown + os=-sim + ;; + none) + basic_machine=none-none + os=-none + ;; + +# Here we handle the default manufacturer of certain CPU types. It is in +# some cases the only manufacturer, in others, it is the most popular. + w89k) + basic_machine=hppa1.1-winbond + ;; + op50n) + basic_machine=hppa1.1-oki + ;; + op60c) + basic_machine=hppa1.1-oki + ;; + romp) + basic_machine=romp-ibm + ;; + mmix) + basic_machine=mmix-knuth + ;; + rs6000) + basic_machine=rs6000-ibm + ;; + vax) + basic_machine=vax-dec + ;; + pdp10) + # there are many clones, so DEC is not a safe bet + basic_machine=pdp10-unknown + ;; + pdp11) + basic_machine=pdp11-dec + ;; + we32k) + basic_machine=we32k-att + ;; + sh[1234] | sh[24]a | sh[34]eb | sh[1234]le | sh[23]ele) + basic_machine=sh-unknown + ;; + sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v) + basic_machine=sparc-sun + ;; + cydra) + basic_machine=cydra-cydrome + ;; + orion) + basic_machine=orion-highlevel + ;; + orion105) + basic_machine=clipper-highlevel + ;; + mac | mpw | mac-mpw) + basic_machine=m68k-apple + ;; + pmac | pmac-mpw) + basic_machine=powerpc-apple + ;; + *-unknown) + # Make sure to match an already-canonicalized machine name. + ;; + *) + echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + exit 1 + ;; +esac + +# Here we canonicalize certain aliases for manufacturers. +case $basic_machine in + *-digital*) + basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'` + ;; + *-commodore*) + basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'` + ;; + *) + ;; +esac + +# Decode manufacturer-specific aliases for certain operating systems. + +if [ x"$os" != x"" ] +then +case $os in + # First match some system type aliases + # that might get confused with valid system types. + # -solaris* is a basic system type, with this one exception. + -solaris1 | -solaris1.*) + os=`echo $os | sed -e 's|solaris1|sunos4|'` + ;; + -solaris) + os=-solaris2 + ;; + -svr4*) + os=-sysv4 + ;; + -unixware*) + os=-sysv4.2uw + ;; + -gnu/linux*) + os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` + ;; + # First accept the basic system types. + # The portable systems comes first. + # Each alternative MUST END IN A *, to match a version number. + # -sysv* is not here because it comes later, after sysvr4. + -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ + | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\ + | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \ + | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ + | -aos* \ + | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ + | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ + | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ + | -openbsd* | -solidbsd* \ + | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ + | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ + | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ + | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ + | -chorusos* | -chorusrdb* \ + | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ + | -mingw32* | -linux-gnu* | -linux-newlib* | -linux-uclibc* \ + | -uxpv* | -beos* | -mpeix* | -udk* \ + | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ + | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ + | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ + | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ + | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ + | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ + | -skyos* | -haiku* | -rdos* | -toppers*) + # Remember, each alternative MUST END IN *, to match a version number. + ;; + -qnx*) + case $basic_machine in + x86-* | i*86-*) + ;; + *) + os=-nto$os + ;; + esac + ;; + -nto-qnx*) + ;; + -nto*) + os=`echo $os | sed -e 's|nto|nto-qnx|'` + ;; + -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ + | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \ + | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) + ;; + -mac*) + os=`echo $os | sed -e 's|mac|macos|'` + ;; + -linux-dietlibc) + os=-linux-dietlibc + ;; + -linux*) + os=`echo $os | sed -e 's|linux|linux-gnu|'` + ;; + -sunos5*) + os=`echo $os | sed -e 's|sunos5|solaris2|'` + ;; + -sunos6*) + os=`echo $os | sed -e 's|sunos6|solaris3|'` + ;; + -opened*) + os=-openedition + ;; + -os400*) + os=-os400 + ;; + -wince*) + os=-wince + ;; + -osfrose*) + os=-osfrose + ;; + -osf*) + os=-osf + ;; + -utek*) + os=-bsd + ;; + -dynix*) + os=-bsd + ;; + -acis*) + os=-aos + ;; + -atheos*) + os=-atheos + ;; + -syllable*) + os=-syllable + ;; + -386bsd) + os=-bsd + ;; + -ctix* | -uts*) + os=-sysv + ;; + -nova*) + os=-rtmk-nova + ;; + -ns2 ) + os=-nextstep2 + ;; + -nsk*) + os=-nsk + ;; + # Preserve the version number of sinix5. + -sinix5.*) + os=`echo $os | sed -e 's|sinix|sysv|'` + ;; + -sinix*) + os=-sysv4 + ;; + -tpf*) + os=-tpf + ;; + -triton*) + os=-sysv3 + ;; + -oss*) + os=-sysv3 + ;; + -svr4) + os=-sysv4 + ;; + -svr3) + os=-sysv3 + ;; + -sysvr4) + os=-sysv4 + ;; + # This must come after -sysvr4. + -sysv*) + ;; + -ose*) + os=-ose + ;; + -es1800*) + os=-ose + ;; + -xenix) + os=-xenix + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + os=-mint + ;; + -aros*) + os=-aros + ;; + -kaos*) + os=-kaos + ;; + -zvmoe) + os=-zvmoe + ;; + -none) + ;; + *) + # Get rid of the `-' at the beginning of $os. + os=`echo $os | sed 's/[^-]*-//'` + echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2 + exit 1 + ;; +esac +else + +# Here we handle the default operating systems that come with various machines. +# The value should be what the vendor currently ships out the door with their +# machine or put another way, the most popular os provided with the machine. + +# Note that if you're going to try to match "-MANUFACTURER" here (say, +# "-sun"), then you have to tell the case statement up towards the top +# that MANUFACTURER isn't an operating system. Otherwise, code above +# will signal an error saying that MANUFACTURER isn't an operating +# system, and we'll never get to this point. + +case $basic_machine in + score-*) + os=-elf + ;; + spu-*) + os=-elf + ;; + *-acorn) + os=-riscix1.2 + ;; + arm*-rebel) + os=-linux + ;; + arm*-semi) + os=-aout + ;; + c4x-* | tic4x-*) + os=-coff + ;; + # This must come before the *-dec entry. + pdp10-*) + os=-tops20 + ;; + pdp11-*) + os=-none + ;; + *-dec | vax-*) + os=-ultrix4.2 + ;; + m68*-apollo) + os=-domain + ;; + i386-sun) + os=-sunos4.0.2 + ;; + m68000-sun) + os=-sunos3 + # This also exists in the configure program, but was not the + # default. + # os=-sunos4 + ;; + m68*-cisco) + os=-aout + ;; + mips*-cisco) + os=-elf + ;; + mips*-*) + os=-elf + ;; + or32-*) + os=-coff + ;; + *-tti) # must be before sparc entry or we get the wrong os. + os=-sysv3 + ;; + sparc-* | *-sun) + os=-sunos4.1.1 + ;; + *-be) + os=-beos + ;; + *-haiku) + os=-haiku + ;; + *-ibm) + os=-aix + ;; + *-knuth) + os=-mmixware + ;; + *-wec) + os=-proelf + ;; + *-winbond) + os=-proelf + ;; + *-oki) + os=-proelf + ;; + *-hp) + os=-hpux + ;; + *-hitachi) + os=-hiux + ;; + i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) + os=-sysv + ;; + *-cbm) + os=-amigaos + ;; + *-dg) + os=-dgux + ;; + *-dolphin) + os=-sysv3 + ;; + m68k-ccur) + os=-rtu + ;; + m88k-omron*) + os=-luna + ;; + *-next ) + os=-nextstep + ;; + *-sequent) + os=-ptx + ;; + *-crds) + os=-unos + ;; + *-ns) + os=-genix + ;; + i370-*) + os=-mvs + ;; + *-next) + os=-nextstep3 + ;; + *-gould) + os=-sysv + ;; + *-highlevel) + os=-bsd + ;; + *-encore) + os=-bsd + ;; + *-sgi) + os=-irix + ;; + *-siemens) + os=-sysv4 + ;; + *-masscomp) + os=-rtu + ;; + f30[01]-fujitsu | f700-fujitsu) + os=-uxpv + ;; + *-rom68k) + os=-coff + ;; + *-*bug) + os=-coff + ;; + *-apple) + os=-macos + ;; + *-atari*) + os=-mint + ;; + *) + os=-none + ;; +esac +fi + +# Here we handle the case where we know the os, and the CPU type, but not the +# manufacturer. We pick the logical manufacturer. +vendor=unknown +case $basic_machine in + *-unknown) + case $os in + -riscix*) + vendor=acorn + ;; + -sunos*) + vendor=sun + ;; + -aix*) + vendor=ibm + ;; + -beos*) + vendor=be + ;; + -hpux*) + vendor=hp + ;; + -mpeix*) + vendor=hp + ;; + -hiux*) + vendor=hitachi + ;; + -unos*) + vendor=crds + ;; + -dgux*) + vendor=dg + ;; + -luna*) + vendor=omron + ;; + -genix*) + vendor=ns + ;; + -mvs* | -opened*) + vendor=ibm + ;; + -os400*) + vendor=ibm + ;; + -ptx*) + vendor=sequent + ;; + -tpf*) + vendor=ibm + ;; + -vxsim* | -vxworks* | -windiss*) + vendor=wrs + ;; + -aux*) + vendor=apple + ;; + -hms*) + vendor=hitachi + ;; + -mpw* | -macos*) + vendor=apple + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + vendor=atari + ;; + -vos*) + vendor=stratus + ;; + esac + basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` + ;; +esac + +echo $basic_machine$os +exit + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/mkl/basic/optional/ThreadPool/config/depcomp b/mkl/basic/optional/ThreadPool/config/depcomp new file mode 100755 index 0000000..ca5ea4e --- /dev/null +++ b/mkl/basic/optional/ThreadPool/config/depcomp @@ -0,0 +1,584 @@ +#! /bin/sh +# depcomp - compile a program generating dependencies as side-effects + +scriptversion=2006-10-15.18 + +# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2006 Free Software +# Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301, USA. + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# Originally written by Alexandre Oliva . + +case $1 in + '') + echo "$0: No command. Try \`$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: depcomp [--help] [--version] PROGRAM [ARGS] + +Run PROGRAMS ARGS to compile a file, generating dependencies +as side-effects. + +Environment variables: + depmode Dependency tracking mode. + source Source file read by `PROGRAMS ARGS'. + object Object file output by `PROGRAMS ARGS'. + DEPDIR directory where to store dependencies. + depfile Dependency file to output. + tmpdepfile Temporary file to use when outputing dependencies. + libtool Whether libtool is used (yes/no). + +Report bugs to . +EOF + exit $? + ;; + -v | --v*) + echo "depcomp $scriptversion" + exit $? + ;; +esac + +if test -z "$depmode" || test -z "$source" || test -z "$object"; then + echo "depcomp: Variables source, object and depmode must be set" 1>&2 + exit 1 +fi + +# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po. +depfile=${depfile-`echo "$object" | + sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`} +tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} + +rm -f "$tmpdepfile" + +# Some modes work just like other modes, but use different flags. We +# parameterize here, but still list the modes in the big case below, +# to make depend.m4 easier to write. Note that we *cannot* use a case +# here, because this file can only contain one case statement. +if test "$depmode" = hp; then + # HP compiler uses -M and no extra arg. + gccflag=-M + depmode=gcc +fi + +if test "$depmode" = dashXmstdout; then + # This is just like dashmstdout with a different argument. + dashmflag=-xM + depmode=dashmstdout +fi + +case "$depmode" in +gcc3) +## gcc 3 implements dependency tracking that does exactly what +## we want. Yay! Note: for some reason libtool 1.4 doesn't like +## it if -MD -MP comes after the -MF stuff. Hmm. +## Unfortunately, FreeBSD c89 acceptance of flags depends upon +## the command line argument order; so add the flags where they +## appear in depend2.am. Note that the slowdown incurred here +## affects only configure: in makefiles, %FASTDEP% shortcuts this. + for arg + do + case $arg in + -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;; + *) set fnord "$@" "$arg" ;; + esac + shift # fnord + shift # $arg + done + "$@" + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + mv "$tmpdepfile" "$depfile" + ;; + +gcc) +## There are various ways to get dependency output from gcc. Here's +## why we pick this rather obscure method: +## - Don't want to use -MD because we'd like the dependencies to end +## up in a subdir. Having to rename by hand is ugly. +## (We might end up doing this anyway to support other compilers.) +## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like +## -MM, not -M (despite what the docs say). +## - Using -M directly means running the compiler twice (even worse +## than renaming). + if test -z "$gccflag"; then + gccflag=-MD, + fi + "$@" -Wp,"$gccflag$tmpdepfile" + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz +## The second -e expression handles DOS-style file names with drive letters. + sed -e 's/^[^:]*: / /' \ + -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" +## This next piece of magic avoids the `deleted header file' problem. +## The problem is that when a header file which appears in a .P file +## is deleted, the dependency causes make to die (because there is +## typically no way to rebuild the header). We avoid this by adding +## dummy dependencies for each header file. Too bad gcc doesn't do +## this for us directly. + tr ' ' ' +' < "$tmpdepfile" | +## Some versions of gcc put a space before the `:'. On the theory +## that the space means something, we add a space to the output as +## well. +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +sgi) + if test "$libtool" = yes; then + "$@" "-Wp,-MDupdate,$tmpdepfile" + else + "$@" -MDupdate "$tmpdepfile" + fi + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + + if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files + echo "$object : \\" > "$depfile" + + # Clip off the initial element (the dependent). Don't try to be + # clever and replace this with sed code, as IRIX sed won't handle + # lines with more than a fixed number of characters (4096 in + # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; + # the IRIX cc adds comments like `#:fec' to the end of the + # dependency line. + tr ' ' ' +' < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \ + tr ' +' ' ' >> $depfile + echo >> $depfile + + # The second pass generates a dummy entry for each header file. + tr ' ' ' +' < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ + >> $depfile + else + # The sourcefile does not contain any dependencies, so just + # store a dummy comment line, to avoid errors with the Makefile + # "include basename.Plo" scheme. + echo "#dummy" > "$depfile" + fi + rm -f "$tmpdepfile" + ;; + +aix) + # The C for AIX Compiler uses -M and outputs the dependencies + # in a .u file. In older versions, this file always lives in the + # current directory. Also, the AIX compiler puts `$object:' at the + # start of each line; $object doesn't have directory information. + # Version 6 uses the directory in both cases. + stripped=`echo "$object" | sed 's/\(.*\)\..*$/\1/'` + tmpdepfile="$stripped.u" + if test "$libtool" = yes; then + "$@" -Wc,-M + else + "$@" -M + fi + stat=$? + + if test -f "$tmpdepfile"; then : + else + stripped=`echo "$stripped" | sed 's,^.*/,,'` + tmpdepfile="$stripped.u" + fi + + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + + if test -f "$tmpdepfile"; then + outname="$stripped.o" + # Each line is of the form `foo.o: dependent.h'. + # Do two passes, one to just change these to + # `$object: dependent.h' and one to simply `dependent.h:'. + sed -e "s,^$outname:,$object :," < "$tmpdepfile" > "$depfile" + sed -e "s,^$outname: \(.*\)$,\1:," < "$tmpdepfile" >> "$depfile" + else + # The sourcefile does not contain any dependencies, so just + # store a dummy comment line, to avoid errors with the Makefile + # "include basename.Plo" scheme. + echo "#dummy" > "$depfile" + fi + rm -f "$tmpdepfile" + ;; + +icc) + # Intel's C compiler understands `-MD -MF file'. However on + # icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c + # ICC 7.0 will fill foo.d with something like + # foo.o: sub/foo.c + # foo.o: sub/foo.h + # which is wrong. We want: + # sub/foo.o: sub/foo.c + # sub/foo.o: sub/foo.h + # sub/foo.c: + # sub/foo.h: + # ICC 7.1 will output + # foo.o: sub/foo.c sub/foo.h + # and will wrap long lines using \ : + # foo.o: sub/foo.c ... \ + # sub/foo.h ... \ + # ... + + "$@" -MD -MF "$tmpdepfile" + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each line is of the form `foo.o: dependent.h', + # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. + # Do two passes, one to just change these to + # `$object: dependent.h' and one to simply `dependent.h:'. + sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" | + sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp2) + # The "hp" stanza above does not work with aCC (C++) and HP's ia64 + # compilers, which have integrated preprocessors. The correct option + # to use with these is +Maked; it writes dependencies to a file named + # 'foo.d', which lands next to the object file, wherever that + # happens to be. + # Much of this is similar to the tru64 case; see comments there. + dir=`echo "$object" | sed -e 's|/[^/]*$|/|'` + test "x$dir" = "x$object" && dir= + base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'` + if test "$libtool" = yes; then + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir.libs/$base.d + "$@" -Wc,+Maked + else + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir$base.d + "$@" +Maked + fi + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile1" "$tmpdepfile2" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" + do + test -f "$tmpdepfile" && break + done + if test -f "$tmpdepfile"; then + sed -e "s,^.*\.[a-z]*:,$object:," "$tmpdepfile" > "$depfile" + # Add `dependent.h:' lines. + sed -ne '2,${; s/^ *//; s/ \\*$//; s/$/:/; p;}' "$tmpdepfile" >> "$depfile" + else + echo "#dummy" > "$depfile" + fi + rm -f "$tmpdepfile" "$tmpdepfile2" + ;; + +tru64) + # The Tru64 compiler uses -MD to generate dependencies as a side + # effect. `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'. + # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put + # dependencies in `foo.d' instead, so we check for that too. + # Subdirectories are respected. + dir=`echo "$object" | sed -e 's|/[^/]*$|/|'` + test "x$dir" = "x$object" && dir= + base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'` + + if test "$libtool" = yes; then + # With Tru64 cc, shared objects can also be used to make a + # static library. This mechanism is used in libtool 1.4 series to + # handle both shared and static libraries in a single compilation. + # With libtool 1.4, dependencies were output in $dir.libs/$base.lo.d. + # + # With libtool 1.5 this exception was removed, and libtool now + # generates 2 separate objects for the 2 libraries. These two + # compilations output dependencies in $dir.libs/$base.o.d and + # in $dir$base.o.d. We have to check for both files, because + # one of the two compilations can be disabled. We should prefer + # $dir$base.o.d over $dir.libs/$base.o.d because the latter is + # automatically cleaned when .libs/ is deleted, while ignoring + # the former would cause a distcleancheck panic. + tmpdepfile1=$dir.libs/$base.lo.d # libtool 1.4 + tmpdepfile2=$dir$base.o.d # libtool 1.5 + tmpdepfile3=$dir.libs/$base.o.d # libtool 1.5 + tmpdepfile4=$dir.libs/$base.d # Compaq CCC V6.2-504 + "$@" -Wc,-MD + else + tmpdepfile1=$dir$base.o.d + tmpdepfile2=$dir$base.d + tmpdepfile3=$dir$base.d + tmpdepfile4=$dir$base.d + "$@" -MD + fi + + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4" + do + test -f "$tmpdepfile" && break + done + if test -f "$tmpdepfile"; then + sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile" + # That's a tab and a space in the []. + sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile" + else + echo "#dummy" > "$depfile" + fi + rm -f "$tmpdepfile" + ;; + +#nosideeffect) + # This comment above is used by automake to tell side-effect + # dependency tracking mechanisms from slower ones. + +dashmstdout) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout, regardless of -o. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test $1 != '--mode=compile'; do + shift + done + shift + fi + + # Remove `-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + test -z "$dashmflag" && dashmflag=-M + # Require at least two characters before searching for `:' + # in the target name. This is to cope with DOS-style filenames: + # a dependency such as `c:/foo/bar' could be seen as target `c' otherwise. + "$@" $dashmflag | + sed 's:^[ ]*[^: ][^:][^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile" + rm -f "$depfile" + cat < "$tmpdepfile" > "$depfile" + tr ' ' ' +' < "$tmpdepfile" | \ +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +dashXmstdout) + # This case only exists to satisfy depend.m4. It is never actually + # run, as this mode is specially recognized in the preamble. + exit 1 + ;; + +makedepend) + "$@" || exit $? + # Remove any Libtool call + if test "$libtool" = yes; then + while test $1 != '--mode=compile'; do + shift + done + shift + fi + # X makedepend + shift + cleared=no + for arg in "$@"; do + case $cleared in + no) + set ""; shift + cleared=yes ;; + esac + case "$arg" in + -D*|-I*) + set fnord "$@" "$arg"; shift ;; + # Strip any option that makedepend may not understand. Remove + # the object too, otherwise makedepend will parse it as a source file. + -*|$object) + ;; + *) + set fnord "$@" "$arg"; shift ;; + esac + done + obj_suffix="`echo $object | sed 's/^.*\././'`" + touch "$tmpdepfile" + ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" + rm -f "$depfile" + cat < "$tmpdepfile" > "$depfile" + sed '1,2d' "$tmpdepfile" | tr ' ' ' +' | \ +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" "$tmpdepfile".bak + ;; + +cpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test $1 != '--mode=compile'; do + shift + done + shift + fi + + # Remove `-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + "$@" -E | + sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ + -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' | + sed '$ s: \\$::' > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + cat < "$tmpdepfile" >> "$depfile" + sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvisualcpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout, regardless of -o, + # because we must use -o when running libtool. + "$@" || exit $? + IFS=" " + for arg + do + case "$arg" in + "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") + set fnord "$@" + shift + shift + ;; + *) + set fnord "$@" "$arg" + shift + shift + ;; + esac + done + "$@" -E | + sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::echo "`cygpath -u \\"\1\\"`":p' | sort | uniq > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s:: \1 \\:p' >> "$depfile" + echo " " >> "$depfile" + . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s::\1\::p' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +none) + exec "$@" + ;; + +*) + echo "Unknown depmode $depmode" 1>&2 + exit 1 + ;; +esac + +exit 0 + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-end: "$" +# End: diff --git a/mkl/basic/optional/ThreadPool/config/generate-makeoptions.pl b/mkl/basic/optional/ThreadPool/config/generate-makeoptions.pl new file mode 100755 index 0000000..a39223e --- /dev/null +++ b/mkl/basic/optional/ThreadPool/config/generate-makeoptions.pl @@ -0,0 +1,86 @@ +#!/usr/bin/perl -w +# +# This perl script graps a bunch of make macro definitions +# generated for Teuchos that can be used in other makefiles. +# This is dumped to stdout and can be redirected to build +# a makefile. +# +# Note, this script must be maintained to be current for +# the Teuchos makefile. +# +use strict; + +if( !(defined(@ARGV) && scalar(@ARGV)==2) ) { + die "Error, this script takes two and only two arguments (makefile_name package_name).!\n"; +} + +my $makefile_name = shift; +my $package_name = shift; + +# +# List the macros you want to grep and include in the output +# +my @macros = + ( + "CC" + ,"CXX" + ,"F77" + ,"CXXLD" + ,"DEFS" + ,"CPPFLAGS" + ,"CFLAGS" + ,"CXXFLAGS" + ,"FFLAGS" + ,"LDFLAGS" + ,"FLIBS" + ,"BLAS_LIBS" + ,"LAPACK_LIBS" + ,"prefix" + ,"AR" + ,"ALTERNATE_AR" + ,"libteuchos_a_AR" + ,"RANLIB" + ); + +open FILE_IN, "<$makefile_name" || die "The file $makefile_name could not be opended for input\n"; +my @makefile_name_array = ; +close FILE_IN; + +# +# Find the above macros and append "${package_name}_" to the beginning. +# +my @new_macros; +my $add_next_line = 0; +foreach( @makefile_name_array ) { + my $line = $_; + if($add_next_line) { + push @new_macros, $line; + if( substr($line,-1,1) eq "\\" ) { + $add_next_line = 1; + } + else { + $add_next_line = 0; + } + next; + } + #print "Line = $line"; + foreach( @macros ) { + my $macro_search = "^${_} "; + #print "Macro search = \'$macro_search\'\n"; + if( $line=~/$macro_search/ ) { + #print "Adding Macro!\n"; + my $find_str = '\(CXX\)'; + my $replace_str = "(${package_name}_CXX)"; + $line=~s/$find_str/$replace_str/; + push @new_macros, "${package_name}_${line}"; + if( substr($line,-2,1) eq "\\" ) { + $add_next_line = 1; + } + else { + $add_next_line = 0; + } + } + } +} + +print join("",@new_macros); diff --git a/mkl/basic/optional/ThreadPool/config/install-sh b/mkl/basic/optional/ThreadPool/config/install-sh new file mode 100755 index 0000000..4fbbae7 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/config/install-sh @@ -0,0 +1,507 @@ +#!/bin/sh +# install - install a program, script, or datafile + +scriptversion=2006-10-14.15 + +# This originates from X11R5 (mit/util/scripts/install.sh), which was +# later released in X11R6 (xc/config/util/install.sh) with the +# following copyright and license. +# +# Copyright (C) 1994 X Consortium +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- +# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# Except as contained in this notice, the name of the X Consortium shall not +# be used in advertising or otherwise to promote the sale, use or other deal- +# ings in this Software without prior written authorization from the X Consor- +# tium. +# +# +# FSF changes to this file are in the public domain. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# `make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. + +nl=' +' +IFS=" "" $nl" + +# set DOITPROG to echo to test this script + +# Don't use :- since 4.3BSD and earlier shells don't like it. +doit="${DOITPROG-}" +if test -z "$doit"; then + doit_exec=exec +else + doit_exec=$doit +fi + +# Put in absolute file names if you don't have them in your path; +# or use environment vars. + +mvprog="${MVPROG-mv}" +cpprog="${CPPROG-cp}" +chmodprog="${CHMODPROG-chmod}" +chownprog="${CHOWNPROG-chown}" +chgrpprog="${CHGRPPROG-chgrp}" +stripprog="${STRIPPROG-strip}" +rmprog="${RMPROG-rm}" +mkdirprog="${MKDIRPROG-mkdir}" + +posix_glob= +posix_mkdir= + +# Desired mode of installed file. +mode=0755 + +chmodcmd=$chmodprog +chowncmd= +chgrpcmd= +stripcmd= +rmcmd="$rmprog -f" +mvcmd="$mvprog" +src= +dst= +dir_arg= +dstarg= +no_target_directory= + +usage="Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE + or: $0 [OPTION]... SRCFILES... DIRECTORY + or: $0 [OPTION]... -t DIRECTORY SRCFILES... + or: $0 [OPTION]... -d DIRECTORIES... + +In the 1st form, copy SRCFILE to DSTFILE. +In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. +In the 4th, create DIRECTORIES. + +Options: +-c (ignored) +-d create directories instead of installing files. +-g GROUP $chgrpprog installed files to GROUP. +-m MODE $chmodprog installed files to MODE. +-o USER $chownprog installed files to USER. +-s $stripprog installed files. +-t DIRECTORY install into DIRECTORY. +-T report an error if DSTFILE is a directory. +--help display this help and exit. +--version display version info and exit. + +Environment variables override the default commands: + CHGRPPROG CHMODPROG CHOWNPROG CPPROG MKDIRPROG MVPROG RMPROG STRIPPROG +" + +while test $# -ne 0; do + case $1 in + -c) shift + continue;; + + -d) dir_arg=true + shift + continue;; + + -g) chgrpcmd="$chgrpprog $2" + shift + shift + continue;; + + --help) echo "$usage"; exit $?;; + + -m) mode=$2 + shift + shift + case $mode in + *' '* | *' '* | *' +'* | *'*'* | *'?'* | *'['*) + echo "$0: invalid mode: $mode" >&2 + exit 1;; + esac + continue;; + + -o) chowncmd="$chownprog $2" + shift + shift + continue;; + + -s) stripcmd=$stripprog + shift + continue;; + + -t) dstarg=$2 + shift + shift + continue;; + + -T) no_target_directory=true + shift + continue;; + + --version) echo "$0 $scriptversion"; exit $?;; + + --) shift + break;; + + -*) echo "$0: invalid option: $1" >&2 + exit 1;; + + *) break;; + esac +done + +if test $# -ne 0 && test -z "$dir_arg$dstarg"; then + # When -d is used, all remaining arguments are directories to create. + # When -t is used, the destination is already specified. + # Otherwise, the last argument is the destination. Remove it from $@. + for arg + do + if test -n "$dstarg"; then + # $@ is not empty: it contains at least $arg. + set fnord "$@" "$dstarg" + shift # fnord + fi + shift # arg + dstarg=$arg + done +fi + +if test $# -eq 0; then + if test -z "$dir_arg"; then + echo "$0: no input file specified." >&2 + exit 1 + fi + # It's OK to call `install-sh -d' without argument. + # This can happen when creating conditional directories. + exit 0 +fi + +if test -z "$dir_arg"; then + trap '(exit $?); exit' 1 2 13 15 + + # Set umask so as not to create temps with too-generous modes. + # However, 'strip' requires both read and write access to temps. + case $mode in + # Optimize common cases. + *644) cp_umask=133;; + *755) cp_umask=22;; + + *[0-7]) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw='% 200' + fi + cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; + *) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw=,u+rw + fi + cp_umask=$mode$u_plus_rw;; + esac +fi + +for src +do + # Protect names starting with `-'. + case $src in + -*) src=./$src ;; + esac + + if test -n "$dir_arg"; then + dst=$src + dstdir=$dst + test -d "$dstdir" + dstdir_status=$? + else + + # Waiting for this to be detected by the "$cpprog $src $dsttmp" command + # might cause directories to be created, which would be especially bad + # if $src (and thus $dsttmp) contains '*'. + if test ! -f "$src" && test ! -d "$src"; then + echo "$0: $src does not exist." >&2 + exit 1 + fi + + if test -z "$dstarg"; then + echo "$0: no destination specified." >&2 + exit 1 + fi + + dst=$dstarg + # Protect names starting with `-'. + case $dst in + -*) dst=./$dst ;; + esac + + # If destination is a directory, append the input filename; won't work + # if double slashes aren't ignored. + if test -d "$dst"; then + if test -n "$no_target_directory"; then + echo "$0: $dstarg: Is a directory" >&2 + exit 1 + fi + dstdir=$dst + dst=$dstdir/`basename "$src"` + dstdir_status=0 + else + # Prefer dirname, but fall back on a substitute if dirname fails. + dstdir=` + (dirname "$dst") 2>/dev/null || + expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$dst" : 'X\(//\)[^/]' \| \ + X"$dst" : 'X\(//\)$' \| \ + X"$dst" : 'X\(/\)' \| . 2>/dev/null || + echo X"$dst" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q' + ` + + test -d "$dstdir" + dstdir_status=$? + fi + fi + + obsolete_mkdir_used=false + + if test $dstdir_status != 0; then + case $posix_mkdir in + '') + # Create intermediate dirs using mode 755 as modified by the umask. + # This is like FreeBSD 'install' as of 1997-10-28. + umask=`umask` + case $stripcmd.$umask in + # Optimize common cases. + *[2367][2367]) mkdir_umask=$umask;; + .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; + + *[0-7]) + mkdir_umask=`expr $umask + 22 \ + - $umask % 100 % 40 + $umask % 20 \ + - $umask % 10 % 4 + $umask % 2 + `;; + *) mkdir_umask=$umask,go-w;; + esac + + # With -d, create the new directory with the user-specified mode. + # Otherwise, rely on $mkdir_umask. + if test -n "$dir_arg"; then + mkdir_mode=-m$mode + else + mkdir_mode= + fi + + posix_mkdir=false + case $umask in + *[123567][0-7][0-7]) + # POSIX mkdir -p sets u+wx bits regardless of umask, which + # is incompatible with FreeBSD 'install' when (umask & 300) != 0. + ;; + *) + tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ + trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0 + + if (umask $mkdir_umask && + exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1 + then + if test -z "$dir_arg" || { + # Check for POSIX incompatibilities with -m. + # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or + # other-writeable bit of parent directory when it shouldn't. + # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. + ls_ld_tmpdir=`ls -ld "$tmpdir"` + case $ls_ld_tmpdir in + d????-?r-*) different_mode=700;; + d????-?--*) different_mode=755;; + *) false;; + esac && + $mkdirprog -m$different_mode -p -- "$tmpdir" && { + ls_ld_tmpdir_1=`ls -ld "$tmpdir"` + test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" + } + } + then posix_mkdir=: + fi + rmdir "$tmpdir/d" "$tmpdir" + else + # Remove any dirs left behind by ancient mkdir implementations. + rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null + fi + trap '' 0;; + esac;; + esac + + if + $posix_mkdir && ( + umask $mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" + ) + then : + else + + # The umask is ridiculous, or mkdir does not conform to POSIX, + # or it failed possibly due to a race condition. Create the + # directory the slow way, step by step, checking for races as we go. + + case $dstdir in + /*) prefix=/ ;; + -*) prefix=./ ;; + *) prefix= ;; + esac + + case $posix_glob in + '') + if (set -f) 2>/dev/null; then + posix_glob=true + else + posix_glob=false + fi ;; + esac + + oIFS=$IFS + IFS=/ + $posix_glob && set -f + set fnord $dstdir + shift + $posix_glob && set +f + IFS=$oIFS + + prefixes= + + for d + do + test -z "$d" && continue + + prefix=$prefix$d + if test -d "$prefix"; then + prefixes= + else + if $posix_mkdir; then + (umask=$mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break + # Don't fail if two instances are running concurrently. + test -d "$prefix" || exit 1 + else + case $prefix in + *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; + *) qprefix=$prefix;; + esac + prefixes="$prefixes '$qprefix'" + fi + fi + prefix=$prefix/ + done + + if test -n "$prefixes"; then + # Don't fail if two instances are running concurrently. + (umask $mkdir_umask && + eval "\$doit_exec \$mkdirprog $prefixes") || + test -d "$dstdir" || exit 1 + obsolete_mkdir_used=true + fi + fi + fi + + if test -n "$dir_arg"; then + { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && + { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || + test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 + else + + # Make a couple of temp file names in the proper directory. + dsttmp=$dstdir/_inst.$$_ + rmtmp=$dstdir/_rm.$$_ + + # Trap to clean up those temp files at exit. + trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 + + # Copy the file name to the temp name. + (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") && + + # and set any options; do chmod last to preserve setuid bits. + # + # If any of these fail, we abort the whole thing. If we want to + # ignore errors from any of these, just make sure not to ignore + # errors from the above "$doit $cpprog $src $dsttmp" command. + # + { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } \ + && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } \ + && { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } \ + && { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && + + # Now rename the file to the real destination. + { $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null \ + || { + # The rename failed, perhaps because mv can't rename something else + # to itself, or perhaps because mv is so ancient that it does not + # support -f. + + # Now remove or move aside any old file at destination location. + # We try this two ways since rm can't unlink itself on some + # systems and the destination file might be busy for other + # reasons. In this case, the final cleanup might fail but the new + # file should still install successfully. + { + if test -f "$dst"; then + $doit $rmcmd -f "$dst" 2>/dev/null \ + || { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null \ + && { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }; }\ + || { + echo "$0: cannot unlink or rename $dst" >&2 + (exit 1); exit 1 + } + else + : + fi + } && + + # Now rename the file to the real destination. + $doit $mvcmd "$dsttmp" "$dst" + } + } || exit 1 + + trap '' 0 + fi +done + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-end: "$" +# End: diff --git a/mkl/basic/optional/ThreadPool/config/missing b/mkl/basic/optional/ThreadPool/config/missing new file mode 100755 index 0000000..1c8ff70 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/config/missing @@ -0,0 +1,367 @@ +#! /bin/sh +# Common stub for a few missing GNU programs while installing. + +scriptversion=2006-05-10.23 + +# Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004, 2005, 2006 +# Free Software Foundation, Inc. +# Originally by Fran,cois Pinard , 1996. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301, USA. + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +if test $# -eq 0; then + echo 1>&2 "Try \`$0 --help' for more information" + exit 1 +fi + +run=: +sed_output='s/.* --output[ =]\([^ ]*\).*/\1/p' +sed_minuso='s/.* -o \([^ ]*\).*/\1/p' + +# In the cases where this matters, `missing' is being run in the +# srcdir already. +if test -f configure.ac; then + configure_ac=configure.ac +else + configure_ac=configure.in +fi + +msg="missing on your system" + +case $1 in +--run) + # Try to run requested program, and just exit if it succeeds. + run= + shift + "$@" && exit 0 + # Exit code 63 means version mismatch. This often happens + # when the user try to use an ancient version of a tool on + # a file that requires a minimum version. In this case we + # we should proceed has if the program had been absent, or + # if --run hadn't been passed. + if test $? = 63; then + run=: + msg="probably too old" + fi + ;; + + -h|--h|--he|--hel|--help) + echo "\ +$0 [OPTION]... PROGRAM [ARGUMENT]... + +Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an +error status if there is no known handling for PROGRAM. + +Options: + -h, --help display this help and exit + -v, --version output version information and exit + --run try to run the given command, and emulate it if it fails + +Supported PROGRAM values: + aclocal touch file \`aclocal.m4' + autoconf touch file \`configure' + autoheader touch file \`config.h.in' + autom4te touch the output file, or create a stub one + automake touch all \`Makefile.in' files + bison create \`y.tab.[ch]', if possible, from existing .[ch] + flex create \`lex.yy.c', if possible, from existing .c + help2man touch the output file + lex create \`lex.yy.c', if possible, from existing .c + makeinfo touch the output file + tar try tar, gnutar, gtar, then tar without non-portable flags + yacc create \`y.tab.[ch]', if possible, from existing .[ch] + +Send bug reports to ." + exit $? + ;; + + -v|--v|--ve|--ver|--vers|--versi|--versio|--version) + echo "missing $scriptversion (GNU Automake)" + exit $? + ;; + + -*) + echo 1>&2 "$0: Unknown \`$1' option" + echo 1>&2 "Try \`$0 --help' for more information" + exit 1 + ;; + +esac + +# Now exit if we have it, but it failed. Also exit now if we +# don't have it and --version was passed (most likely to detect +# the program). +case $1 in + lex|yacc) + # Not GNU programs, they don't have --version. + ;; + + tar) + if test -n "$run"; then + echo 1>&2 "ERROR: \`tar' requires --run" + exit 1 + elif test "x$2" = "x--version" || test "x$2" = "x--help"; then + exit 1 + fi + ;; + + *) + if test -z "$run" && ($1 --version) > /dev/null 2>&1; then + # We have it, but it failed. + exit 1 + elif test "x$2" = "x--version" || test "x$2" = "x--help"; then + # Could not run --version or --help. This is probably someone + # running `$TOOL --version' or `$TOOL --help' to check whether + # $TOOL exists and not knowing $TOOL uses missing. + exit 1 + fi + ;; +esac + +# If it does not exist, or fails to run (possibly an outdated version), +# try to emulate it. +case $1 in + aclocal*) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified \`acinclude.m4' or \`${configure_ac}'. You might want + to install the \`Automake' and \`Perl' packages. Grab them from + any GNU archive site." + touch aclocal.m4 + ;; + + autoconf) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified \`${configure_ac}'. You might want to install the + \`Autoconf' and \`GNU m4' packages. Grab them from any GNU + archive site." + touch configure + ;; + + autoheader) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified \`acconfig.h' or \`${configure_ac}'. You might want + to install the \`Autoconf' and \`GNU m4' packages. Grab them + from any GNU archive site." + files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}` + test -z "$files" && files="config.h" + touch_files= + for f in $files; do + case $f in + *:*) touch_files="$touch_files "`echo "$f" | + sed -e 's/^[^:]*://' -e 's/:.*//'`;; + *) touch_files="$touch_files $f.in";; + esac + done + touch $touch_files + ;; + + automake*) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'. + You might want to install the \`Automake' and \`Perl' packages. + Grab them from any GNU archive site." + find . -type f -name Makefile.am -print | + sed 's/\.am$/.in/' | + while read f; do touch "$f"; done + ;; + + autom4te) + echo 1>&2 "\ +WARNING: \`$1' is needed, but is $msg. + You might have modified some files without having the + proper tools for further handling them. + You can get \`$1' as part of \`Autoconf' from any GNU + archive site." + + file=`echo "$*" | sed -n "$sed_output"` + test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"` + if test -f "$file"; then + touch $file + else + test -z "$file" || exec >$file + echo "#! /bin/sh" + echo "# Created by GNU Automake missing as a replacement of" + echo "# $ $@" + echo "exit 0" + chmod +x $file + exit 1 + fi + ;; + + bison|yacc) + echo 1>&2 "\ +WARNING: \`$1' $msg. You should only need it if + you modified a \`.y' file. You may need the \`Bison' package + in order for those modifications to take effect. You can get + \`Bison' from any GNU archive site." + rm -f y.tab.c y.tab.h + if test $# -ne 1; then + eval LASTARG="\${$#}" + case $LASTARG in + *.y) + SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'` + if test -f "$SRCFILE"; then + cp "$SRCFILE" y.tab.c + fi + SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'` + if test -f "$SRCFILE"; then + cp "$SRCFILE" y.tab.h + fi + ;; + esac + fi + if test ! -f y.tab.h; then + echo >y.tab.h + fi + if test ! -f y.tab.c; then + echo 'main() { return 0; }' >y.tab.c + fi + ;; + + lex|flex) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified a \`.l' file. You may need the \`Flex' package + in order for those modifications to take effect. You can get + \`Flex' from any GNU archive site." + rm -f lex.yy.c + if test $# -ne 1; then + eval LASTARG="\${$#}" + case $LASTARG in + *.l) + SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'` + if test -f "$SRCFILE"; then + cp "$SRCFILE" lex.yy.c + fi + ;; + esac + fi + if test ! -f lex.yy.c; then + echo 'main() { return 0; }' >lex.yy.c + fi + ;; + + help2man) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified a dependency of a manual page. You may need the + \`Help2man' package in order for those modifications to take + effect. You can get \`Help2man' from any GNU archive site." + + file=`echo "$*" | sed -n "$sed_output"` + test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"` + if test -f "$file"; then + touch $file + else + test -z "$file" || exec >$file + echo ".ab help2man is required to generate this page" + exit 1 + fi + ;; + + makeinfo) + echo 1>&2 "\ +WARNING: \`$1' is $msg. You should only need it if + you modified a \`.texi' or \`.texinfo' file, or any other file + indirectly affecting the aspect of the manual. The spurious + call might also be the consequence of using a buggy \`make' (AIX, + DU, IRIX). You might want to install the \`Texinfo' package or + the \`GNU make' package. Grab either from any GNU archive site." + # The file to touch is that specified with -o ... + file=`echo "$*" | sed -n "$sed_output"` + test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"` + if test -z "$file"; then + # ... or it is the one specified with @setfilename ... + infile=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'` + file=`sed -n ' + /^@setfilename/{ + s/.* \([^ ]*\) *$/\1/ + p + q + }' $infile` + # ... or it is derived from the source name (dir/f.texi becomes f.info) + test -z "$file" && file=`echo "$infile" | sed 's,.*/,,;s,.[^.]*$,,'`.info + fi + # If the file does not exist, the user really needs makeinfo; + # let's fail without touching anything. + test -f $file || exit 1 + touch $file + ;; + + tar) + shift + + # We have already tried tar in the generic part. + # Look for gnutar/gtar before invocation to avoid ugly error + # messages. + if (gnutar --version > /dev/null 2>&1); then + gnutar "$@" && exit 0 + fi + if (gtar --version > /dev/null 2>&1); then + gtar "$@" && exit 0 + fi + firstarg="$1" + if shift; then + case $firstarg in + *o*) + firstarg=`echo "$firstarg" | sed s/o//` + tar "$firstarg" "$@" && exit 0 + ;; + esac + case $firstarg in + *h*) + firstarg=`echo "$firstarg" | sed s/h//` + tar "$firstarg" "$@" && exit 0 + ;; + esac + fi + + echo 1>&2 "\ +WARNING: I can't seem to be able to run \`tar' with the given arguments. + You may want to install GNU tar or Free paxutils, or check the + command line arguments." + exit 1 + ;; + + *) + echo 1>&2 "\ +WARNING: \`$1' is needed, and is $msg. + You might have modified some files without having the + proper tools for further handling them. Check the \`README' file, + it often tells you about the needed prerequisites for installing + this package. You may also peek at any GNU archive site, in case + some other package would contain this missing \`$1' program." + exit 1 + ;; +esac + +exit 0 + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-end: "$" +# End: diff --git a/mkl/basic/optional/ThreadPool/config/replace-install-prefix.pl b/mkl/basic/optional/ThreadPool/config/replace-install-prefix.pl new file mode 100755 index 0000000..7523b08 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/config/replace-install-prefix.pl @@ -0,0 +1,89 @@ +#!/usr/bin/perl -w +use strict; +use Getopt::Long; +# +# This script is called to do a set of text replacements for installing +# a Mafile.export.package file so that external clients can use it. +# +# Read in commandline arguments +# +my $exec_prefix = ""; # [required] Abs path to base installation directory (i.e. --prefix=??? option passed to configure) +my $my_export_makefile = ""; # [required] Name only of installed Makefile.export.package file +my $my_top_srcdir = ""; # [required] Abs path to this package's top source directory +my $my_incl_dirs = ""; # [required] Abs path to this package's include directories +my $my_lib_dirs = ""; # [optional] Abs path to this package's library directories (if any exist) +my $dep_package_builddirs = ""; # [optional] Abs paths to other directly dependent framework package build directories (if any exist) +GetOptions( + "exec-prefix=s" => \$exec_prefix, + "my-export-makefile=s" => \$my_export_makefile, + "my-abs-top-srcdir=s" => \$my_top_srcdir, + "my-abs-incl-dirs=s" => \$my_incl_dirs, + "my-abs-lib-dirs=s" => \$my_lib_dirs, + "dep-package-abs-builddirs=s" => \$dep_package_builddirs + ); +# +# Validate commandline arguments +# +scalar(@ARGV) == 0 || die; +$exec_prefix ne "" || die; +$my_export_makefile ne "" || die; +$my_top_srcdir ne "" || die; +$my_incl_dirs ne "" || die; +# +# Interpret commandline arguments +# +$exec_prefix = remove_rel_paths($exec_prefix); +my @my_incl_dirs = split(":",$my_incl_dirs); +my @my_lib_dirs = split(":",$my_lib_dirs); +my @dep_export_package_builddirs = split(":",$dep_package_builddirs); +# +# Do the replacements +# +my $my_abs_export_makefile = "${exec_prefix}/include/${my_export_makefile}"; + +my $cmnd_base = "${my_top_srcdir}/config/token-replace.pl "; +# +foreach(@dep_export_package_builddirs) { + if($_ ne "") { + run_cmnd($cmnd_base . "${_} ${exec_prefix}/include ${my_abs_export_makefile} ${my_abs_export_makefile}"); + } +} +# +foreach(@my_incl_dirs) { + if($_ ne "") { + run_cmnd($cmnd_base . "-I${_} -I${exec_prefix}/include ${my_abs_export_makefile} ${my_abs_export_makefile}"); + } +} +# +foreach(@my_lib_dirs) { + if($_ ne "") { + run_cmnd($cmnd_base . "-L${_} -L${exec_prefix}/lib ${my_abs_export_makefile} ${my_abs_export_makefile}"); + } +} +# +run_cmnd($cmnd_base . "${my_top_srcdir}/config ${exec_prefix}/include ${my_abs_export_makefile} ${my_abs_export_makefile}"); +# +# Subroutines +# +sub remove_rel_paths { + my $entry_in = shift; + if ($entry_in=~/-L\.\./) { + return $entry_in; + } + my @paths = split("/",$entry_in); + my @new_paths; + foreach( @paths ) { + if( !($_=~/\.\./) ) { + push @new_paths, $_; + } + else { + pop @new_paths + } + } + return join("/",@new_paths); +} +sub run_cmnd { + my $cmnd = shift; + #print "\n", $cmnd, "\n"; + system($cmnd)==0 || die; +} diff --git a/mkl/basic/optional/ThreadPool/config/string-replace.pl b/mkl/basic/optional/ThreadPool/config/string-replace.pl new file mode 100755 index 0000000..adeb1f4 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/config/string-replace.pl @@ -0,0 +1,43 @@ +#!/usr/bin/perl -w +# +# This perl script replaces a string with another string. +# Here it is allowd for file_in and file_out to be the +# same file. +# +use strict; +# +my $g_use_msg = + "Use: string-replace.pl find_string replacement_string file_in file_out\n"; +if( scalar(@ARGV) < 4 ) { + print STDERR $g_use_msg; + exit(-1); +} +# +my $find_string = shift; +my $replacement_string = shift; +my $file_in_name = shift; +my $file_out_name = shift; +# +# +if($file_in_name=~/CVS/) { +# print "Do not replace in CVS\n"; + exit; +} +# +open FILE_IN, "<$file_in_name" || die "The file $file_in_name could not be opended for input\n"; +my @file_in_array = ; +close FILE_IN; +# +my @file_out_array; +my $did_replacement = 0; +foreach(@file_in_array) { + #print $_; + $did_replacement = 1 if $_=~s/$find_string/$replacement_string/g; + #print $_; + push @file_out_array, $_; +} +if($did_replacement || $file_out_name ne $file_in_name) { + open FILE_OUT, ">$file_out_name" || die "The file $file_out_name could not be opended for output\n"; + print FILE_OUT @file_out_array; + close FILE_OUT; +} diff --git a/mkl/basic/optional/ThreadPool/config/strip_dup_incl_paths.pl b/mkl/basic/optional/ThreadPool/config/strip_dup_incl_paths.pl new file mode 100755 index 0000000..c628d31 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/config/strip_dup_incl_paths.pl @@ -0,0 +1,44 @@ +#!/usr/bin/perl -w +# This perl script removes duplicate include paths left to the right +use strict; +my @all_incl_paths = @ARGV; +my @cleaned_up_incl_paths; +foreach( @all_incl_paths ) { + $_ = remove_rel_paths($_); + if( !($_=~/-I/) ) { + push @cleaned_up_incl_paths, $_; + } + elsif( !entry_exists($_,\@cleaned_up_incl_paths) ) { + push @cleaned_up_incl_paths, $_; + } +} +print join( " ", @cleaned_up_incl_paths ); +# +# Subroutines +# +sub entry_exists { + my $entry = shift; # String + my $list = shift; # Reference to an array + foreach( @$list ) { + if( $entry eq $_ ) { return 1; } + } + return 0; +} +# +sub remove_rel_paths { + my $entry_in = shift; + if ($entry_in=~/-I\.\./) { + return $entry_in; + } + my @paths = split("/",$entry_in); + my @new_paths; + foreach( @paths ) { + if( !($_=~/\.\./) ) { + push @new_paths, $_; + } + else { + pop @new_paths + } + } + return join("/",@new_paths); +} diff --git a/mkl/basic/optional/ThreadPool/config/strip_dup_libs.pl b/mkl/basic/optional/ThreadPool/config/strip_dup_libs.pl new file mode 100755 index 0000000..cdf4b42 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/config/strip_dup_libs.pl @@ -0,0 +1,69 @@ +#!/usr/bin/perl -w +# This perl script removes duplicate libraries from the right to the left and +# removes duplicate -L library paths from the left to the right +use strict; + +my @all_libs = @ARGV; +# +# Move from left to right and remove duplicate -l libraries +# +my @cleaned_up_libs_first; +foreach( reverse @all_libs ) { + $_ = remove_rel_paths($_); + if( $_=~/-L/ ) { + unshift @cleaned_up_libs_first, $_; + } + else { + if( !entry_exists($_,\@cleaned_up_libs_first) ) { + unshift @cleaned_up_libs_first, $_; + } + } +} + +# +# Move from right to left and remove duplicate -L library paths +# +my @cleaned_up_libs; +foreach( @cleaned_up_libs_first ) { + $_ = remove_rel_paths($_); + if( !($_=~/-L/) ) { + push @cleaned_up_libs, $_; + } + elsif( !entry_exists($_,\@cleaned_up_libs) ) { + push @cleaned_up_libs, $_; + } +} +# +# Print the new list of libraries and paths +# +print join( " ", @cleaned_up_libs ); + +# +# Subroutines +# +sub entry_exists { + my $entry = shift; # String + my $list = shift; # Reference to an array + foreach( @$list ) { + if( $entry eq $_ ) { return 1; } + } + return 0; +} +# +sub remove_rel_paths { + my $entry_in = shift; + if ($entry_in=~/-L\.\./) { + return $entry_in; + } + my @paths = split("/",$entry_in); + my @new_paths; + foreach( @paths ) { + if( !($_=~/\.\./) ) { + push @new_paths, $_; + } + else { + pop @new_paths + } + } + return join("/",@new_paths); +} diff --git a/mkl/basic/optional/ThreadPool/config/tac_arg_check_mpi.m4 b/mkl/basic/optional/ThreadPool/config/tac_arg_check_mpi.m4 new file mode 100644 index 0000000..10d569a --- /dev/null +++ b/mkl/basic/optional/ThreadPool/config/tac_arg_check_mpi.m4 @@ -0,0 +1,68 @@ +dnl @synopsis TAC_ARG_CHECK_MPI +dnl +dnl Check to make sure any definitions set in TAC_ARG_CONFIG_MPI +dnl are valid, set the MPI flags. Test MPI compile using C++ compiler. +dnl +dnl @author Mike Heroux +dnl +AC_DEFUN([TAC_ARG_CHECK_MPI], +[ + +if test "X${HAVE_PKG_MPI}" = "Xyes"; then + + if test -n "${MPI_DIR}" && test -z "${MPI_INC}"; then + MPI_INC="${MPI_DIR}/include" + fi + + if test -n "${MPI_INC}"; then + CPPFLAGS="${CPPFLAGS} -I${MPI_INC}" + fi + + AC_LANG_CPLUSPLUS + AC_MSG_CHECKING(for mpi.h) + AC_TRY_CPP([#include "mpi.h"], + [AC_MSG_RESULT(yes)], + [ + AC_MSG_RESULT(no) + echo "-----" + echo "Cannot link simple MPI program." + echo "Try --with-mpi-compilers to specify MPI compilers." + echo "Or try --with-mpi-libs, --with-mpi-incdir, --with-mpi-libdir" + echo "to specify all the specific MPI compile options." + echo "-----" + AC_MSG_ERROR(MPI cannot link) + ]) + + if test -n "${MPI_DIR}" && test -z "${MPI_LIBDIR}"; then + MPI_LIBDIR="${MPI_DIR}/lib" + fi + + if test -n "${MPI_LIBDIR}"; then + LDFLAGS="${LDFLAGS} -L${MPI_LIBDIR}" + fi + + if test -z "${MPI_LIBS}" && test -n "${MPI_LIBDIR}"; then + MPI_LIBS="-lmpi" + fi + + if test -n "${MPI_LIBS}"; then + LIBS="${MPI_LIBS} ${LIBS}" + fi + +# AC_LANG_CPLUSPLUS +# AC_MSG_CHECKING(whether MPI will link using C++ compiler) +# AC_TRY_LINK([#include ], +# [int c; char** v; MPI_Init(&c,&v);], +# [AC_MSG_RESULT(yes)], +# [AC_MSG_RESULT(no) +# echo "-----" +# echo "Cannot link simple MPI program." +# echo "Try --with-mpi-cxx to specify MPI C++ compile script." +# echo "Or try --with-mpi-libs, --with-mpi-incdir, --with-mpi-libdir" +# echo "to specify all the specific MPI compile options." +# echo "-----" +# AC_MSG_ERROR(MPI cannot link)] +# ) + +fi +]) diff --git a/mkl/basic/optional/ThreadPool/config/tac_arg_config_mpi.m4 b/mkl/basic/optional/ThreadPool/config/tac_arg_config_mpi.m4 new file mode 100644 index 0000000..2d1dd98 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/config/tac_arg_config_mpi.m4 @@ -0,0 +1,188 @@ +dnl @synopsis TAC_ARG_CONFIG_MPI +dnl +dnl Test a variety of MPI options: +dnl --enable-mpi - Turns MPI compiling mode on +dnl --with-mpi - specify root directory of MPI +dnl --with-mpi-compilers - Turns on MPI compiling mode and sets the MPI C++ +dnl compiler = mpicxx, mpic++ or mpiCC, +dnl the MPI C compiler = mpicc and +dnl the MPI Fortran compiler = mpif77 +dnl --with-mpi-incdir - specify include directory for MPI +dnl --with-mpi-libs - specify MPI libraries +dnl --with-mpi-libdir - specify location of MPI libraries +dnl +dnl If any of these options are set, HAVE_MPI will be defined for both +dnl Autoconf and Automake, and HAVE_MPI will be defined in the +dnl generated config.h file +dnl +dnl +dnl @author Mike Heroux +dnl Modified 12/26/2007 by Jim Willenbring to skip the Fortran compiler +dnl check if Fortran is not enabled. +dnl +AC_DEFUN([TAC_ARG_CONFIG_MPI], +[ + +AC_ARG_ENABLE(mpi, +[AC_HELP_STRING([--enable-mpi],[MPI support])], +[HAVE_PKG_MPI=$enableval], +[HAVE_PKG_MPI=no] +) + +AC_ARG_WITH(mpi-compilers, +[AC_HELP_STRING([--with-mpi-compilers=PATH], +[use MPI compilers mpicc, mpif77, and mpicxx, mpic++ or mpiCC in the specified path or in the default path if no path is specified. Enables MPI])], +[ + if test X${withval} != Xno; then + HAVE_PKG_MPI=yes + if test X${withval} = Xyes; then + # Check for mpicxx, if it does not exist, check for mpic++, if it does + # not exist, use mpiCC instead. + AC_CHECK_PROG(MPI_TEMP_CXX, mpicxx, mpicxx, no) + if test X${MPI_TEMP_CXX} = Xno; then + AC_CHECK_PROG(MPI_CXX, mpic++, mpic++, mpiCC) + else + MPI_CXX=${MPI_TEMP_CXX} + fi + MPI_CC=mpicc + MPI_F77=mpif77 + else + if test -f ${withval}/mpicxx; then + MPI_CXX=${withval}/mpicxx + elif test -f ${withval}/mpic++; then + MPI_CXX=${withval}/mpic++ + else + MPI_CXX=${withval}/mpiCC + fi + MPI_CC=${withval}/mpicc + MPI_F77=${withval}/mpif77 + fi + fi +] +) + +AC_ARG_WITH(mpi, +[AC_HELP_STRING([--with-mpi=MPIROOT],[use MPI root directory (enables MPI)])], +[ + HAVE_PKG_MPI=yes + MPI_DIR=${withval} + AC_MSG_CHECKING(MPI directory) + AC_MSG_RESULT([${MPI_DIR}]) +] +) + +#AC_ARG_WITH(mpi-include, +#[AC_HELP_STRING([--with-mpi-include],[Obsolete. Use --with-mpi-incdir=DIR instead. Do not prefix DIR with '-I'.])], +#[AC_MSG_ERROR([--with-mpi-include is an obsolte option. Use --with-mpi-incdir=DIR instead. Do not prefix DIR with '-I'. For example '--with-mpi-incdir=/usr/lam_path/include'.])] +#) + +AC_ARG_WITH(mpi-libs, +[AC_HELP_STRING([--with-mpi-libs="LIBS"],[MPI libraries @<:@"-lmpi"@:>@])], +[ + MPI_LIBS=${withval} + AC_MSG_CHECKING(user-defined MPI libraries) + AC_MSG_RESULT([${MPI_LIBS}]) +] +) + +AC_ARG_WITH(mpi-incdir, +[AC_HELP_STRING([--with-mpi-incdir=DIR],[MPI include directory @<:@MPIROOT/include@:>@ Do not use -I])], +[ + MPI_INC=${withval} + AC_MSG_CHECKING(user-defined MPI includes) + AC_MSG_RESULT([${MPI_INC}]) +] +) + +AC_ARG_WITH(mpi-libdir, +[AC_HELP_STRING([--with-mpi-libdir=DIR],[MPI library directory @<:@MPIROOT/lib@:>@ Do not use -L])], +[ + MPI_LIBDIR=${withval} + AC_MSG_CHECKING(user-defined MPI library directory) + AC_MSG_RESULT([${MPI_LIBDIR}]) +] +) + +AC_MSG_CHECKING(whether we are using MPI) +AC_MSG_RESULT([${HAVE_PKG_MPI}]) + +if test "X${HAVE_PKG_MPI}" = "Xyes"; then + AC_DEFINE(HAVE_MPI,,[define if we want to use MPI]) +fi + +dnl Define Automake version of HAVE_MPI if appropriate + +AM_CONDITIONAL(HAVE_MPI, [test "X${HAVE_PKG_MPI}" = "Xyes"]) + + +dnl +dnl -------------------------------------------------------------------- +dnl Check for MPI compilers (must be done *before* AC_PROG_CXX, +dnl AC_PROG_CC and AC_PROG_F77) +dnl +dnl -------------------------------------------------------------------- + +if test -n "${MPI_CXX}"; then + if test -f ${MPI_CXX}; then + MPI_CXX_EXISTS=yes + else + AC_CHECK_PROG(MPI_CXX_EXISTS, ${MPI_CXX}, yes, no) + fi + + if test "X${MPI_CXX_EXISTS}" = "Xyes"; then + CXX=${MPI_CXX} + else + echo "-----" + echo "Cannot find MPI C++ compiler ${MPI_CXX}." + echo "Specify a path to all mpi compilers with --with-mpi-compilers=PATH" + echo "or specify a C++ compiler using CXX=" + echo "Do not use --with-mpi-compilers if using CXX=" + echo "-----" + AC_MSG_ERROR([MPI C++ compiler (${MPI_CXX}) not found.]) + fi +fi + +if test -n "${MPI_CC}"; then + if test -f ${MPI_CC}; then + MPI_CC_EXISTS=yes + else + AC_CHECK_PROG(MPI_CC_EXISTS, ${MPI_CC}, yes, no) + fi + + if test "X${MPI_CC_EXISTS}" = "Xyes"; then + CC=${MPI_CC} + else + echo "-----" + echo "Cannot find MPI C compiler ${MPI_CC}." + echo "Specify a path to all mpi compilers with --with-mpi-compilers=PATH" + echo "or specify a C compiler using CC=" + echo "Do not use --with-mpi-compilers if using CC=" + echo "-----" + AC_MSG_ERROR([MPI C compiler (${MPI_CC}) not found.]) + fi +fi + +if test "X$ac_cv_use_fortran" = "Xyes"; then + +if test -n "${MPI_F77}"; then + if test -f ${MPI_F77}; then + MPI_F77_EXISTS=yes + else + AC_CHECK_PROG(MPI_F77_EXISTS, ${MPI_F77}, yes, no) + fi + + if test "X${MPI_F77_EXISTS}" = "Xyes"; then + F77=${MPI_F77} + else + echo "-----" + echo "Cannot find MPI Fortran compiler ${MPI_F77}." + echo "Specify a path to all mpi compilers with --with-mpi-compilers=PATH" + echo "or specify a Fortran 77 compiler using F77=" + echo "Do not use --with-mpi-compilers if using F77=" + echo "-----" + AC_MSG_ERROR([MPI Fortran 77 compiler (${MPI_F77}) not found.]) + fi +fi + +fi dnl ac_cv_use_fortran +]) diff --git a/mkl/basic/optional/ThreadPool/config/tac_arg_enable_export-makefiles.m4 b/mkl/basic/optional/ThreadPool/config/tac_arg_enable_export-makefiles.m4 new file mode 100644 index 0000000..b7a8b38 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/config/tac_arg_enable_export-makefiles.m4 @@ -0,0 +1,76 @@ +dnl Enables export makefile specific code +dnl +dnl The following AM_CONDITIONALS are set for makefiles to access: +dnl USING_EXPORT_MAKEFILES +dnl USING_PERL via TAC_ARG_WITH_PERL +dnl USING_GNUMAKE +dnl +dnl The following AC_DEFINES are set: +dnl HAVE_EXPORT_MAKEFILES +dnl +dnl the following variables are set: +dnl PERL_EXE for the perl executable via TAC_ARG_WITH_PERL +dnl +dnl This file was based on tac_arg_enable_feature.m4 by Mike Heroux +dnl @author Roger Pawlowski +dnl +AC_DEFUN([TAC_ARG_ENABLE_EXPORT_MAKEFILES], +[ +AC_ARG_ENABLE(export-makefiles, +AC_HELP_STRING([--enable-export-makefiles],[Creates export makefiles in the install (prefix) directory. This option requires perl to be set in your path or defined with --with-perl=. Note that the export makefiles are always created and used in the build directory, but will not be installable without this option to change the paths. (default is $1)]), +ac_cv_use_export_makefiles=$enableval, +ac_cv_use_export_makefiles=$1) + +AC_MSG_CHECKING(whether to build export makefiles) + +if test "X$ac_cv_use_export_makefiles" != "Xno"; then + + AC_MSG_RESULT(yes) + AC_DEFINE([HAVE_EXPORT_MAKEFILES],,[Define if you want to build export makefiles.]) + +else + + AC_MSG_RESULT(no) + +fi + +AM_CONDITIONAL(USING_EXPORT_MAKEFILES, test X${ac_cv_use_export_makefiles} = Xyes) + +# Check for perl to run scripts (Required dependency) +TAC_ARG_WITH_PERL + +if test "X$HAVE_PERL" != "Xyes" && + test "X$ac_cv_use_export_makefiles" != "Xno"; then + AC_MSG_RESULT(no) + AC_MSG_ERROR([Failed to find the perl executable. The flag --enable-export-makefiles requires perl to be either in your path or explicitly defined by the flag --with-perl=. If you do not require the export makefiles to be installed via 'make install', you can disable the export makefiles with --disable-export-makefiles.]) +fi + +# Check for using gnumake to clean up link lines via +# gnumake's "shell" command. Optional dependency. +AC_DEFUN([TAC_ARG_WITH_GNUMAKE], +[ +AC_ARG_WITH(gnumake, +AC_HELP_STRING([--with-gnumake],[Gnu's make has special functions we can use to eliminate redundant paths in the build and link lines. Enable this if you use gnu-make to build Trilinos. This requires that perl is in your path or that you have specified the perl executable with --with-perl=. Configure will check for the existence of the perl executable and quit with an error if it is not found. (default is no)]), +ac_cv_use_gnumake=$withval, ac_cv_use_gnumake=no) + +AC_MSG_CHECKING(whether gnumake specific code should be enabled) + +if test "X$ac_cv_use_gnumake" != "Xno"; then + AC_MSG_RESULT(yes) + AC_DEFINE([HAVE_GNUMAKE],,[Define if you are using gnumake - this will shorten your link lines.]) +else + AC_MSG_RESULT(no) +fi +AM_CONDITIONAL(USING_GNUMAKE, test "X$ac_cv_use_gnumake" = "Xyes") +]) + +TAC_ARG_WITH_GNUMAKE + +if test "X$HAVE_PERL" != "Xyes" && + test "X$ac_cv_use_gnumake" != "Xno"; then + AC_MSG_RESULT(no) + AC_MSG_ERROR([The flag --with-gnumake requires perl to be in your path. The perl executable can alternatively be explicitly defined by the flag --with-perl=.]) +fi + +]) + diff --git a/mkl/basic/optional/ThreadPool/config/tac_arg_enable_feature.m4 b/mkl/basic/optional/ThreadPool/config/tac_arg_enable_feature.m4 new file mode 100644 index 0000000..4e22753 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/config/tac_arg_enable_feature.m4 @@ -0,0 +1,40 @@ +dnl @synopsis TAC_ARG_ENABLE_FEATURE(FEATURE_NAME, FEATURE_DESCRIPTION, HAVE_NAME, DEFAULT_VAL) +dnl +dnl Test for --enable-${FEATURE_NAME} and set to DEFAULT_VAL value if feature not specified. +dnl Also calls AC_DEFINE to define HAVE_${HAVE_NAME} if value is not equal to "no" +dnl +dnl Use this macro to help defining whether or not optional +dnl features* should compiled. For example: +dnl +dnl TAC_ARG_ENABLE_FEATURE(epetra, [Configure and build epetra], EPETRA, yes) +dnl +dnl will test for --enable-epetra when configure is run. If it is defined +dnl and not set to "no" or not defined (default is "yes") then HAVE_EPETRA will +dnl be defined, if --enable-epetra is defined to be "no", HAVE_EPETRA will not +dnl be defined. +dnl +dnl *NOTE: epetra, aztecoo, komplex, ifpack, and other software found in +dnl subdirectories of Trilinos/packages are "packages" in their own right. +dnl However, these packages are also "features" of the larger package +dnl "Trilinos". Therefore, when configuring from the Trilinos directory, +dnl it is appropriate to refer to these software packages as "features". +dnl +dnl This file was based on tac_arg_with_package.m4 by Mike Heroux +dnl @author James Willenbring +dnl +AC_DEFUN([TAC_ARG_ENABLE_FEATURE], +[ +AC_ARG_ENABLE([$1], +AC_HELP_STRING([--enable-$1],[$2 (default is [$4])]), +ac_cv_use_$1=$enableval, ac_cv_use_$1=$4) + +AC_MSG_CHECKING(whether to use [$1]) + +if test "X$ac_cv_use_$1" != "Xno"; then + AC_MSG_RESULT(yes) + AC_DEFINE([HAVE_$3],,[Define if want to build $1]) +else + AC_MSG_RESULT(no) +fi +]) + diff --git a/mkl/basic/optional/ThreadPool/config/tac_arg_enable_feature_sub_check.m4 b/mkl/basic/optional/ThreadPool/config/tac_arg_enable_feature_sub_check.m4 new file mode 100755 index 0000000..b3876fd --- /dev/null +++ b/mkl/basic/optional/ThreadPool/config/tac_arg_enable_feature_sub_check.m4 @@ -0,0 +1,54 @@ +dnl @synopsis TAC_ARG_ENABLE_FEATURE_SUB_CHECK(FEATURE_NAME, SUB_FEATURE_NAME, FEATURE_DESCRIPTION, HAVE_NAME) +dnl +dnl This hack gets around the fact that TAC_ARG_ENABLE_FEATURE does not support underscores +dnl in its feature names. TAC_ARG_ENABLE_FEATURE_SUB_CHECK allows exactly one underscore. Not great, +dnl but arguably better than supporting no underscores. +dnl +dnl TAC_ARG_ENABLE_FEATURE(feature-sub, [Configure and build feature-sub], FEATURE_SUB, yes) +dnl fails because tac_arg_enable_feature tests for ac_cv_use_feature-sub which gets +dnl rejected because the `-' is not allowed in variables. (AC_ARG_ENABLE sets ac_cv_use_feature_sub +dnl to avoid this problem.) Use: +dnl +dnl TAC_ARG_ENABLE_FEATURE_SUB_CHECK(feature, sub, [Configure and build feature-sub], FEATURE_SUB) +dnl instead. +dnl +dnl This macro will test for --enable-${FEATURE_NAME}-${SUB_FEATURE_NAME} when configure is run. +dnl If it is defined and not set to "no" or not defined and --disable-${SUB_FEATURE_NAME} is not +dnl specified then HAVE_${HAVE_NAME} will be defined. +dnl +dnl *NOTE: This macro is designed for the use-case when there is an individual Trilinos package +dnl offering fine-grained control of a Trilinos option. This way, the individual package +dnl option is enabled, as long as the Trilinos option is not disabled. If the Trilinos option is +dnl disabled, then the user must enable each packages option individually. For instance: +dnl +dnl --disable-tests --enable-teuchos-tests +dnl +dnl *NOTE: epetra, aztecoo, komplex, ifpack, and other software found in +dnl subdirectories of Trilinos/packages are "packages" in their own right. +dnl However, these packages are also "features" of the larger package +dnl "Trilinos". Therefore, when configuring from the Trilinos directory, +dnl it is appropriate to refer to these software packages as "features". +dnl +dnl This file was based on tac_arg_enable_package.m4 by Jim Willenbring +dnl and tac_arg_enable_package_sub.m4 by Ken Stanley. +dnl +dnl @author Heidi Thornquist +dnl +AC_DEFUN([TAC_ARG_ENABLE_FEATURE_SUB_CHECK], +[ +AC_ARG_ENABLE([$2],, ac_cv_use_$2=$enableval, ac_cv_use_$2=yes) + +AC_ARG_ENABLE([$1-$2], +AC_HELP_STRING([--enable-$1-$2],[$3 (default is yes if --disable-$2 is not specified)]), +ac_cv_use_$1_$2=$enableval, ac_cv_use_$1_$2=${ac_cv_use_$2}) + +AC_MSG_CHECKING(whether to use [$1-$2]) + +if test "X$ac_cv_use_$1_$2" != "Xno"; then + AC_MSG_RESULT(yes) + AC_DEFINE([HAVE_$4],,[Define if want to build $1-$2]) +else + AC_MSG_RESULT(no) +fi +]) + diff --git a/mkl/basic/optional/ThreadPool/config/tac_arg_with_ar.m4 b/mkl/basic/optional/ThreadPool/config/tac_arg_with_ar.m4 new file mode 100644 index 0000000..9568f3e --- /dev/null +++ b/mkl/basic/optional/ThreadPool/config/tac_arg_with_ar.m4 @@ -0,0 +1,39 @@ +dnl @synopsis TAC_ARG_WITH_AR +dnl +dnl Test for --with-ar="ar_program ar_flags". +dnl Default is "ar cru" +dnl +dnl Generates an Automake conditional USE_ALTERNATE_AR that can be tested. +dnl Generates the user-specified archiver command in @ALTERNATE_AR@. +dnl +dnl @author Mike Heroux +dnl +AC_DEFUN([TAC_ARG_WITH_AR], +[ +AC_ARG_WITH(ar, +AC_HELP_STRING([--with-ar], [override archiver command (default is "ar cru")]), +[ +AC_MSG_CHECKING(user-defined archiver) +AC_MSG_RESULT([${withval}]) +USE_ALTERNATE_AR=yes +ALTERNATE_AR="${withval}" +] +) + +if test -n "${SPECIAL_AR}" && test "X${USE_ALTERNATE_AR}" != "Xyes"; +then + USE_ALTERNATE_AR=yes + ALTERNATE_AR="${SPECIAL_AR}" +fi + +AC_MSG_CHECKING(for special archiver command) +if test "X${USE_ALTERNATE_AR}" = "Xyes"; then + AC_MSG_RESULT([${ALTERNATE_AR}]) + AM_CONDITIONAL(USE_ALTERNATE_AR, true) +else + AC_MSG_RESULT([none]) + AM_CONDITIONAL(USE_ALTERNATE_AR, false) +fi +AC_SUBST(ALTERNATE_AR) +]) + diff --git a/mkl/basic/optional/ThreadPool/config/tac_arg_with_flags.m4 b/mkl/basic/optional/ThreadPool/config/tac_arg_with_flags.m4 new file mode 100644 index 0000000..256450a --- /dev/null +++ b/mkl/basic/optional/ThreadPool/config/tac_arg_with_flags.m4 @@ -0,0 +1,31 @@ +dnl @synopsis TAC_ARG_WITH_FLAGS(lcase_name, UCASE_NAME) +dnl +dnl Test for --with-lcase_name="compiler/loader flags". if defined, prepend +dnl flags to standard UCASE_NAME definition. +dnl +dnl Use this macro to facilitate additional special flags that should be +dnl passed on to the preprocessor/compilers/loader. +dnl +dnl Example use +dnl +dnl TAC_ARG_WITH_FLAGS(cxxflags, CXXFLAGS) +dnl +dnl tests for --with-cxxflags and pre-pends to CXXFLAGS +dnl +dnl +dnl @author Mike Heroux +dnl +AC_DEFUN([TAC_ARG_WITH_FLAGS], +[ +AC_MSG_CHECKING([whether additional [$2] flags should be added]) +AC_ARG_WITH($1, +AC_HELP_STRING([--with-$1], +[additional [$2] flags to be added: will prepend to [$2]]), +[ +$2="${withval} ${$2}" +AC_MSG_RESULT([$2 = ${$2}]) +], +AC_MSG_RESULT(no) +) +]) + diff --git a/mkl/basic/optional/ThreadPool/config/tac_arg_with_incdirs.m4 b/mkl/basic/optional/ThreadPool/config/tac_arg_with_incdirs.m4 new file mode 100644 index 0000000..f3092e5 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/config/tac_arg_with_incdirs.m4 @@ -0,0 +1,24 @@ +dnl @synopsis TAC_ARG_WITH_INCDIRS +dnl +dnl Test for --with-incdirs="-Iincdir1 -Iincdir2". if defined, prepend +dnl "-Iincdir1 -Iincdir2" to CPPFLAGS +dnl +dnl Use this macro to facilitate addition of directories to include file search path. +dnl +dnl +dnl @author Mike Heroux +dnl +AC_DEFUN([TAC_ARG_WITH_INCDIRS], +[ +AC_MSG_CHECKING([whether additional include search paths defined]) +AC_ARG_WITH(incdirs, +AC_HELP_STRING([--with-incdirs], +[additional directories containing include files: will prepend to search here for includes, use -Idir format]), +[ +CPPFLAGS="${withval} ${CPPFLAGS}" +AC_MSG_RESULT([${withval}]) +], +AC_MSG_RESULT(no) +) +]) + diff --git a/mkl/basic/optional/ThreadPool/config/tac_arg_with_libdirs.m4 b/mkl/basic/optional/ThreadPool/config/tac_arg_with_libdirs.m4 new file mode 100644 index 0000000..b2f9438 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/config/tac_arg_with_libdirs.m4 @@ -0,0 +1,24 @@ +dnl @synopsis TAC_ARG_WITH_LIBDIRS +dnl +dnl Test for --with-libdirs="-Llibdir1 -Llibdir2". if defined, +dnl prepend "-Llibdir1 -Llibdir2" to LDFLAGS +dnl +dnl Use this macro to facilitate addition of directories to library search path. +dnl +dnl +dnl @author Mike Heroux +dnl +AC_DEFUN([TAC_ARG_WITH_LIBDIRS], +[ +AC_MSG_CHECKING([whether additional library search paths defined]) +AC_ARG_WITH(libdirs, +AC_HELP_STRING([--with-libdirs], +[OBSOLETE use --with-ldflags instead. (ex. --with-ldflags="-L -L")]), +[ +LDFLAGS="${withval} ${LDFLAGS}" +AC_MSG_RESULT([${withval}]) +], +AC_MSG_RESULT(no) +) +]) + diff --git a/mkl/basic/optional/ThreadPool/config/tac_arg_with_libs.m4 b/mkl/basic/optional/ThreadPool/config/tac_arg_with_libs.m4 new file mode 100644 index 0000000..3a64880 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/config/tac_arg_with_libs.m4 @@ -0,0 +1,30 @@ +dnl @synopsis TAC_ARG_WITH_LIBS +dnl +dnl Test for --with-libs="name(s)". +dnl +dnl Prepends the specified name(s) to the list of libraries to link +dnl with. +dnl +dnl Example use +dnl +dnl TAC_ARG_WITH_LIBS +dnl +dnl tests for --with-libs and pre-pends to LIBS +dnl +dnl @author Jim Willenbring +dnl +AC_DEFUN([TAC_ARG_WITH_LIBS], +[ +AC_MSG_CHECKING([whether additional libraries are needed]) +AC_ARG_WITH(libs, +AC_HELP_STRING([--with-libs], +[List additional libraries here. For example, --with-libs=-lsuperlu +or --with-libs=/path/libsuperlu.a]), +[ +LIBS="${withval} ${LIBS}" +AC_MSG_RESULT([LIBS = ${LIBS}]) +], +AC_MSG_RESULT(no) +) +] +) diff --git a/mkl/basic/optional/ThreadPool/config/tac_arg_with_perl.m4 b/mkl/basic/optional/ThreadPool/config/tac_arg_with_perl.m4 new file mode 100644 index 0000000..63e74ba --- /dev/null +++ b/mkl/basic/optional/ThreadPool/config/tac_arg_with_perl.m4 @@ -0,0 +1,34 @@ +dnl @synopsis TAC_ARG_WITH_PERL(DEFAULT_VAL) +dnl +dnl Test for --enable-gnumake and set to DEFAULT_VAL value if feature not specified. +dnl Calls AC_DEFINE to define HAVE_GNUMAKE if value is not equal to "no" +dnl Calls AM_CONDITIONAL to define USING_GNUMAKE to true/false. +dnl +dnl This file was based on tac_arg_with_ar.m4 by Mike Heroux +dnl @author Roger Pawlowski +dnl +AC_DEFUN([TAC_ARG_WITH_PERL], +[ + +AC_ARG_WITH(perl, +AC_HELP_STRING([--with-perl], [supply a perl executable. For example --with-perl=/usr/bin/perl.]), +[ +AC_MSG_CHECKING(for user supplied perl executable) +AC_MSG_RESULT([${withval}]) +USER_SPECIFIED_PERL=yes +PERL_EXE="${withval}" +], +[ +USER_SPECIFIED_PERL=no +]) + +if test "X${USER_SPECIFIED_PERL}" = "Xyes"; then + AC_CHECK_FILE(${PERL_EXE}, [HAVE_PERL=yes], [HAVE_PERL=no]) + AC_SUBST(PERL_EXE, ${PERL_EXE}) +else + AC_CHECK_PROG(HAVE_PERL, perl, yes, no) + AC_SUBST(PERL_EXE, perl) +fi +AM_CONDITIONAL(USING_PERL, test X${HAVE_PERL} = Xyes) +]) + diff --git a/mkl/basic/optional/ThreadPool/config/token-replace.pl b/mkl/basic/optional/ThreadPool/config/token-replace.pl new file mode 100755 index 0000000..c3b413e --- /dev/null +++ b/mkl/basic/optional/ThreadPool/config/token-replace.pl @@ -0,0 +1,43 @@ +#!/usr/bin/perl -w +# +# This perl script replaces a string with another string +# on a token basis. Here it is allowed for file_in and +# file_out to be the same file. +# +use strict; +# +my $g_use_msg = + "Use: token-replace.pl find_token replacement_token file_in file_out\n"; +if( scalar(@ARGV) < 4 ) { + print STDERR $g_use_msg; + exit(-1); +} +# +my $find_token = shift; +my $replacement_token = shift; +my $file_in_name = shift; +my $file_out_name = shift; +# +#print "file_in_name = $file_in_name\n"; +if($file_in_name=~/CVS/) { +# print "Do not replace in CVS\n"; + exit; +} +open FILE_IN, "<$file_in_name" || die "The file $file_in_name could not be opended for input\n"; +my @file_in_array = ; +close FILE_IN; +# +my $match_str = '([^\w\d_]|^)' . $find_token . '([^\w\d_]|$)'; +#print $match_str . "\n"; +# +my @file_out_array; +my $did_replacement = 0; +foreach(@file_in_array) { + $did_replacement = 1 if $_=~s/$match_str/$1$replacement_token$2/g; + push @file_out_array, $_; +} +if($did_replacement || $file_out_name ne $file_in_name) { + open FILE_OUT, ">$file_out_name" || die "The file $file_out_name could not be opended for output\n"; + print FILE_OUT @file_out_array; + close FILE_OUT; +} diff --git a/mkl/basic/optional/ThreadPool/configure b/mkl/basic/optional/ThreadPool/configure new file mode 100755 index 0000000..6312db9 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/configure @@ -0,0 +1,7804 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.61 for ThreadPool 1.1d. +# +# Report bugs to . +# +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, +# 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## --------------------- ## +## M4sh Initialization. ## +## --------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in + *posix*) set -o posix ;; +esac + +fi + + + + +# PATH needs CR +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + echo "#! /bin/sh" >conf$$.sh + echo "exit 0" >>conf$$.sh + chmod +x conf$$.sh + if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then + PATH_SEPARATOR=';' + else + PATH_SEPARATOR=: + fi + rm -f conf$$.sh +fi + +# Support unset when possible. +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + as_unset=unset +else + as_unset=false +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +as_nl=' +' +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +case $0 in + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break +done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + { (exit 1); exit 1; } +fi + +# Work around bugs in pre-3.0 UWIN ksh. +for as_var in ENV MAIL MAILPATH +do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +for as_var in \ + LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \ + LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \ + LC_TELEPHONE LC_TIME +do + if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then + eval $as_var=C; export $as_var + else + ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var + fi +done + +# Required to use basename. +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + + +# Name of the executable. +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# CDPATH. +$as_unset CDPATH + + +if test "x$CONFIG_SHELL" = x; then + if (eval ":") 2>/dev/null; then + as_have_required=yes +else + as_have_required=no +fi + + if test $as_have_required = yes && (eval ": +(as_func_return () { + (exit \$1) +} +as_func_success () { + as_func_return 0 +} +as_func_failure () { + as_func_return 1 +} +as_func_ret_success () { + return 0 +} +as_func_ret_failure () { + return 1 +} + +exitcode=0 +if as_func_success; then + : +else + exitcode=1 + echo as_func_success failed. +fi + +if as_func_failure; then + exitcode=1 + echo as_func_failure succeeded. +fi + +if as_func_ret_success; then + : +else + exitcode=1 + echo as_func_ret_success failed. +fi + +if as_func_ret_failure; then + exitcode=1 + echo as_func_ret_failure succeeded. +fi + +if ( set x; as_func_ret_success y && test x = \"\$1\" ); then + : +else + exitcode=1 + echo positional parameters were not saved. +fi + +test \$exitcode = 0) || { (exit 1); exit 1; } + +( + as_lineno_1=\$LINENO + as_lineno_2=\$LINENO + test \"x\$as_lineno_1\" != \"x\$as_lineno_2\" && + test \"x\`expr \$as_lineno_1 + 1\`\" = \"x\$as_lineno_2\") || { (exit 1); exit 1; } +") 2> /dev/null; then + : +else + as_candidate_shells= + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + case $as_dir in + /*) + for as_base in sh bash ksh sh5; do + as_candidate_shells="$as_candidate_shells $as_dir/$as_base" + done;; + esac +done +IFS=$as_save_IFS + + + for as_shell in $as_candidate_shells $SHELL; do + # Try only shells that exist, to save several forks. + if { test -f "$as_shell" || test -f "$as_shell.exe"; } && + { ("$as_shell") 2> /dev/null <<\_ASEOF +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in + *posix*) set -o posix ;; +esac + +fi + + +: +_ASEOF +}; then + CONFIG_SHELL=$as_shell + as_have_required=yes + if { "$as_shell" 2> /dev/null <<\_ASEOF +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in + *posix*) set -o posix ;; +esac + +fi + + +: +(as_func_return () { + (exit $1) +} +as_func_success () { + as_func_return 0 +} +as_func_failure () { + as_func_return 1 +} +as_func_ret_success () { + return 0 +} +as_func_ret_failure () { + return 1 +} + +exitcode=0 +if as_func_success; then + : +else + exitcode=1 + echo as_func_success failed. +fi + +if as_func_failure; then + exitcode=1 + echo as_func_failure succeeded. +fi + +if as_func_ret_success; then + : +else + exitcode=1 + echo as_func_ret_success failed. +fi + +if as_func_ret_failure; then + exitcode=1 + echo as_func_ret_failure succeeded. +fi + +if ( set x; as_func_ret_success y && test x = "$1" ); then + : +else + exitcode=1 + echo positional parameters were not saved. +fi + +test $exitcode = 0) || { (exit 1); exit 1; } + +( + as_lineno_1=$LINENO + as_lineno_2=$LINENO + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2") || { (exit 1); exit 1; } + +_ASEOF +}; then + break +fi + +fi + + done + + if test "x$CONFIG_SHELL" != x; then + for as_var in BASH_ENV ENV + do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var + done + export CONFIG_SHELL + exec "$CONFIG_SHELL" "$as_myself" ${1+"$@"} +fi + + + if test $as_have_required = no; then + echo This script requires a shell more modern than all the + echo shells that I found on your system. Please install a + echo modern shell, or manually run the script under such a + echo shell if you do have one. + { (exit 1); exit 1; } +fi + + +fi + +fi + + + +(eval "as_func_return () { + (exit \$1) +} +as_func_success () { + as_func_return 0 +} +as_func_failure () { + as_func_return 1 +} +as_func_ret_success () { + return 0 +} +as_func_ret_failure () { + return 1 +} + +exitcode=0 +if as_func_success; then + : +else + exitcode=1 + echo as_func_success failed. +fi + +if as_func_failure; then + exitcode=1 + echo as_func_failure succeeded. +fi + +if as_func_ret_success; then + : +else + exitcode=1 + echo as_func_ret_success failed. +fi + +if as_func_ret_failure; then + exitcode=1 + echo as_func_ret_failure succeeded. +fi + +if ( set x; as_func_ret_success y && test x = \"\$1\" ); then + : +else + exitcode=1 + echo positional parameters were not saved. +fi + +test \$exitcode = 0") || { + echo No shell found that supports shell functions. + echo Please tell autoconf@gnu.org about your system, + echo including any error possibly output before this + echo message +} + + + + as_lineno_1=$LINENO + as_lineno_2=$LINENO + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || { + + # Create $as_me.lineno as a copy of $as_myself, but with $LINENO + # uniformly replaced by the line number. The first 'sed' inserts a + # line-number line after each line using $LINENO; the second 'sed' + # does the real work. The second script uses 'N' to pair each + # line-number line with the line containing $LINENO, and appends + # trailing '-' during substitution so that $LINENO is not a special + # case at line end. + # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the + # scripts with optimization help from Paolo Bonzini. Blame Lee + # E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 + { (exit 1); exit 1; }; } + + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in +-n*) + case `echo 'x\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + *) ECHO_C='\c';; + esac;; +*) + ECHO_N='-n';; +esac + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir +fi +echo >conf$$.file +if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -p'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -p' +elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p=: +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +if test -x / >/dev/null 2>&1; then + as_test_x='test -x' +else + if ls -dL / >/dev/null 2>&1; then + as_ls_L_option=L + else + as_ls_L_option= + fi + as_test_x=' + eval sh -c '\'' + if test -d "$1"; then + test -d "$1/."; + else + case $1 in + -*)set "./$1";; + esac; + case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in + ???[sx]*):;;*)false;;esac;fi + '\'' sh + ' +fi +as_executable_p=$as_test_x + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + + +exec 7<&0 &1 + +# Name of the host. +# hostname on some systems (SVR3.2, Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_clean_files= +ac_config_libobj_dir=. +LIBOBJS= +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= +SHELL=${CONFIG_SHELL-/bin/sh} + +# Identity of this package. +PACKAGE_NAME='ThreadPool' +PACKAGE_TARNAME='threadpool' +PACKAGE_VERSION='1.1d' +PACKAGE_STRING='ThreadPool 1.1d' +PACKAGE_BUGREPORT='hcedwar@sandia.gov' + +ac_unique_file="src/TPI.c" +# Factoring default headers for most tests. +ac_includes_default="\ +#include +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#ifdef HAVE_SYS_STAT_H +# include +#endif +#ifdef STDC_HEADERS +# include +# include +#else +# ifdef HAVE_STDLIB_H +# include +# endif +#endif +#ifdef HAVE_STRING_H +# if !defined STDC_HEADERS && defined HAVE_MEMORY_H +# include +# endif +# include +#endif +#ifdef HAVE_STRINGS_H +# include +#endif +#ifdef HAVE_INTTYPES_H +# include +#endif +#ifdef HAVE_STDINT_H +# include +#endif +#ifdef HAVE_UNISTD_H +# include +#endif" + +ac_subst_vars='SHELL +PATH_SEPARATOR +PACKAGE_NAME +PACKAGE_TARNAME +PACKAGE_VERSION +PACKAGE_STRING +PACKAGE_BUGREPORT +exec_prefix +prefix +program_transform_name +bindir +sbindir +libexecdir +datarootdir +datadir +sysconfdir +sharedstatedir +localstatedir +includedir +oldincludedir +docdir +infodir +htmldir +dvidir +pdfdir +psdir +libdir +localedir +mandir +DEFS +ECHO_C +ECHO_N +ECHO_T +LIBS +build_alias +host_alias +target_alias +MAINTAINER_MODE_TRUE +MAINTAINER_MODE_FALSE +MAINT +build +build_cpu +build_vendor +build_os +host +host_cpu +host_vendor +host_os +target +target_cpu +target_vendor +target_os +INSTALL_PROGRAM +INSTALL_SCRIPT +INSTALL_DATA +am__isrc +CYGPATH_W +PACKAGE +VERSION +ACLOCAL +AUTOCONF +AUTOMAKE +AUTOHEADER +MAKEINFO +install_sh +STRIP +INSTALL_STRIP_PROGRAM +mkdir_p +AWK +SET_MAKE +am__leading_dot +AMTAR +am__tar +am__untar +MPI_TEMP_CXX +MPI_CXX +HAVE_MPI_TRUE +HAVE_MPI_FALSE +MPI_CXX_EXISTS +MPI_CC_EXISTS +MPI_F77_EXISTS +CC +CFLAGS +LDFLAGS +CPPFLAGS +ac_ct_CC +EXEEXT +OBJEXT +DEPDIR +am__include +am__quote +AMDEP_TRUE +AMDEP_FALSE +AMDEPBACKSLASH +CCDEPMODE +am__fastdepCC_TRUE +am__fastdepCC_FALSE +CXX +CXXFLAGS +ac_ct_CXX +CXXDEPMODE +am__fastdepCXX_TRUE +am__fastdepCXX_FALSE +RANLIB +USE_ALTERNATE_AR_TRUE +USE_ALTERNATE_AR_FALSE +ALTERNATE_AR +CXXCPP +USING_EXPORT_MAKEFILES_TRUE +USING_EXPORT_MAKEFILES_FALSE +PERL_EXE +HAVE_PERL +USING_PERL_TRUE +USING_PERL_FALSE +USING_GNUMAKE_TRUE +USING_GNUMAKE_FALSE +BUILD_TESTS_TRUE +BUILD_TESTS_FALSE +SUB_TEST_TRUE +SUB_TEST_FALSE +GREP +EGREP +PTHREAD_CC +PTHREAD_LIBS +PTHREAD_CFLAGS +ac_aux_dir +LIBOBJS +LTLIBOBJS' +ac_subst_files='' + ac_precious_vars='build_alias +host_alias +target_alias +CC +CFLAGS +LDFLAGS +LIBS +CPPFLAGS +CXX +CXXFLAGS +CCC +CXXCPP' + + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +# (The list follows the same order as the GNU Coding Standards.) +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datarootdir='${prefix}/share' +datadir='${datarootdir}' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +includedir='${prefix}/include' +oldincludedir='/usr/include' +docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' +infodir='${datarootdir}/info' +htmldir='${docdir}' +dvidir='${docdir}' +pdfdir='${docdir}' +psdir='${docdir}' +libdir='${exec_prefix}/lib' +localedir='${datarootdir}/locale' +mandir='${datarootdir}/man' + +ac_prev= +ac_dashdash= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval $ac_prev=\$ac_option + ac_prev= + continue + fi + + case $ac_option in + *=*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; + *) ac_optarg=yes ;; + esac + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_dashdash$ac_option in + --) + ac_dashdash=yes ;; + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=*) + datadir=$ac_optarg ;; + + -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ + | --dataroo | --dataro | --datar) + ac_prev=datarootdir ;; + -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ + | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) + datarootdir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_feature" : ".*[^-._$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid feature name: $ac_feature" >&2 + { (exit 1); exit 1; }; } + ac_feature=`echo $ac_feature | sed 's/[-.]/_/g'` + eval enable_$ac_feature=no ;; + + -docdir | --docdir | --docdi | --doc | --do) + ac_prev=docdir ;; + -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) + docdir=$ac_optarg ;; + + -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) + ac_prev=dvidir ;; + -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) + dvidir=$ac_optarg ;; + + -enable-* | --enable-*) + ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_feature" : ".*[^-._$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid feature name: $ac_feature" >&2 + { (exit 1); exit 1; }; } + ac_feature=`echo $ac_feature | sed 's/[-.]/_/g'` + eval enable_$ac_feature=\$ac_optarg ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) + ac_prev=htmldir ;; + -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ + | --ht=*) + htmldir=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localedir | --localedir | --localedi | --localed | --locale) + ac_prev=localedir ;; + -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) + localedir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst | --locals) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) + ac_prev=pdfdir ;; + -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) + pdfdir=$ac_optarg ;; + + -psdir | --psdir | --psdi | --psd | --ps) + ac_prev=psdir ;; + -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) + psdir=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_package" : ".*[^-._$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid package name: $ac_package" >&2 + { (exit 1); exit 1; }; } + ac_package=`echo $ac_package | sed 's/[-.]/_/g'` + eval with_$ac_package=\$ac_optarg ;; + + -without-* | --without-*) + ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_package" : ".*[^-._$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid package name: $ac_package" >&2 + { (exit 1); exit 1; }; } + ac_package=`echo $ac_package | sed 's/[-.]/_/g'` + eval with_$ac_package=no ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) { echo "$as_me: error: unrecognized option: $ac_option +Try \`$0 --help' for more information." >&2 + { (exit 1); exit 1; }; } + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid variable name: $ac_envvar" >&2 + { (exit 1); exit 1; }; } + eval $ac_envvar=\$ac_optarg + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option} + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + { echo "$as_me: error: missing argument to $ac_option" >&2 + { (exit 1); exit 1; }; } +fi + +# Be sure to have absolute directory names. +for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ + datadir sysconfdir sharedstatedir localstatedir includedir \ + oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ + libdir localedir mandir +do + eval ac_val=\$$ac_var + case $ac_val in + [\\/$]* | ?:[\\/]* ) continue;; + NONE | '' ) case $ac_var in *prefix ) continue;; esac;; + esac + { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 + { (exit 1); exit 1; }; } +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host. + If a cross compiler is detected then cross compile mode will be used." >&2 + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +ac_pwd=`pwd` && test -n "$ac_pwd" && +ac_ls_di=`ls -di .` && +ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || + { echo "$as_me: error: Working directory cannot be determined" >&2 + { (exit 1); exit 1; }; } +test "X$ac_ls_di" = "X$ac_pwd_ls_di" || + { echo "$as_me: error: pwd does not report name of working directory" >&2 + { (exit 1); exit 1; }; } + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then the parent directory. + ac_confdir=`$as_dirname -- "$0" || +$as_expr X"$0" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$0" : 'X\(//\)[^/]' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +echo X"$0" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r "$srcdir/$ac_unique_file"; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r "$srcdir/$ac_unique_file"; then + test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." + { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2 + { (exit 1); exit 1; }; } +fi +ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" +ac_abs_confdir=`( + cd "$srcdir" && test -r "./$ac_unique_file" || { echo "$as_me: error: $ac_msg" >&2 + { (exit 1); exit 1; }; } + pwd)` +# When building in place, set srcdir=. +if test "$ac_abs_confdir" = "$ac_pwd"; then + srcdir=. +fi +# Remove unnecessary trailing slashes from srcdir. +# Double slashes in file names in object file debugging info +# mess up M-x gdb in Emacs. +case $srcdir in +*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; +esac +for ac_var in $ac_precious_vars; do + eval ac_env_${ac_var}_set=\${${ac_var}+set} + eval ac_env_${ac_var}_value=\$${ac_var} + eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} + eval ac_cv_env_${ac_var}_value=\$${ac_var} +done + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures ThreadPool 1.1d to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] + --datadir=DIR read-only architecture-independent data [DATAROOTDIR] + --infodir=DIR info documentation [DATAROOTDIR/info] + --localedir=DIR locale-dependent data [DATAROOTDIR/locale] + --mandir=DIR man documentation [DATAROOTDIR/man] + --docdir=DIR documentation root [DATAROOTDIR/doc/threadpool] + --htmldir=DIR html documentation [DOCDIR] + --dvidir=DIR dvi documentation [DOCDIR] + --pdfdir=DIR pdf documentation [DOCDIR] + --psdir=DIR ps documentation [DOCDIR] +_ACEOF + + cat <<\_ACEOF + +Program names: + --program-prefix=PREFIX prepend PREFIX to installed program names + --program-suffix=SUFFIX append SUFFIX to installed program names + --program-transform-name=PROGRAM run sed PROGRAM on installed program names + +System types: + --build=BUILD configure for building on BUILD [guessed] + --host=HOST cross-compile to build programs to run on HOST [BUILD] + --target=TARGET configure for building compilers for TARGET [HOST] +_ACEOF +fi + +if test -n "$ac_init_help"; then + case $ac_init_help in + short | recursive ) echo "Configuration of ThreadPool 1.1d:";; + esac + cat <<\_ACEOF + +Optional Features: + --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) + --enable-FEATURE[=ARG] include FEATURE [ARG=yes] + --enable-maintainer-mode enable make rules and dependencies not useful + (and sometimes confusing) to the casual installer + --enable-mpi MPI support + --disable-dependency-tracking speeds up one-time build + --enable-dependency-tracking do not reject slow dependency extractors + --enable-export-makefiles + Creates export makefiles in the install (prefix) + directory. This option requires perl to be set in + your path or defined with --with-perl=. Note that the export makefiles are + always created and used in the build directory, but + will not be installable without this option to + change the paths. (default is yes) + --enable-tests Make tests for all Trilinos packages buildable with + 'make tests' (default is yes) + + --enable-threadpool-tests + Make ThreadPool tests buildable with 'make tests' + (default is yes if --disable-tests is not specified) + --enable-libcheck Check for some third-party libraries. (Cannot be + disabled unless tests and examples are also + disabled.) (default is yes) + +Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-install=INSTALL_PROGRAM + Use the installation program INSTALL_PROGRAM rather + the default that is provided. For example + --with-install="/path/install -p" + --with-mpi-compilers=PATH + use MPI compilers mpicc, mpif77, and mpicxx, mpic++ + or mpiCC in the specified path or in the default + path if no path is specified. Enables MPI + --with-mpi=MPIROOT use MPI root directory (enables MPI) + --with-mpi-libs="LIBS" MPI libraries ["-lmpi"] + --with-mpi-incdir=DIR MPI include directory [MPIROOT/include] Do not use + -I + --with-mpi-libdir=DIR MPI library directory [MPIROOT/lib] Do not use -L + --with-ccflags additional CCFLAGS flags to be added: will prepend + to CCFLAGS + --with-cxxflags additional CXXFLAGS flags to be added: will + prepend to CXXFLAGS + --with-cflags additional CFLAGS flags to be added: will prepend + to CFLAGS + --with-libs List additional libraries here. For example, + --with-libs=-lsuperlu or + --with-libs=/path/libsuperlu.a + --with-ldflags additional LDFLAGS flags to be added: will prepend + to LDFLAGS + --with-ar override archiver command (default is "ar cru") + --with-perl supply a perl executable. For example + --with-perl=/usr/bin/perl. + --with-gnumake Gnu's make has special functions we can use to + eliminate redundant paths in the build and link + lines. Enable this if you use gnu-make to build + Trilinos. This requires that perl is in your path or + that you have specified the perl executable with + --with-perl=. Configure will check + for the existence of the perl executable and quit + with an error if it is not found. (default is no) + --with-libdirs OBSOLETE use --with-ldflags instead. (ex. + --with-ldflags="-L -L") + --with-incdirs additional directories containing include files: + will prepend to search here for includes, use -Idir + format + +Some influential environment variables: + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + LIBS libraries to pass to the linker, e.g. -l + CPPFLAGS C/C++/Objective C preprocessor flags, e.g. -I if + you have headers in a nonstandard directory + CXX C++ compiler command + CXXFLAGS C++ compiler flags + CXXCPP C++ preprocessor + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +Report bugs to . +_ACEOF +ac_status=$? +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d "$ac_dir" || continue + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,/..,g;s,/,,'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + cd "$ac_dir" || { ac_status=$?; continue; } + # Check for guested configure. + if test -f "$ac_srcdir/configure.gnu"; then + echo && + $SHELL "$ac_srcdir/configure.gnu" --help=recursive + elif test -f "$ac_srcdir/configure"; then + echo && + $SHELL "$ac_srcdir/configure" --help=recursive + else + echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi || ac_status=$? + cd "$ac_pwd" || { ac_status=$?; break; } + done +fi + +test -n "$ac_init_help" && exit $ac_status +if $ac_init_version; then + cat <<\_ACEOF +ThreadPool configure 1.1d +generated by GNU Autoconf 2.61 + +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, +2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit +fi +cat >config.log <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by ThreadPool $as_me 1.1d, which was +generated by GNU Autoconf 2.61. Invocation command line was + + $ $0 $@ + +_ACEOF +exec 5>>config.log +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + echo "PATH: $as_dir" +done +IFS=$as_save_IFS + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *\'*) + ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;; + 2) + ac_configure_args1="$ac_configure_args1 '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + ac_configure_args="$ac_configure_args '$ac_arg'" + ;; + esac + done +done +$as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; } +$as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; } + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Use '\'' to represent an apostrophe within the trap. +# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + + cat <<\_ASBOX +## ---------------- ## +## Cache variables. ## +## ---------------- ## +_ASBOX + echo + # The following way of writing the cache mishandles newlines in values, +( + for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { echo "$as_me:$LINENO: WARNING: Cache variable $ac_var contains a newline." >&5 +echo "$as_me: WARNING: Cache variable $ac_var contains a newline." >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + *) $as_unset $ac_var ;; + esac ;; + esac + done + (set) 2>&1 | + case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + sed -n \ + "s/'\''/'\''\\\\'\'''\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" + ;; #( + *) + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) + echo + + cat <<\_ASBOX +## ----------------- ## +## Output variables. ## +## ----------------- ## +_ASBOX + echo + for ac_var in $ac_subst_vars + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + echo "$ac_var='\''$ac_val'\''" + done | sort + echo + + if test -n "$ac_subst_files"; then + cat <<\_ASBOX +## ------------------- ## +## File substitutions. ## +## ------------------- ## +_ASBOX + echo + for ac_var in $ac_subst_files + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + echo "$ac_var='\''$ac_val'\''" + done | sort + echo + fi + + if test -s confdefs.h; then + cat <<\_ASBOX +## ----------- ## +## confdefs.h. ## +## ----------- ## +_ASBOX + echo + cat confdefs.h + echo + fi + test "$ac_signal" != 0 && + echo "$as_me: caught signal $ac_signal" + echo "$as_me: exit $exit_status" + } >&5 + rm -f core *.core core.conftest.* && + rm -f -r conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status +' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -f -r conftest* confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer explicitly selected file to automatically selected ones. +if test -n "$CONFIG_SITE"; then + set x "$CONFIG_SITE" +elif test "x$prefix" != xNONE; then + set x "$prefix/share/config.site" "$prefix/etc/config.site" +else + set x "$ac_default_prefix/share/config.site" \ + "$ac_default_prefix/etc/config.site" +fi +shift +for ac_site_file +do + if test -r "$ac_site_file"; then + { echo "$as_me:$LINENO: loading site script $ac_site_file" >&5 +echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special + # files actually), so we avoid doing that. + if test -f "$cache_file"; then + { echo "$as_me:$LINENO: loading cache $cache_file" >&5 +echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . "$cache_file";; + *) . "./$cache_file";; + esac + fi +else + { echo "$as_me:$LINENO: creating cache $cache_file" >&5 +echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in $ac_precious_vars; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val=\$ac_cv_env_${ac_var}_value + eval ac_new_val=\$ac_env_${ac_var}_value + case $ac_old_set,$ac_new_set in + set,) + { echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5 +echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + { echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5 +echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + { echo "$as_me:$LINENO: former value: $ac_old_val" >&5 +echo "$as_me: former value: $ac_old_val" >&2;} + { echo "$as_me:$LINENO: current value: $ac_new_val" >&5 +echo "$as_me: current value: $ac_new_val" >&2;} + ac_cache_corrupted=: + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *\'*) ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) ac_configure_args="$ac_configure_args '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5 +echo "$as_me: error: changes in the environment can compromise the build" >&2;} + { { echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5 +echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;} + { (exit 1); exit 1; }; } +fi + + + + + + + + + + + + + + + + + + + + + + + + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + +# Hello World! +echo "----------------------------------------" +echo "Running ThreadPool Configure Script" +echo "----------------------------------------" + +# This is to protect against accidentally specifying the wrong +# directory with --srcdir. Any file in that directory will do, +# preferably one that is unlikely to be removed or renamed. + + + +# Specify directory for auxillary build tools (e.g., install-sh, +# config.sub, config.guess) and M4 files. + +ac_aux_dir= +for ac_dir in config "$srcdir"/config; do + if test -f "$ac_dir/install-sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install-sh -c" + break + elif test -f "$ac_dir/install.sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install.sh -c" + break + elif test -f "$ac_dir/shtool"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/shtool install -c" + break + fi +done +if test -z "$ac_aux_dir"; then + { { echo "$as_me:$LINENO: error: cannot find install-sh or install.sh in config \"$srcdir\"/config" >&5 +echo "$as_me: error: cannot find install-sh or install.sh in config \"$srcdir\"/config" >&2;} + { (exit 1); exit 1; }; } +fi + +# These three variables are undocumented and unsupported, +# and are intended to be withdrawn in a future Autoconf release. +# They can cause serious problems if a builder's source tree is in a directory +# whose full name contains unusual characters. +ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. +ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. +ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. + + +# #auto np# - Change file names in next line +# Configure should create src/ThreadPool_config.h from src/ThreadPool_config.h.in + +ac_config_headers="$ac_config_headers src/ThreadPool_config.h:src/ThreadPool_config.h.in" + + +# Allow users to specify their own "install" command. If none is specified, +# the default is install-sh found in the config subdirectory. + + +# Check whether --with-install was given. +if test "${with_install+set}" = set; then + withval=$with_install; + INSTALL=$withval + INSTALL_PROGRAM=$withval + INSTALL_SCRIPT=$withval + INSTALL_DATA="$withval -m 644" + +fi + + +# AM_MAINTAINER_MODE turns off maintainer-only makefile targets by +# default, and changes configure to understand a +# --enable-maintainer-mode option. --enable-maintainer-mode turns the +# maintainer-only targets back on. The maintainer-only makefile +# targets permit end users to clean automatically-generated files such +# as configure, which means they have to have autoconf and automake +# installed to repair the damage. AM_MAINTAINER_MODE makes it a bit +# harder for users to shoot themselves in the foot. + +{ echo "$as_me:$LINENO: checking whether to enable maintainer-specific portions of Makefiles" >&5 +echo $ECHO_N "checking whether to enable maintainer-specific portions of Makefiles... $ECHO_C" >&6; } + # Check whether --enable-maintainer-mode was given. +if test "${enable_maintainer_mode+set}" = set; then + enableval=$enable_maintainer_mode; USE_MAINTAINER_MODE=$enableval +else + USE_MAINTAINER_MODE=no +fi + + { echo "$as_me:$LINENO: result: $USE_MAINTAINER_MODE" >&5 +echo "${ECHO_T}$USE_MAINTAINER_MODE" >&6; } + if test $USE_MAINTAINER_MODE = yes; then + MAINTAINER_MODE_TRUE= + MAINTAINER_MODE_FALSE='#' +else + MAINTAINER_MODE_TRUE='#' + MAINTAINER_MODE_FALSE= +fi + + MAINT=$MAINTAINER_MODE_TRUE + + + +# Define $build, $host, $target, etc + +# Make sure we can run config.sub. +$SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 || + { { echo "$as_me:$LINENO: error: cannot run $SHELL $ac_aux_dir/config.sub" >&5 +echo "$as_me: error: cannot run $SHELL $ac_aux_dir/config.sub" >&2;} + { (exit 1); exit 1; }; } + +{ echo "$as_me:$LINENO: checking build system type" >&5 +echo $ECHO_N "checking build system type... $ECHO_C" >&6; } +if test "${ac_cv_build+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_build_alias=$build_alias +test "x$ac_build_alias" = x && + ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"` +test "x$ac_build_alias" = x && + { { echo "$as_me:$LINENO: error: cannot guess build type; you must specify one" >&5 +echo "$as_me: error: cannot guess build type; you must specify one" >&2;} + { (exit 1); exit 1; }; } +ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` || + { { echo "$as_me:$LINENO: error: $SHELL $ac_aux_dir/config.sub $ac_build_alias failed" >&5 +echo "$as_me: error: $SHELL $ac_aux_dir/config.sub $ac_build_alias failed" >&2;} + { (exit 1); exit 1; }; } + +fi +{ echo "$as_me:$LINENO: result: $ac_cv_build" >&5 +echo "${ECHO_T}$ac_cv_build" >&6; } +case $ac_cv_build in +*-*-*) ;; +*) { { echo "$as_me:$LINENO: error: invalid value of canonical build" >&5 +echo "$as_me: error: invalid value of canonical build" >&2;} + { (exit 1); exit 1; }; };; +esac +build=$ac_cv_build +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_build +shift +build_cpu=$1 +build_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +build_os=$* +IFS=$ac_save_IFS +case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac + + +{ echo "$as_me:$LINENO: checking host system type" >&5 +echo $ECHO_N "checking host system type... $ECHO_C" >&6; } +if test "${ac_cv_host+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test "x$host_alias" = x; then + ac_cv_host=$ac_cv_build +else + ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` || + { { echo "$as_me:$LINENO: error: $SHELL $ac_aux_dir/config.sub $host_alias failed" >&5 +echo "$as_me: error: $SHELL $ac_aux_dir/config.sub $host_alias failed" >&2;} + { (exit 1); exit 1; }; } +fi + +fi +{ echo "$as_me:$LINENO: result: $ac_cv_host" >&5 +echo "${ECHO_T}$ac_cv_host" >&6; } +case $ac_cv_host in +*-*-*) ;; +*) { { echo "$as_me:$LINENO: error: invalid value of canonical host" >&5 +echo "$as_me: error: invalid value of canonical host" >&2;} + { (exit 1); exit 1; }; };; +esac +host=$ac_cv_host +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_host +shift +host_cpu=$1 +host_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +host_os=$* +IFS=$ac_save_IFS +case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac + + +{ echo "$as_me:$LINENO: checking target system type" >&5 +echo $ECHO_N "checking target system type... $ECHO_C" >&6; } +if test "${ac_cv_target+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test "x$target_alias" = x; then + ac_cv_target=$ac_cv_host +else + ac_cv_target=`$SHELL "$ac_aux_dir/config.sub" $target_alias` || + { { echo "$as_me:$LINENO: error: $SHELL $ac_aux_dir/config.sub $target_alias failed" >&5 +echo "$as_me: error: $SHELL $ac_aux_dir/config.sub $target_alias failed" >&2;} + { (exit 1); exit 1; }; } +fi + +fi +{ echo "$as_me:$LINENO: result: $ac_cv_target" >&5 +echo "${ECHO_T}$ac_cv_target" >&6; } +case $ac_cv_target in +*-*-*) ;; +*) { { echo "$as_me:$LINENO: error: invalid value of canonical target" >&5 +echo "$as_me: error: invalid value of canonical target" >&2;} + { (exit 1); exit 1; }; };; +esac +target=$ac_cv_target +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_target +shift +target_cpu=$1 +target_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +target_os=$* +IFS=$ac_save_IFS +case $target_os in *\ *) target_os=`echo "$target_os" | sed 's/ /-/g'`;; esac + + +# The aliases save the names the user supplied, while $host etc. +# will get canonicalized. +test -n "$target_alias" && + test "$program_prefix$program_suffix$program_transform_name" = \ + NONENONEs,x,x, && + program_prefix=${target_alias}- + +# Use automake + +# - Required version of automake. +am__api_version='1.10' + +# Find a good install program. We prefer a C program (faster), +# so one script is as good as another. But avoid the broken or +# incompatible versions: +# SysV /etc/install, /usr/sbin/install +# SunOS /usr/etc/install +# IRIX /sbin/install +# AIX /bin/install +# AmigaOS /C/install, which installs bootblocks on floppy discs +# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag +# AFS /usr/afsws/bin/install, which mishandles nonexistent args +# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" +# OS/2's system install, which has a completely different semantic +# ./install, which can be erroneously created by make from ./install.sh. +{ echo "$as_me:$LINENO: checking for a BSD-compatible install" >&5 +echo $ECHO_N "checking for a BSD-compatible install... $ECHO_C" >&6; } +if test -z "$INSTALL"; then +if test "${ac_cv_path_install+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + # Account for people who put trailing slashes in PATH elements. +case $as_dir/ in + ./ | .// | /cC/* | \ + /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ + ?:\\/os2\\/install\\/* | ?:\\/OS2\\/INSTALL\\/* | \ + /usr/ucb/* ) ;; + *) + # OSF1 and SCO ODT 3.0 have their own names for install. + # Don't use installbsd from OSF since it installs stuff as root + # by default. + for ac_prog in ginstall scoinst install; do + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_prog$ac_exec_ext" && $as_test_x "$as_dir/$ac_prog$ac_exec_ext"; }; then + if test $ac_prog = install && + grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # AIX install. It has an incompatible calling convention. + : + elif test $ac_prog = install && + grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # program-specific install script used by HP pwplus--don't use. + : + else + ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c" + break 3 + fi + fi + done + done + ;; +esac +done +IFS=$as_save_IFS + + +fi + if test "${ac_cv_path_install+set}" = set; then + INSTALL=$ac_cv_path_install + else + # As a last resort, use the slow shell script. Don't cache a + # value for INSTALL within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + INSTALL=$ac_install_sh + fi +fi +{ echo "$as_me:$LINENO: result: $INSTALL" >&5 +echo "${ECHO_T}$INSTALL" >&6; } + +# Use test -z because SunOS4 sh mishandles braces in ${var-val}. +# It thinks the first close brace ends the variable substitution. +test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' + +test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' + +test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' + +{ echo "$as_me:$LINENO: checking whether build environment is sane" >&5 +echo $ECHO_N "checking whether build environment is sane... $ECHO_C" >&6; } +# Just in case +sleep 1 +echo timestamp > conftest.file +# Do `set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + set X `ls -Lt $srcdir/configure conftest.file 2> /dev/null` + if test "$*" = "X"; then + # -L didn't work. + set X `ls -t $srcdir/configure conftest.file` + fi + rm -f conftest.file + if test "$*" != "X $srcdir/configure conftest.file" \ + && test "$*" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + { { echo "$as_me:$LINENO: error: ls -t appears to fail. Make sure there is not a broken +alias in your environment" >&5 +echo "$as_me: error: ls -t appears to fail. Make sure there is not a broken +alias in your environment" >&2;} + { (exit 1); exit 1; }; } + fi + + test "$2" = conftest.file + ) +then + # Ok. + : +else + { { echo "$as_me:$LINENO: error: newly created file is older than distributed files! +Check your system clock" >&5 +echo "$as_me: error: newly created file is older than distributed files! +Check your system clock" >&2;} + { (exit 1); exit 1; }; } +fi +{ echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } +test "$program_prefix" != NONE && + program_transform_name="s&^&$program_prefix&;$program_transform_name" +# Use a double $ so make ignores it. +test "$program_suffix" != NONE && + program_transform_name="s&\$&$program_suffix&;$program_transform_name" +# Double any \ or $. echo might interpret backslashes. +# By default was `s,x,x', remove it if useless. +cat <<\_ACEOF >conftest.sed +s/[\\$]/&&/g;s/;s,x,x,$// +_ACEOF +program_transform_name=`echo $program_transform_name | sed -f conftest.sed` +rm -f conftest.sed + +# expand $ac_aux_dir to an absolute path +am_aux_dir=`cd $ac_aux_dir && pwd` + +test x"${MISSING+set}" = xset || MISSING="\${SHELL} $am_aux_dir/missing" +# Use eval to expand $SHELL +if eval "$MISSING --run true"; then + am_missing_run="$MISSING --run " +else + am_missing_run= + { echo "$as_me:$LINENO: WARNING: \`missing' script is too old or missing" >&5 +echo "$as_me: WARNING: \`missing' script is too old or missing" >&2;} +fi + +{ echo "$as_me:$LINENO: checking for a thread-safe mkdir -p" >&5 +echo $ECHO_N "checking for a thread-safe mkdir -p... $ECHO_C" >&6; } +if test -z "$MKDIR_P"; then + if test "${ac_cv_path_mkdir+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in mkdir gmkdir; do + for ac_exec_ext in '' $ac_executable_extensions; do + { test -f "$as_dir/$ac_prog$ac_exec_ext" && $as_test_x "$as_dir/$ac_prog$ac_exec_ext"; } || continue + case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #( + 'mkdir (GNU coreutils) '* | \ + 'mkdir (coreutils) '* | \ + 'mkdir (fileutils) '4.1*) + ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext + break 3;; + esac + done + done +done +IFS=$as_save_IFS + +fi + + if test "${ac_cv_path_mkdir+set}" = set; then + MKDIR_P="$ac_cv_path_mkdir -p" + else + # As a last resort, use the slow shell script. Don't cache a + # value for MKDIR_P within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + test -d ./--version && rmdir ./--version + MKDIR_P="$ac_install_sh -d" + fi +fi +{ echo "$as_me:$LINENO: result: $MKDIR_P" >&5 +echo "${ECHO_T}$MKDIR_P" >&6; } + +mkdir_p="$MKDIR_P" +case $mkdir_p in + [\\/$]* | ?:[\\/]*) ;; + */*) mkdir_p="\$(top_builddir)/$mkdir_p" ;; +esac + +for ac_prog in gawk mawk nawk awk +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_prog_AWK+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$AWK"; then + ac_cv_prog_AWK="$AWK" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_AWK="$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +AWK=$ac_cv_prog_AWK +if test -n "$AWK"; then + { echo "$as_me:$LINENO: result: $AWK" >&5 +echo "${ECHO_T}$AWK" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + + test -n "$AWK" && break +done + +{ echo "$as_me:$LINENO: checking whether ${MAKE-make} sets \$(MAKE)" >&5 +echo $ECHO_N "checking whether ${MAKE-make} sets \$(MAKE)... $ECHO_C" >&6; } +set x ${MAKE-make}; ac_make=`echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'` +if { as_var=ac_cv_prog_make_${ac_make}_set; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.make <<\_ACEOF +SHELL = /bin/sh +all: + @echo '@@@%%%=$(MAKE)=@@@%%%' +_ACEOF +# GNU make sometimes prints "make[1]: Entering...", which would confuse us. +case `${MAKE-make} -f conftest.make 2>/dev/null` in + *@@@%%%=?*=@@@%%%*) + eval ac_cv_prog_make_${ac_make}_set=yes;; + *) + eval ac_cv_prog_make_${ac_make}_set=no;; +esac +rm -f conftest.make +fi +if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } + SET_MAKE= +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + SET_MAKE="MAKE=${MAKE-make}" +fi + +rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null + +if test "`cd $srcdir && pwd`" != "`pwd`"; then + # Use -I$(srcdir) only when $(srcdir) != ., so that make's output + # is not polluted with repeated "-I." + am__isrc=' -I$(srcdir)' + # test to see if srcdir already configured + if test -f $srcdir/config.status; then + { { echo "$as_me:$LINENO: error: source directory already configured; run \"make distclean\" there first" >&5 +echo "$as_me: error: source directory already configured; run \"make distclean\" there first" >&2;} + { (exit 1); exit 1; }; } + fi +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi + + +# Define the identity of the package. + PACKAGE='threadpool' + VERSION='1.1d' + + +# Some tools Automake needs. + +ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"} + + +AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"} + + +AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"} + + +AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"} + + +MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} + +install_sh=${install_sh-"\$(SHELL) $am_aux_dir/install-sh"} + +# Installed binaries are usually stripped using `strip' when the user +# run `make install-strip'. However `strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the `STRIP' environment variable to overrule this program. +if test "$cross_compiling" != no; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. +set dummy ${ac_tool_prefix}strip; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_prog_STRIP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_STRIP="${ac_tool_prefix}strip" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +STRIP=$ac_cv_prog_STRIP +if test -n "$STRIP"; then + { echo "$as_me:$LINENO: result: $STRIP" >&5 +echo "${ECHO_T}$STRIP" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_STRIP"; then + ac_ct_STRIP=$STRIP + # Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_prog_ac_ct_STRIP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_STRIP"; then + ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_STRIP="strip" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP +if test -n "$ac_ct_STRIP"; then + { echo "$as_me:$LINENO: result: $ac_ct_STRIP" >&5 +echo "${ECHO_T}$ac_ct_STRIP" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + if test "x$ac_ct_STRIP" = x; then + STRIP=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools +whose name does not start with the host triplet. If you think this +configuration is useful to you, please write to autoconf@gnu.org." >&5 +echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools +whose name does not start with the host triplet. If you think this +configuration is useful to you, please write to autoconf@gnu.org." >&2;} +ac_tool_warned=yes ;; +esac + STRIP=$ac_ct_STRIP + fi +else + STRIP="$ac_cv_prog_STRIP" +fi + +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" + +# We need awk for the "check" target. The system "awk" is bad on +# some platforms. +# Always define AMTAR for backward compatibility. + +AMTAR=${AMTAR-"${am_missing_run}tar"} + + +{ echo "$as_me:$LINENO: checking how to create a ustar tar archive" >&5 +echo $ECHO_N "checking how to create a ustar tar archive... $ECHO_C" >&6; } +# Loop over all known methods to create a tar archive until one works. +_am_tools='gnutar plaintar pax cpio none' +_am_tools=${am_cv_prog_tar_ustar-$_am_tools} +# Do not fold the above two line into one, because Tru64 sh and +# Solaris sh will not grok spaces in the rhs of `-'. +for _am_tool in $_am_tools +do + case $_am_tool in + gnutar) + for _am_tar in tar gnutar gtar; + do + { echo "$as_me:$LINENO: $_am_tar --version" >&5 + ($_am_tar --version) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && break + done + am__tar="$_am_tar --format=ustar -chf - "'"$$tardir"' + am__tar_="$_am_tar --format=ustar -chf - "'"$tardir"' + am__untar="$_am_tar -xf -" + ;; + plaintar) + # Must skip GNU tar: if it does not support --format= it doesn't create + # ustar tarball either. + (tar --version) >/dev/null 2>&1 && continue + am__tar='tar chf - "$$tardir"' + am__tar_='tar chf - "$tardir"' + am__untar='tar xf -' + ;; + pax) + am__tar='pax -L -x ustar -w "$$tardir"' + am__tar_='pax -L -x ustar -w "$tardir"' + am__untar='pax -r' + ;; + cpio) + am__tar='find "$$tardir" -print | cpio -o -H ustar -L' + am__tar_='find "$tardir" -print | cpio -o -H ustar -L' + am__untar='cpio -i -H ustar -d' + ;; + none) + am__tar=false + am__tar_=false + am__untar=false + ;; + esac + + # If the value was cached, stop now. We just wanted to have am__tar + # and am__untar set. + test -n "${am_cv_prog_tar_ustar}" && break + + # tar/untar a dummy directory, and stop if the command works + rm -rf conftest.dir + mkdir conftest.dir + echo GrepMe > conftest.dir/file + { echo "$as_me:$LINENO: tardir=conftest.dir && eval $am__tar_ >conftest.tar" >&5 + (tardir=conftest.dir && eval $am__tar_ >conftest.tar) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + rm -rf conftest.dir + if test -s conftest.tar; then + { echo "$as_me:$LINENO: $am__untar &5 + ($am__untar &5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + grep GrepMe conftest.dir/file >/dev/null 2>&1 && break + fi +done +rm -rf conftest.dir + +if test "${am_cv_prog_tar_ustar+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + am_cv_prog_tar_ustar=$_am_tool +fi + +{ echo "$as_me:$LINENO: result: $am_cv_prog_tar_ustar" >&5 +echo "${ECHO_T}$am_cv_prog_tar_ustar" >&6; } + + + + + + +# Specify required version of autoconf. + + + +# ------------------------------------------------------------------------ +# Check to see if MPI enabled and if any special configuration done +# ------------------------------------------------------------------------ + + + +# Check whether --enable-mpi was given. +if test "${enable_mpi+set}" = set; then + enableval=$enable_mpi; HAVE_PKG_MPI=$enableval +else + HAVE_PKG_MPI=no + +fi + + + +# Check whether --with-mpi-compilers was given. +if test "${with_mpi_compilers+set}" = set; then + withval=$with_mpi_compilers; + if test X${withval} != Xno; then + HAVE_PKG_MPI=yes + if test X${withval} = Xyes; then + # Check for mpicxx, if it does not exist, check for mpic++, if it does + # not exist, use mpiCC instead. + # Extract the first word of "mpicxx", so it can be a program name with args. +set dummy mpicxx; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_prog_MPI_TEMP_CXX+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$MPI_TEMP_CXX"; then + ac_cv_prog_MPI_TEMP_CXX="$MPI_TEMP_CXX" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_MPI_TEMP_CXX="mpicxx" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + + test -z "$ac_cv_prog_MPI_TEMP_CXX" && ac_cv_prog_MPI_TEMP_CXX="no" +fi +fi +MPI_TEMP_CXX=$ac_cv_prog_MPI_TEMP_CXX +if test -n "$MPI_TEMP_CXX"; then + { echo "$as_me:$LINENO: result: $MPI_TEMP_CXX" >&5 +echo "${ECHO_T}$MPI_TEMP_CXX" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + + if test X${MPI_TEMP_CXX} = Xno; then + # Extract the first word of "mpic++", so it can be a program name with args. +set dummy mpic++; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_prog_MPI_CXX+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$MPI_CXX"; then + ac_cv_prog_MPI_CXX="$MPI_CXX" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_MPI_CXX="mpic++" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + + test -z "$ac_cv_prog_MPI_CXX" && ac_cv_prog_MPI_CXX="mpiCC" +fi +fi +MPI_CXX=$ac_cv_prog_MPI_CXX +if test -n "$MPI_CXX"; then + { echo "$as_me:$LINENO: result: $MPI_CXX" >&5 +echo "${ECHO_T}$MPI_CXX" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + + else + MPI_CXX=${MPI_TEMP_CXX} + fi + MPI_CC=mpicc + MPI_F77=mpif77 + else + if test -f ${withval}/mpicxx; then + MPI_CXX=${withval}/mpicxx + elif test -f ${withval}/mpic++; then + MPI_CXX=${withval}/mpic++ + else + MPI_CXX=${withval}/mpiCC + fi + MPI_CC=${withval}/mpicc + MPI_F77=${withval}/mpif77 + fi + fi + + +fi + + + +# Check whether --with-mpi was given. +if test "${with_mpi+set}" = set; then + withval=$with_mpi; + HAVE_PKG_MPI=yes + MPI_DIR=${withval} + { echo "$as_me:$LINENO: checking MPI directory" >&5 +echo $ECHO_N "checking MPI directory... $ECHO_C" >&6; } + { echo "$as_me:$LINENO: result: ${MPI_DIR}" >&5 +echo "${ECHO_T}${MPI_DIR}" >&6; } + + +fi + + +#AC_ARG_WITH(mpi-include, +#[AC_HELP_STRING([--with-mpi-include],[Obsolete. Use --with-mpi-incdir=DIR instead. Do not prefix DIR with '-I'.])], +#[AC_MSG_ERROR([--with-mpi-include is an obsolte option. Use --with-mpi-incdir=DIR instead. Do not prefix DIR with '-I'. For example '--with-mpi-incdir=/usr/lam_path/include'.])] +#) + + +# Check whether --with-mpi-libs was given. +if test "${with_mpi_libs+set}" = set; then + withval=$with_mpi_libs; + MPI_LIBS=${withval} + { echo "$as_me:$LINENO: checking user-defined MPI libraries" >&5 +echo $ECHO_N "checking user-defined MPI libraries... $ECHO_C" >&6; } + { echo "$as_me:$LINENO: result: ${MPI_LIBS}" >&5 +echo "${ECHO_T}${MPI_LIBS}" >&6; } + + +fi + + + +# Check whether --with-mpi-incdir was given. +if test "${with_mpi_incdir+set}" = set; then + withval=$with_mpi_incdir; + MPI_INC=${withval} + { echo "$as_me:$LINENO: checking user-defined MPI includes" >&5 +echo $ECHO_N "checking user-defined MPI includes... $ECHO_C" >&6; } + { echo "$as_me:$LINENO: result: ${MPI_INC}" >&5 +echo "${ECHO_T}${MPI_INC}" >&6; } + + +fi + + + +# Check whether --with-mpi-libdir was given. +if test "${with_mpi_libdir+set}" = set; then + withval=$with_mpi_libdir; + MPI_LIBDIR=${withval} + { echo "$as_me:$LINENO: checking user-defined MPI library directory" >&5 +echo $ECHO_N "checking user-defined MPI library directory... $ECHO_C" >&6; } + { echo "$as_me:$LINENO: result: ${MPI_LIBDIR}" >&5 +echo "${ECHO_T}${MPI_LIBDIR}" >&6; } + + +fi + + +{ echo "$as_me:$LINENO: checking whether we are using MPI" >&5 +echo $ECHO_N "checking whether we are using MPI... $ECHO_C" >&6; } +{ echo "$as_me:$LINENO: result: ${HAVE_PKG_MPI}" >&5 +echo "${ECHO_T}${HAVE_PKG_MPI}" >&6; } + +if test "X${HAVE_PKG_MPI}" = "Xyes"; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_MPI +_ACEOF + +fi + + + if test "X${HAVE_PKG_MPI}" = "Xyes"; then + HAVE_MPI_TRUE= + HAVE_MPI_FALSE='#' +else + HAVE_MPI_TRUE='#' + HAVE_MPI_FALSE= +fi + + + + +if test -n "${MPI_CXX}"; then + if test -f ${MPI_CXX}; then + MPI_CXX_EXISTS=yes + else + # Extract the first word of "${MPI_CXX}", so it can be a program name with args. +set dummy ${MPI_CXX}; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_prog_MPI_CXX_EXISTS+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$MPI_CXX_EXISTS"; then + ac_cv_prog_MPI_CXX_EXISTS="$MPI_CXX_EXISTS" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_MPI_CXX_EXISTS="yes" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + + test -z "$ac_cv_prog_MPI_CXX_EXISTS" && ac_cv_prog_MPI_CXX_EXISTS="no" +fi +fi +MPI_CXX_EXISTS=$ac_cv_prog_MPI_CXX_EXISTS +if test -n "$MPI_CXX_EXISTS"; then + { echo "$as_me:$LINENO: result: $MPI_CXX_EXISTS" >&5 +echo "${ECHO_T}$MPI_CXX_EXISTS" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + + fi + + if test "X${MPI_CXX_EXISTS}" = "Xyes"; then + CXX=${MPI_CXX} + else + echo "-----" + echo "Cannot find MPI C++ compiler ${MPI_CXX}." + echo "Specify a path to all mpi compilers with --with-mpi-compilers=PATH" + echo "or specify a C++ compiler using CXX=" + echo "Do not use --with-mpi-compilers if using CXX=" + echo "-----" + { { echo "$as_me:$LINENO: error: MPI C++ compiler (${MPI_CXX}) not found." >&5 +echo "$as_me: error: MPI C++ compiler (${MPI_CXX}) not found." >&2;} + { (exit 1); exit 1; }; } + fi +fi + +if test -n "${MPI_CC}"; then + if test -f ${MPI_CC}; then + MPI_CC_EXISTS=yes + else + # Extract the first word of "${MPI_CC}", so it can be a program name with args. +set dummy ${MPI_CC}; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_prog_MPI_CC_EXISTS+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$MPI_CC_EXISTS"; then + ac_cv_prog_MPI_CC_EXISTS="$MPI_CC_EXISTS" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_MPI_CC_EXISTS="yes" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + + test -z "$ac_cv_prog_MPI_CC_EXISTS" && ac_cv_prog_MPI_CC_EXISTS="no" +fi +fi +MPI_CC_EXISTS=$ac_cv_prog_MPI_CC_EXISTS +if test -n "$MPI_CC_EXISTS"; then + { echo "$as_me:$LINENO: result: $MPI_CC_EXISTS" >&5 +echo "${ECHO_T}$MPI_CC_EXISTS" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + + fi + + if test "X${MPI_CC_EXISTS}" = "Xyes"; then + CC=${MPI_CC} + else + echo "-----" + echo "Cannot find MPI C compiler ${MPI_CC}." + echo "Specify a path to all mpi compilers with --with-mpi-compilers=PATH" + echo "or specify a C compiler using CC=" + echo "Do not use --with-mpi-compilers if using CC=" + echo "-----" + { { echo "$as_me:$LINENO: error: MPI C compiler (${MPI_CC}) not found." >&5 +echo "$as_me: error: MPI C compiler (${MPI_CC}) not found." >&2;} + { (exit 1); exit 1; }; } + fi +fi + +if test "X$ac_cv_use_fortran" = "Xyes"; then + +if test -n "${MPI_F77}"; then + if test -f ${MPI_F77}; then + MPI_F77_EXISTS=yes + else + # Extract the first word of "${MPI_F77}", so it can be a program name with args. +set dummy ${MPI_F77}; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_prog_MPI_F77_EXISTS+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$MPI_F77_EXISTS"; then + ac_cv_prog_MPI_F77_EXISTS="$MPI_F77_EXISTS" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_MPI_F77_EXISTS="yes" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + + test -z "$ac_cv_prog_MPI_F77_EXISTS" && ac_cv_prog_MPI_F77_EXISTS="no" +fi +fi +MPI_F77_EXISTS=$ac_cv_prog_MPI_F77_EXISTS +if test -n "$MPI_F77_EXISTS"; then + { echo "$as_me:$LINENO: result: $MPI_F77_EXISTS" >&5 +echo "${ECHO_T}$MPI_F77_EXISTS" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + + fi + + if test "X${MPI_F77_EXISTS}" = "Xyes"; then + F77=${MPI_F77} + else + echo "-----" + echo "Cannot find MPI Fortran compiler ${MPI_F77}." + echo "Specify a path to all mpi compilers with --with-mpi-compilers=PATH" + echo "or specify a Fortran 77 compiler using F77=" + echo "Do not use --with-mpi-compilers if using F77=" + echo "-----" + { { echo "$as_me:$LINENO: error: MPI Fortran 77 compiler (${MPI_F77}) not found." >&5 +echo "$as_me: error: MPI Fortran 77 compiler (${MPI_F77}) not found." >&2;} + { (exit 1); exit 1; }; } + fi +fi + +fi + +# #np# - can eliminate compiler checks below if your package does not use the +# language corresponding to the check. Please note that if you use +# F77_FUNC to determine Fortran name mangling, you should not remove +# the Fortran compiler check or the check for Fortran flags. Doing +# so will prevent the detection of the proper name mangling in some +# cases. +# ------------------------------------------------------------------------ +# Checks for programs +# ------------------------------------------------------------------------ + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + for ac_prog in cc gcc + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cc gcc +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_CC="$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools +whose name does not start with the host triplet. If you think this +configuration is useful to you, please write to autoconf@gnu.org." >&5 +echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools +whose name does not start with the host triplet. If you think this +configuration is useful to you, please write to autoconf@gnu.org." >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + + +test -z "$CC" && { { echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH +See \`config.log' for more details." >&5 +echo "$as_me: error: no acceptable C compiler found in \$PATH +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } + +# Provide some information about the compiler. +echo "$as_me:$LINENO: checking for C compiler version" >&5 +ac_compiler=`set X $ac_compile; echo $2` +{ (ac_try="$ac_compiler --version >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compiler --version >&5") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (ac_try="$ac_compiler -v >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compiler -v >&5") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (ac_try="$ac_compiler -V >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compiler -V >&5") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +{ echo "$as_me:$LINENO: checking for C compiler default output file name" >&5 +echo $ECHO_N "checking for C compiler default output file name... $ECHO_C" >&6; } +ac_link_default=`echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` +# +# List of possible output files, starting from the most likely. +# The algorithm is not robust to junk in `.', hence go to wildcards (a.*) +# only as a last resort. b.out is created by i960 compilers. +ac_files='a_out.exe a.exe conftest.exe a.out conftest a.* conftest.* b.out' +# +# The IRIX 6 linker writes into existing files which may not be +# executable, retaining their permissions. Remove them first so a +# subsequent execution test works. +ac_rmfiles= +for ac_file in $ac_files +do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.o | *.obj ) ;; + * ) ac_rmfiles="$ac_rmfiles $ac_file";; + esac +done +rm -f $ac_rmfiles + +if { (ac_try="$ac_link_default" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link_default") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. +# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' +# in a Makefile. We should not override ac_cv_exeext if it was cached, +# so that the user can short-circuit this test for compilers unknown to +# Autoconf. +for ac_file in $ac_files '' +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.o | *.obj ) + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; + then :; else + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + fi + # We set ac_cv_exeext here because the later test for it is not + # safe: cross compilers may not add the suffix if given an `-o' + # argument, so we may need to know it at that point already. + # Even if this section looks crufty: it has the advantage of + # actually working. + break;; + * ) + break;; + esac +done +test "$ac_cv_exeext" = no && ac_cv_exeext= + +else + ac_file='' +fi + +{ echo "$as_me:$LINENO: result: $ac_file" >&5 +echo "${ECHO_T}$ac_file" >&6; } +if test -z "$ac_file"; then + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { echo "$as_me:$LINENO: error: C compiler cannot create executables +See \`config.log' for more details." >&5 +echo "$as_me: error: C compiler cannot create executables +See \`config.log' for more details." >&2;} + { (exit 77); exit 77; }; } +fi + +ac_exeext=$ac_cv_exeext + +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ echo "$as_me:$LINENO: checking whether the C compiler works" >&5 +echo $ECHO_N "checking whether the C compiler works... $ECHO_C" >&6; } +# FIXME: These cross compiler hacks should be removed for Autoconf 3.0 +# If not cross compiling, check that we can run a simple program. +if test "$cross_compiling" != yes; then + if { ac_try='./$ac_file' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { echo "$as_me:$LINENO: error: cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } + fi + fi +fi +{ echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } + +rm -f a.out a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ echo "$as_me:$LINENO: checking whether we are cross compiling" >&5 +echo $ECHO_N "checking whether we are cross compiling... $ECHO_C" >&6; } +{ echo "$as_me:$LINENO: result: $cross_compiling" >&5 +echo "${ECHO_T}$cross_compiling" >&6; } + +{ echo "$as_me:$LINENO: checking for suffix of executables" >&5 +echo $ECHO_N "checking for suffix of executables... $ECHO_C" >&6; } +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + break;; + * ) break;; + esac +done +else + { { echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +fi + +rm -f conftest$ac_cv_exeext +{ echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5 +echo "${ECHO_T}$ac_cv_exeext" >&6; } + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +{ echo "$as_me:$LINENO: checking for suffix of object files" >&5 +echo $ECHO_N "checking for suffix of object files... $ECHO_C" >&6; } +if test "${ac_cv_objext+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + for ac_file in conftest.o conftest.obj conftest.*; do + test -f "$ac_file" || continue; + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot compute suffix of object files: cannot compile +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +fi + +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +{ echo "$as_me:$LINENO: result: $ac_cv_objext" >&5 +echo "${ECHO_T}$ac_cv_objext" >&6; } +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +{ echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5 +echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6; } +if test "${ac_cv_c_compiler_gnu+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_compiler_gnu=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_compiler_gnu=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5 +echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6; } +GCC=`test $ac_compiler_gnu = yes && echo yes` +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +{ echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5 +echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6; } +if test "${ac_cv_prog_cc_g+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_prog_cc_g=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + CFLAGS="" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_prog_cc_g=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5 +echo "${ECHO_T}$ac_cv_prog_cc_g" >&6; } +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +{ echo "$as_me:$LINENO: checking for $CC option to accept ISO C89" >&5 +echo $ECHO_N "checking for $CC option to accept ISO C89... $ECHO_C" >&6; } +if test "${ac_cv_prog_cc_c89+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#include +#include +#include +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) 'x' +int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ + -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_prog_cc_c89=$ac_arg +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + +fi + +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c89" in + x) + { echo "$as_me:$LINENO: result: none needed" >&5 +echo "${ECHO_T}none needed" >&6; } ;; + xno) + { echo "$as_me:$LINENO: result: unsupported" >&5 +echo "${ECHO_T}unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c89" + { echo "$as_me:$LINENO: result: $ac_cv_prog_cc_c89" >&5 +echo "${ECHO_T}$ac_cv_prog_cc_c89" >&6; } ;; +esac + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +DEPDIR="${am__leading_dot}deps" + +ac_config_commands="$ac_config_commands depfiles" + + +am_make=${MAKE-make} +cat > confinc << 'END' +am__doit: + @echo done +.PHONY: am__doit +END +# If we don't find an include directive, just comment out the code. +{ echo "$as_me:$LINENO: checking for style of include used by $am_make" >&5 +echo $ECHO_N "checking for style of include used by $am_make... $ECHO_C" >&6; } +am__include="#" +am__quote= +_am_result=none +# First try GNU make style include. +echo "include confinc" > confmf +# We grep out `Entering directory' and `Leaving directory' +# messages which can occur if `w' ends up in MAKEFLAGS. +# In particular we don't look at `^make:' because GNU make might +# be invoked under some other name (usually "gmake"), in which +# case it prints its new name instead of `make'. +if test "`$am_make -s -f confmf 2> /dev/null | grep -v 'ing directory'`" = "done"; then + am__include=include + am__quote= + _am_result=GNU +fi +# Now try BSD make style include. +if test "$am__include" = "#"; then + echo '.include "confinc"' > confmf + if test "`$am_make -s -f confmf 2> /dev/null`" = "done"; then + am__include=.include + am__quote="\"" + _am_result=BSD + fi +fi + + +{ echo "$as_me:$LINENO: result: $_am_result" >&5 +echo "${ECHO_T}$_am_result" >&6; } +rm -f confinc confmf + +# Check whether --enable-dependency-tracking was given. +if test "${enable_dependency_tracking+set}" = set; then + enableval=$enable_dependency_tracking; +fi + +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' +fi + if test "x$enable_dependency_tracking" != xno; then + AMDEP_TRUE= + AMDEP_FALSE='#' +else + AMDEP_TRUE='#' + AMDEP_FALSE= +fi + + + +depcc="$CC" am_compiler_list= + +{ echo "$as_me:$LINENO: checking dependency style of $depcc" >&5 +echo $ECHO_N "checking dependency style of $depcc... $ECHO_C" >&6; } +if test "${am_cv_CC_dependencies_compiler_type+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named `D' -- because `-MD' means `put the output + # in D'. + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CC_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with + # Solaris 8's {/usr,}/bin/sh. + touch sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + case $depmode in + nosideeffect) + # after this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + none) break ;; + esac + # We check with `-c' and `-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle `-M -o', and we need to detect this. + if depmode=$depmode \ + source=sub/conftest.c object=sub/conftest.${OBJEXT-o} \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c -o sub/conftest.${OBJEXT-o} sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftest.${OBJEXT-o} sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CC_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CC_dependencies_compiler_type=none +fi + +fi +{ echo "$as_me:$LINENO: result: $am_cv_CC_dependencies_compiler_type" >&5 +echo "${ECHO_T}$am_cv_CC_dependencies_compiler_type" >&6; } +CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type + + if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then + am__fastdepCC_TRUE= + am__fastdepCC_FALSE='#' +else + am__fastdepCC_TRUE='#' + am__fastdepCC_FALSE= +fi + + +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu +if test -z "$CXX"; then + if test -n "$CCC"; then + CXX=$CCC + else + if test -n "$ac_tool_prefix"; then + for ac_prog in CC g++ c++ cxx + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_prog_CXX+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CXX"; then + ac_cv_prog_CXX="$CXX" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CXX="$ac_tool_prefix$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +CXX=$ac_cv_prog_CXX +if test -n "$CXX"; then + { echo "$as_me:$LINENO: result: $CXX" >&5 +echo "${ECHO_T}$CXX" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + + test -n "$CXX" && break + done +fi +if test -z "$CXX"; then + ac_ct_CXX=$CXX + for ac_prog in CC g++ c++ cxx +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_prog_ac_ct_CXX+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CXX"; then + ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_CXX="$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +ac_ct_CXX=$ac_cv_prog_ac_ct_CXX +if test -n "$ac_ct_CXX"; then + { echo "$as_me:$LINENO: result: $ac_ct_CXX" >&5 +echo "${ECHO_T}$ac_ct_CXX" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + + test -n "$ac_ct_CXX" && break +done + + if test "x$ac_ct_CXX" = x; then + CXX="g++" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools +whose name does not start with the host triplet. If you think this +configuration is useful to you, please write to autoconf@gnu.org." >&5 +echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools +whose name does not start with the host triplet. If you think this +configuration is useful to you, please write to autoconf@gnu.org." >&2;} +ac_tool_warned=yes ;; +esac + CXX=$ac_ct_CXX + fi +fi + + fi +fi +# Provide some information about the compiler. +echo "$as_me:$LINENO: checking for C++ compiler version" >&5 +ac_compiler=`set X $ac_compile; echo $2` +{ (ac_try="$ac_compiler --version >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compiler --version >&5") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (ac_try="$ac_compiler -v >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compiler -v >&5") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (ac_try="$ac_compiler -V >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compiler -V >&5") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + +{ echo "$as_me:$LINENO: checking whether we are using the GNU C++ compiler" >&5 +echo $ECHO_N "checking whether we are using the GNU C++ compiler... $ECHO_C" >&6; } +if test "${ac_cv_cxx_compiler_gnu+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_compiler_gnu=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_compiler_gnu=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_cxx_compiler_gnu=$ac_compiler_gnu + +fi +{ echo "$as_me:$LINENO: result: $ac_cv_cxx_compiler_gnu" >&5 +echo "${ECHO_T}$ac_cv_cxx_compiler_gnu" >&6; } +GXX=`test $ac_compiler_gnu = yes && echo yes` +ac_test_CXXFLAGS=${CXXFLAGS+set} +ac_save_CXXFLAGS=$CXXFLAGS +{ echo "$as_me:$LINENO: checking whether $CXX accepts -g" >&5 +echo $ECHO_N "checking whether $CXX accepts -g... $ECHO_C" >&6; } +if test "${ac_cv_prog_cxx_g+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_save_cxx_werror_flag=$ac_cxx_werror_flag + ac_cxx_werror_flag=yes + ac_cv_prog_cxx_g=no + CXXFLAGS="-g" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_prog_cxx_g=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + CXXFLAGS="" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_cxx_werror_flag=$ac_save_cxx_werror_flag + CXXFLAGS="-g" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_prog_cxx_g=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_cxx_werror_flag=$ac_save_cxx_werror_flag +fi +{ echo "$as_me:$LINENO: result: $ac_cv_prog_cxx_g" >&5 +echo "${ECHO_T}$ac_cv_prog_cxx_g" >&6; } +if test "$ac_test_CXXFLAGS" = set; then + CXXFLAGS=$ac_save_CXXFLAGS +elif test $ac_cv_prog_cxx_g = yes; then + if test "$GXX" = yes; then + CXXFLAGS="-g -O2" + else + CXXFLAGS="-g" + fi +else + if test "$GXX" = yes; then + CXXFLAGS="-O2" + else + CXXFLAGS= + fi +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +depcc="$CXX" am_compiler_list= + +{ echo "$as_me:$LINENO: checking dependency style of $depcc" >&5 +echo $ECHO_N "checking dependency style of $depcc... $ECHO_C" >&6; } +if test "${am_cv_CXX_dependencies_compiler_type+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named `D' -- because `-MD' means `put the output + # in D'. + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CXX_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with + # Solaris 8's {/usr,}/bin/sh. + touch sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + case $depmode in + nosideeffect) + # after this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + none) break ;; + esac + # We check with `-c' and `-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle `-M -o', and we need to detect this. + if depmode=$depmode \ + source=sub/conftest.c object=sub/conftest.${OBJEXT-o} \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c -o sub/conftest.${OBJEXT-o} sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftest.${OBJEXT-o} sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CXX_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CXX_dependencies_compiler_type=none +fi + +fi +{ echo "$as_me:$LINENO: result: $am_cv_CXX_dependencies_compiler_type" >&5 +echo "${ECHO_T}$am_cv_CXX_dependencies_compiler_type" >&6; } +CXXDEPMODE=depmode=$am_cv_CXX_dependencies_compiler_type + + if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CXX_dependencies_compiler_type" = gcc3; then + am__fastdepCXX_TRUE= + am__fastdepCXX_FALSE='#' +else + am__fastdepCXX_TRUE='#' + am__fastdepCXX_FALSE= +fi + + +#AC_PROG_F77(f77 g77 gfortran f90 xlf90 f95) +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. +set dummy ${ac_tool_prefix}ranlib; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_prog_RANLIB+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$RANLIB"; then + ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +RANLIB=$ac_cv_prog_RANLIB +if test -n "$RANLIB"; then + { echo "$as_me:$LINENO: result: $RANLIB" >&5 +echo "${ECHO_T}$RANLIB" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_RANLIB"; then + ac_ct_RANLIB=$RANLIB + # Extract the first word of "ranlib", so it can be a program name with args. +set dummy ranlib; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_prog_ac_ct_RANLIB+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_RANLIB"; then + ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_RANLIB="ranlib" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB +if test -n "$ac_ct_RANLIB"; then + { echo "$as_me:$LINENO: result: $ac_ct_RANLIB" >&5 +echo "${ECHO_T}$ac_ct_RANLIB" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + if test "x$ac_ct_RANLIB" = x; then + RANLIB=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools +whose name does not start with the host triplet. If you think this +configuration is useful to you, please write to autoconf@gnu.org." >&5 +echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools +whose name does not start with the host triplet. If you think this +configuration is useful to you, please write to autoconf@gnu.org." >&2;} +ac_tool_warned=yes ;; +esac + RANLIB=$ac_ct_RANLIB + fi +else + RANLIB="$ac_cv_prog_RANLIB" +fi + + +# Check if --with-flags present, prepend any specs to FLAGS + + +{ echo "$as_me:$LINENO: checking whether additional CCFLAGS flags should be added" >&5 +echo $ECHO_N "checking whether additional CCFLAGS flags should be added... $ECHO_C" >&6; } + +# Check whether --with-ccflags was given. +if test "${with_ccflags+set}" = set; then + withval=$with_ccflags; +CCFLAGS="${withval} ${CCFLAGS}" +{ echo "$as_me:$LINENO: result: CCFLAGS = ${CCFLAGS}" >&5 +echo "${ECHO_T}CCFLAGS = ${CCFLAGS}" >&6; } + +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + +fi + + + +{ echo "$as_me:$LINENO: checking whether additional CXXFLAGS flags should be added" >&5 +echo $ECHO_N "checking whether additional CXXFLAGS flags should be added... $ECHO_C" >&6; } + +# Check whether --with-cxxflags was given. +if test "${with_cxxflags+set}" = set; then + withval=$with_cxxflags; +CXXFLAGS="${withval} ${CXXFLAGS}" +{ echo "$as_me:$LINENO: result: CXXFLAGS = ${CXXFLAGS}" >&5 +echo "${ECHO_T}CXXFLAGS = ${CXXFLAGS}" >&6; } + +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + +fi + + + +{ echo "$as_me:$LINENO: checking whether additional CFLAGS flags should be added" >&5 +echo $ECHO_N "checking whether additional CFLAGS flags should be added... $ECHO_C" >&6; } + +# Check whether --with-cflags was given. +if test "${with_cflags+set}" = set; then + withval=$with_cflags; +CFLAGS="${withval} ${CFLAGS}" +{ echo "$as_me:$LINENO: result: CFLAGS = ${CFLAGS}" >&5 +echo "${ECHO_T}CFLAGS = ${CFLAGS}" >&6; } + +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + +fi + + +#TAC_ARG_WITH_FLAGS(fflags, FFLAGS) + +{ echo "$as_me:$LINENO: checking whether additional libraries are needed" >&5 +echo $ECHO_N "checking whether additional libraries are needed... $ECHO_C" >&6; } + +# Check whether --with-libs was given. +if test "${with_libs+set}" = set; then + withval=$with_libs; +LIBS="${withval} ${LIBS}" +{ echo "$as_me:$LINENO: result: LIBS = ${LIBS}" >&5 +echo "${ECHO_T}LIBS = ${LIBS}" >&6; } + +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + +fi + + + + +{ echo "$as_me:$LINENO: checking whether additional LDFLAGS flags should be added" >&5 +echo $ECHO_N "checking whether additional LDFLAGS flags should be added... $ECHO_C" >&6; } + +# Check whether --with-ldflags was given. +if test "${with_ldflags+set}" = set; then + withval=$with_ldflags; +LDFLAGS="${withval} ${LDFLAGS}" +{ echo "$as_me:$LINENO: result: LDFLAGS = ${LDFLAGS}" >&5 +echo "${ECHO_T}LDFLAGS = ${LDFLAGS}" >&6; } + +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + +fi + + + +# ------------------------------------------------------------------------ +# Alternate archiver +# ------------------------------------------------------------------------ + + + +# Check whether --with-ar was given. +if test "${with_ar+set}" = set; then + withval=$with_ar; +{ echo "$as_me:$LINENO: checking user-defined archiver" >&5 +echo $ECHO_N "checking user-defined archiver... $ECHO_C" >&6; } +{ echo "$as_me:$LINENO: result: ${withval}" >&5 +echo "${ECHO_T}${withval}" >&6; } +USE_ALTERNATE_AR=yes +ALTERNATE_AR="${withval}" + + +fi + + +if test -n "${SPECIAL_AR}" && test "X${USE_ALTERNATE_AR}" != "Xyes"; +then + USE_ALTERNATE_AR=yes + ALTERNATE_AR="${SPECIAL_AR}" +fi + +{ echo "$as_me:$LINENO: checking for special archiver command" >&5 +echo $ECHO_N "checking for special archiver command... $ECHO_C" >&6; } +if test "X${USE_ALTERNATE_AR}" = "Xyes"; then + { echo "$as_me:$LINENO: result: ${ALTERNATE_AR}" >&5 +echo "${ECHO_T}${ALTERNATE_AR}" >&6; } + if true; then + USE_ALTERNATE_AR_TRUE= + USE_ALTERNATE_AR_FALSE='#' +else + USE_ALTERNATE_AR_TRUE='#' + USE_ALTERNATE_AR_FALSE= +fi + +else + { echo "$as_me:$LINENO: result: none" >&5 +echo "${ECHO_T}none" >&6; } + if false; then + USE_ALTERNATE_AR_TRUE= + USE_ALTERNATE_AR_FALSE='#' +else + USE_ALTERNATE_AR_TRUE='#' + USE_ALTERNATE_AR_FALSE= +fi + +fi + + + +# ------------------------------------------------------------------------ +# MPI link check +# ------------------------------------------------------------------------ + +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu +{ echo "$as_me:$LINENO: checking how to run the C++ preprocessor" >&5 +echo $ECHO_N "checking how to run the C++ preprocessor... $ECHO_C" >&6; } +if test -z "$CXXCPP"; then + if test "${ac_cv_prog_CXXCPP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + # Double quotes because CXXCPP needs to be expanded + for CXXCPP in "$CXX -E" "/lib/cpp" + do + ac_preproc_ok=false +for ac_cxx_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if { (ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null && { + test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" || + test ! -s conftest.err + }; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Broken: fails on valid input. +continue +fi + +rm -f conftest.err conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null && { + test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" || + test ! -s conftest.err + }; then + # Broken: success on invalid input. +continue +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Passes both tests. +ac_preproc_ok=: +break +fi + +rm -f conftest.err conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.err conftest.$ac_ext +if $ac_preproc_ok; then + break +fi + + done + ac_cv_prog_CXXCPP=$CXXCPP + +fi + CXXCPP=$ac_cv_prog_CXXCPP +else + ac_cv_prog_CXXCPP=$CXXCPP +fi +{ echo "$as_me:$LINENO: result: $CXXCPP" >&5 +echo "${ECHO_T}$CXXCPP" >&6; } +ac_preproc_ok=false +for ac_cxx_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if { (ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null && { + test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" || + test ! -s conftest.err + }; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Broken: fails on valid input. +continue +fi + +rm -f conftest.err conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null && { + test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" || + test ! -s conftest.err + }; then + # Broken: success on invalid input. +continue +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Passes both tests. +ac_preproc_ok=: +break +fi + +rm -f conftest.err conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.err conftest.$ac_ext +if $ac_preproc_ok; then + : +else + { { echo "$as_me:$LINENO: error: C++ preprocessor \"$CXXCPP\" fails sanity check +See \`config.log' for more details." >&5 +echo "$as_me: error: C++ preprocessor \"$CXXCPP\" fails sanity check +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +fi + +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + + + +if test "X${HAVE_PKG_MPI}" = "Xyes"; then + + if test -n "${MPI_DIR}" && test -z "${MPI_INC}"; then + MPI_INC="${MPI_DIR}/include" + fi + + if test -n "${MPI_INC}"; then + CPPFLAGS="${CPPFLAGS} -I${MPI_INC}" + fi + + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + { echo "$as_me:$LINENO: checking for mpi.h" >&5 +echo $ECHO_N "checking for mpi.h... $ECHO_C" >&6; } + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include "mpi.h" +_ACEOF +if { (ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null && { + test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" || + test ! -s conftest.err + }; then + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + echo "-----" + echo "Cannot link simple MPI program." + echo "Try --with-mpi-compilers to specify MPI compilers." + echo "Or try --with-mpi-libs, --with-mpi-incdir, --with-mpi-libdir" + echo "to specify all the specific MPI compile options." + echo "-----" + { { echo "$as_me:$LINENO: error: MPI cannot link" >&5 +echo "$as_me: error: MPI cannot link" >&2;} + { (exit 1); exit 1; }; } + +fi + +rm -f conftest.err conftest.$ac_ext + + if test -n "${MPI_DIR}" && test -z "${MPI_LIBDIR}"; then + MPI_LIBDIR="${MPI_DIR}/lib" + fi + + if test -n "${MPI_LIBDIR}"; then + LDFLAGS="${LDFLAGS} -L${MPI_LIBDIR}" + fi + + if test -z "${MPI_LIBS}" && test -n "${MPI_LIBDIR}"; then + MPI_LIBS="-lmpi" + fi + + if test -n "${MPI_LIBS}"; then + LIBS="${MPI_LIBS} ${LIBS}" + fi + +# AC_LANG_CPLUSPLUS +# AC_MSG_CHECKING(whether MPI will link using C++ compiler) +# AC_TRY_LINK([#include ], +# [int c; char** v; MPI_Init(&c,&v);], +# [AC_MSG_RESULT(yes)], +# [AC_MSG_RESULT(no) +# echo "-----" +# echo "Cannot link simple MPI program." +# echo "Try --with-mpi-cxx to specify MPI C++ compile script." +# echo "Or try --with-mpi-libs, --with-mpi-incdir, --with-mpi-libdir" +# echo "to specify all the specific MPI compile options." +# echo "-----" +# AC_MSG_ERROR(MPI cannot link)] +# ) + +fi + + +# ------------------------------------------------------------------------ +# Checks for Makefile.export related systems +# ------------------------------------------------------------------------ + +# Check whether --enable-export-makefiles was given. +if test "${enable_export_makefiles+set}" = set; then + enableval=$enable_export_makefiles; ac_cv_use_export_makefiles=$enableval +else + ac_cv_use_export_makefiles=yes +fi + + +{ echo "$as_me:$LINENO: checking whether to build export makefiles" >&5 +echo $ECHO_N "checking whether to build export makefiles... $ECHO_C" >&6; } + +if test "X$ac_cv_use_export_makefiles" != "Xno"; then + + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } + +cat >>confdefs.h <<\_ACEOF +#define HAVE_EXPORT_MAKEFILES +_ACEOF + + +else + + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + +fi + + if test X${ac_cv_use_export_makefiles} = Xyes; then + USING_EXPORT_MAKEFILES_TRUE= + USING_EXPORT_MAKEFILES_FALSE='#' +else + USING_EXPORT_MAKEFILES_TRUE='#' + USING_EXPORT_MAKEFILES_FALSE= +fi + + +# Check for perl to run scripts (Required dependency) + + + +# Check whether --with-perl was given. +if test "${with_perl+set}" = set; then + withval=$with_perl; +{ echo "$as_me:$LINENO: checking for user supplied perl executable" >&5 +echo $ECHO_N "checking for user supplied perl executable... $ECHO_C" >&6; } +{ echo "$as_me:$LINENO: result: ${withval}" >&5 +echo "${ECHO_T}${withval}" >&6; } +USER_SPECIFIED_PERL=yes +PERL_EXE="${withval}" + +else + +USER_SPECIFIED_PERL=no + +fi + + +if test "X${USER_SPECIFIED_PERL}" = "Xyes"; then + as_ac_File=`echo "ac_cv_file_${PERL_EXE}" | $as_tr_sh` +{ echo "$as_me:$LINENO: checking for ${PERL_EXE}" >&5 +echo $ECHO_N "checking for ${PERL_EXE}... $ECHO_C" >&6; } +if { as_var=$as_ac_File; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + test "$cross_compiling" = yes && + { { echo "$as_me:$LINENO: error: cannot check for file existence when cross compiling" >&5 +echo "$as_me: error: cannot check for file existence when cross compiling" >&2;} + { (exit 1); exit 1; }; } +if test -r "${PERL_EXE}"; then + eval "$as_ac_File=yes" +else + eval "$as_ac_File=no" +fi +fi +ac_res=`eval echo '${'$as_ac_File'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } +if test `eval echo '${'$as_ac_File'}'` = yes; then + HAVE_PERL=yes +else + HAVE_PERL=no +fi + + PERL_EXE=${PERL_EXE} + +else + # Extract the first word of "perl", so it can be a program name with args. +set dummy perl; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_prog_HAVE_PERL+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$HAVE_PERL"; then + ac_cv_prog_HAVE_PERL="$HAVE_PERL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_HAVE_PERL="yes" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + + test -z "$ac_cv_prog_HAVE_PERL" && ac_cv_prog_HAVE_PERL="no" +fi +fi +HAVE_PERL=$ac_cv_prog_HAVE_PERL +if test -n "$HAVE_PERL"; then + { echo "$as_me:$LINENO: result: $HAVE_PERL" >&5 +echo "${ECHO_T}$HAVE_PERL" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + + PERL_EXE=perl + +fi + if test X${HAVE_PERL} = Xyes; then + USING_PERL_TRUE= + USING_PERL_FALSE='#' +else + USING_PERL_TRUE='#' + USING_PERL_FALSE= +fi + + + +if test "X$HAVE_PERL" != "Xyes" && + test "X$ac_cv_use_export_makefiles" != "Xno"; then + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + { { echo "$as_me:$LINENO: error: Failed to find the perl executable. The flag --enable-export-makefiles requires perl to be either in your path or explicitly defined by the flag --with-perl=. If you do not require the export makefiles to be installed via 'make install', you can disable the export makefiles with --disable-export-makefiles." >&5 +echo "$as_me: error: Failed to find the perl executable. The flag --enable-export-makefiles requires perl to be either in your path or explicitly defined by the flag --with-perl=. If you do not require the export makefiles to be installed via 'make install', you can disable the export makefiles with --disable-export-makefiles." >&2;} + { (exit 1); exit 1; }; } +fi + +# Check for using gnumake to clean up link lines via +# gnumake's "shell" command. Optional dependency. + + + + +# Check whether --with-gnumake was given. +if test "${with_gnumake+set}" = set; then + withval=$with_gnumake; ac_cv_use_gnumake=$withval +else + ac_cv_use_gnumake=no +fi + + +{ echo "$as_me:$LINENO: checking whether gnumake specific code should be enabled" >&5 +echo $ECHO_N "checking whether gnumake specific code should be enabled... $ECHO_C" >&6; } + +if test "X$ac_cv_use_gnumake" != "Xno"; then + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } + +cat >>confdefs.h <<\_ACEOF +#define HAVE_GNUMAKE +_ACEOF + +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + if test "X$ac_cv_use_gnumake" = "Xyes"; then + USING_GNUMAKE_TRUE= + USING_GNUMAKE_FALSE='#' +else + USING_GNUMAKE_TRUE='#' + USING_GNUMAKE_FALSE= +fi + + + +if test "X$HAVE_PERL" != "Xyes" && + test "X$ac_cv_use_gnumake" != "Xno"; then + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + { { echo "$as_me:$LINENO: error: The flag --with-gnumake requires perl to be in your path. The perl executable can alternatively be explicitly defined by the flag --with-perl=." >&5 +echo "$as_me: error: The flag --with-gnumake requires perl to be in your path. The perl executable can alternatively be explicitly defined by the flag --with-perl=." >&2;} + { (exit 1); exit 1; }; } +fi + + + +# ------------------------------------------------------------------------ +# Checks if tests and examples should be built +# ------------------------------------------------------------------------ + +# #np# - These options can disable the tests and examples of a package. +# #np# - Packages that do not have tests or examples should #-out the +# #np# - option(s) that does (do) not apply. + + +# Check whether --enable-tests was given. +if test "${enable_tests+set}" = set; then + enableval=$enable_tests; ac_cv_use_tests=$enableval +else + ac_cv_use_tests=yes +fi + + +{ echo "$as_me:$LINENO: checking whether to use tests" >&5 +echo $ECHO_N "checking whether to use tests... $ECHO_C" >&6; } + +if test "X$ac_cv_use_tests" != "Xno"; then + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } + +cat >>confdefs.h <<\_ACEOF +#define HAVE_TESTS +_ACEOF + +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + +# Check whether --enable-tests was given. +if test "${enable_tests+set}" = set; then + enableval=$enable_tests; ac_cv_use_tests=$enableval +else + ac_cv_use_tests=yes +fi + + +# Check whether --enable-threadpool-tests was given. +if test "${enable_threadpool_tests+set}" = set; then + enableval=$enable_threadpool_tests; ac_cv_use_threadpool_tests=$enableval +else + ac_cv_use_threadpool_tests=${ac_cv_use_tests} +fi + + +{ echo "$as_me:$LINENO: checking whether to use threadpool-tests" >&5 +echo $ECHO_N "checking whether to use threadpool-tests... $ECHO_C" >&6; } + +if test "X$ac_cv_use_threadpool_tests" != "Xno"; then + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } + +cat >>confdefs.h <<\_ACEOF +#define HAVE_NEW_PACKAGE_TESTS +_ACEOF + +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + if test "X$ac_cv_use_threadpool_tests" != "Xno"; then + BUILD_TESTS_TRUE= + BUILD_TESTS_FALSE='#' +else + BUILD_TESTS_TRUE='#' + BUILD_TESTS_FALSE= +fi + + +#TAC_ARG_ENABLE_FEATURE(examples, [Make examples for all Trilinos packages buildable with 'make examples'], EXAMPLES, yes) +#TAC_ARG_ENABLE_FEATURE_SUB_CHECK( new_package, examples, [Make New_Package examples buildable with 'make examples'], NEW_PACKAGE_EXAMPLES) +#AM_CONDITIONAL(BUILD_EXAMPLES, test "X$ac_cv_use_new_package_examples" != "Xno") + +#We now build tests and examples through separate make targets, rather than +#during "make". We still need to conditionally include the test and example +#in SUBDIRS, even though SUB_TEST and SUB_EXAMPLE will never be +#defined, so that the tests and examples are included in the distribution +#tarball. + if test "X$ac_cv_use_sub_test" = "Xyes"; then + SUB_TEST_TRUE= + SUB_TEST_FALSE='#' +else + SUB_TEST_TRUE='#' + SUB_TEST_FALSE= +fi + +#AM_CONDITIONAL(SUB_EXAMPLE, test "X$ac_cv_use_sub_example" = "Xyes") + + +# Check whether --enable-libcheck was given. +if test "${enable_libcheck+set}" = set; then + enableval=$enable_libcheck; ac_cv_use_libcheck=$enableval +else + ac_cv_use_libcheck=yes +fi + + +{ echo "$as_me:$LINENO: checking whether to use libcheck" >&5 +echo $ECHO_N "checking whether to use libcheck... $ECHO_C" >&6; } + +if test "X$ac_cv_use_libcheck" != "Xno"; then + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } + +cat >>confdefs.h <<\_ACEOF +#define HAVE_LIBCHECK +_ACEOF + +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + +# ------------------------------------------------------------------------ +# Specify other directories +# ------------------------------------------------------------------------ + +# enable use of --with-libdirs="-Llibdir1 -Llibdir2 ..." to prepend to LDFLAGS + +{ echo "$as_me:$LINENO: checking whether additional library search paths defined" >&5 +echo $ECHO_N "checking whether additional library search paths defined... $ECHO_C" >&6; } + +# Check whether --with-libdirs was given. +if test "${with_libdirs+set}" = set; then + withval=$with_libdirs; +LDFLAGS="${withval} ${LDFLAGS}" +{ echo "$as_me:$LINENO: result: ${withval}" >&5 +echo "${ECHO_T}${withval}" >&6; } + +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + +fi + + +# enable use of --with-incdirs="-Lincdir1 -Lincdir2 ..." to prepend to CPPFLAGS + +{ echo "$as_me:$LINENO: checking whether additional include search paths defined" >&5 +echo $ECHO_N "checking whether additional include search paths defined... $ECHO_C" >&6; } + +# Check whether --with-incdirs was given. +if test "${with_incdirs+set}" = set; then + withval=$with_incdirs; +CPPFLAGS="${withval} ${CPPFLAGS}" +{ echo "$as_me:$LINENO: result: ${withval}" >&5 +echo "${ECHO_T}${withval}" >&6; } + +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + +fi + + + +# #np# - Yet another opportunity to remove code if you aren't +# using Fortran +# Define F77_FUNC that will be used to link with Fortran subroutines. - trash WORKGXX +#AC_F77_WRAPPERS + +# ------------------------------------------------------------------------ +# Checks for libraries +# ------------------------------------------------------------------------ + +# If tests, examples and libcheck are disabled, we don't have to check +# for these libraries. + +# #np# - +# If a package does not have tests or examples, the corresponding check(s) +# should be pulled out of the "if" statement below. +#if test "X$ac_cv_use_new_package_examples" != "Xno" || test "X$ac_cv_use_libcheck" != "Xno"; then +if test "X$ac_cv_use_threadpool_tests" != "Xno" || test "X$ac_cv_use_libcheck" != "Xno"; then + +{ echo "$as_me:$LINENO: checking for grep that handles long lines and -e" >&5 +echo $ECHO_N "checking for grep that handles long lines and -e... $ECHO_C" >&6; } +if test "${ac_cv_path_GREP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + # Extract the first word of "grep ggrep" to use in msg output +if test -z "$GREP"; then +set dummy grep ggrep; ac_prog_name=$2 +if test "${ac_cv_path_GREP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_path_GREP_found=false +# Loop through the user's path and test for each of PROGNAME-LIST +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in grep ggrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" + { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue + # Check for GNU ac_path_GREP and select it if it is found. + # Check for GNU $ac_path_GREP +case `"$ac_path_GREP" --version 2>&1` in +*GNU*) + ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; +*) + ac_count=0 + echo $ECHO_N "0123456789$ECHO_C" >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + echo 'GREP' >> "conftest.nl" + "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + ac_count=`expr $ac_count + 1` + if test $ac_count -gt ${ac_path_GREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_GREP="$ac_path_GREP" + ac_path_GREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + + $ac_path_GREP_found && break 3 + done +done + +done +IFS=$as_save_IFS + + +fi + +GREP="$ac_cv_path_GREP" +if test -z "$GREP"; then + { { echo "$as_me:$LINENO: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5 +echo "$as_me: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;} + { (exit 1); exit 1; }; } +fi + +else + ac_cv_path_GREP=$GREP +fi + + +fi +{ echo "$as_me:$LINENO: result: $ac_cv_path_GREP" >&5 +echo "${ECHO_T}$ac_cv_path_GREP" >&6; } + GREP="$ac_cv_path_GREP" + + +{ echo "$as_me:$LINENO: checking for egrep" >&5 +echo $ECHO_N "checking for egrep... $ECHO_C" >&6; } +if test "${ac_cv_path_EGREP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + # Extract the first word of "egrep" to use in msg output +if test -z "$EGREP"; then +set dummy egrep; ac_prog_name=$2 +if test "${ac_cv_path_EGREP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_path_EGREP_found=false +# Loop through the user's path and test for each of PROGNAME-LIST +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in egrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" + { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue + # Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +*) + ac_count=0 + echo $ECHO_N "0123456789$ECHO_C" >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + echo 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + ac_count=`expr $ac_count + 1` + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + + $ac_path_EGREP_found && break 3 + done +done + +done +IFS=$as_save_IFS + + +fi + +EGREP="$ac_cv_path_EGREP" +if test -z "$EGREP"; then + { { echo "$as_me:$LINENO: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5 +echo "$as_me: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;} + { (exit 1); exit 1; }; } +fi + +else + ac_cv_path_EGREP=$EGREP +fi + + + fi +fi +{ echo "$as_me:$LINENO: result: $ac_cv_path_EGREP" >&5 +echo "${ECHO_T}$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" + + +{ echo "$as_me:$LINENO: checking for ANSI C header files" >&5 +echo $ECHO_N "checking for ANSI C header files... $ECHO_C" >&6; } +if test "${ac_cv_header_stdc+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#include +#include +#include + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_header_stdc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_cv_header_stdc=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "memchr" >/dev/null 2>&1; then + : +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "free" >/dev/null 2>&1; then + : +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then + : +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#include +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) \ + (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + return 2; + return 0; +} +_ACEOF +rm -f conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { ac_try='./conftest$ac_exeext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + : +else + echo "$as_me: program exited with status $ac_status" >&5 +echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +( exit $ac_status ) +ac_cv_header_stdc=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +fi + + +fi +fi +{ echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5 +echo "${ECHO_T}$ac_cv_header_stdc" >&6; } +if test $ac_cv_header_stdc = yes; then + +cat >>confdefs.h <<\_ACEOF +#define STDC_HEADERS 1 +_ACEOF + +fi + +# On IRIX 5.3, sys/types and inttypes.h are conflicting. + + + + + + + + + +for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ + inttypes.h stdint.h unistd.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +{ echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } +if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default + +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + eval "$as_ac_Header=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + eval "$as_ac_Header=no" +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +ac_res=`eval echo '${'$as_ac_Header'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + + + +acx_pthread_ok=no + +# First, check if the POSIX threads header, pthread.h, is available. +# If it isn't, don't bother looking for the threads libraries. +if test "${ac_cv_header_pthread_h+set}" = set; then + { echo "$as_me:$LINENO: checking for pthread.h" >&5 +echo $ECHO_N "checking for pthread.h... $ECHO_C" >&6; } +if test "${ac_cv_header_pthread_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +{ echo "$as_me:$LINENO: result: $ac_cv_header_pthread_h" >&5 +echo "${ECHO_T}$ac_cv_header_pthread_h" >&6; } +else + # Is the header compilable? +{ echo "$as_me:$LINENO: checking pthread.h usability" >&5 +echo $ECHO_N "checking pthread.h usability... $ECHO_C" >&6; } +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_compiler=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6; } + +# Is the header present? +{ echo "$as_me:$LINENO: checking pthread.h presence" >&5 +echo $ECHO_N "checking pthread.h presence... $ECHO_C" >&6; } +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null && { + test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" || + test ! -s conftest.err + }; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_preproc=no +fi + +rm -f conftest.err conftest.$ac_ext +{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6; } + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_cxx_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: pthread.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: pthread.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: pthread.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: pthread.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: pthread.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: pthread.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: pthread.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: pthread.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: pthread.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: pthread.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: pthread.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: pthread.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: pthread.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: pthread.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: pthread.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: pthread.h: in the future, the compiler will take precedence" >&2;} + ( cat <<\_ASBOX +## --------------------------------- ## +## Report this to hcedwar@sandia.gov ## +## --------------------------------- ## +_ASBOX + ) | sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +{ echo "$as_me:$LINENO: checking for pthread.h" >&5 +echo $ECHO_N "checking for pthread.h... $ECHO_C" >&6; } +if test "${ac_cv_header_pthread_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_header_pthread_h=$ac_header_preproc +fi +{ echo "$as_me:$LINENO: result: $ac_cv_header_pthread_h" >&5 +echo "${ECHO_T}$ac_cv_header_pthread_h" >&6; } + +fi +if test $ac_cv_header_pthread_h = yes; then + : +else + acx_pthread_ok=noheader +fi + + + +# We must check for the threads library under a number of different +# names; the ordering is very important because some systems +# (e.g. DEC) have both -lpthread and -lpthreads, where one of the +# libraries is broken (non-POSIX). + +# First of all, check if the user has set any of the PTHREAD_LIBS, +# etcetera environment variables, and if threads linking works using +# them: +if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + save_LIBS="$LIBS" + LIBS="$PTHREAD_LIBS $LIBS" + { echo "$as_me:$LINENO: checking for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS" >&5 +echo $ECHO_N "checking for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS... $ECHO_C" >&6; } + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char pthread_join (); +int +main () +{ +return pthread_join (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && + $as_test_x conftest$ac_exeext; then + acx_pthread_ok=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + +fi + +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext + { echo "$as_me:$LINENO: result: $acx_pthread_ok" >&5 +echo "${ECHO_T}$acx_pthread_ok" >&6; } + if test x"$acx_pthread_ok" = xno; then + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" + fi + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" +fi + +# Create a list of thread flags to try. Items starting with a "-" are +# C compiler flags, and other items are library names, except for "none" +# which indicates that we try without any flags at all. + +acx_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt" + +# The ordering *is* (sometimes) important. Some notes on the +# individual items follow: + +# pthreads: AIX (must check this before -lpthread) +# none: in case threads are in libc; should be tried before -Kthread and +# other compiler flags to prevent continual compiler warnings +# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) +# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) +# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) +# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads) +# -pthreads: Solaris/gcc +# -mthreads: Mingw32/gcc, Lynx/gcc +# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it +# doesn't hurt to check since this sometimes defines pthreads too; +# also defines -D_REENTRANT) +# pthread: Linux, etcetera +# --thread-safe: KAI C++ + +case "${host_cpu}-${host_os}" in + *solaris*) + + # On Solaris (at least, for some versions), libc contains stubbed + # (non-functional) versions of the pthreads routines, so link-based + # tests will erroneously succeed. (We need to link with -pthread or + # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather + # a function called by this macro, so we could check for that, but + # who knows whether they'll stub that too in a future libc.) So, + # we'll just look for -pthreads and -lpthread first: + + acx_pthread_flags="-pthread -pthreads pthread -mt $acx_pthread_flags" + ;; +esac + +if test x"$acx_pthread_ok" = xno; then +for flag in $acx_pthread_flags; do + + case $flag in + none) + { echo "$as_me:$LINENO: checking whether pthreads work without any flags" >&5 +echo $ECHO_N "checking whether pthreads work without any flags... $ECHO_C" >&6; } + ;; + + -*) + { echo "$as_me:$LINENO: checking whether pthreads work with $flag" >&5 +echo $ECHO_N "checking whether pthreads work with $flag... $ECHO_C" >&6; } + PTHREAD_CFLAGS="$flag" + ;; + + *) + { echo "$as_me:$LINENO: checking for the pthreads library -l$flag" >&5 +echo $ECHO_N "checking for the pthreads library -l$flag... $ECHO_C" >&6; } + PTHREAD_LIBS="-l$flag" + ;; + esac + + save_LIBS="$LIBS" + save_CFLAGS="$CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + + # Check for various functions. We must include pthread.h, + # since some functions may be macros. (On the Sequent, we + # need a special flag -Kthread to make this header compile.) + # We check for pthread_join because it is in -lpthread on IRIX + # while pthread_create is in libc. We check for pthread_attr_init + # due to DEC craziness with -lpthreads. We check for + # pthread_cleanup_push because it is one of the few pthread + # functions on Solaris that doesn't have a non-functional libc stub. + # We try pthread_create on general principles. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +int +main () +{ +pthread_t th; pthread_join(th, 0); + pthread_attr_init(0); pthread_cleanup_push(0, 0); + pthread_create(0,0,0,0); pthread_cleanup_pop(0); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && + $as_test_x conftest$ac_exeext; then + acx_pthread_ok=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + +fi + +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext + + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + + { echo "$as_me:$LINENO: result: $acx_pthread_ok" >&5 +echo "${ECHO_T}$acx_pthread_ok" >&6; } + if test "x$acx_pthread_ok" = xyes; then + break; + fi + + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" +done +fi + +# Various other checks: +if test "x$acx_pthread_ok" = xyes; then + save_LIBS="$LIBS" + LIBS="$PTHREAD_LIBS $LIBS" + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + + # Detect AIX lossage: threads are created detached by default + # and the JOINABLE attribute has a nonstandard name (UNDETACHED). + { echo "$as_me:$LINENO: checking for joinable pthread attribute" >&5 +echo $ECHO_N "checking for joinable pthread attribute... $ECHO_C" >&6; } + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +int +main () +{ +int attr=PTHREAD_CREATE_JOINABLE; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && + $as_test_x conftest$ac_exeext; then + ok=PTHREAD_CREATE_JOINABLE +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ok=unknown +fi + +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext + if test x"$ok" = xunknown; then + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +int +main () +{ +int attr=PTHREAD_CREATE_UNDETACHED; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && + $as_test_x conftest$ac_exeext; then + ok=PTHREAD_CREATE_UNDETACHED +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ok=unknown +fi + +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext + fi + if test x"$ok" != xPTHREAD_CREATE_JOINABLE; then + +cat >>confdefs.h <<\_ACEOF +#define PTHREAD_CREATE_JOINABLE $ok +_ACEOF + + fi + { echo "$as_me:$LINENO: result: ${ok}" >&5 +echo "${ECHO_T}${ok}" >&6; } + if test x"$ok" = xunknown; then + { echo "$as_me:$LINENO: WARNING: we do not know how to create joinable pthreads" >&5 +echo "$as_me: WARNING: we do not know how to create joinable pthreads" >&2;} + fi + + { echo "$as_me:$LINENO: checking if more special flags are required for pthreads" >&5 +echo $ECHO_N "checking if more special flags are required for pthreads... $ECHO_C" >&6; } + flag=no + case "${host_cpu}-${host_os}" in + *-aix* | *-freebsd*) flag="-D_THREAD_SAFE";; + *solaris* | alpha*-osf*) flag="-D_REENTRANT";; + esac + { echo "$as_me:$LINENO: result: ${flag}" >&5 +echo "${ECHO_T}${flag}" >&6; } + if test "x$flag" != xno; then + PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS" + fi + + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + + # More AIX lossage: must compile with cc_r + # Extract the first word of "cc_r", so it can be a program name with args. +set dummy cc_r; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_prog_PTHREAD_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$PTHREAD_CC"; then + ac_cv_prog_PTHREAD_CC="$PTHREAD_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_PTHREAD_CC="cc_r" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + + test -z "$ac_cv_prog_PTHREAD_CC" && ac_cv_prog_PTHREAD_CC="${CC}" +fi +fi +PTHREAD_CC=$ac_cv_prog_PTHREAD_CC +if test -n "$PTHREAD_CC"; then + { echo "$as_me:$LINENO: result: $PTHREAD_CC" >&5 +echo "${ECHO_T}$PTHREAD_CC" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + +else + PTHREAD_CC="$CC" +fi + + + + + +# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test x"$acx_pthread_ok" = xyes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_PTHREAD 1 +_ACEOF + + : +else + acx_pthread_ok=no + +fi + + +LIBS="$PTHREAD_LIBS $LIBS" +CFLAGS="$CFLAGS $PTHREAD_CFLAGS" +CC="$PTHREAD_CC" + +fi +# end of the list of libraries that don't need to be checked for if +# tests and examples are disabled. + +# ------------------------------------------------------------------------ +# Checks for linker characteristics +# ------------------------------------------------------------------------ + +# Determine libraries needed for linking with Fortran +#AC_F77_LIBRARY_LDFLAGS + + +# ------------------------------------------------------------------------ +# Perform substitutions in output files +# ------------------------------------------------------------------------ + + + +# ------------------------------------------------------------------------ +# Output files +# ------------------------------------------------------------------------ +## +# You will need to change AC_CONFIG_FILES below and Makefile.am +# to add a new directory. +ac_config_files="$ac_config_files Makefile Makefile.export.threadpool src/Makefile test/Makefile" + + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, we kill variables containing newlines. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +( + for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { echo "$as_me:$LINENO: WARNING: Cache variable $ac_var contains a newline." >&5 +echo "$as_me: WARNING: Cache variable $ac_var contains a newline." >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + *) $as_unset $ac_var ;; + esac ;; + esac + done + + (set) 2>&1 | + case $as_nl`(ac_space=' '; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + # `set' does not quote correctly, so add quotes (double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \). + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; #( + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) | + sed ' + /^ac_cv_env_/b end + t clear + :clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + :end' >>confcache +if diff "$cache_file" confcache >/dev/null 2>&1; then :; else + if test -w "$cache_file"; then + test "x$cache_file" != "x/dev/null" && + { echo "$as_me:$LINENO: updating cache $cache_file" >&5 +echo "$as_me: updating cache $cache_file" >&6;} + cat confcache >$cache_file + else + { echo "$as_me:$LINENO: not updating unwritable cache $cache_file" >&5 +echo "$as_me: not updating unwritable cache $cache_file" >&6;} + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +DEFS=-DHAVE_CONFIG_H + +ac_libobjs= +ac_ltlibobjs= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' + ac_i=`echo "$ac_i" | sed "$ac_script"` + # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR + # will be set to the directory where LIBOBJS objects are built. + ac_libobjs="$ac_libobjs \${LIBOBJDIR}$ac_i\$U.$ac_objext" + ac_ltlibobjs="$ac_ltlibobjs \${LIBOBJDIR}$ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + +if test -z "${MAINTAINER_MODE_TRUE}" && test -z "${MAINTAINER_MODE_FALSE}"; then + { { echo "$as_me:$LINENO: error: conditional \"MAINTAINER_MODE\" was never defined. +Usually this means the macro was only invoked conditionally." >&5 +echo "$as_me: error: conditional \"MAINTAINER_MODE\" was never defined. +Usually this means the macro was only invoked conditionally." >&2;} + { (exit 1); exit 1; }; } +fi +if test -z "${HAVE_MPI_TRUE}" && test -z "${HAVE_MPI_FALSE}"; then + { { echo "$as_me:$LINENO: error: conditional \"HAVE_MPI\" was never defined. +Usually this means the macro was only invoked conditionally." >&5 +echo "$as_me: error: conditional \"HAVE_MPI\" was never defined. +Usually this means the macro was only invoked conditionally." >&2;} + { (exit 1); exit 1; }; } +fi +if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then + { { echo "$as_me:$LINENO: error: conditional \"AMDEP\" was never defined. +Usually this means the macro was only invoked conditionally." >&5 +echo "$as_me: error: conditional \"AMDEP\" was never defined. +Usually this means the macro was only invoked conditionally." >&2;} + { (exit 1); exit 1; }; } +fi +if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then + { { echo "$as_me:$LINENO: error: conditional \"am__fastdepCC\" was never defined. +Usually this means the macro was only invoked conditionally." >&5 +echo "$as_me: error: conditional \"am__fastdepCC\" was never defined. +Usually this means the macro was only invoked conditionally." >&2;} + { (exit 1); exit 1; }; } +fi +if test -z "${am__fastdepCXX_TRUE}" && test -z "${am__fastdepCXX_FALSE}"; then + { { echo "$as_me:$LINENO: error: conditional \"am__fastdepCXX\" was never defined. +Usually this means the macro was only invoked conditionally." >&5 +echo "$as_me: error: conditional \"am__fastdepCXX\" was never defined. +Usually this means the macro was only invoked conditionally." >&2;} + { (exit 1); exit 1; }; } +fi +if test -z "${USE_ALTERNATE_AR_TRUE}" && test -z "${USE_ALTERNATE_AR_FALSE}"; then + { { echo "$as_me:$LINENO: error: conditional \"USE_ALTERNATE_AR\" was never defined. +Usually this means the macro was only invoked conditionally." >&5 +echo "$as_me: error: conditional \"USE_ALTERNATE_AR\" was never defined. +Usually this means the macro was only invoked conditionally." >&2;} + { (exit 1); exit 1; }; } +fi +if test -z "${USE_ALTERNATE_AR_TRUE}" && test -z "${USE_ALTERNATE_AR_FALSE}"; then + { { echo "$as_me:$LINENO: error: conditional \"USE_ALTERNATE_AR\" was never defined. +Usually this means the macro was only invoked conditionally." >&5 +echo "$as_me: error: conditional \"USE_ALTERNATE_AR\" was never defined. +Usually this means the macro was only invoked conditionally." >&2;} + { (exit 1); exit 1; }; } +fi +if test -z "${USING_EXPORT_MAKEFILES_TRUE}" && test -z "${USING_EXPORT_MAKEFILES_FALSE}"; then + { { echo "$as_me:$LINENO: error: conditional \"USING_EXPORT_MAKEFILES\" was never defined. +Usually this means the macro was only invoked conditionally." >&5 +echo "$as_me: error: conditional \"USING_EXPORT_MAKEFILES\" was never defined. +Usually this means the macro was only invoked conditionally." >&2;} + { (exit 1); exit 1; }; } +fi +if test -z "${USING_PERL_TRUE}" && test -z "${USING_PERL_FALSE}"; then + { { echo "$as_me:$LINENO: error: conditional \"USING_PERL\" was never defined. +Usually this means the macro was only invoked conditionally." >&5 +echo "$as_me: error: conditional \"USING_PERL\" was never defined. +Usually this means the macro was only invoked conditionally." >&2;} + { (exit 1); exit 1; }; } +fi +if test -z "${USING_GNUMAKE_TRUE}" && test -z "${USING_GNUMAKE_FALSE}"; then + { { echo "$as_me:$LINENO: error: conditional \"USING_GNUMAKE\" was never defined. +Usually this means the macro was only invoked conditionally." >&5 +echo "$as_me: error: conditional \"USING_GNUMAKE\" was never defined. +Usually this means the macro was only invoked conditionally." >&2;} + { (exit 1); exit 1; }; } +fi +if test -z "${BUILD_TESTS_TRUE}" && test -z "${BUILD_TESTS_FALSE}"; then + { { echo "$as_me:$LINENO: error: conditional \"BUILD_TESTS\" was never defined. +Usually this means the macro was only invoked conditionally." >&5 +echo "$as_me: error: conditional \"BUILD_TESTS\" was never defined. +Usually this means the macro was only invoked conditionally." >&2;} + { (exit 1); exit 1; }; } +fi +if test -z "${SUB_TEST_TRUE}" && test -z "${SUB_TEST_FALSE}"; then + { { echo "$as_me:$LINENO: error: conditional \"SUB_TEST\" was never defined. +Usually this means the macro was only invoked conditionally." >&5 +echo "$as_me: error: conditional \"SUB_TEST\" was never defined. +Usually this means the macro was only invoked conditionally." >&2;} + { (exit 1); exit 1; }; } +fi + +: ${CONFIG_STATUS=./config.status} +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5 +echo "$as_me: creating $CONFIG_STATUS" >&6;} +cat >$CONFIG_STATUS <<_ACEOF +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false +SHELL=\${CONFIG_SHELL-$SHELL} +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF +## --------------------- ## +## M4sh Initialization. ## +## --------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in + *posix*) set -o posix ;; +esac + +fi + + + + +# PATH needs CR +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + echo "#! /bin/sh" >conf$$.sh + echo "exit 0" >>conf$$.sh + chmod +x conf$$.sh + if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then + PATH_SEPARATOR=';' + else + PATH_SEPARATOR=: + fi + rm -f conf$$.sh +fi + +# Support unset when possible. +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + as_unset=unset +else + as_unset=false +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +as_nl=' +' +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +case $0 in + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break +done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + { (exit 1); exit 1; } +fi + +# Work around bugs in pre-3.0 UWIN ksh. +for as_var in ENV MAIL MAILPATH +do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +for as_var in \ + LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \ + LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \ + LC_TELEPHONE LC_TIME +do + if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then + eval $as_var=C; export $as_var + else + ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var + fi +done + +# Required to use basename. +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + + +# Name of the executable. +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# CDPATH. +$as_unset CDPATH + + + + as_lineno_1=$LINENO + as_lineno_2=$LINENO + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || { + + # Create $as_me.lineno as a copy of $as_myself, but with $LINENO + # uniformly replaced by the line number. The first 'sed' inserts a + # line-number line after each line using $LINENO; the second 'sed' + # does the real work. The second script uses 'N' to pair each + # line-number line with the line containing $LINENO, and appends + # trailing '-' during substitution so that $LINENO is not a special + # case at line end. + # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the + # scripts with optimization help from Paolo Bonzini. Blame Lee + # E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 + { (exit 1); exit 1; }; } + + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in +-n*) + case `echo 'x\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + *) ECHO_C='\c';; + esac;; +*) + ECHO_N='-n';; +esac + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir +fi +echo >conf$$.file +if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -p'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -p' +elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p=: +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +if test -x / >/dev/null 2>&1; then + as_test_x='test -x' +else + if ls -dL / >/dev/null 2>&1; then + as_ls_L_option=L + else + as_ls_L_option= + fi + as_test_x=' + eval sh -c '\'' + if test -d "$1"; then + test -d "$1/."; + else + case $1 in + -*)set "./$1";; + esac; + case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in + ???[sx]*):;;*)false;;esac;fi + '\'' sh + ' +fi +as_executable_p=$as_test_x + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +exec 6>&1 + +# Save the log message, to keep $[0] and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. +ac_log=" +This file was extended by ThreadPool $as_me 1.1d, which was +generated by GNU Autoconf 2.61. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +on `(hostname || uname -n) 2>/dev/null | sed 1q` +" + +_ACEOF + +cat >>$CONFIG_STATUS <<_ACEOF +# Files that config.status was made for. +config_files="$ac_config_files" +config_headers="$ac_config_headers" +config_commands="$ac_config_commands" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF +ac_cs_usage="\ +\`$as_me' instantiates files from templates according to the +current configuration. + +Usage: $0 [OPTIONS] [FILE]... + + -h, --help print this help, then exit + -V, --version print version number and configuration settings, then exit + -q, --quiet do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + --header=FILE[:TEMPLATE] + instantiate the configuration header FILE + +Configuration files: +$config_files + +Configuration headers: +$config_headers + +Configuration commands: +$config_commands + +Report bugs to ." + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF +ac_cs_version="\\ +ThreadPool config.status 1.1d +configured by $0, generated by GNU Autoconf 2.61, + with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" + +Copyright (C) 2006 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." + +ac_pwd='$ac_pwd' +srcdir='$srcdir' +INSTALL='$INSTALL' +MKDIR_P='$MKDIR_P' +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF +# If no file are specified by the user, then we need to provide default +# value. By we need to know if files were specified by the user. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=*) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` + ac_shift=: + ;; + *) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + esac + + case $ac_option in + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) + echo "$ac_cs_version"; exit ;; + --debug | --debu | --deb | --de | --d | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + CONFIG_FILES="$CONFIG_FILES $ac_optarg" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + $ac_shift + CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg" + ac_need_defaults=false;; + --he | --h) + # Conflict between --help and --header + { echo "$as_me: error: ambiguous option: $1 +Try \`$0 --help' for more information." >&2 + { (exit 1); exit 1; }; };; + --help | --hel | -h ) + echo "$ac_cs_usage"; exit ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) { echo "$as_me: error: unrecognized option: $1 +Try \`$0 --help' for more information." >&2 + { (exit 1); exit 1; }; } ;; + + *) ac_config_targets="$ac_config_targets $1" + ac_need_defaults=false ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF +if \$ac_cs_recheck; then + echo "running CONFIG_SHELL=$SHELL $SHELL $0 "$ac_configure_args \$ac_configure_extra_args " --no-create --no-recursion" >&6 + CONFIG_SHELL=$SHELL + export CONFIG_SHELL + exec $SHELL "$0"$ac_configure_args \$ac_configure_extra_args --no-create --no-recursion +fi + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX + echo "$ac_log" +} >&5 + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF +# +# INIT-COMMANDS +# +AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF + +# Handling of arguments. +for ac_config_target in $ac_config_targets +do + case $ac_config_target in + "src/ThreadPool_config.h") CONFIG_HEADERS="$CONFIG_HEADERS src/ThreadPool_config.h:src/ThreadPool_config.h.in" ;; + "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;; + "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; + "Makefile.export.threadpool") CONFIG_FILES="$CONFIG_FILES Makefile.export.threadpool" ;; + "src/Makefile") CONFIG_FILES="$CONFIG_FILES src/Makefile" ;; + "test/Makefile") CONFIG_FILES="$CONFIG_FILES test/Makefile" ;; + + *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5 +echo "$as_me: error: invalid argument: $ac_config_target" >&2;} + { (exit 1); exit 1; }; };; + esac +done + + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files + test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers + test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason against having it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Hook for its removal unless debugging. +# Note that there is a small window in which the directory will not be cleaned: +# after its creation but before its name has been assigned to `$tmp'. +$debug || +{ + tmp= + trap 'exit_status=$? + { test -z "$tmp" || test ! -d "$tmp" || rm -fr "$tmp"; } && exit $exit_status +' 0 + trap '{ (exit 1); exit 1; }' 1 2 13 15 +} +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && + test -n "$tmp" && test -d "$tmp" +} || +{ + tmp=./conf$$-$RANDOM + (umask 077 && mkdir "$tmp") +} || +{ + echo "$me: cannot create a temporary directory in ." >&2 + { (exit 1); exit 1; } +} + +# +# Set up the sed scripts for CONFIG_FILES section. +# + +# No need to generate the scripts if there are no CONFIG_FILES. +# This happens for instance when ./config.status config.h +if test -n "$CONFIG_FILES"; then + +_ACEOF + + + +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + cat >conf$$subs.sed <<_ACEOF +SHELL!$SHELL$ac_delim +PATH_SEPARATOR!$PATH_SEPARATOR$ac_delim +PACKAGE_NAME!$PACKAGE_NAME$ac_delim +PACKAGE_TARNAME!$PACKAGE_TARNAME$ac_delim +PACKAGE_VERSION!$PACKAGE_VERSION$ac_delim +PACKAGE_STRING!$PACKAGE_STRING$ac_delim +PACKAGE_BUGREPORT!$PACKAGE_BUGREPORT$ac_delim +exec_prefix!$exec_prefix$ac_delim +prefix!$prefix$ac_delim +program_transform_name!$program_transform_name$ac_delim +bindir!$bindir$ac_delim +sbindir!$sbindir$ac_delim +libexecdir!$libexecdir$ac_delim +datarootdir!$datarootdir$ac_delim +datadir!$datadir$ac_delim +sysconfdir!$sysconfdir$ac_delim +sharedstatedir!$sharedstatedir$ac_delim +localstatedir!$localstatedir$ac_delim +includedir!$includedir$ac_delim +oldincludedir!$oldincludedir$ac_delim +docdir!$docdir$ac_delim +infodir!$infodir$ac_delim +htmldir!$htmldir$ac_delim +dvidir!$dvidir$ac_delim +pdfdir!$pdfdir$ac_delim +psdir!$psdir$ac_delim +libdir!$libdir$ac_delim +localedir!$localedir$ac_delim +mandir!$mandir$ac_delim +DEFS!$DEFS$ac_delim +ECHO_C!$ECHO_C$ac_delim +ECHO_N!$ECHO_N$ac_delim +ECHO_T!$ECHO_T$ac_delim +LIBS!$LIBS$ac_delim +build_alias!$build_alias$ac_delim +host_alias!$host_alias$ac_delim +target_alias!$target_alias$ac_delim +MAINTAINER_MODE_TRUE!$MAINTAINER_MODE_TRUE$ac_delim +MAINTAINER_MODE_FALSE!$MAINTAINER_MODE_FALSE$ac_delim +MAINT!$MAINT$ac_delim +build!$build$ac_delim +build_cpu!$build_cpu$ac_delim +build_vendor!$build_vendor$ac_delim +build_os!$build_os$ac_delim +host!$host$ac_delim +host_cpu!$host_cpu$ac_delim +host_vendor!$host_vendor$ac_delim +host_os!$host_os$ac_delim +target!$target$ac_delim +target_cpu!$target_cpu$ac_delim +target_vendor!$target_vendor$ac_delim +target_os!$target_os$ac_delim +INSTALL_PROGRAM!$INSTALL_PROGRAM$ac_delim +INSTALL_SCRIPT!$INSTALL_SCRIPT$ac_delim +INSTALL_DATA!$INSTALL_DATA$ac_delim +am__isrc!$am__isrc$ac_delim +CYGPATH_W!$CYGPATH_W$ac_delim +PACKAGE!$PACKAGE$ac_delim +VERSION!$VERSION$ac_delim +ACLOCAL!$ACLOCAL$ac_delim +AUTOCONF!$AUTOCONF$ac_delim +AUTOMAKE!$AUTOMAKE$ac_delim +AUTOHEADER!$AUTOHEADER$ac_delim +MAKEINFO!$MAKEINFO$ac_delim +install_sh!$install_sh$ac_delim +STRIP!$STRIP$ac_delim +INSTALL_STRIP_PROGRAM!$INSTALL_STRIP_PROGRAM$ac_delim +mkdir_p!$mkdir_p$ac_delim +AWK!$AWK$ac_delim +SET_MAKE!$SET_MAKE$ac_delim +am__leading_dot!$am__leading_dot$ac_delim +AMTAR!$AMTAR$ac_delim +am__tar!$am__tar$ac_delim +am__untar!$am__untar$ac_delim +MPI_TEMP_CXX!$MPI_TEMP_CXX$ac_delim +MPI_CXX!$MPI_CXX$ac_delim +HAVE_MPI_TRUE!$HAVE_MPI_TRUE$ac_delim +HAVE_MPI_FALSE!$HAVE_MPI_FALSE$ac_delim +MPI_CXX_EXISTS!$MPI_CXX_EXISTS$ac_delim +MPI_CC_EXISTS!$MPI_CC_EXISTS$ac_delim +MPI_F77_EXISTS!$MPI_F77_EXISTS$ac_delim +CC!$CC$ac_delim +CFLAGS!$CFLAGS$ac_delim +LDFLAGS!$LDFLAGS$ac_delim +CPPFLAGS!$CPPFLAGS$ac_delim +ac_ct_CC!$ac_ct_CC$ac_delim +EXEEXT!$EXEEXT$ac_delim +OBJEXT!$OBJEXT$ac_delim +DEPDIR!$DEPDIR$ac_delim +am__include!$am__include$ac_delim +am__quote!$am__quote$ac_delim +AMDEP_TRUE!$AMDEP_TRUE$ac_delim +AMDEP_FALSE!$AMDEP_FALSE$ac_delim +AMDEPBACKSLASH!$AMDEPBACKSLASH$ac_delim +CCDEPMODE!$CCDEPMODE$ac_delim +am__fastdepCC_TRUE!$am__fastdepCC_TRUE$ac_delim +am__fastdepCC_FALSE!$am__fastdepCC_FALSE$ac_delim +_ACEOF + + if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then + break + elif $ac_last_try; then + { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 +echo "$as_me: error: could not make $CONFIG_STATUS" >&2;} + { (exit 1); exit 1; }; } + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done + +ac_eof=`sed -n '/^CEOF[0-9]*$/s/CEOF/0/p' conf$$subs.sed` +if test -n "$ac_eof"; then + ac_eof=`echo "$ac_eof" | sort -nru | sed 1q` + ac_eof=`expr $ac_eof + 1` +fi + +cat >>$CONFIG_STATUS <<_ACEOF +cat >"\$tmp/subs-1.sed" <<\CEOF$ac_eof +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +_ACEOF +sed ' +s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g +s/^/s,@/; s/!/@,|#_!!_#|/ +:n +t n +s/'"$ac_delim"'$/,g/; t +s/$/\\/; p +N; s/^.*\n//; s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g; b n +' >>$CONFIG_STATUS >$CONFIG_STATUS <<_ACEOF +CEOF$ac_eof +_ACEOF + + +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + cat >conf$$subs.sed <<_ACEOF +CXX!$CXX$ac_delim +CXXFLAGS!$CXXFLAGS$ac_delim +ac_ct_CXX!$ac_ct_CXX$ac_delim +CXXDEPMODE!$CXXDEPMODE$ac_delim +am__fastdepCXX_TRUE!$am__fastdepCXX_TRUE$ac_delim +am__fastdepCXX_FALSE!$am__fastdepCXX_FALSE$ac_delim +RANLIB!$RANLIB$ac_delim +USE_ALTERNATE_AR_TRUE!$USE_ALTERNATE_AR_TRUE$ac_delim +USE_ALTERNATE_AR_FALSE!$USE_ALTERNATE_AR_FALSE$ac_delim +ALTERNATE_AR!$ALTERNATE_AR$ac_delim +CXXCPP!$CXXCPP$ac_delim +USING_EXPORT_MAKEFILES_TRUE!$USING_EXPORT_MAKEFILES_TRUE$ac_delim +USING_EXPORT_MAKEFILES_FALSE!$USING_EXPORT_MAKEFILES_FALSE$ac_delim +PERL_EXE!$PERL_EXE$ac_delim +HAVE_PERL!$HAVE_PERL$ac_delim +USING_PERL_TRUE!$USING_PERL_TRUE$ac_delim +USING_PERL_FALSE!$USING_PERL_FALSE$ac_delim +USING_GNUMAKE_TRUE!$USING_GNUMAKE_TRUE$ac_delim +USING_GNUMAKE_FALSE!$USING_GNUMAKE_FALSE$ac_delim +BUILD_TESTS_TRUE!$BUILD_TESTS_TRUE$ac_delim +BUILD_TESTS_FALSE!$BUILD_TESTS_FALSE$ac_delim +SUB_TEST_TRUE!$SUB_TEST_TRUE$ac_delim +SUB_TEST_FALSE!$SUB_TEST_FALSE$ac_delim +GREP!$GREP$ac_delim +EGREP!$EGREP$ac_delim +PTHREAD_CC!$PTHREAD_CC$ac_delim +PTHREAD_LIBS!$PTHREAD_LIBS$ac_delim +PTHREAD_CFLAGS!$PTHREAD_CFLAGS$ac_delim +ac_aux_dir!$ac_aux_dir$ac_delim +LIBOBJS!$LIBOBJS$ac_delim +LTLIBOBJS!$LTLIBOBJS$ac_delim +_ACEOF + + if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 31; then + break + elif $ac_last_try; then + { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 +echo "$as_me: error: could not make $CONFIG_STATUS" >&2;} + { (exit 1); exit 1; }; } + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done + +ac_eof=`sed -n '/^CEOF[0-9]*$/s/CEOF/0/p' conf$$subs.sed` +if test -n "$ac_eof"; then + ac_eof=`echo "$ac_eof" | sort -nru | sed 1q` + ac_eof=`expr $ac_eof + 1` +fi + +cat >>$CONFIG_STATUS <<_ACEOF +cat >"\$tmp/subs-2.sed" <<\CEOF$ac_eof +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b end +_ACEOF +sed ' +s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g +s/^/s,@/; s/!/@,|#_!!_#|/ +:n +t n +s/'"$ac_delim"'$/,g/; t +s/$/\\/; p +N; s/^.*\n//; s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g; b n +' >>$CONFIG_STATUS >$CONFIG_STATUS <<_ACEOF +:end +s/|#_!!_#|//g +CEOF$ac_eof +_ACEOF + + +# VPATH may cause trouble with some makes, so we remove $(srcdir), +# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=/{ +s/:*\$(srcdir):*/:/ +s/:*\${srcdir}:*/:/ +s/:*@srcdir@:*/:/ +s/^\([^=]*=[ ]*\):*/\1/ +s/:*$// +s/^[^=]*=[ ]*$// +}' +fi + +cat >>$CONFIG_STATUS <<\_ACEOF +fi # test -n "$CONFIG_FILES" + + +for ac_tag in :F $CONFIG_FILES :H $CONFIG_HEADERS :C $CONFIG_COMMANDS +do + case $ac_tag in + :[FHLC]) ac_mode=$ac_tag; continue;; + esac + case $ac_mode$ac_tag in + :[FHL]*:*);; + :L* | :C*:*) { { echo "$as_me:$LINENO: error: Invalid tag $ac_tag." >&5 +echo "$as_me: error: Invalid tag $ac_tag." >&2;} + { (exit 1); exit 1; }; };; + :[FH]-) ac_tag=-:-;; + :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; + esac + ac_save_IFS=$IFS + IFS=: + set x $ac_tag + IFS=$ac_save_IFS + shift + ac_file=$1 + shift + + case $ac_mode in + :L) ac_source=$1;; + :[FH]) + ac_file_inputs= + for ac_f + do + case $ac_f in + -) ac_f="$tmp/stdin";; + *) # Look for the file first in the build tree, then in the source tree + # (if the path is not absolute). The absolute path cannot be DOS-style, + # because $ac_f cannot contain `:'. + test -f "$ac_f" || + case $ac_f in + [\\/$]*) false;; + *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; + esac || + { { echo "$as_me:$LINENO: error: cannot find input file: $ac_f" >&5 +echo "$as_me: error: cannot find input file: $ac_f" >&2;} + { (exit 1); exit 1; }; };; + esac + ac_file_inputs="$ac_file_inputs $ac_f" + done + + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + configure_input="Generated from "`IFS=: + echo $* | sed 's|^[^:]*/||;s|:[^:]*/|, |g'`" by configure." + if test x"$ac_file" != x-; then + configure_input="$ac_file. $configure_input" + { echo "$as_me:$LINENO: creating $ac_file" >&5 +echo "$as_me: creating $ac_file" >&6;} + fi + + case $ac_tag in + *:-:* | *:-) cat >"$tmp/stdin";; + esac + ;; + esac + + ac_dir=`$as_dirname -- "$ac_file" || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || +echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + { as_dir="$ac_dir" + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || { $as_mkdir_p && mkdir -p "$as_dir"; } || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || { { echo "$as_me:$LINENO: error: cannot create directory $as_dir" >&5 +echo "$as_me: error: cannot create directory $as_dir" >&2;} + { (exit 1); exit 1; }; }; } + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,/..,g;s,/,,'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + + case $ac_mode in + :F) + # + # CONFIG_FILE + # + + case $INSTALL in + [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; + *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;; + esac + ac_MKDIR_P=$MKDIR_P + case $MKDIR_P in + [\\/$]* | ?:[\\/]* ) ;; + */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;; + esac +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF +# If the template does not know about datarootdir, expand it. +# FIXME: This hack should be removed a few years after 2.60. +ac_datarootdir_hack=; ac_datarootdir_seen= + +case `sed -n '/datarootdir/ { + p + q +} +/@datadir@/p +/@docdir@/p +/@infodir@/p +/@localedir@/p +/@mandir@/p +' $ac_file_inputs` in +*datarootdir*) ac_datarootdir_seen=yes;; +*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) + { echo "$as_me:$LINENO: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF + ac_datarootdir_hack=' + s&@datadir@&$datadir&g + s&@docdir@&$docdir&g + s&@infodir@&$infodir&g + s&@localedir@&$localedir&g + s&@mandir@&$mandir&g + s&\\\${datarootdir}&$datarootdir&g' ;; +esac +_ACEOF + +# Neutralize VPATH when `$srcdir' = `.'. +# Shell code in configure.ac might set extrasub. +# FIXME: do we really want to maintain this feature? +cat >>$CONFIG_STATUS <<_ACEOF + sed "$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s&@configure_input@&$configure_input&;t t +s&@top_builddir@&$ac_top_builddir_sub&;t t +s&@srcdir@&$ac_srcdir&;t t +s&@abs_srcdir@&$ac_abs_srcdir&;t t +s&@top_srcdir@&$ac_top_srcdir&;t t +s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t +s&@builddir@&$ac_builddir&;t t +s&@abs_builddir@&$ac_abs_builddir&;t t +s&@abs_top_builddir@&$ac_abs_top_builddir&;t t +s&@INSTALL@&$ac_INSTALL&;t t +s&@MKDIR_P@&$ac_MKDIR_P&;t t +$ac_datarootdir_hack +" $ac_file_inputs | sed -f "$tmp/subs-1.sed" | sed -f "$tmp/subs-2.sed" >$tmp/out + +test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && + { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } && + { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' "$tmp/out"`; test -z "$ac_out"; } && + { echo "$as_me:$LINENO: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined." >&5 +echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined." >&2;} + + rm -f "$tmp/stdin" + case $ac_file in + -) cat "$tmp/out"; rm -f "$tmp/out";; + *) rm -f "$ac_file"; mv "$tmp/out" $ac_file;; + esac + ;; + :H) + # + # CONFIG_HEADER + # +_ACEOF + +# Transform confdefs.h into a sed script `conftest.defines', that +# substitutes the proper values into config.h.in to produce config.h. +rm -f conftest.defines conftest.tail +# First, append a space to every undef/define line, to ease matching. +echo 's/$/ /' >conftest.defines +# Then, protect against being on the right side of a sed subst, or in +# an unquoted here document, in config.status. If some macros were +# called several times there might be several #defines for the same +# symbol, which is useless. But do not sort them, since the last +# AC_DEFINE must be honored. +ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* +# These sed commands are passed to sed as "A NAME B PARAMS C VALUE D", where +# NAME is the cpp macro being defined, VALUE is the value it is being given. +# PARAMS is the parameter list in the macro definition--in most cases, it's +# just an empty string. +ac_dA='s,^\\([ #]*\\)[^ ]*\\([ ]*' +ac_dB='\\)[ (].*,\\1define\\2' +ac_dC=' ' +ac_dD=' ,' + +uniq confdefs.h | + sed -n ' + t rset + :rset + s/^[ ]*#[ ]*define[ ][ ]*// + t ok + d + :ok + s/[\\&,]/\\&/g + s/^\('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/ '"$ac_dA"'\1'"$ac_dB"'\2'"${ac_dC}"'\3'"$ac_dD"'/p + s/^\('"$ac_word_re"'\)[ ]*\(.*\)/'"$ac_dA"'\1'"$ac_dB$ac_dC"'\2'"$ac_dD"'/p + ' >>conftest.defines + +# Remove the space that was appended to ease matching. +# Then replace #undef with comments. This is necessary, for +# example, in the case of _POSIX_SOURCE, which is predefined and required +# on some systems where configure will not decide to define it. +# (The regexp can be short, since the line contains either #define or #undef.) +echo 's/ $// +s,^[ #]*u.*,/* & */,' >>conftest.defines + +# Break up conftest.defines: +ac_max_sed_lines=50 + +# First sed command is: sed -f defines.sed $ac_file_inputs >"$tmp/out1" +# Second one is: sed -f defines.sed "$tmp/out1" >"$tmp/out2" +# Third one will be: sed -f defines.sed "$tmp/out2" >"$tmp/out1" +# et cetera. +ac_in='$ac_file_inputs' +ac_out='"$tmp/out1"' +ac_nxt='"$tmp/out2"' + +while : +do + # Write a here document: + cat >>$CONFIG_STATUS <<_ACEOF + # First, check the format of the line: + cat >"\$tmp/defines.sed" <<\\CEOF +/^[ ]*#[ ]*undef[ ][ ]*$ac_word_re[ ]*\$/b def +/^[ ]*#[ ]*define[ ][ ]*$ac_word_re[( ]/b def +b +:def +_ACEOF + sed ${ac_max_sed_lines}q conftest.defines >>$CONFIG_STATUS + echo 'CEOF + sed -f "$tmp/defines.sed"' "$ac_in >$ac_out" >>$CONFIG_STATUS + ac_in=$ac_out; ac_out=$ac_nxt; ac_nxt=$ac_in + sed 1,${ac_max_sed_lines}d conftest.defines >conftest.tail + grep . conftest.tail >/dev/null || break + rm -f conftest.defines + mv conftest.tail conftest.defines +done +rm -f conftest.defines conftest.tail + +echo "ac_result=$ac_in" >>$CONFIG_STATUS +cat >>$CONFIG_STATUS <<\_ACEOF + if test x"$ac_file" != x-; then + echo "/* $configure_input */" >"$tmp/config.h" + cat "$ac_result" >>"$tmp/config.h" + if diff $ac_file "$tmp/config.h" >/dev/null 2>&1; then + { echo "$as_me:$LINENO: $ac_file is unchanged" >&5 +echo "$as_me: $ac_file is unchanged" >&6;} + else + rm -f $ac_file + mv "$tmp/config.h" $ac_file + fi + else + echo "/* $configure_input */" + cat "$ac_result" + fi + rm -f "$tmp/out12" +# Compute $ac_file's index in $config_headers. +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $ac_file | $ac_file:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $ac_file" >`$as_dirname -- $ac_file || +$as_expr X$ac_file : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X$ac_file : 'X\(//\)[^/]' \| \ + X$ac_file : 'X\(//\)$' \| \ + X$ac_file : 'X\(/\)' \| . 2>/dev/null || +echo X$ac_file | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'`/stamp-h$_am_stamp_count + ;; + + :C) { echo "$as_me:$LINENO: executing $ac_file commands" >&5 +echo "$as_me: executing $ac_file commands" >&6;} + ;; + esac + + + case $ac_file$ac_mode in + "depfiles":C) test x"$AMDEP_TRUE" != x"" || for mf in $CONFIG_FILES; do + # Strip MF so we end up with the name of the file. + mf=`echo "$mf" | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile or not. + # We used to match only the files named `Makefile.in', but + # some people rename them; so instead we look at the file content. + # Grep'ing the first line is not enough: some people post-process + # each Makefile.in and add a new line on top of each file to say so. + # Grep'ing the whole file is not good either: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + if sed 10q "$mf" | grep '^#.*generated by automake' > /dev/null 2>&1; then + dirpart=`$as_dirname -- "$mf" || +$as_expr X"$mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$mf" : 'X\(//\)[^/]' \| \ + X"$mf" : 'X\(//\)$' \| \ + X"$mf" : 'X\(/\)' \| . 2>/dev/null || +echo X"$mf" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + else + continue + fi + # Extract the definition of DEPDIR, am__include, and am__quote + # from the Makefile without running `make'. + DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"` + test -z "$DEPDIR" && continue + am__include=`sed -n 's/^am__include = //p' < "$mf"` + test -z "am__include" && continue + am__quote=`sed -n 's/^am__quote = //p' < "$mf"` + # When using ansi2knr, U may be empty or an underscore; expand it + U=`sed -n 's/^U = //p' < "$mf"` + # Find all dependency output files, they are included files with + # $(DEPDIR) in their names. We invoke sed twice because it is the + # simplest approach to changing $(DEPDIR) to its actual value in the + # expansion. + for file in `sed -n " + s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \ + sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do + # Make sure the directory exists. + test -f "$dirpart/$file" && continue + fdir=`$as_dirname -- "$file" || +$as_expr X"$file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$file" : 'X\(//\)[^/]' \| \ + X"$file" : 'X\(//\)$' \| \ + X"$file" : 'X\(/\)' \| . 2>/dev/null || +echo X"$file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + { as_dir=$dirpart/$fdir + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || { $as_mkdir_p && mkdir -p "$as_dir"; } || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || { { echo "$as_me:$LINENO: error: cannot create directory $as_dir" >&5 +echo "$as_me: error: cannot create directory $as_dir" >&2;} + { (exit 1); exit 1; }; }; } + # echo "creating $dirpart/$file" + echo '# dummy' > "$dirpart/$file" + done +done + ;; + + esac +done # for ac_tag + + +{ (exit 0); exit 0; } +_ACEOF +chmod +x $CONFIG_STATUS +ac_clean_files=$ac_clean_files_save + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || { (exit 1); exit 1; } +fi + + +# Bye World! +echo "---------------------------------------------" +echo "Finished Running ThreadPool Configure Script" +echo "---------------------------------------------" diff --git a/mkl/basic/optional/ThreadPool/configure.ac b/mkl/basic/optional/ThreadPool/configure.ac new file mode 100644 index 0000000..12778f4 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/configure.ac @@ -0,0 +1,240 @@ +# ------------------------------------------------------------------------ +# Process this file with autoconf to produce a configure script. +# ------------------------------------------------------------------------ + +# @HEADER +# ************************************************************************ +# +# ThreadPool Package +# Copyright (2008) Sandia Corporation +# +# Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +# license for use of this work by or on behalf of the U.S. Government. +# +# This library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 2.1 of the +# License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +# USA +# Questions? Contact Carter Edwards (hcedwar@sandia.gov) +# +# ************************************************************************ +# @HEADER + +# ------------------------------------------------------------------------ +# Initialization +# ------------------------------------------------------------------------ + +# This must be the first line in configure.ac. +# Optional 3rd argument is email address for bugs. + +# #np# - package name, version number, and e-mail address below +AC_INIT(ThreadPool, 1.1d, hcedwar@sandia.gov) + +# Hello World! +echo "----------------------------------------" +echo "Running ThreadPool Configure Script" +echo "----------------------------------------" + +# This is to protect against accidentally specifying the wrong +# directory with --srcdir. Any file in that directory will do, +# preferably one that is unlikely to be removed or renamed. + +AC_CONFIG_SRCDIR([src/TPI.c]) + +# Specify directory for auxillary build tools (e.g., install-sh, +# config.sub, config.guess) and M4 files. + +AC_CONFIG_AUX_DIR(config) +# #auto np# - Change file names in next line +# Configure should create src/ThreadPool_config.h from src/ThreadPool_config.h.in + +AM_CONFIG_HEADER(src/ThreadPool_config.h:src/ThreadPool_config.h.in) + +# Allow users to specify their own "install" command. If none is specified, +# the default is install-sh found in the config subdirectory. + +AC_ARG_WITH(install, + [AC_HELP_STRING([--with-install=INSTALL_PROGRAM], + [Use the installation program INSTALL_PROGRAM rather the default that is provided. For example --with-install="/path/install -p"])], + [ + INSTALL=$withval + INSTALL_PROGRAM=$withval + INSTALL_SCRIPT=$withval + INSTALL_DATA="$withval -m 644" + ],) + +# AM_MAINTAINER_MODE turns off maintainer-only makefile targets by +# default, and changes configure to understand a +# --enable-maintainer-mode option. --enable-maintainer-mode turns the +# maintainer-only targets back on. The maintainer-only makefile +# targets permit end users to clean automatically-generated files such +# as configure, which means they have to have autoconf and automake +# installed to repair the damage. AM_MAINTAINER_MODE makes it a bit +# harder for users to shoot themselves in the foot. + +AM_MAINTAINER_MODE + +# Define $build, $host, $target, etc + +AC_CANONICAL_TARGET + +# Use automake + +# - Required version of automake. +AM_INIT_AUTOMAKE(1.10 no-define tar-ustar) + +# Specify required version of autoconf. + +AC_PREREQ(2.61) + +# ------------------------------------------------------------------------ +# Check to see if MPI enabled and if any special configuration done +# ------------------------------------------------------------------------ + +TAC_ARG_CONFIG_MPI + +# #np# - can eliminate compiler checks below if your package does not use the +# language corresponding to the check. Please note that if you use +# F77_FUNC to determine Fortran name mangling, you should not remove +# the Fortran compiler check or the check for Fortran flags. Doing +# so will prevent the detection of the proper name mangling in some +# cases. +# ------------------------------------------------------------------------ +# Checks for programs +# ------------------------------------------------------------------------ + +AC_PROG_CC(cc gcc) +AC_PROG_CXX(CC g++ c++ cxx) +#AC_PROG_F77(f77 g77 gfortran f90 xlf90 f95) +AC_PROG_RANLIB + +# Check if --with-flags present, prepend any specs to FLAGS + +TAC_ARG_WITH_FLAGS(ccflags, CCFLAGS) +TAC_ARG_WITH_FLAGS(cxxflags, CXXFLAGS) +TAC_ARG_WITH_FLAGS(cflags, CFLAGS) +#TAC_ARG_WITH_FLAGS(fflags, FFLAGS) +TAC_ARG_WITH_LIBS +TAC_ARG_WITH_FLAGS(ldflags, LDFLAGS) + +# ------------------------------------------------------------------------ +# Alternate archiver +# ------------------------------------------------------------------------ + +TAC_ARG_WITH_AR + +# ------------------------------------------------------------------------ +# MPI link check +# ------------------------------------------------------------------------ +TAC_ARG_CHECK_MPI + +# ------------------------------------------------------------------------ +# Checks for Makefile.export related systems +# ------------------------------------------------------------------------ +TAC_ARG_ENABLE_EXPORT_MAKEFILES(yes) + +# ------------------------------------------------------------------------ +# Checks if tests and examples should be built +# ------------------------------------------------------------------------ + +# #np# - These options can disable the tests and examples of a package. +# #np# - Packages that do not have tests or examples should #-out the +# #np# - option(s) that does (do) not apply. + +TAC_ARG_ENABLE_FEATURE(tests, [Make tests for all Trilinos packages buildable with 'make tests'], TESTS, yes) +TAC_ARG_ENABLE_FEATURE_SUB_CHECK( threadpool, tests, [Make ThreadPool tests buildable with 'make tests'], NEW_PACKAGE_TESTS) +AM_CONDITIONAL(BUILD_TESTS, test "X$ac_cv_use_threadpool_tests" != "Xno") + +#TAC_ARG_ENABLE_FEATURE(examples, [Make examples for all Trilinos packages buildable with 'make examples'], EXAMPLES, yes) +#TAC_ARG_ENABLE_FEATURE_SUB_CHECK( new_package, examples, [Make New_Package examples buildable with 'make examples'], NEW_PACKAGE_EXAMPLES) +#AM_CONDITIONAL(BUILD_EXAMPLES, test "X$ac_cv_use_new_package_examples" != "Xno") + +#We now build tests and examples through separate make targets, rather than +#during "make". We still need to conditionally include the test and example +#in SUBDIRS, even though SUB_TEST and SUB_EXAMPLE will never be +#defined, so that the tests and examples are included in the distribution +#tarball. +AM_CONDITIONAL(SUB_TEST, test "X$ac_cv_use_sub_test" = "Xyes") +#AM_CONDITIONAL(SUB_EXAMPLE, test "X$ac_cv_use_sub_example" = "Xyes") + +TAC_ARG_ENABLE_FEATURE(libcheck, [Check for some third-party libraries. (Cannot be disabled unless tests and examples are also disabled.)], LIBCHECK, yes) + +# ------------------------------------------------------------------------ +# Specify other directories +# ------------------------------------------------------------------------ + +# enable use of --with-libdirs="-Llibdir1 -Llibdir2 ..." to prepend to LDFLAGS +TAC_ARG_WITH_LIBDIRS +# enable use of --with-incdirs="-Lincdir1 -Lincdir2 ..." to prepend to CPPFLAGS +TAC_ARG_WITH_INCDIRS + +# #np# - Yet another opportunity to remove code if you aren't +# using Fortran +# Define F77_FUNC that will be used to link with Fortran subroutines. - trash WORKGXX +#AC_F77_WRAPPERS + +# ------------------------------------------------------------------------ +# Checks for libraries +# ------------------------------------------------------------------------ + +# If tests, examples and libcheck are disabled, we don't have to check +# for these libraries. + +# #np# - +# If a package does not have tests or examples, the corresponding check(s) +# should be pulled out of the "if" statement below. +#if test "X$ac_cv_use_new_package_examples" != "Xno" || test "X$ac_cv_use_libcheck" != "Xno"; then +if test "X$ac_cv_use_threadpool_tests" != "Xno" || test "X$ac_cv_use_libcheck" != "Xno"; then + +ACX_PTHREAD +LIBS="$PTHREAD_LIBS $LIBS" +CFLAGS="$CFLAGS $PTHREAD_CFLAGS" +CC="$PTHREAD_CC" + +fi +# end of the list of libraries that don't need to be checked for if +# tests and examples are disabled. + +# ------------------------------------------------------------------------ +# Checks for linker characteristics +# ------------------------------------------------------------------------ + +# Determine libraries needed for linking with Fortran +#AC_F77_LIBRARY_LDFLAGS + + +# ------------------------------------------------------------------------ +# Perform substitutions in output files +# ------------------------------------------------------------------------ + +AC_SUBST(ac_aux_dir) + +# ------------------------------------------------------------------------ +# Output files +# ------------------------------------------------------------------------ +## +# You will need to change AC_CONFIG_FILES below and Makefile.am +# to add a new directory. +AC_CONFIG_FILES([ + Makefile + Makefile.export.threadpool + src/Makefile + test/Makefile + ]) + +AC_OUTPUT() + +# Bye World! +echo "---------------------------------------------" +echo "Finished Running ThreadPool Configure Script" +echo "---------------------------------------------" diff --git a/mkl/basic/optional/ThreadPool/src/CMakeLists.txt b/mkl/basic/optional/ThreadPool/src/CMakeLists.txt new file mode 100644 index 0000000..41a1f39 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/src/CMakeLists.txt @@ -0,0 +1,70 @@ + +INCLUDE(PackageLibraryMacros) + +# +# A) Package-specific configuration options +# + +PACKAGE_CONFIGURE_FILE(${PACKAGE_NAME}_config.h) + +# +# B) Define the header and source files (and directories) +# + +# +# src +# + +SET(HEADERS "") +SET(SOURCES "") + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) + +SET(HEADERS ${HEADERS} + ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h + ) + +# +# Core files +# + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +APPEND_SET(HEADERS + TPI.h + TPI.hpp + ) + +APPEND_SET(SOURCES + TPI.c + ) + +# +# Util files +# +APPEND_SET(SOURCES + TPI_Walltime.c + ) + +###################################### + +APPEND_SET(HEADERS + ) + +APPEND_SET(SOURCES + ) + +###################################### +IF (TPL_ENABLE_MPI) +ENDIF() + +# +# C) Define the targets for package's library(s) +# + +PACKAGE_ADD_LIBRARY( + tpi + HEADERS ${HEADERS} + SOURCES ${SOURCES} + ) diff --git a/mkl/basic/optional/ThreadPool/src/Makefile.am b/mkl/basic/optional/ThreadPool/src/Makefile.am new file mode 100644 index 0000000..44c1621 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/src/Makefile.am @@ -0,0 +1,140 @@ +# @HEADER +# ************************************************************************ +# +# ThreadPool Package +# Copyright (2008) Sandia Corporation +# +# Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +# license for use of this work by or on behalf of the U.S. Government. +# +# This library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 2.1 of the +# License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +# USA +# Questions? Contact Carter Edwards (hcedwar@sandia.gov) +# +# ************************************************************************ +# @HEADER + +# The following line helps the test harness recover from build errors. + +all-local: + @echo "" + @echo "Trilinos package ThreadPool subdirectory src built successfully." + @echo "" + +# ------------------------------------------------------------------------ +# For each category, create two variables - NAME and NAME_H. The +# second is the list of headers to be installed, i.e., any header that +# might someday be needed by some other code outside New_Package. The first is +# the list of all source and any other header files. +# ------------------------------------------------------------------------ + +#np# Make sure to list all source files in one of the following categories. + +CORE = $(srcdir)/TPI.c + +CORE_H = \ + $(srcdir)/TPI.h \ + $(srcdir)/TPI.hpp + +UTIL = \ + $(srcdir)/TPI_Walltime.c + + +# ------------------------------------------------------------------------ +# ThreadPool library specifications +# ------------------------------------------------------------------------ +#np# replace new_package with the name of the package being autotool'ed here +THREADPOOL_LIB = libtpi.a + +#np# replace new_package with the name of the package being autotool'ed here +THREADPOOL_H = \ + $(CORE_H) + +#np# replace new_package with the name of the package being autotool'ed here +libtpi_a_SOURCES = \ + $(CORE) \ + $(UTIL) + +#np# replace new_package with the name of the package being autotool'ed here +#EXTRA_libtpi_a_SOURCES = + +include $(top_builddir)/Makefile.export.threadpool + +if USING_GNUMAKE +EXPORT_INCLUDES = $(shell $(PERL_EXE) $(top_srcdir)/config/strip_dup_incl_paths.pl $(THREADPOOL_INCLUDES)) +else +EXPORT_INCLUDES = $(THREADPOOL_INCLUDES) +endif + +AM_CPPFLAGS = $(EXPORT_INCLUDES) + +# ------------------------------------------------------------------------ +# For using a special archiver +# ------------------------------------------------------------------------ + +if USE_ALTERNATE_AR + +libtpi_a_AR = $(ALTERNATE_AR) +else + +libtpi_a_AR = $(AR) cru + +endif + +# ------------------------------------------------------------------------ +# Some C++ compilers create extra .o-files for templates. We need to +# be sure to include these, and this is the hack to do it. +# ------------------------------------------------------------------------ + +libtpi_a_LIBADD = $(XTRALDADD) + +# ------------------------------------------------------------------------ +# List of all libraries to install in $(libexecdir) +# ------------------------------------------------------------------------ + +lib_LIBRARIES = $(THREADPOOL_LIB) + +# ------------------------------------------------------------------------ +# List of all headers to install in $(includedir) +# ------------------------------------------------------------------------ + +#np# replace new_package with the name of the package being autotool'ed here +include_HEADERS = $(THREADPOOL_H) + +# ------------------------------------------------------------------------ +# Special stuff to install in our special $(execincludedir) +# ------------------------------------------------------------------------ + +# SPECIAL NOTE: New_Package_config.h is a machine-dependent file, so we need +# to install it in the machine-dependent directory. However, that is +# not a default installation directory, so we had to create it +# special. + +# All Trilinos headers are now installed in the same directory +execincludedir = $(includedir) +#np# replace new_package with the name of the package being autotool'ed here +nodist_execinclude_HEADERS = ThreadPool_config.h + +# ------------------------------------------------------------------------ +# Files to be deleted by 'make maintainer-clean' +# ------------------------------------------------------------------------ + +MAINTAINERCLEANFILES = Makefile.in + + + + + + diff --git a/mkl/basic/optional/ThreadPool/src/Makefile.in b/mkl/basic/optional/ThreadPool/src/Makefile.in new file mode 100644 index 0000000..4dd7802 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/src/Makefile.in @@ -0,0 +1,680 @@ +# Makefile.in generated by automake 1.10 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# @HEADER +# ************************************************************************ +# +# ThreadPool Package +# Copyright (2008) Sandia Corporation +# +# Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +# license for use of this work by or on behalf of the U.S. Government. +# +# This library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 2.1 of the +# License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +# USA +# Questions? Contact Carter Edwards (hcedwar@sandia.gov) +# +# ************************************************************************ +# @HEADER + +# The following line helps the test harness recover from build errors. + + +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = src +DIST_COMMON = $(include_HEADERS) $(srcdir)/Makefile.am \ + $(srcdir)/Makefile.in $(srcdir)/ThreadPool_config.h.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/acx_pthread.m4 \ + $(top_srcdir)/config/tac_arg_check_mpi.m4 \ + $(top_srcdir)/config/tac_arg_config_mpi.m4 \ + $(top_srcdir)/config/tac_arg_enable_export-makefiles.m4 \ + $(top_srcdir)/config/tac_arg_enable_feature.m4 \ + $(top_srcdir)/config/tac_arg_enable_feature_sub_check.m4 \ + $(top_srcdir)/config/tac_arg_with_ar.m4 \ + $(top_srcdir)/config/tac_arg_with_flags.m4 \ + $(top_srcdir)/config/tac_arg_with_incdirs.m4 \ + $(top_srcdir)/config/tac_arg_with_libdirs.m4 \ + $(top_srcdir)/config/tac_arg_with_libs.m4 \ + $(top_srcdir)/config/tac_arg_with_perl.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = ThreadPool_config.h +CONFIG_CLEAN_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = `echo $$p | sed -e 's|^.*/||'`; +am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" \ + "$(DESTDIR)$(execincludedir)" +libLIBRARIES_INSTALL = $(INSTALL_DATA) +LIBRARIES = $(lib_LIBRARIES) +AR = ar +ARFLAGS = cru +libtpi_a_DEPENDENCIES = +am__objects_1 = TPI.$(OBJEXT) +am__objects_2 = TPI_Walltime.$(OBJEXT) +am_libtpi_a_OBJECTS = $(am__objects_1) $(am__objects_2) +libtpi_a_OBJECTS = $(am_libtpi_a_OBJECTS) +DEFAULT_INCLUDES = -I.@am__isrc@ +depcomp = $(SHELL) $(top_srcdir)/config/depcomp +am__depfiles_maybe = depfiles +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +SOURCES = $(libtpi_a_SOURCES) +DIST_SOURCES = $(libtpi_a_SOURCES) +includeHEADERS_INSTALL = $(INSTALL_HEADER) +nodist_execincludeHEADERS_INSTALL = $(INSTALL_HEADER) +HEADERS = $(include_HEADERS) $(nodist_execinclude_HEADERS) +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALTERNATE_AR = @ALTERNATE_AR@ +AMTAR = @AMTAR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +GREP = @GREP@ +HAVE_PERL = @HAVE_PERL@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +MPI_CC_EXISTS = @MPI_CC_EXISTS@ +MPI_CXX = @MPI_CXX@ +MPI_CXX_EXISTS = @MPI_CXX_EXISTS@ +MPI_F77_EXISTS = @MPI_F77_EXISTS@ +MPI_TEMP_CXX = @MPI_TEMP_CXX@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERL_EXE = @PERL_EXE@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_aux_dir = @ac_aux_dir@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ + +# ------------------------------------------------------------------------ +# For each category, create two variables - NAME and NAME_H. The +# second is the list of headers to be installed, i.e., any header that +# might someday be needed by some other code outside New_Package. The first is +# the list of all source and any other header files. +# ------------------------------------------------------------------------ + +#np# Make sure to list all source files in one of the following categories. +CORE = $(srcdir)/TPI.c +CORE_H = \ + $(srcdir)/TPI.h \ + $(srcdir)/TPI.hpp + +UTIL = \ + $(srcdir)/TPI_Walltime.c + + +# ------------------------------------------------------------------------ +# ThreadPool library specifications +# ------------------------------------------------------------------------ +#np# replace new_package with the name of the package being autotool'ed here +THREADPOOL_LIB = libtpi.a + +#np# replace new_package with the name of the package being autotool'ed here +THREADPOOL_H = \ + $(CORE_H) + + +#np# replace new_package with the name of the package being autotool'ed here +libtpi_a_SOURCES = \ + $(CORE) \ + $(UTIL) + +@USING_GNUMAKE_FALSE@EXPORT_INCLUDES = $(THREADPOOL_INCLUDES) +@USING_GNUMAKE_TRUE@EXPORT_INCLUDES = $(shell $(PERL_EXE) $(top_srcdir)/config/strip_dup_incl_paths.pl $(THREADPOOL_INCLUDES)) +AM_CPPFLAGS = $(EXPORT_INCLUDES) +@USE_ALTERNATE_AR_FALSE@libtpi_a_AR = $(AR) cru + +# ------------------------------------------------------------------------ +# For using a special archiver +# ------------------------------------------------------------------------ +@USE_ALTERNATE_AR_TRUE@libtpi_a_AR = $(ALTERNATE_AR) + +# ------------------------------------------------------------------------ +# Some C++ compilers create extra .o-files for templates. We need to +# be sure to include these, and this is the hack to do it. +# ------------------------------------------------------------------------ +libtpi_a_LIBADD = $(XTRALDADD) + +# ------------------------------------------------------------------------ +# List of all libraries to install in $(libexecdir) +# ------------------------------------------------------------------------ +lib_LIBRARIES = $(THREADPOOL_LIB) + +# ------------------------------------------------------------------------ +# List of all headers to install in $(includedir) +# ------------------------------------------------------------------------ + +#np# replace new_package with the name of the package being autotool'ed here +include_HEADERS = $(THREADPOOL_H) + +# ------------------------------------------------------------------------ +# Special stuff to install in our special $(execincludedir) +# ------------------------------------------------------------------------ + +# SPECIAL NOTE: New_Package_config.h is a machine-dependent file, so we need +# to install it in the machine-dependent directory. However, that is +# not a default installation directory, so we had to create it +# special. + +# All Trilinos headers are now installed in the same directory +execincludedir = $(includedir) +#np# replace new_package with the name of the package being autotool'ed here +nodist_execinclude_HEADERS = ThreadPool_config.h + +# ------------------------------------------------------------------------ +# Files to be deleted by 'make maintainer-clean' +# ------------------------------------------------------------------------ +MAINTAINERCLEANFILES = Makefile.in +all: ThreadPool_config.h + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/Makefile'; \ + cd $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +ThreadPool_config.h: stamp-h1 + @if test ! -f $@; then \ + rm -f stamp-h1; \ + $(MAKE) $(AM_MAKEFLAGS) stamp-h1; \ + else :; fi + +stamp-h1: $(srcdir)/ThreadPool_config.h.in $(top_builddir)/config.status + @rm -f stamp-h1 + cd $(top_builddir) && $(SHELL) ./config.status src/ThreadPool_config.h +$(srcdir)/ThreadPool_config.h.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_srcdir) && $(AUTOHEADER) + rm -f stamp-h1 + touch $@ + +distclean-hdr: + -rm -f ThreadPool_config.h stamp-h1 +install-libLIBRARIES: $(lib_LIBRARIES) + @$(NORMAL_INSTALL) + test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)" + @list='$(lib_LIBRARIES)'; for p in $$list; do \ + if test -f $$p; then \ + f=$(am__strip_dir) \ + echo " $(libLIBRARIES_INSTALL) '$$p' '$(DESTDIR)$(libdir)/$$f'"; \ + $(libLIBRARIES_INSTALL) "$$p" "$(DESTDIR)$(libdir)/$$f"; \ + else :; fi; \ + done + @$(POST_INSTALL) + @list='$(lib_LIBRARIES)'; for p in $$list; do \ + if test -f $$p; then \ + p=$(am__strip_dir) \ + echo " $(RANLIB) '$(DESTDIR)$(libdir)/$$p'"; \ + $(RANLIB) "$(DESTDIR)$(libdir)/$$p"; \ + else :; fi; \ + done + +uninstall-libLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LIBRARIES)'; for p in $$list; do \ + p=$(am__strip_dir) \ + echo " rm -f '$(DESTDIR)$(libdir)/$$p'"; \ + rm -f "$(DESTDIR)$(libdir)/$$p"; \ + done + +clean-libLIBRARIES: + -test -z "$(lib_LIBRARIES)" || rm -f $(lib_LIBRARIES) +libtpi.a: $(libtpi_a_OBJECTS) $(libtpi_a_DEPENDENCIES) + -rm -f libtpi.a + $(libtpi_a_AR) libtpi.a $(libtpi_a_OBJECTS) $(libtpi_a_LIBADD) + $(RANLIB) libtpi.a + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/TPI.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/TPI_Walltime.Po@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` + +TPI.o: $(srcdir)/TPI.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT TPI.o -MD -MP -MF $(DEPDIR)/TPI.Tpo -c -o TPI.o `test -f '$(srcdir)/TPI.c' || echo '$(srcdir)/'`$(srcdir)/TPI.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/TPI.Tpo $(DEPDIR)/TPI.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/TPI.c' object='TPI.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o TPI.o `test -f '$(srcdir)/TPI.c' || echo '$(srcdir)/'`$(srcdir)/TPI.c + +TPI.obj: $(srcdir)/TPI.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT TPI.obj -MD -MP -MF $(DEPDIR)/TPI.Tpo -c -o TPI.obj `if test -f '$(srcdir)/TPI.c'; then $(CYGPATH_W) '$(srcdir)/TPI.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/TPI.c'; fi` +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/TPI.Tpo $(DEPDIR)/TPI.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/TPI.c' object='TPI.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o TPI.obj `if test -f '$(srcdir)/TPI.c'; then $(CYGPATH_W) '$(srcdir)/TPI.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/TPI.c'; fi` + +TPI_Walltime.o: $(srcdir)/TPI_Walltime.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT TPI_Walltime.o -MD -MP -MF $(DEPDIR)/TPI_Walltime.Tpo -c -o TPI_Walltime.o `test -f '$(srcdir)/TPI_Walltime.c' || echo '$(srcdir)/'`$(srcdir)/TPI_Walltime.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/TPI_Walltime.Tpo $(DEPDIR)/TPI_Walltime.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/TPI_Walltime.c' object='TPI_Walltime.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o TPI_Walltime.o `test -f '$(srcdir)/TPI_Walltime.c' || echo '$(srcdir)/'`$(srcdir)/TPI_Walltime.c + +TPI_Walltime.obj: $(srcdir)/TPI_Walltime.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT TPI_Walltime.obj -MD -MP -MF $(DEPDIR)/TPI_Walltime.Tpo -c -o TPI_Walltime.obj `if test -f '$(srcdir)/TPI_Walltime.c'; then $(CYGPATH_W) '$(srcdir)/TPI_Walltime.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/TPI_Walltime.c'; fi` +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/TPI_Walltime.Tpo $(DEPDIR)/TPI_Walltime.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(srcdir)/TPI_Walltime.c' object='TPI_Walltime.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o TPI_Walltime.obj `if test -f '$(srcdir)/TPI_Walltime.c'; then $(CYGPATH_W) '$(srcdir)/TPI_Walltime.c'; else $(CYGPATH_W) '$(srcdir)/$(srcdir)/TPI_Walltime.c'; fi` +install-includeHEADERS: $(include_HEADERS) + @$(NORMAL_INSTALL) + test -z "$(includedir)" || $(MKDIR_P) "$(DESTDIR)$(includedir)" + @list='$(include_HEADERS)'; for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + f=$(am__strip_dir) \ + echo " $(includeHEADERS_INSTALL) '$$d$$p' '$(DESTDIR)$(includedir)/$$f'"; \ + $(includeHEADERS_INSTALL) "$$d$$p" "$(DESTDIR)$(includedir)/$$f"; \ + done + +uninstall-includeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(include_HEADERS)'; for p in $$list; do \ + f=$(am__strip_dir) \ + echo " rm -f '$(DESTDIR)$(includedir)/$$f'"; \ + rm -f "$(DESTDIR)$(includedir)/$$f"; \ + done +install-nodist_execincludeHEADERS: $(nodist_execinclude_HEADERS) + @$(NORMAL_INSTALL) + test -z "$(execincludedir)" || $(MKDIR_P) "$(DESTDIR)$(execincludedir)" + @list='$(nodist_execinclude_HEADERS)'; for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + f=$(am__strip_dir) \ + echo " $(nodist_execincludeHEADERS_INSTALL) '$$d$$p' '$(DESTDIR)$(execincludedir)/$$f'"; \ + $(nodist_execincludeHEADERS_INSTALL) "$$d$$p" "$(DESTDIR)$(execincludedir)/$$f"; \ + done + +uninstall-nodist_execincludeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(nodist_execinclude_HEADERS)'; for p in $$list; do \ + f=$(am__strip_dir) \ + echo " rm -f '$(DESTDIR)$(execincludedir)/$$f'"; \ + rm -f "$(DESTDIR)$(execincludedir)/$$f"; \ + done + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) ThreadPool_config.h.in $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) ThreadPool_config.h.in $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) ThreadPool_config.h.in $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) ThreadPool_config.h.in $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(CTAGS_ARGS)$$tags$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LIBRARIES) $(HEADERS) ThreadPool_config.h all-local +installdirs: + for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" "$(DESTDIR)$(execincludedir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES) +clean: clean-am + +clean-am: clean-generic clean-libLIBRARIES mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-hdr distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +info: info-am + +info-am: + +install-data-am: install-includeHEADERS + +install-dvi: install-dvi-am + +install-exec-am: install-libLIBRARIES \ + install-nodist_execincludeHEADERS + +install-html: install-html-am + +install-info: install-info-am + +install-man: + +install-pdf: install-pdf-am + +install-ps: install-ps-am + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-includeHEADERS uninstall-libLIBRARIES \ + uninstall-nodist_execincludeHEADERS + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS all all-am all-local check check-am clean \ + clean-generic clean-libLIBRARIES ctags distclean \ + distclean-compile distclean-generic distclean-hdr \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-includeHEADERS install-info \ + install-info-am install-libLIBRARIES install-man \ + install-nodist_execincludeHEADERS install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic pdf pdf-am ps ps-am tags uninstall \ + uninstall-am uninstall-includeHEADERS uninstall-libLIBRARIES \ + uninstall-nodist_execincludeHEADERS + + +all-local: + @echo "" + @echo "Trilinos package ThreadPool subdirectory src built successfully." + @echo "" + +#np# replace new_package with the name of the package being autotool'ed here +#EXTRA_libtpi_a_SOURCES = + +include $(top_builddir)/Makefile.export.threadpool +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/mkl/basic/optional/ThreadPool/src/TPI.c b/mkl/basic/optional/ThreadPool/src/TPI.c new file mode 100644 index 0000000..f2b1566 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/src/TPI.c @@ -0,0 +1,1016 @@ +/*------------------------------------------------------------------------*/ +/* TPI: Thread Pool Interface */ +/* Copyright (2008) Sandia Corporation */ +/* */ +/* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */ +/* license for use of this work by or on behalf of the U.S. Government. */ +/* */ +/* This library is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU Lesser General Public License as */ +/* published by the Free Software Foundation; either version 2.1 of the */ +/* License, or (at your option) any later version. */ +/* */ +/* This library is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ +/* Lesser General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU Lesser General Public */ +/* License along with this library; if not, write to the Free Software */ +/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 */ +/* USA */ +/*------------------------------------------------------------------------*/ +/** + * @author H. Carter Edwards + */ + +/*--------------------------------------------------------------------*/ + +#include +#include +#include +#include + +/*--------------------------------------------------------------------*/ +/*----------- PTHREAD CONFIGURATION (BEGIN) --------------------------*/ +/*--------------------------------------------------------------------*/ + +#if defined( HAVE_PTHREAD ) + +#include +#include +#include + +/*--------------------------------------------------------------------*/ +/*---------------- COMPILER SPECIFICS (BEGIN) ------------------------*/ +/*--------------------------------------------------------------------*/ + +/* Performance is heavily impacted by an + * atomic decrement of the work counter. + * Optimize this if at all possible. + */ + +#if defined( __INTEL_COMPILER ) + +#define THREADPOOL_CONFIG "PTHREAD SCHED_YIELD" + +#elif defined( __linux__ ) && \ + defined( __GNUC__ ) && ( 4 <= __GNUC__ ) + +#define THREADPOOL_CONFIG "PTHREAD SCHED_YIELD ATOMIC_SYNC" + +#define atomic_fetch_and_decrement( VALUE_PTR ) \ + __sync_fetch_and_sub( VALUE_PTR , 1 ) + +#else + +#define THREADPOOL_CONFIG "PTHREAD SCHED_YIELD" + +#endif + +#if ! defined( atomic_fetch_and_decrement ) + +static int atomic_fetch_and_decrement( volatile int * value ) +{ + static pthread_mutex_t atomic_lock = PTHREAD_MUTEX_INITIALIZER ; + int result ; + while ( EBUSY == pthread_mutex_trylock( & atomic_lock ) ); + result = ( *value )-- ; + pthread_mutex_unlock( & atomic_lock ); + return result ; +} + +#endif + +/*--------------------------------------------------------------------*/ +/*---------------- COMPILER SPECIFICS (END) --------------------------*/ +/*--------------------------------------------------------------------*/ + +typedef pthread_mutex_t local_lock_type ; + +#else /* ! defined( HAVE_PTHREAD ) */ + +#define THREADPOOL_CONFIG "NO THREADING" + +typedef int local_lock_type ; + +#endif + +/*--------------------------------------------------------------------*/ +/*----------- PTHREAD CONFIGURATION (END) ----------------------------*/ +/*--------------------------------------------------------------------*/ + +const char * TPI_Version() +{ + static const char version_string[] = + "TPI Version 1.1 , November 2009 , Configuration = " THREADPOOL_CONFIG ; + + return version_string ; +} + +/*--------------------------------------------------------------------*/ + +enum { THREAD_COUNT_MAX = 256 }; +enum { LOCK_COUNT_MAX = 32 }; + +struct ThreadPool_Data ; + +typedef struct Thread_Data { + struct Thread_Data * m_thread_fan ; /* Fan-in / fan-out begin */ + void * m_reduce ; /* Reduction memory */ + long m_rank ; + long m_barrier_wait_max ; + long m_barrier_wait_total ; + long m_barrier_wait_count ; + volatile long m_control ; +} Thread ; + +typedef struct ThreadPool_Data { + TPI_work_subprogram m_work_routine ; + const void * m_work_info ; + TPI_reduce_join m_reduce_join ; + TPI_reduce_init m_reduce_init ; + unsigned char * m_reduce_alloc ; + int m_reduce_alloc_size ; + int m_thread_count ; + int m_lock_init ; + int m_lock_count ; + int m_work_thread_count ; + int m_work_count ; + int m_work_count_claim ; + + Thread m_thread[ THREAD_COUNT_MAX ]; + local_lock_type m_lock[ LOCK_COUNT_MAX ]; +} ThreadPool ; + + +static ThreadPool thread_pool = +{ + /* m_work_routine */ NULL , + /* m_work_info */ NULL , + /* m_reduce_join */ NULL , + /* m_reduce_init */ NULL , + /* m_reduce_alloc */ NULL , + /* m_reduce_alloc_size */ 0 , + /* m_thread_count */ 0 , + /* m_lock_init */ 0 , + /* m_lock_count */ 0 , + /* m_work_thread_count */ 0 , + /* m_work_count */ 0 , + /* m_work_count_claim */ 0 +}; + +/*--------------------------------------------------------------------*/ +/*--------------------------------------------------------------------*/ + +#if defined( HAVE_PTHREAD ) + +/*--------------------------------------------------------------------*/ +/*--------------------------------------------------------------------*/ + +int TPI_Lock( int i ) +{ + int result = i < 0 || thread_pool.m_lock_count <= i ? TPI_ERROR_SIZE : 0 ; + + if ( ! result ) { + pthread_mutex_t * const lock = thread_pool.m_lock + i ; + + while ( EBUSY == ( result = pthread_mutex_trylock( lock ) ) ); + + if ( result ) { result = TPI_ERROR_LOCK ; } + } + return result ; +} + +int TPI_Unlock( int i ) +{ + int result = i < 0 || thread_pool.m_lock_count <= i ? TPI_ERROR_SIZE : 0 ; + + if ( ! result && pthread_mutex_unlock( thread_pool.m_lock + i ) ) { + result = TPI_ERROR_LOCK ; + } + + return result ; +} + +static int local_set_lock_count( const int lock_count ) +{ + int result = lock_count < 0 || LOCK_COUNT_MAX < lock_count + ? TPI_ERROR_SIZE : 0 ; + + while ( ! result && thread_pool.m_lock_init < lock_count ) { + + pthread_mutex_t * const lock = thread_pool.m_lock + + thread_pool.m_lock_init ; + + if ( pthread_mutex_init( lock , NULL ) ) { + result = TPI_ERROR_INTERNAL ; + } + else { + ++( thread_pool.m_lock_init ); + } + } + + return result ; +} + +static void local_destroy_locks() +{ + while ( thread_pool.m_lock_init ) { + --( thread_pool.m_lock_init ); + pthread_mutex_destroy( thread_pool.m_lock + thread_pool.m_lock_init ); + } +} + +/*--------------------------------------------------------------------*/ +/*--------------------------------------------------------------------*/ +/* Run work if any, then wait for child threads to block. */ + +static void local_run( Thread * const this_thread , void * reduce ) +{ + struct TPI_Work_Struct work ; + + work.info = thread_pool.m_work_info ; + work.reduce = reduce ; + work.count = thread_pool.m_work_count ; + work.lock_count = thread_pool.m_lock_count ; + + if ( work.count <= thread_pool.m_work_thread_count ) { + + work.rank = ( thread_pool.m_thread_count - 1 ) - this_thread->m_rank ; + + if ( work.rank < work.count ) { + (*thread_pool.m_work_routine)( & work ); + } + } + else { + + int * const claim = & thread_pool.m_work_count_claim ; + + while ( 0 < ( work.rank = atomic_fetch_and_decrement( claim ))) { + + work.rank = work.count - work.rank ; + + (*thread_pool.m_work_routine)( & work ); + } + } +} + +static int wait_thread( volatile long * const control , const int val ) +{ + int count = 0 ; + while ( val == *control ) { + sched_yield(); + ++count ; + } + return count ; +} + +static void local_barrier_wait( Thread * const this_thread , + Thread * const thread ) +{ + const long count = wait_thread( & thread->m_control , 1 ); + + ++( this_thread->m_barrier_wait_count ); + + this_thread->m_barrier_wait_total += count ; + + if ( this_thread->m_barrier_wait_max < count ) { + this_thread->m_barrier_wait_max = count ; + } +} + +static void local_barrier( Thread * const this_thread ) +{ + Thread * const thread_beg = this_thread[0].m_thread_fan ; + Thread * thread = this_thread[1].m_thread_fan ; + + if ( ! thread_pool.m_work_routine ) { + while ( thread_beg < thread ) { + --thread ; local_barrier_wait( this_thread , thread ); + } + } + else if ( ! thread_pool.m_reduce_join ) { + + local_run( this_thread , NULL ); + + while ( thread_beg < thread ) { + --thread ; local_barrier_wait( this_thread , thread ); + } + } + else { + + /* Work data for the reduction initialization and join */ + + struct TPI_Work_Struct work ; + + work.info = thread_pool.m_work_info ; + work.reduce = this_thread->m_reduce ; + work.count = -1 ; + work.rank = -1 ; + work.lock_count = -1 ; + + /* Initialize reduction value for non-root thread */ + + if ( this_thread->m_rank ) { (*thread_pool.m_reduce_init)( & work ); } + + /* Run the work routine with barrier blocking */ + + local_run( this_thread , work.reduce ); + + /* Reduction of thread's contributions */ + + while ( thread_beg < thread ) { + --thread ; local_barrier_wait( this_thread , thread ); + (*thread_pool.m_reduce_join)( & work , thread->m_reduce ); + } + } +} + +/*--------------------------------------------------------------------*/ +/* The driver given to 'pthread_create'. + * Run work until told to terminate. + */ +static void * local_driver( void * arg ) +{ + Thread * const this_thread = (Thread *) arg ; + + do { + /* Wait for my subtree of threads to complete */ + local_barrier( this_thread ); + + this_thread->m_control = 0 ; + + /* Spin until I am activated. */ + wait_thread( & this_thread->m_control , 0 ); + + } while ( thread_pool.m_work_routine ); + + local_barrier( this_thread ); /* Termination barrier */ + + this_thread->m_control = 0 ; + + return NULL ; +} + +/*--------------------------------------------------------------------*/ +/*--------------------------------------------------------------------*/ + +static void alloc_reduce( int reduce_size ) +{ + const int alloc_count = thread_pool.m_thread_count - 1 ; + + if ( thread_pool.m_reduce_alloc_size < alloc_count * reduce_size ) { + + const int grain_shift = 8 ; /* grain_size = 0x80 */ + const int grain_size = 1 << grain_shift ; /* Byte grain size */ + const int grain_count = ( reduce_size + grain_size - 1 ) >> grain_shift ; + const int reduce_grain = grain_size * grain_count ; + const int alloc_size = alloc_count * reduce_grain ; + + int i ; + + if ( thread_pool.m_reduce_alloc ) { + thread_pool.m_reduce_alloc = + (unsigned char *) realloc( thread_pool.m_reduce_alloc , alloc_size ); + } + else { + thread_pool.m_reduce_alloc = (unsigned char *) malloc( alloc_size ); + } + + thread_pool.m_reduce_alloc_size = alloc_size ; + + for ( i = 0 ; i < alloc_count ; ++i ) { + thread_pool.m_thread[i+1].m_reduce = + thread_pool.m_reduce_alloc + reduce_grain * i ; + } + } +} + +static int local_start( + int work_thread_count , + TPI_work_subprogram work_subprogram , + const void * work_info , + int work_count , + int lock_count , + TPI_reduce_join reduce_join , + TPI_reduce_init reduce_init , + int reduce_size , + void * reduce_data ) +{ + const int result = lock_count ? local_set_lock_count( lock_count ) : 0 ; + + if ( ! result ) { + + thread_pool.m_work_routine = work_subprogram ; + thread_pool.m_work_info = work_info ; + thread_pool.m_work_count = work_count ; + thread_pool.m_lock_count = lock_count ; + thread_pool.m_thread->m_reduce = reduce_data ; + + if ( 1 < thread_pool.m_thread_count ) { + + if ( reduce_size ) { alloc_reduce( reduce_size ); } + + thread_pool.m_reduce_join = reduce_join ; + thread_pool.m_reduce_init = reduce_init ; + thread_pool.m_work_thread_count = work_thread_count ; + thread_pool.m_work_count_claim = work_count ; + + /* Activate the spinning worker threads */ + { + Thread * const thread_beg = thread_pool.m_thread + 1 ; + Thread * thread = thread_pool.m_thread + + thread_pool.m_thread_count ; + + while ( thread_beg < thread ) { (--thread)->m_control = 1 ; } + } + } + } + + return result ; +} + +static void local_wait() +{ + if ( 1 < thread_pool.m_thread_count ) { + + local_barrier( thread_pool.m_thread ); + + thread_pool.m_reduce_join = NULL ; + thread_pool.m_reduce_init = NULL ; + thread_pool.m_work_thread_count = 0 ; + thread_pool.m_work_count_claim = 0 ; + } + else { + struct TPI_Work_Struct w = { NULL , NULL , 0 , 0 , 0 }; + + w.info = thread_pool.m_work_info ; + w.count = thread_pool.m_work_count ; + w.lock_count = thread_pool.m_lock_count ; + w.reduce = thread_pool.m_thread->m_reduce ; + + for ( w.rank = 0 ; w.rank < w.count ; ++( w.rank ) ) { + (* thread_pool.m_work_routine )( & w ); + } + } + + thread_pool.m_work_routine = NULL ; + thread_pool.m_work_info = NULL ; + thread_pool.m_work_count = 0 ; + thread_pool.m_lock_count = 0 ; + thread_pool.m_thread->m_reduce = NULL ; +} + +/*--------------------------------------------------------------------*/ +/*--------------------------------------------------------------------*/ + +int TPI_Init( int n ) +{ + int result = thread_pool.m_thread_count ? TPI_ERROR_ACTIVE : 0 ; + + if ( ! result && ( n < 1 || THREAD_COUNT_MAX + 1 <= n ) ) { + result = TPI_ERROR_SIZE ; + } + + if ( ! result ) { + pthread_attr_t attr ; + + if ( pthread_attr_init( & attr ) + || pthread_attr_setscope( & attr, PTHREAD_SCOPE_SYSTEM ) + || pthread_attr_setdetachstate( & attr, PTHREAD_CREATE_DETACHED ) ) { + result = TPI_ERROR_INTERNAL ; + } + + if ( ! result ) { + int thread_rank = 0 ; + int count = 1 ; + + /* Initialize one lock for blocking and unblocking */ + + local_set_lock_count( 1 ); + + /* Initialize threads with fan-in / fan-out span of threads */ + + for ( thread_rank = 0 ; thread_rank <= n ; ++thread_rank ) { + Thread * const thread = thread_pool.m_thread + thread_rank ; + + thread->m_thread_fan = thread_pool.m_thread + count ; + thread->m_reduce = NULL ; + thread->m_rank = thread_rank ; + thread->m_barrier_wait_max = 0 ; + thread->m_barrier_wait_total = 0 ; + thread->m_barrier_wait_count = 0 ; + thread->m_control = 1 ; + + { + int up = 1 ; + while ( up <= thread_rank ) { up <<= 1 ; } + while ( thread_rank + up < n ) { up <<= 1 ; ++count ; } + } + } + + thread_pool.m_thread_count = n ; + + /* Create threads last-to-first for start up fan-in barrier */ + + for ( thread_rank = n ; ! result && 1 < thread_rank ; ) { + Thread * const thread = thread_pool.m_thread + --thread_rank ; + + pthread_t pt ; + + if ( pthread_create( & pt, & attr, & local_driver, thread ) ) { + thread->m_control = 0 ; + result = TPI_ERROR_INTERNAL ; + } + } + + /* If a thread-spawn failed, terminate the created threads */ + + if ( result ) { + while ( thread_rank < --( thread_pool.m_thread_count ) ) { + Thread * thread = thread_pool.m_thread + thread_pool.m_thread_count ; + wait_thread( & thread->m_control , 1 ); /* Wait for blocking */ + thread->m_control = 1 ; /* Reactivate thread */ + wait_thread( & thread->m_control , 1 ); /* Wait for termination */ + } + thread_pool.m_thread_count = 0 ; + } + + pthread_attr_destroy( & attr ); + } + } + + if ( ! result ) { + local_barrier( thread_pool.m_thread ); + result = n ; + } + + return result ; +} + +/*--------------------------------------------------------------------*/ + +int TPI_Finalize() +{ + static int print_statistics = 0 ; + + int result ; + + result = NULL != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : 0 ; + + if ( ! result ) { + + /* Wake up threads then wait for them to terminate */ + local_start( 0 , NULL , NULL , 0 , + 0 , NULL , NULL , 0 , NULL ); + + local_wait(); + + if ( print_statistics ) { + int i = 0 ; + for ( ; i < thread_pool.m_thread_count ; ++i ) { + if ( thread_pool.m_thread[i].m_barrier_wait_count ) { + long mean = ( thread_pool.m_thread[i].m_barrier_wait_total + 0.5 ) / + thread_pool.m_thread[i].m_barrier_wait_count ; + fprintf(stdout,"Thread[%d] barrier_wait( max %ld , mean %ld )\n", i , + thread_pool.m_thread[i].m_barrier_wait_max , mean ); + } + } + } + + thread_pool.m_thread_count = 0 ; + + local_destroy_locks(); + + if ( thread_pool.m_reduce_alloc ) { + free( thread_pool.m_reduce_alloc ); + thread_pool.m_reduce_alloc = NULL ; + thread_pool.m_reduce_alloc_size = 0 ; + } + } + + return result ; +} + +/*--------------------------------------------------------------------*/ +/*--------------------------------------------------------------------*/ + +static void local_block( TPI_Work * work ) +{ + if ( work->rank ) { + pthread_mutex_lock( thread_pool.m_lock ); + pthread_mutex_unlock( thread_pool.m_lock ); + } +} + +int TPI_Block() +{ + const int result = + NULL != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : ( + pthread_mutex_lock( thread_pool.m_lock ) ? TPI_ERROR_INTERNAL : + + local_start( thread_pool.m_thread_count , + local_block , NULL , + thread_pool.m_thread_count , + 0 /* lock_count */ , + NULL , NULL , 0 , NULL ) ); + + return result ; +} + +int TPI_Unblock() +{ + const int result = + local_block != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : ( + pthread_mutex_unlock( thread_pool.m_lock ) ? TPI_ERROR_INTERNAL : 0 ); + + if ( ! result ) { local_wait(); } + + return result ; +} + +int TPI_Isblocked() +{ + return local_block == thread_pool.m_work_routine ; +} + +/*--------------------------------------------------------------------*/ +/*--------------------------------------------------------------------*/ + +#else /* ! defined( HAVE_PTHREAD ) */ + +/*--------------------------------------------------------------------*/ +/*--------------------------------------------------------------------*/ + +int TPI_Lock( int i ) +{ + int result = i < 0 || thread_pool.m_lock_count <= i ? TPI_ERROR_SIZE : 0 ; + + if ( ! result ) { + if ( 0 != thread_pool.m_lock[i] ) { + result = TPI_ERROR_LOCK ; + } + else { + thread_pool.m_lock[i] = 1 ; + } + } + return result ; +} + +int TPI_Unlock( int i ) +{ + int result = i < 0 || thread_pool.m_lock_count <= i ? TPI_ERROR_SIZE : 0 ; + + if ( ! result ) { + if ( 0 == thread_pool.m_lock[i] ) { + result = TPI_ERROR_LOCK ; + } + else { + thread_pool.m_lock[i] = 0 ; + } + } + return result ; +} + +static int local_set_lock_count( const int lock_count ) +{ + int result = lock_count < 0 || LOCK_COUNT_MAX < lock_count + ? TPI_ERROR_SIZE : 0 ; + + while ( thread_pool.m_lock_init < lock_count ) { + + thread_pool.m_lock[ thread_pool.m_lock_init ] = 0 ; + + ++( thread_pool.m_lock_init ); + } + + return result ; +} + +/*--------------------------------------------------------------------*/ + +static int local_start( + int work_thread_count , + TPI_work_subprogram work_subprogram , + const void * work_info , + int work_count , + int lock_count , + TPI_reduce_join reduce_join , + TPI_reduce_init reduce_init , + int reduce_size , + void * reduce_data ) +{ + const int result = lock_count ? local_set_lock_count( lock_count ) : 0 ; + + if ( ! result ) { + thread_pool.m_work_routine = work_subprogram ; + thread_pool.m_work_info = work_info ; + thread_pool.m_work_count = work_count ; + thread_pool.m_lock_count = lock_count ; + thread_pool.m_thread->m_reduce = reduce_data ; + } + + return result ; +} + +static void local_wait() +{ + struct TPI_Work_Struct w = { NULL , NULL , 0 , 0 , 0 }; + + w.info = thread_pool.m_work_info ; + w.count = thread_pool.m_work_count ; + w.lock_count = thread_pool.m_lock_count ; + w.reduce = thread_pool.m_thread->m_reduce ; + + for ( w.rank = 0 ; w.rank < w.count ; ++( w.rank ) ) { + (* thread_pool.m_work_routine )( & w ); + } + + thread_pool.m_work_routine = NULL ; + thread_pool.m_work_info = NULL ; + thread_pool.m_work_count = 0 ; + thread_pool.m_lock_count = 0 ; + thread_pool.m_thread->m_reduce = NULL ; +} + +/*--------------------------------------------------------------------*/ + +static void local_block( TPI_Work * work ) {} + +int TPI_Block() +{ + const int result = + NULL != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : + + local_start( thread_pool.m_thread_count , + local_block , NULL , + thread_pool.m_thread_count , + 0 /* lock_count */ , + NULL , NULL , 0 , NULL ) ; + + return result ; +} + +int TPI_Unblock() +{ + const int result = + local_block != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : 0 ; + + if ( ! result ) { local_wait(); } + + return result ; +} + +int TPI_Isblocked() +{ + return local_block == thread_pool.m_work_routine ; +} + +/*--------------------------------------------------------------------*/ + +int TPI_Init( int n ) +{ + int result = thread_pool.m_thread_count ? TPI_ERROR_ACTIVE : 0 ; + + if ( ! result && ( n < 1 || THREAD_COUNT_MAX + 1 <= n ) ) { + result = TPI_ERROR_SIZE ; + } + else { + Thread * const thread = thread_pool.m_thread ; + + thread->m_thread_fan = NULL ; + thread->m_reduce = NULL ; + thread->m_rank = 0 ; + thread->m_barrier_wait_max = 0 ; + thread->m_barrier_wait_total = 0 ; + thread->m_barrier_wait_count = 0 ; + thread->m_control = 1 ; + + thread_pool.m_thread_count = result = n ; + + /* Initialize one lock for blocking and unblocking */ + + local_set_lock_count( 1 ); + } + + return result ; +} + +/*--------------------------------------------------------------------*/ + +int TPI_Finalize() +{ + int result = NULL != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : 0 ; + + if ( ! result ) { + thread_pool.m_thread_count = 0 ; + thread_pool.m_lock_init = 0 ; + } + + return result ; +} + +/*--------------------------------------------------------------------*/ +/*--------------------------------------------------------------------*/ + +#endif + +/*--------------------------------------------------------------------*/ +/*--------------------------------------------------------------------*/ + +int TPI_Wait() +{ + const int result = + ( NULL == thread_pool.m_work_routine || + local_block == thread_pool.m_work_routine ) ? TPI_ERROR_ACTIVE : 0 ; + + if ( ! result ) { local_wait(); } + + return result ; +} + +int TPI_Start( TPI_work_subprogram work_subprogram , + const void * work_info , + int work_count , + int lock_count ) +{ + const int result = + NULL != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : ( + NULL == work_subprogram ? TPI_ERROR_NULL : ( + work_count < 0 ? TPI_ERROR_SIZE : + local_start( thread_pool.m_thread_count - 1 , + work_subprogram , work_info , work_count , lock_count , + NULL , NULL , 0 , NULL ) ) ); + + return result ; +} + +int TPI_Run( TPI_work_subprogram work_subprogram , + const void * work_info , + int work_count , + int lock_count ) +{ + const int result = + NULL != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : ( + NULL == work_subprogram ? TPI_ERROR_NULL : ( + work_count < 0 ? TPI_ERROR_SIZE : + local_start( thread_pool.m_thread_count , + work_subprogram , work_info , work_count , lock_count , + NULL , NULL , 0 , NULL ) ) ); + + if ( ! result ) { local_wait(); } + + return result ; +} + +int TPI_Run_threads( TPI_work_subprogram work_subprogram , + const void * work_info , + int lock_count ) +{ + const int work_count = 0 < thread_pool.m_thread_count ? + thread_pool.m_thread_count : 1 ; + + const int result = + NULL != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : ( + NULL == work_subprogram ? TPI_ERROR_NULL : ( + local_start( thread_pool.m_thread_count , + work_subprogram , work_info , work_count , lock_count , + NULL , NULL , 0 , NULL ) ) ); + + if ( ! result ) { local_wait(); } + + return result ; +} + +int TPI_Start_threads( TPI_work_subprogram work_subprogram , + const void * work_info , + int lock_count ) +{ + const int work_count = 1 < thread_pool.m_thread_count ? + thread_pool.m_thread_count - 1 : 1 ; + + const int result = + NULL != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : ( + NULL == work_subprogram ? TPI_ERROR_NULL : ( + local_start( thread_pool.m_thread_count - 1 , + work_subprogram , work_info , work_count , lock_count , + NULL , NULL , 0 , NULL ) ) ); + + if ( ! result ) { local_wait(); } + + return result ; +} + +/*--------------------------------------------------------------------*/ +/*--------------------------------------------------------------------*/ + +int TPI_Run_reduce( TPI_work_subprogram work_subprogram , + const void * work_info , + int work_count , + TPI_reduce_join reduce_join , + TPI_reduce_init reduce_init , + int reduce_size , + void * reduce_data ) +{ + const int lock_count = 0 ; + + const int result = + NULL != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : ( + NULL == work_subprogram ? TPI_ERROR_NULL : ( + NULL == reduce_join ? TPI_ERROR_NULL : ( + NULL == reduce_init ? TPI_ERROR_NULL : ( + NULL == reduce_data ? TPI_ERROR_NULL : ( + work_count <= 0 ? TPI_ERROR_SIZE : ( + reduce_size <= 0 ? TPI_ERROR_SIZE : + + local_start( thread_pool.m_thread_count , + work_subprogram, work_info, work_count, lock_count, + reduce_join, reduce_init, reduce_size, reduce_data ))))))); + + if ( ! result ) { local_wait(); } + + return result ; +} + +int TPI_Run_threads_reduce( TPI_work_subprogram work_subprogram , + const void * work_info , + TPI_reduce_join reduce_join , + TPI_reduce_init reduce_init , + int reduce_size , + void * reduce_data ) +{ + const int lock_count = 0 ; + const int work_count = 0 < thread_pool.m_thread_count ? + thread_pool.m_thread_count : 1 ; + + const int result = + NULL != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : ( + NULL == work_subprogram ? TPI_ERROR_NULL : ( + NULL == reduce_join ? TPI_ERROR_NULL : ( + NULL == reduce_init ? TPI_ERROR_NULL : ( + NULL == reduce_data ? TPI_ERROR_NULL : ( + reduce_size <= 0 ? TPI_ERROR_SIZE : + + local_start( thread_pool.m_thread_count , + work_subprogram , work_info , work_count , lock_count , + reduce_join, reduce_init, reduce_size, reduce_data )))))); + + if ( ! result ) { local_wait(); } + + return result ; +} + +int TPI_Start_reduce( TPI_work_subprogram work_subprogram , + const void * work_info , + int work_count , + TPI_reduce_join reduce_join , + TPI_reduce_init reduce_init , + int reduce_size , + void * reduce_data ) +{ + const int lock_count = 0 ; + + const int result = + NULL != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : ( + NULL == work_subprogram ? TPI_ERROR_NULL : ( + NULL == reduce_join ? TPI_ERROR_NULL : ( + NULL == reduce_init ? TPI_ERROR_NULL : ( + NULL == reduce_data ? TPI_ERROR_NULL : ( + work_count <= 0 ? TPI_ERROR_SIZE : ( + reduce_size <= 0 ? TPI_ERROR_SIZE : + + local_start( thread_pool.m_thread_count - 1 , + work_subprogram , work_info , work_count , lock_count , + reduce_join, reduce_init, reduce_size, reduce_data ))))))); + + return result ; +} + +int TPI_Start_threads_reduce( TPI_work_subprogram work_subprogram , + const void * work_info , + TPI_reduce_join reduce_join , + TPI_reduce_init reduce_init , + int reduce_size , + void * reduce_data ) +{ + const int lock_count = 0 ; + const int work_count = 1 < thread_pool.m_thread_count ? + thread_pool.m_thread_count - 1 : 1 ; + + const int result = + NULL != thread_pool.m_work_routine ? TPI_ERROR_ACTIVE : ( + NULL == work_subprogram ? TPI_ERROR_NULL : ( + NULL == reduce_join ? TPI_ERROR_NULL : ( + NULL == reduce_init ? TPI_ERROR_NULL : ( + NULL == reduce_data ? TPI_ERROR_NULL : ( + reduce_size <= 0 ? TPI_ERROR_SIZE : + + local_start( thread_pool.m_thread_count - 1 , + work_subprogram , work_info , work_count , lock_count , + reduce_join, reduce_init, reduce_size, reduce_data )))))); + + return result ; +} + + diff --git a/mkl/basic/optional/ThreadPool/src/TPI.h b/mkl/basic/optional/ThreadPool/src/TPI.h new file mode 100644 index 0000000..939d3be --- /dev/null +++ b/mkl/basic/optional/ThreadPool/src/TPI.h @@ -0,0 +1,253 @@ +/*------------------------------------------------------------------------*/ +/* TPI: Thread Pool Interface */ +/* Copyright (2008) Sandia Corporation */ +/* */ +/* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */ +/* license for use of this work by or on behalf of the U.S. Government. */ +/* */ +/* This library is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU Lesser General Public License as */ +/* published by the Free Software Foundation; either version 2.1 of the */ +/* License, or (at your option) any later version. */ +/* */ +/* This library is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ +/* Lesser General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU Lesser General Public */ +/* License along with this library; if not, write to the Free Software */ +/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 */ +/* USA */ +/*------------------------------------------------------------------------*/ +/** + * @author H. Carter Edwards + * + * Thread Pool Interface (TPI). + * + * A simple and miminalistic interface for executing subprograms + * in a thread parallel, shared memory mode. + * + * States: the underlying thread pool has four states. + * 1) Uninitialized: no extra threads exist, this is the initial state. + * 2) Ready: extra threads exist and are ready to run a subprogram. + * 3) Active: extra threads are calling the subprogram. + * 4) Blocked: extra threads blocked. + * + * Threads are created on initialization and placed in the 'Ready' state. + * While in the 'Ready' state the threads are spin-waiting to minimize + * the cost of activating blocked threads. + * Threads can be blocked so that they do not compete for computatational + * resources with other threads created external to the TPI interface. + * For example, threads created by OpenMP or TBB. + */ + +#ifndef ThreadPoolInterface_h +#define ThreadPoolInterface_h + +#if defined( __cplusplus ) +extern "C" { +#endif + +/*--------------------------------------------------------------------*/ +/** \brief Version string. */ +const char * TPI_Version(); + +/** Start up the requested number of threads, less the calling thread. + * Return the actual number of threads, including the calling thread, + * otherwise return an error. + */ +int TPI_Init( int thread_count ); + +/** Shut down all started threads. */ +int TPI_Finalize(); + +/*--------------------------------------------------------------------*/ +/** \brief A utility to measure wall-clock time, which is frequently + * needed when performance testing HPC algorithms. + */ +double TPI_Walltime(); + +/*--------------------------------------------------------------------*/ +/* All functions return zero for success. */ + +#define TPI_ERROR_NULL ((int) -1) /**< NULL input */ +#define TPI_ERROR_SIZE ((int) -2) /**< BAD input: size or index */ +#define TPI_ERROR_LOCK ((int) -3) /**< BAD lock or unlock */ +#define TPI_ERROR_ACTIVE ((int) -4) /**< BAD input: the pool is active */ +#define TPI_ERROR_INTERNAL ((int) -5) /**< internal resource error */ + +/*--------------------------------------------------------------------*/ +/** \brief Work information passed to a work subprogram. */ +struct TPI_Work_Struct { + const void * info ; /**< Shared info input to TPI_Run */ + void * reduce ; /**< Data for reduce operation, if any */ + int count ; /**< Count of work requested via TPI_Run */ + int rank ; /**< Rank of work for the current call */ + int lock_count ; /**< Count of locks requested via TPI_Run */ +}; + +/** \brief Typedef for work subprogram argument */ +typedef const struct TPI_Work_Struct TPI_Work ; + +/** The interface for a parallel task */ +typedef void (*TPI_work_subprogram)( TPI_Work * ); + +/** The interface for a parallel reduction operation. + * Initialize work->reduce value. + */ +typedef +void (*TPI_reduce_init)( TPI_Work * work ); + +/** The interface for a parallel reduction operation. + * Perform reduction operation work->reduce OP= reduce. + * Every initialized reduce value will appear exactly + * once as the 'reduce' argument of a call to the join function. + */ +typedef +void (*TPI_reduce_join)( TPI_Work * work , const void * reduce ); + +/*--------------------------------------------------------------------*/ +/** \brief Run a work subprogram in thread parallel. + * + * The thread pool must be in the 'paused' state when this + * function is called. Thus a recursive call to TPI_Run is illegal. + */ +int TPI_Run( TPI_work_subprogram work_subprogram , + const void * work_info , + int work_count , + int lock_count ); + +/** \brief Run a work and reduction subprograms in thread parallel. + * + * Each call to the work_subprogram has exclusive (thread safe) + * access to its work->reduce data. + * The reduce_init and reduce_join subprograms have + * exclusive access to their arguments. + */ +int TPI_Run_reduce( TPI_work_subprogram work_subprogram , + const void * work_info , + int work_count , + TPI_reduce_join reduce_join , + TPI_reduce_init reduce_init , + int reduce_size , + void * reduce_data ); + +/** \brief Run a work subprogram exactly once on each thread. + * + * The thread pool must be in the 'paused' state when this + * function is called. Thus a recursive call to TPI_Run is illegal. + */ +int TPI_Run_threads( TPI_work_subprogram work_subprogram , + const void * work_info , + int lock_count ); + +/** \brief Run a work and reduction subprograms in thread parallel. + * + * Each call to the work_subprogram has exclusive (thread safe) + * access to its work->reduce data. + * The reduce_init and reduce_join subprograms have + * exclusive access to their arguments. + */ +int TPI_Run_threads_reduce( TPI_work_subprogram work_subprogram , + const void * work_info , + TPI_reduce_join reduce_join , + TPI_reduce_init reduce_init , + int reduce_size , + void * reduce_data ); + +/*--------------------------------------------------------------------*/ +/** \brief Start a work subprogram in thread parallel + * running on all but the 'main' calling thread; + * the 'main' calling thread returns immediately. + * + * The thread pool must be in the 'paused' state when this + * function is called. Thus a recursive call to TPI_Start is illegal. + */ +int TPI_Start( TPI_work_subprogram work_subprogram , + const void * work_info , + int work_count , + int lock_count ); + +/** \brief Start a work and reduction subprograms in thread parallel + * running on all but the 'main' calling thread; + * the 'main' calling thread returns immediately. + * + * Each call to the work_subprogram has exclusive (thread safe) + * access to its work->reduce data. + * The reduce_init and reduce_join subprograms have + * exclusive access to their arguments. + */ +int TPI_Start_reduce( TPI_work_subprogram work_subprogram , + const void * work_info , + int work_count , + TPI_reduce_join reduce_join , + TPI_reduce_init reduce_init , + int reduce_size , + void * reduce_data ); + +/** \brief Run a work subprogram on each thread + * that is not the 'main' calling thread. + * The 'main' calling thread returns immediately. + * + * The thread pool must be in the 'paused' state when this + * function is called. Thus a recursive call to TPI_Start_threads is illegal. + */ +int TPI_Start_threads( TPI_work_subprogram work_subprogram , + const void * work_info , + int lock_count ); + +/** \brief Start a work / reduction subprogram + * on each thread that is not the 'main' calling thread. + * The 'main' calling thread returns immediately. + * + * Each call to the work_subprogram has exclusive (thread safe) + * access to its work->reduce data. + * The reduce_init and reduce_join subprograms have + * exclusive access to their arguments. + */ +int TPI_Start_threads_reduce( TPI_work_subprogram work_subprogram , + const void * work_info , + TPI_reduce_join reduce_join , + TPI_reduce_init reduce_init , + int reduce_size , + void * reduce_data ); + +/** \brief Wait for a started work subprogram to complete. */ +int TPI_Wait(); + +/*--------------------------------------------------------------------*/ +/** \brief Block threads within the operating system. + * + * Normally the worker threads are unblocked and spinning for + * minimal start up overhead when running work subprograms. + * If no TPI work is to be performed for a long period of time + * then an application can block the worker threads. + */ +int TPI_Block(); + +/** \brief Unblock blocked threads within the operating system */ +int TPI_Unblock(); + +/** \brief Query if threads are blocked */ +int TPI_Isblocked(); + +/*--------------------------------------------------------------------*/ +/** \brief Blocks until lock lock_rank is obtained. + * The thread pool must be in the 'active' state. + */ +int TPI_Lock( int lock_rank ); + +/** \brief Unlocks lock lock_rank. + * The thread pool must be in the 'active' state. + */ +int TPI_Unlock( int lock_rank ); + +/*--------------------------------------------------------------------*/ + +#if defined( __cplusplus ) +} +#endif + +#endif + diff --git a/mkl/basic/optional/ThreadPool/src/TPI.hpp b/mkl/basic/optional/ThreadPool/src/TPI.hpp new file mode 100644 index 0000000..fc1894e --- /dev/null +++ b/mkl/basic/optional/ThreadPool/src/TPI.hpp @@ -0,0 +1,135 @@ +/*------------------------------------------------------------------------*/ +/* TPI: Thread Pool Interface */ +/* Copyright (2008) Sandia Corporation */ +/* */ +/* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */ +/* license for use of this work by or on behalf of the U.S. Government. */ +/* */ +/* This library is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU Lesser General Public License as */ +/* published by the Free Software Foundation; either version 2.1 of the */ +/* License, or (at your option) any later version. */ +/* */ +/* This library is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ +/* Lesser General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU Lesser General Public */ +/* License along with this library; if not, write to the Free Software */ +/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 */ +/* USA */ +/*------------------------------------------------------------------------*/ +/** + * @author H. Carter Edwards + */ + +#ifndef util_ThreadPool_hpp +#define util_ThreadPool_hpp + +#include + +namespace TPI { + +typedef TPI_Work Work ; + +//---------------------------------------------------------------------- +/** Run worker.*method(work) on all threads. + */ +template +int Run( Worker & worker , void (Worker::*method)(Work &) , + int work_count , int lock_count = 0 ); + +//---------------------------------------------------------------------- + +inline int Lock( int n ) { return TPI_Lock( n ); } +inline int Unlock( int n ) { return TPI_Unlock( n ); } + +/** Lock guard to insure that a lock is released + * when control exists a block. + * { + * TPI::LockGuard local_lock( i ); + * } + */ +class LockGuard { +private: + LockGuard(); + LockGuard( const LockGuard & ); + LockGuard & operator = ( const LockGuard & ); + const int m_value ; + const int m_result ; +public: + operator int() const { return m_result ; } + + explicit LockGuard( unsigned i_lock ) + : m_value( i_lock ), m_result( TPI_Lock(i_lock) ) {} + + ~LockGuard() { TPI_Unlock( m_value ); } +}; + +//---------------------------------------------------------------------- + +inline +int Init( int n ) { return TPI_Init( n ); } + +inline +int Finalize() { return TPI_Finalize(); } + +inline +double Walltime() { return TPI_Walltime(); } + +//---------------------------------------------------------------------- +//---------------------------------------------------------------------- + +namespace { + +template +class WorkerMethodHelper { +private: + WorkerMethodHelper(); + WorkerMethodHelper( const WorkerMethodHelper & ); + WorkerMethodHelper & operator = ( const WorkerMethodHelper & ); + +public: + + typedef void (Worker::*Method)( Work & ); + + Worker & worker ; + Method method ; + + WorkerMethodHelper( Worker & w , Method m ) : worker(w), method(m) {} + + static void run( TPI_Work * work ) + { + try { + const WorkerMethodHelper & wm = + * reinterpret_cast(work->info); + (wm.worker.*wm.method)(*work); + } catch(...){} + } +}; + +} + +//---------------------------------------------------------------------- +//---------------------------------------------------------------------- + +template +inline +int Run( Worker & worker, void (Worker::*method)(Work &) , + int work_count , int lock_count ) +{ + typedef WorkerMethodHelper WM ; + + WM tmp( worker , method ); + + return TPI_Run( reinterpret_cast(& WM::run),&tmp,work_count,lock_count); +} + +//---------------------------------------------------------------------- +//---------------------------------------------------------------------- + +} + +#endif + diff --git a/mkl/basic/optional/ThreadPool/src/TPI_Walltime.c b/mkl/basic/optional/ThreadPool/src/TPI_Walltime.c new file mode 100644 index 0000000..d2c1fe4 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/src/TPI_Walltime.c @@ -0,0 +1,44 @@ +/*------------------------------------------------------------------------*/ +/* TPI: Thread Pool Interface */ +/* Copyright (2008) Sandia Corporation */ +/* */ +/* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */ +/* license for use of this work by or on behalf of the U.S. Government. */ +/* */ +/* This library is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU Lesser General Public License as */ +/* published by the Free Software Foundation; either version 2.1 of the */ +/* License, or (at your option) any later version. */ +/* */ +/* This library is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ +/* Lesser General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU Lesser General Public */ +/* License along with this library; if not, write to the Free Software */ +/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 */ +/* USA */ +/*------------------------------------------------------------------------*/ +/** + * @author H. Carter Edwards + */ + +#include + +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +double TPI_Walltime() +{ + struct timeval tp ; + + gettimeofday( &tp , ((struct timezone *) NULL ) ); + + return ( (double) tp.tv_sec ) + ( (double) tp.tv_usec ) / 1.0e6 ; +} + diff --git a/mkl/basic/optional/ThreadPool/src/ThreadPool_config.h.in b/mkl/basic/optional/ThreadPool/src/ThreadPool_config.h.in new file mode 100644 index 0000000..752f5c5 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/src/ThreadPool_config.h.in @@ -0,0 +1,71 @@ +/* src/ThreadPool_config.h.in. Generated from configure.ac by autoheader. */ + +/* Define if you want to build export makefiles. */ +#undef HAVE_EXPORT_MAKEFILES + +/* Define if you are using gnumake - this will shorten your link lines. */ +#undef HAVE_GNUMAKE + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Define if want to build libcheck */ +#undef HAVE_LIBCHECK + +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H + +/* define if we want to use MPI */ +#undef HAVE_MPI + +/* Define if want to build threadpool-tests */ +#undef HAVE_NEW_PACKAGE_TESTS + +/* Define if you have POSIX threads libraries and header files. */ +#undef HAVE_PTHREAD + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define if want to build tests */ +#undef HAVE_TESTS + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* Define to the necessary symbol if this constant uses a non-standard name on + your system. */ +#undef PTHREAD_CREATE_JOINABLE + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS diff --git a/mkl/basic/optional/ThreadPool/test/CMakeLists.txt b/mkl/basic/optional/ThreadPool/test/CMakeLists.txt new file mode 100644 index 0000000..ff878e7 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/CMakeLists.txt @@ -0,0 +1,86 @@ + +INCLUDE(PackageAddExecutableAndTest) + +PACKAGE_ADD_EXECUTABLE( + test_tpi_unit + COMM serial mpi + SOURCES test_tpi_unit.c + DIRECTORY . + ) + +PACKAGE_ADD_EXECUTABLE( + test_c_dnax + COMM serial + SOURCES test_c_dnax.c + DIRECTORY . + ) + +PACKAGE_ADD_EXECUTABLE( + test_tpi_cpp + COMM serial + SOURCES test_tpi.cpp + DIRECTORY . + ) + +PACKAGE_ADD_EXECUTABLE( + test_tpi_sum + COMM serial mpi + SOURCES test_mpi_sum.c + DIRECTORY . + ) + +PACKAGE_ADD_TEST( + test_tpi_unit + NAME test_tpi_unit_serial + COMM serial + DIRECTORY . + ) + +PACKAGE_ADD_TEST( + test_tpi_unit + NAME test_tpi_unit_mpi + COMM mpi + NUM_MPI_PROCS 1 + DIRECTORY . + ) + +PACKAGE_ADD_TEST( + test_tpi_cpp + NAME test_tpi_cpp + COMM serial + DIRECTORY . + ) + +PACKAGE_ADD_TEST( + test_tpi_sum + NAME test_tpi_sum_serial + COMM serial + DIRECTORY . + XHOSTTYPE AIX + ) + +PACKAGE_ADD_TEST( + test_tpi_sum + NAME test_tpi_sum_np1 + COMM mpi + NUM_MPI_PROCS 1 + DIRECTORY . + ) + +PACKAGE_ADD_TEST( + test_tpi_sum + NAME test_tpi_sum_np2 + COMM mpi + NUM_MPI_PROCS 2 + DIRECTORY . + ) + +PACKAGE_ADD_TEST( + test_tpi_sum + NAME test_tpi_sum_np4 + COMM mpi + NUM_MPI_PROCS 4 + DIRECTORY . + ) + + diff --git a/mkl/basic/optional/ThreadPool/test/Makefile.am b/mkl/basic/optional/ThreadPool/test/Makefile.am new file mode 100644 index 0000000..8e78cbf --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/Makefile.am @@ -0,0 +1,55 @@ +#@HEADER +# ************************************************************************ +# +# ThreadPool Package +# Copyright (2008) Sandia Corporation +# +# Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +# license for use of this work by or on behalf of the U.S. Government. +# +# This library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 2.1 of the +# License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +# USA +# Questions? Contact Carter Edwards (hcedwar@sandia.gov) +# +# ************************************************************************ +#@HEADER + +SUBDIRS = + +# The following line helps the test harness recover from build errors. + +all-local: + +include $(top_builddir)/Makefile.export.threadpool + +EXEEXT = .exe + +noinst_PROGRAMS = test_tpi test_tpi_cpp test_sum + +test_tpi_SOURCES = test_main.c test_tpi_unit.c test_c_dnax.c test_c_tpi.c test_pthreads.c +test_tpi_DEPENDENCIES = $(top_builddir)/src/libtpi.a +test_tpi_CFLAGS = $(THREADPOOL_INCLUDES) +test_tpi_LDADD = $(THREADPOOL_LIBS) + +test_tpi_cpp_SOURCES = test_tpi.cpp +test_tpi_cpp_DEPENDENCIES = $(top_builddir)/src/libtpi.a +test_tpi_cpp_CXXFLAGS = $(THREADPOOL_INCLUDES) +test_tpi_cpp_LDADD = $(THREADPOOL_LIBS) + +test_sum_SOURCES = test_mpi_sum.c +test_sum_DEPENDENCIES = $(top_builddir)/src/libtpi.a +test_sum_CFLAGS = $(THREADPOOL_INCLUDES) +test_sum_LDADD = $(THREADPOOL_LIBS) + diff --git a/mkl/basic/optional/ThreadPool/test/Makefile.in b/mkl/basic/optional/ThreadPool/test/Makefile.in new file mode 100644 index 0000000..ffc5220 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/Makefile.in @@ -0,0 +1,730 @@ +# Makefile.in generated by automake 1.10 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +#@HEADER +# ************************************************************************ +# +# ThreadPool Package +# Copyright (2008) Sandia Corporation +# +# Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +# license for use of this work by or on behalf of the U.S. Government. +# +# This library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 2.1 of the +# License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +# USA +# Questions? Contact Carter Edwards (hcedwar@sandia.gov) +# +# ************************************************************************ +#@HEADER + +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +noinst_PROGRAMS = test_tpi$(EXEEXT) test_tpi_cpp$(EXEEXT) \ + test_sum$(EXEEXT) +subdir = test +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/acx_pthread.m4 \ + $(top_srcdir)/config/tac_arg_check_mpi.m4 \ + $(top_srcdir)/config/tac_arg_config_mpi.m4 \ + $(top_srcdir)/config/tac_arg_enable_export-makefiles.m4 \ + $(top_srcdir)/config/tac_arg_enable_feature.m4 \ + $(top_srcdir)/config/tac_arg_enable_feature_sub_check.m4 \ + $(top_srcdir)/config/tac_arg_with_ar.m4 \ + $(top_srcdir)/config/tac_arg_with_flags.m4 \ + $(top_srcdir)/config/tac_arg_with_incdirs.m4 \ + $(top_srcdir)/config/tac_arg_with_libdirs.m4 \ + $(top_srcdir)/config/tac_arg_with_libs.m4 \ + $(top_srcdir)/config/tac_arg_with_perl.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/ThreadPool_config.h +CONFIG_CLEAN_FILES = +PROGRAMS = $(noinst_PROGRAMS) +am_test_sum_OBJECTS = test_sum-test_mpi_sum.$(OBJEXT) +test_sum_OBJECTS = $(am_test_sum_OBJECTS) +test_sum_LINK = $(CCLD) $(test_sum_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +am_test_tpi_OBJECTS = test_tpi-test_main.$(OBJEXT) \ + test_tpi-test_tpi_unit.$(OBJEXT) \ + test_tpi-test_c_dnax.$(OBJEXT) test_tpi-test_c_tpi.$(OBJEXT) \ + test_tpi-test_pthreads.$(OBJEXT) +test_tpi_OBJECTS = $(am_test_tpi_OBJECTS) +test_tpi_LINK = $(CCLD) $(test_tpi_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +am_test_tpi_cpp_OBJECTS = test_tpi_cpp-test_tpi.$(OBJEXT) +test_tpi_cpp_OBJECTS = $(am_test_tpi_cpp_OBJECTS) +test_tpi_cpp_LINK = $(CXXLD) $(test_tpi_cpp_CXXFLAGS) $(CXXFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +DEFAULT_INCLUDES = -I. -I$(top_builddir)/src@am__isrc@ +depcomp = $(SHELL) $(top_srcdir)/config/depcomp +am__depfiles_maybe = depfiles +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +CXXLD = $(CXX) +CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ + -o $@ +SOURCES = $(test_sum_SOURCES) $(test_tpi_SOURCES) \ + $(test_tpi_cpp_SOURCES) +DIST_SOURCES = $(test_sum_SOURCES) $(test_tpi_SOURCES) \ + $(test_tpi_cpp_SOURCES) +RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ + html-recursive info-recursive install-data-recursive \ + install-dvi-recursive install-exec-recursive \ + install-html-recursive install-info-recursive \ + install-pdf-recursive install-ps-recursive install-recursive \ + installcheck-recursive installdirs-recursive pdf-recursive \ + ps-recursive uninstall-recursive +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALTERNATE_AR = @ALTERNATE_AR@ +AMTAR = @AMTAR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = .exe +GREP = @GREP@ +HAVE_PERL = @HAVE_PERL@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +MPI_CC_EXISTS = @MPI_CC_EXISTS@ +MPI_CXX = @MPI_CXX@ +MPI_CXX_EXISTS = @MPI_CXX_EXISTS@ +MPI_F77_EXISTS = @MPI_F77_EXISTS@ +MPI_TEMP_CXX = @MPI_TEMP_CXX@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERL_EXE = @PERL_EXE@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_aux_dir = @ac_aux_dir@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = +test_tpi_SOURCES = test_main.c test_tpi_unit.c test_c_dnax.c test_c_tpi.c test_pthreads.c +test_tpi_DEPENDENCIES = $(top_builddir)/src/libtpi.a +test_tpi_CFLAGS = $(THREADPOOL_INCLUDES) +test_tpi_LDADD = $(THREADPOOL_LIBS) +test_tpi_cpp_SOURCES = test_tpi.cpp +test_tpi_cpp_DEPENDENCIES = $(top_builddir)/src/libtpi.a +test_tpi_cpp_CXXFLAGS = $(THREADPOOL_INCLUDES) +test_tpi_cpp_LDADD = $(THREADPOOL_LIBS) +test_sum_SOURCES = test_mpi_sum.c +test_sum_DEPENDENCIES = $(top_builddir)/src/libtpi.a +test_sum_CFLAGS = $(THREADPOOL_INCLUDES) +test_sum_LDADD = $(THREADPOOL_LIBS) +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .cpp .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign test/Makefile'; \ + cd $(top_srcdir) && \ + $(AUTOMAKE) --foreign test/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +clean-noinstPROGRAMS: + -test -z "$(noinst_PROGRAMS)" || rm -f $(noinst_PROGRAMS) +test_sum$(EXEEXT): $(test_sum_OBJECTS) $(test_sum_DEPENDENCIES) + @rm -f test_sum$(EXEEXT) + $(test_sum_LINK) $(test_sum_OBJECTS) $(test_sum_LDADD) $(LIBS) +test_tpi$(EXEEXT): $(test_tpi_OBJECTS) $(test_tpi_DEPENDENCIES) + @rm -f test_tpi$(EXEEXT) + $(test_tpi_LINK) $(test_tpi_OBJECTS) $(test_tpi_LDADD) $(LIBS) +test_tpi_cpp$(EXEEXT): $(test_tpi_cpp_OBJECTS) $(test_tpi_cpp_DEPENDENCIES) + @rm -f test_tpi_cpp$(EXEEXT) + $(test_tpi_cpp_LINK) $(test_tpi_cpp_OBJECTS) $(test_tpi_cpp_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_sum-test_mpi_sum.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_tpi-test_c_dnax.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_tpi-test_c_tpi.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_tpi-test_main.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_tpi-test_pthreads.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_tpi-test_tpi_unit.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_tpi_cpp-test_tpi.Po@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` + +test_sum-test_mpi_sum.o: test_mpi_sum.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_sum_CFLAGS) $(CFLAGS) -MT test_sum-test_mpi_sum.o -MD -MP -MF $(DEPDIR)/test_sum-test_mpi_sum.Tpo -c -o test_sum-test_mpi_sum.o `test -f 'test_mpi_sum.c' || echo '$(srcdir)/'`test_mpi_sum.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/test_sum-test_mpi_sum.Tpo $(DEPDIR)/test_sum-test_mpi_sum.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_mpi_sum.c' object='test_sum-test_mpi_sum.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_sum_CFLAGS) $(CFLAGS) -c -o test_sum-test_mpi_sum.o `test -f 'test_mpi_sum.c' || echo '$(srcdir)/'`test_mpi_sum.c + +test_sum-test_mpi_sum.obj: test_mpi_sum.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_sum_CFLAGS) $(CFLAGS) -MT test_sum-test_mpi_sum.obj -MD -MP -MF $(DEPDIR)/test_sum-test_mpi_sum.Tpo -c -o test_sum-test_mpi_sum.obj `if test -f 'test_mpi_sum.c'; then $(CYGPATH_W) 'test_mpi_sum.c'; else $(CYGPATH_W) '$(srcdir)/test_mpi_sum.c'; fi` +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/test_sum-test_mpi_sum.Tpo $(DEPDIR)/test_sum-test_mpi_sum.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_mpi_sum.c' object='test_sum-test_mpi_sum.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_sum_CFLAGS) $(CFLAGS) -c -o test_sum-test_mpi_sum.obj `if test -f 'test_mpi_sum.c'; then $(CYGPATH_W) 'test_mpi_sum.c'; else $(CYGPATH_W) '$(srcdir)/test_mpi_sum.c'; fi` + +test_tpi-test_main.o: test_main.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -MT test_tpi-test_main.o -MD -MP -MF $(DEPDIR)/test_tpi-test_main.Tpo -c -o test_tpi-test_main.o `test -f 'test_main.c' || echo '$(srcdir)/'`test_main.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/test_tpi-test_main.Tpo $(DEPDIR)/test_tpi-test_main.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_main.c' object='test_tpi-test_main.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -c -o test_tpi-test_main.o `test -f 'test_main.c' || echo '$(srcdir)/'`test_main.c + +test_tpi-test_main.obj: test_main.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -MT test_tpi-test_main.obj -MD -MP -MF $(DEPDIR)/test_tpi-test_main.Tpo -c -o test_tpi-test_main.obj `if test -f 'test_main.c'; then $(CYGPATH_W) 'test_main.c'; else $(CYGPATH_W) '$(srcdir)/test_main.c'; fi` +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/test_tpi-test_main.Tpo $(DEPDIR)/test_tpi-test_main.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_main.c' object='test_tpi-test_main.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -c -o test_tpi-test_main.obj `if test -f 'test_main.c'; then $(CYGPATH_W) 'test_main.c'; else $(CYGPATH_W) '$(srcdir)/test_main.c'; fi` + +test_tpi-test_tpi_unit.o: test_tpi_unit.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -MT test_tpi-test_tpi_unit.o -MD -MP -MF $(DEPDIR)/test_tpi-test_tpi_unit.Tpo -c -o test_tpi-test_tpi_unit.o `test -f 'test_tpi_unit.c' || echo '$(srcdir)/'`test_tpi_unit.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/test_tpi-test_tpi_unit.Tpo $(DEPDIR)/test_tpi-test_tpi_unit.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_tpi_unit.c' object='test_tpi-test_tpi_unit.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -c -o test_tpi-test_tpi_unit.o `test -f 'test_tpi_unit.c' || echo '$(srcdir)/'`test_tpi_unit.c + +test_tpi-test_tpi_unit.obj: test_tpi_unit.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -MT test_tpi-test_tpi_unit.obj -MD -MP -MF $(DEPDIR)/test_tpi-test_tpi_unit.Tpo -c -o test_tpi-test_tpi_unit.obj `if test -f 'test_tpi_unit.c'; then $(CYGPATH_W) 'test_tpi_unit.c'; else $(CYGPATH_W) '$(srcdir)/test_tpi_unit.c'; fi` +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/test_tpi-test_tpi_unit.Tpo $(DEPDIR)/test_tpi-test_tpi_unit.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_tpi_unit.c' object='test_tpi-test_tpi_unit.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -c -o test_tpi-test_tpi_unit.obj `if test -f 'test_tpi_unit.c'; then $(CYGPATH_W) 'test_tpi_unit.c'; else $(CYGPATH_W) '$(srcdir)/test_tpi_unit.c'; fi` + +test_tpi-test_c_dnax.o: test_c_dnax.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -MT test_tpi-test_c_dnax.o -MD -MP -MF $(DEPDIR)/test_tpi-test_c_dnax.Tpo -c -o test_tpi-test_c_dnax.o `test -f 'test_c_dnax.c' || echo '$(srcdir)/'`test_c_dnax.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/test_tpi-test_c_dnax.Tpo $(DEPDIR)/test_tpi-test_c_dnax.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_c_dnax.c' object='test_tpi-test_c_dnax.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -c -o test_tpi-test_c_dnax.o `test -f 'test_c_dnax.c' || echo '$(srcdir)/'`test_c_dnax.c + +test_tpi-test_c_dnax.obj: test_c_dnax.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -MT test_tpi-test_c_dnax.obj -MD -MP -MF $(DEPDIR)/test_tpi-test_c_dnax.Tpo -c -o test_tpi-test_c_dnax.obj `if test -f 'test_c_dnax.c'; then $(CYGPATH_W) 'test_c_dnax.c'; else $(CYGPATH_W) '$(srcdir)/test_c_dnax.c'; fi` +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/test_tpi-test_c_dnax.Tpo $(DEPDIR)/test_tpi-test_c_dnax.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_c_dnax.c' object='test_tpi-test_c_dnax.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -c -o test_tpi-test_c_dnax.obj `if test -f 'test_c_dnax.c'; then $(CYGPATH_W) 'test_c_dnax.c'; else $(CYGPATH_W) '$(srcdir)/test_c_dnax.c'; fi` + +test_tpi-test_c_tpi.o: test_c_tpi.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -MT test_tpi-test_c_tpi.o -MD -MP -MF $(DEPDIR)/test_tpi-test_c_tpi.Tpo -c -o test_tpi-test_c_tpi.o `test -f 'test_c_tpi.c' || echo '$(srcdir)/'`test_c_tpi.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/test_tpi-test_c_tpi.Tpo $(DEPDIR)/test_tpi-test_c_tpi.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_c_tpi.c' object='test_tpi-test_c_tpi.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -c -o test_tpi-test_c_tpi.o `test -f 'test_c_tpi.c' || echo '$(srcdir)/'`test_c_tpi.c + +test_tpi-test_c_tpi.obj: test_c_tpi.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -MT test_tpi-test_c_tpi.obj -MD -MP -MF $(DEPDIR)/test_tpi-test_c_tpi.Tpo -c -o test_tpi-test_c_tpi.obj `if test -f 'test_c_tpi.c'; then $(CYGPATH_W) 'test_c_tpi.c'; else $(CYGPATH_W) '$(srcdir)/test_c_tpi.c'; fi` +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/test_tpi-test_c_tpi.Tpo $(DEPDIR)/test_tpi-test_c_tpi.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_c_tpi.c' object='test_tpi-test_c_tpi.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -c -o test_tpi-test_c_tpi.obj `if test -f 'test_c_tpi.c'; then $(CYGPATH_W) 'test_c_tpi.c'; else $(CYGPATH_W) '$(srcdir)/test_c_tpi.c'; fi` + +test_tpi-test_pthreads.o: test_pthreads.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -MT test_tpi-test_pthreads.o -MD -MP -MF $(DEPDIR)/test_tpi-test_pthreads.Tpo -c -o test_tpi-test_pthreads.o `test -f 'test_pthreads.c' || echo '$(srcdir)/'`test_pthreads.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/test_tpi-test_pthreads.Tpo $(DEPDIR)/test_tpi-test_pthreads.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_pthreads.c' object='test_tpi-test_pthreads.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -c -o test_tpi-test_pthreads.o `test -f 'test_pthreads.c' || echo '$(srcdir)/'`test_pthreads.c + +test_tpi-test_pthreads.obj: test_pthreads.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -MT test_tpi-test_pthreads.obj -MD -MP -MF $(DEPDIR)/test_tpi-test_pthreads.Tpo -c -o test_tpi-test_pthreads.obj `if test -f 'test_pthreads.c'; then $(CYGPATH_W) 'test_pthreads.c'; else $(CYGPATH_W) '$(srcdir)/test_pthreads.c'; fi` +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/test_tpi-test_pthreads.Tpo $(DEPDIR)/test_tpi-test_pthreads.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_pthreads.c' object='test_tpi-test_pthreads.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_CFLAGS) $(CFLAGS) -c -o test_tpi-test_pthreads.obj `if test -f 'test_pthreads.c'; then $(CYGPATH_W) 'test_pthreads.c'; else $(CYGPATH_W) '$(srcdir)/test_pthreads.c'; fi` + +.cpp.o: +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $< + +.cpp.obj: +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +test_tpi_cpp-test_tpi.o: test_tpi.cpp +@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_cpp_CXXFLAGS) $(CXXFLAGS) -MT test_tpi_cpp-test_tpi.o -MD -MP -MF $(DEPDIR)/test_tpi_cpp-test_tpi.Tpo -c -o test_tpi_cpp-test_tpi.o `test -f 'test_tpi.cpp' || echo '$(srcdir)/'`test_tpi.cpp +@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/test_tpi_cpp-test_tpi.Tpo $(DEPDIR)/test_tpi_cpp-test_tpi.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='test_tpi.cpp' object='test_tpi_cpp-test_tpi.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_cpp_CXXFLAGS) $(CXXFLAGS) -c -o test_tpi_cpp-test_tpi.o `test -f 'test_tpi.cpp' || echo '$(srcdir)/'`test_tpi.cpp + +test_tpi_cpp-test_tpi.obj: test_tpi.cpp +@am__fastdepCXX_TRUE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_cpp_CXXFLAGS) $(CXXFLAGS) -MT test_tpi_cpp-test_tpi.obj -MD -MP -MF $(DEPDIR)/test_tpi_cpp-test_tpi.Tpo -c -o test_tpi_cpp-test_tpi.obj `if test -f 'test_tpi.cpp'; then $(CYGPATH_W) 'test_tpi.cpp'; else $(CYGPATH_W) '$(srcdir)/test_tpi.cpp'; fi` +@am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/test_tpi_cpp-test_tpi.Tpo $(DEPDIR)/test_tpi_cpp-test_tpi.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='test_tpi.cpp' object='test_tpi_cpp-test_tpi.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_tpi_cpp_CXXFLAGS) $(CXXFLAGS) -c -o test_tpi_cpp-test_tpi.obj `if test -f 'test_tpi.cpp'; then $(CYGPATH_W) 'test_tpi.cpp'; else $(CYGPATH_W) '$(srcdir)/test_tpi.cpp'; fi` + +# This directory's subdirectories are mostly independent; you can cd +# into them and run `make' without going through this Makefile. +# To change the values of `make' variables: instead of editing Makefiles, +# (1) if the variable is set in `config.status', edit `config.status' +# (which will cause the Makefiles to be regenerated when you run `make'); +# (2) otherwise, pass the desired values on the `make' command line. +$(RECURSIVE_TARGETS): + @failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +$(RECURSIVE_CLEAN_TARGETS): + @failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + rev=''; for subdir in $$list; do \ + if test "$$subdir" = "."; then :; else \ + rev="$$subdir $$rev"; \ + fi; \ + done; \ + rev="$$rev ."; \ + target=`echo $@ | sed s/-recursive//`; \ + for subdir in $$rev; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done && test -z "$$fail" +tags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ + done +ctags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ + done + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + tags="$$tags $$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique; \ + fi +ctags: CTAGS +CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(CTAGS_ARGS)$$tags$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done + list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + distdir=`$(am__cd) $(distdir) && pwd`; \ + top_distdir=`$(am__cd) $(top_distdir) && pwd`; \ + (cd $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$top_distdir" \ + distdir="$$distdir/$$subdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(PROGRAMS) all-local +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-noinstPROGRAMS mostlyclean-am + +distclean: distclean-recursive + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-exec-am: + +install-html: install-html-recursive + +install-info: install-info-recursive + +install-man: + +install-pdf: install-pdf-recursive + +install-ps: install-ps-recursive + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) install-am \ + install-strip + +.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ + all all-am all-local check check-am clean clean-generic \ + clean-noinstPROGRAMS ctags ctags-recursive distclean \ + distclean-compile distclean-generic distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs installdirs-am \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic pdf pdf-am ps ps-am \ + tags tags-recursive uninstall uninstall-am + + +# The following line helps the test harness recover from build errors. + +all-local: + +include $(top_builddir)/Makefile.export.threadpool +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/mkl/basic/optional/ThreadPool/test/build_gnu b/mkl/basic/optional/ThreadPool/test/build_gnu new file mode 100755 index 0000000..bba4b90 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/build_gnu @@ -0,0 +1,79 @@ +#!/bin/bash + +TEST_SRC="test_main.c test_c_dnax.c test_tpi_unit.c test_pthreads.c" + +LIB_SRC="../src/TPI.c ../src/TPI_Walltime.c" + +LIB_OBJ="TPI.o TPI_Walltime.o" + +# OPT="-O3" +OPT="-g" +# OPT="-O" + +#CFLAGS="${OPT} -std=c99 -Wall -Wextra" + +CFLAGS=" ${OPT} -std=c89 -Wall -Wextra" +CCFLAGS="${OPT} -std=c++98 -Wall -Wextra" + +echo build: gcc ${CFLAGS} + +#----------------------------------------------------------------------- + +rm -f ThreadPool_config.h +echo "#define HAVE_PTHREAD 1" > ThreadPool_config.h + +gcc ${CFLAGS} -c \ + -I. -I../src ${LIB_SRC} + +gcc ${CFLAGS} \ + -o test_tpi.gnu.exe \ + -I. -I../src ${TEST_SRC} ${LIB_OBJ} -lpthread -lm + +g++ ${CCFLAGS} \ + -o test_tpi_cpp.gnu.exe \ + -I. -I../src test_tpi.cpp ${LIB_OBJ} -lpthread -lstdc++ -lm + +gcc ${CFLAGS} \ + -o test_sum.gnu.exe \ + -I. -I../src test_mpi_sum.c ${LIB_OBJ} -lpthread -lm + +#----------------------------------------------------------------------- + +mpicc ${CFLAGS} \ + -o test_sum.mpi.gnu.exe \ + -I. -I../src -DTEST_WITH_MPI test_mpi_sum.c ${LIB_OBJ} -lpthread -lm + +#----------------------------------------------------------------------- + +rm -f ThreadPool_config.h +echo "/* #define HAVE_PTHREAD 1 */" > ThreadPool_config.h + +gcc ${CFLAGS} -c \ + -I. -I../src ${LIB_SRC} + +gcc ${CFLAGS} \ + -o test_tpi.gnu.noth.exe \ + -I. -I../src ${TEST_SRC} ${LIB_OBJ} -lpthread -lm + +g++ ${CCFLAGS} \ + -o test_tpi_cpp.gnu.noth.exe \ + -I. -I../src test_tpi.cpp ${LIB_OBJ} -lpthread -lstdc++ -lm + +gcc ${CFLAGS} \ + -o test_sum.gnu.noth.exe \ + -I. -I../src test_mpi_sum.c ${LIB_OBJ} -lpthread -lm + +#----------------------------------------------------------------------- + +rm -f ThreadPool_config.h +echo "/* #define HAVE_PTHREAD 1 */" > ThreadPool_config.h +echo "#define HAVE_MPI 1" >> ThreadPool_config.h + +mpicc ${CFLAGS} \ + -o test_sum.mpi.gnu.noth.exe \ + -I. -I../src -DTEST_WITH_MPI test_mpi_sum.c ${LIB_OBJ} -lpthread -lm + +#----------------------------------------------------------------------- + +rm -f ThreadPool_config.h + diff --git a/mkl/basic/optional/ThreadPool/test/build_intel b/mkl/basic/optional/ThreadPool/test/build_intel new file mode 100755 index 0000000..accb0a0 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/build_intel @@ -0,0 +1,82 @@ +#!/bin/bash + +# . /usr/local/modules/3.2.6/Modules/$MODULE_VERSION/bin/modulecmd tcsh \ +# load sierra-devel-desktop-intel-10.1ip + + +TEST_SRC="test_main.c test_c_dnax.c test_tpi_unit.c test_pthreads.c" + +LIB_SRC="../src/TPI.c ../src/TPI_Walltime.c" + +LIB_OBJ="TPI.o TPI_Walltime.o" + +#CFLAGS="-std=c99 -strict-ansi -Wall -Wcheck -Werror -wd141 -wd869 -wd1418 -wd1419" +#CFLAGS="-std=c89 -strict-ansi -Wall -Wcheck -Werror -wd141 -wd869 -wd1418 -wd1419" +CCFLAGS=" -strict-ansi -Wall -Wcheck -Werror -wd141 -wd869 -wd1418 -wd1419" + +OPT="-O3" +# OPT="-g" +# OPT="-O" + +echo build ${OPT} + +#----------------------------------------------------------------------- + +rm -f ThreadPool_config.h +echo "#define HAVE_PTHREAD 1" > ThreadPool_config.h + +icc ${CFLAGS} ${OPT} -c \ + -I. -I../src ${LIB_SRC} + +icc ${CFLAGS} ${OPT} \ + -o test_tpi.intel.exe \ + -I. -I../src ${TEST_SRC} ${LIB_OBJ} -lpthread + +icc ${CCFLAGS} ${OPT} \ + -o test_tpi_cpp.intel.exe \ + -I. -I../src test_tpi.cpp ${LIB_OBJ} -lpthread -lstdc++ + +icc ${CFLAGS} ${OPT} \ + -o test_sum.intel.exe \ + -I. -I../src test_mpi_sum.c ${LIB_OBJ} -lpthread + +#----------------------------------------------------------------------- + +mpicc ${CFLAGS} ${OPT} \ + -o test_sum.mpi.intel.exe \ + -I. -I../src -DTEST_WITH_MPI test_mpi_sum.c ${LIB_OBJ} -lpthread + +#----------------------------------------------------------------------- + +rm -f ThreadPool_config.h +echo "/* #define HAVE_PTHREAD 1 */" > ThreadPool_config.h + +icc ${CFLAGS} ${OPT} -c \ + -I. -I../src ${LIB_SRC} + +icc ${CFLAGS} ${OPT} \ + -o test_tpi.intel.noth.exe \ + -I. -I../src ${TEST_SRC} ${LIB_OBJ} -lpthread + +icc ${CCFLAGS} ${OPT} \ + -o test_tpi_cpp.intel.noth.exe \ + -I. -I../src test_tpi.cpp ${LIB_OBJ} -lpthread -lstdc++ + +icc ${CFLAGS} ${OPT} \ + -o test_sum.intel.noth.exe \ + -I. -I../src test_mpi_sum.c ${LIB_OBJ} -lpthread + +#----------------------------------------------------------------------- + +rm -f ThreadPool_config.h +echo "/* #define HAVE_PTHREAD 1 */" > ThreadPool_config.h +echo "#define HAVE_MPI 1" >> ThreadPool_config.h + +mpicc ${CFLAGS} ${OPT} \ + -o test_sum.mpi.intel.noth.exe \ + -I. -I../src -DTEST_WITH_MPI test_mpi_sum.c ${LIB_OBJ} -lpthread + +#----------------------------------------------------------------------- + +rm -f ThreadPool_config.h + diff --git a/mkl/basic/optional/ThreadPool/test/build_pgi b/mkl/basic/optional/ThreadPool/test/build_pgi new file mode 100755 index 0000000..85799cc --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/build_pgi @@ -0,0 +1,39 @@ +#!/bin/bash + +export LM_LICENSE_FILE=7500@reddish +PGI_HOME="/usr/local/pgi_64/linux86-64/7.0-7" +MPICH_HOME="/usr/local/mpi/mpich/64Bit/1.2.7/pgi-6.0" + +export PATH="${PGI_HOME}/bin:${PATH}" + +TEST_SRC="test_main.c test_c_dnax.c test_c_tpi.c test_pthreads.c" + +LIB_SRC="../src/TPI_pthreads.c ../src/TPI_Walltime.c ../src/TPI_Concurrency.c" + +LIB_OBJ="TPI_pthreads.o TPI_Walltime.o TPI_Concurrency.o" + +#----------------------------------------------------------------------- + +pgcc -O4 -c \ + -I../include ${LIB_SRC} -lpthread + +pgcc -O4 \ + -o test_tpi.pgi.exe \ + -I../include ${TEST_SRC} ${LIB_OBJ} -lpthread + +pgCC -O4 \ + -o test_tpi_cpp.pgi.exe \ + -I../include test_tpi.cpp ${LIB_OBJ} -lpthread + +#----------------------------------------------------------------------- +# Enable PGI-MPI installation to accept as large a message as possible, 200 Mb + +# export P4_GLOBMEMSIZE="268435456" + +export PATH="${MPICH_HOME}/bin:${PGI_HOME}/bin:${PATH}" + +mpicc -c99 \ + -O4 \ + -o test_sum.mpi.pgi.exe \ + -I../include -DTEST_WITH_MPI test_mpi_sum.c ${LIB_OBJ} -lpthread + diff --git a/mkl/basic/optional/ThreadPool/test/hhpccg/BoxPartitionIB.c b/mkl/basic/optional/ThreadPool/test/hhpccg/BoxPartitionIB.c new file mode 100644 index 0000000..5f2866f --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/hhpccg/BoxPartitionIB.c @@ -0,0 +1,562 @@ + + +#include +#include + +#include + +/*--------------------------------------------------------------------*/ +/* Recursively split a box into into (up-ip) sub-boxes */ + +typedef const int RangeInput[2] ; +typedef int RangeOutput[2] ; +typedef RangeInput * const BoxInput ; +typedef RangeOutput * const BoxOutput ; + +static +void box_partition( int ip , int up , int axis , + BoxInput box , + int (* const p_box)[3][2] ) +{ + const int np = up - ip ; + if ( 1 == np ) { + p_box[ip][0][0] = box[0][0] ; p_box[ip][0][1] = box[0][1] ; + p_box[ip][1][0] = box[1][0] ; p_box[ip][1][1] = box[1][1] ; + p_box[ip][2][0] = box[2][0] ; p_box[ip][2][1] = box[2][1] ; + } + else { + const int n = box[ axis ][1] - box[ axis ][0] ; + const int np_low = np / 2 ; /* Rounded down */ + const int np_upp = np - np_low ; + + const int n_upp = (int) (((double) n) * ( ((double)np_upp) / ((double)np))); + const int n_low = n - n_upp ; + const int next_axis = ( axis + 2 ) % 3 ; + + if ( np_low ) { /* P = [ip,ip+np_low) */ + int dbox[3][2] ; + dbox[0][0] = box[0][0] ; dbox[0][1] = box[0][1] ; + dbox[1][0] = box[1][0] ; dbox[1][1] = box[1][1] ; + dbox[2][0] = box[2][0] ; dbox[2][1] = box[2][1] ; + + dbox[ axis ][1] = dbox[ axis ][0] + n_low ; + + box_partition( ip, ip + np_low, next_axis, + (const int (*)[2]) dbox, p_box ); + } + + if ( np_upp ) { /* P = [ip+np_low,ip+np_low+np_upp) */ + int dbox[3][2] ; + dbox[0][0] = box[0][0] ; dbox[0][1] = box[0][1] ; + dbox[1][0] = box[1][0] ; dbox[1][1] = box[1][1] ; + dbox[2][0] = box[2][0] ; dbox[2][1] = box[2][1] ; + + ip += np_low ; + dbox[ axis ][0] += n_low ; + dbox[ axis ][1] = dbox[ axis ][0] + n_upp ; + + box_partition( ip, ip + np_upp, next_axis, + (const int (*)[2]) dbox, p_box ); + } + } +} + +void box_partition_rcb( const int np , + const int root_box[3][2] , + int pbox[][3][2] ) +{ + box_partition( 0 , np , 2 , root_box , pbox ); +} + +/*--------------------------------------------------------------------*/ + +static int box_intersect( BoxInput a , BoxInput b , BoxOutput c ) +{ + int i ; + for ( i = 0 ; i < 3 ; ++i ) { + c[i][0] = a[i][0] < b[i][0] ? b[i][0] : a[i][0] ; + c[i][1] = a[i][1] < b[i][1] ? a[i][1] : b[i][1] ; + } + + return c[0][0] < c[0][1] && c[1][0] < c[1][1] && c[2][0] < c[2][1] ; +} + + +/*--------------------------------------------------------------------*/ + +static void global_to_use_box( BoxInput gbox , + BoxInput pbox , + const int ghost , + BoxOutput interiorBox , + BoxOutput useBox ) +{ + int i = 0 ; + + for ( i = 0 ; i < 3 ; ++i ) { + const int n = pbox[i][1] - pbox[i][0] ; + + if ( n < 0 ) { + abort(); + } + + interiorBox[i][0] = gbox[i][0] == pbox[i][0] + ? gbox[i][0] : pbox[i][0] + ghost ; + + interiorBox[i][1] = gbox[i][1] == pbox[i][1] + ? gbox[i][1] : pbox[i][1] - ghost ; + + if ( interiorBox[i][1] < pbox[i][0] ) { + interiorBox[i][1] = pbox[i][0] ; + } + + if ( interiorBox[i][0] > pbox[i][1] ) { + interiorBox[i][0] = pbox[i][1] ; + } + + if ( interiorBox[i][1] < interiorBox[i][0] ) { + interiorBox[i][1] = interiorBox[i][0] ; + } + + useBox[i][0] = pbox[i][0] - ghost ; + useBox[i][1] = pbox[i][1] + ghost ; + + if ( useBox[i][0] < gbox[i][0] ) { useBox[i][0] = gbox[i][0] ; } + if ( useBox[i][1] > gbox[i][1] ) { useBox[i][1] = gbox[i][1] ; } + } +} + + +/* A use-box is the owned box plus the ghost layers. + * Map a global (x,y,z) to a local integer ordinate. + */ +static int map_global_to_use_box( BoxInput useBox , + const int global_x , + const int global_y , + const int global_z ) +{ + const int nx = useBox[0][1] - useBox[0][0] ; + const int ny = useBox[1][1] - useBox[1][0] ; + const int nz = useBox[2][1] - useBox[2][0] ; + const int ix = global_x - useBox[0][0] ; + const int iy = global_y - useBox[1][0] ; + const int iz = global_z - useBox[2][0] ; + + const int good = 0 <= ix && ix < nx && + 0 <= iy && iy < ny && + 0 <= iz && iz < nz ; + + if ( nx < 0 || ny < 0 || nz < 0 ) { + abort(); + } + if ( ! good ) { + abort(); + } + + return good ? ix + iy * nx + iz * nx * ny : -1 ; +} + +int box_map_local( const int local_uses[3][2] , + const int map_local_id[] , + const int global_x , + const int global_y , + const int global_z ) +{ + int i = map_global_to_use_box( local_uses , global_x , global_y , global_z ); + + if ( 0 <= i ) { i = map_local_id[i] ; } + + return i ; +} + + +/*--------------------------------------------------------------------*/ + +static void resize_int( int ** a , int * allocLen , int newLen ) +{ + int k = 32; + while ( k < newLen ) { k <<= 1 ; } + if ( NULL == *a ) + { *a = malloc( sizeof(int)*(*allocLen = k) ); } + else if ( *allocLen < k ) + { *a = realloc(*a , sizeof(int)*(*allocLen = k)); } +} + +void box_partition_map( + const int np , + const int my_p , + const int gbox[3][2] , + const int pbox[][3][2] , + const int ghost , + + int map_use_box[3][2] , + int map_local_id[] , + int * map_count_interior , + int * map_count_owns , + int * map_count_uses , + int ** map_recv_pc , + int ** map_send_pc , + int ** map_send_id ) +{ + int * recv_pc = (int *) malloc( ( np + 1 ) * sizeof(int) ); + int * send_pc = (int *) malloc( ( np + 1 ) * sizeof(int) ); + + int id_length = 0 ; + + int * send_id = NULL ; + int send_id_size = 0 ; + + int own_length , use_length , int_length ; + int count_interior , count_parallel ; + int iSend ; + int g_ix , g_iy , g_iz ; + int i ; + + int my_int_box[3][2] ; + + global_to_use_box( gbox , pbox[my_p] , ghost , my_int_box , map_use_box ); + + own_length = ( pbox[my_p][0][1] - pbox[my_p][0][0] ) * + ( pbox[my_p][1][1] - pbox[my_p][1][0] ) * + ( pbox[my_p][2][1] - pbox[my_p][2][0] ); + + use_length = ( map_use_box[0][1] - map_use_box[0][0] ) * + ( map_use_box[1][1] - map_use_box[1][0] ) * + ( map_use_box[2][1] - map_use_box[2][0] ); + + int_length = ( my_int_box[0][1] - my_int_box[0][0] ) * + ( my_int_box[1][1] - my_int_box[1][0] ) * + ( my_int_box[2][1] - my_int_box[2][0] ); + + for ( i = 0 ; i < id_length ; ++i ) { map_local_id[i] = -1 ; } + + /* Fill in locally owned portion: { interior , parallel } */ + + count_interior = 0 ; + count_parallel = int_length ; + + for ( g_iz = pbox[my_p][2][0] ; g_iz < pbox[my_p][2][1] ; ++g_iz ) { + for ( g_iy = pbox[my_p][1][0] ; g_iy < pbox[my_p][1][1] ; ++g_iy ) { + for ( g_ix = pbox[my_p][0][0] ; g_ix < pbox[my_p][0][1] ; ++g_ix ) { + + const int local = + map_global_to_use_box( (BoxInput) map_use_box, g_ix, g_iy, g_iz ); + + if ( local < 0 ) { + abort(); + } + + if ( my_int_box[2][0] <= g_iz && g_iz < my_int_box[2][1] && + my_int_box[1][0] <= g_iy && g_iy < my_int_box[1][1] && + my_int_box[0][0] <= g_ix && g_ix < my_int_box[0][1] ) { + /* Interior */ + map_local_id[ local ] = count_interior++ ; + } + else { + /* Parallel */ + map_local_id[ local ] = count_parallel++ ; + } + } + } + } + + if ( count_interior != int_length ) { abort(); } + if ( count_parallel != own_length ) { abort(); } + + /* Fill in off-process received portion: { ( i + my_p ) % np } */ + + recv_pc[0] = count_parallel ; + recv_pc[1] = count_parallel ; + send_pc[0] = 0 ; + send_pc[1] = 0 ; + iSend = 0 ; + + for ( i = 1 ; i < np ; ++i ) { + const int ip = ( i + my_p ) % np ; + int recv_box[3][2] ; + int send_box[3][2] ; + int other_int_box[3][2] ; + int other_use_box[3][2] ; + + /* Received portions */ + + if ( box_intersect( (BoxInput) map_use_box , (BoxInput) pbox[ip] , recv_box ) ) { + + for ( g_iz = recv_box[2][0] ; g_iz < recv_box[2][1] ; ++g_iz ) { + for ( g_iy = recv_box[1][0] ; g_iy < recv_box[1][1] ; ++g_iy ) { + for ( g_ix = recv_box[0][0] ; g_ix < recv_box[0][1] ; ++g_ix ) { + + const int local = map_global_to_use_box( (BoxInput) map_use_box, g_ix, g_iy, g_iz ); + + map_local_id[ local ] = count_parallel++ ; + } + } + } + } + recv_pc[i+1] = count_parallel ; + + /* Sent items */ + + global_to_use_box( gbox, pbox[ip], ghost, other_int_box, other_use_box ); + + if ( box_intersect( (BoxInput) other_use_box , (BoxInput) pbox[my_p] , send_box ) ) { + + int nSend = ( send_box[0][1] - send_box[0][0] ) * + ( send_box[1][1] - send_box[1][0] ) * + ( send_box[2][1] - send_box[2][0] ); + + resize_int( & send_id , & send_id_size , (iSend + nSend ) ); + + for ( g_iz = send_box[2][0] ; g_iz < send_box[2][1] ; ++g_iz ) { + for ( g_iy = send_box[1][0] ; g_iy < send_box[1][1] ; ++g_iy ) { + for ( g_ix = send_box[0][0] ; g_ix < send_box[0][1] ; ++g_ix ) { + + const int local = map_global_to_use_box( (BoxInput) map_use_box, g_ix, g_iy, g_iz ); + + if ( map_local_id[ local ] < count_interior ) { abort(); } + + send_id[ iSend ] = map_local_id[ local ] ; + ++iSend ; + } + } + } + } + send_pc[i+1] = iSend ; + } + + if ( count_parallel != use_length ) { abort(); } + + *map_count_interior = int_length ; + *map_count_owns = own_length ; + *map_count_uses = use_length ; + *map_recv_pc = recv_pc ; + *map_send_pc = send_pc ; + *map_send_id = send_id ; +} + +/*--------------------------------------------------------------------*/ + +#ifdef UNIT_TEST + +static int box_contain( const int a[3][2] , const int b[3][2] ) +{ + return a[0][0] <= b[0][0] && b[0][1] <= a[0][1] && + a[1][0] <= b[1][0] && b[1][1] <= a[1][1] && + a[2][0] <= b[2][0] && b[2][1] <= a[2][1] ; +} + +static void box_print( FILE * fp , const int a[][2] ) +{ + fprintf(fp,"{ [ %d , %d ) , [ %d , %d ) , [ %d , %d ) }", + a[0][0] , a[0][1] , + a[1][0] , a[1][1] , + a[2][0] , a[2][1] ); +} + +static int box_disjoint( BoxInput a , BoxInput b ) +{ + return a[0][1] <= b[0][0] || b[0][1] <= a[0][0] || + a[1][1] <= b[1][0] || b[1][1] <= a[1][0] || + a[2][1] <= b[2][0] || b[2][1] <= a[2][0] ; +} + + +static void test_box( const int box[3][2] , const int np ) +{ + const int ncell_box = box[0][1] * box[1][1] * box[2][1] ; + int ncell_total = 0 ; + int ncell_min = ncell_box ; + int ncell_max = 0 ; + int (*pbox)[3][2] ; + int i , j ; + + pbox = (int (*)[3][2]) malloc( sizeof(int) * np * 3 * 2 ); + + box_partition( 0 , np , 2 , box , pbox ); + + for ( i = 0 ; i < np ; ++i ) { + const int ncell = ( pbox[i][0][1] - pbox[i][0][0] ) * + ( pbox[i][1][1] - pbox[i][1][0] ) * + ( pbox[i][2][1] - pbox[i][2][0] ); + + if ( ! box_contain( box , (const int (*)[2]) pbox[i] ) ) { + fprintf(stdout," OUT OF BOUNDS pbox[%d/%d] = ",i,np); + box_print(stdout,(const int (*)[2]) pbox[i]); + fprintf(stdout,"\n"); + abort(); + } + + for ( j = i + 1 ; j < np ; ++j ) { + if ( ! box_disjoint( (const int (*)[2]) pbox[i] , + (const int (*)[2]) pbox[j] ) ) { + fprintf(stdout," NOT DISJOINT pbox[%d/%d] = ",i,np); + box_print(stdout, (const int (*)[2]) pbox[i]); + fprintf(stdout,"\n"); + fprintf(stdout," pbox[%d/%d] = ",j,np); + box_print(stdout, (const int (*)[2]) pbox[j]); + fprintf(stdout,"\n"); + abort(); + } + } + ncell_total += ncell ; + + if ( ncell_max < ncell ) { ncell_max = ncell ; } + if ( ncell < ncell_min ) { ncell_min = ncell ; } + } + + if ( ncell_total != ncell_box ) { + fprintf(stdout," WRONG CELL COUNT NP = %d\n",np); + abort(); + } + fprintf(stdout,"NP = %d, total = %d, avg = %d, min = %d, max = %d\n", + np,ncell_box,ncell_box/np,ncell_min,ncell_max); + + free( pbox ); +} + +/*--------------------------------------------------------------------*/ + +static void test_maps( const int root_box[][2] , const int np ) +{ + const int ghost = 1 ; + const int nx_global = root_box[0][1] - root_box[0][0] ; + const int ny_global = root_box[1][1] - root_box[1][0] ; + int map_count_interior , map_count_owns , map_count_uses ; + int map_use_box[3][2] ; + int ieq , i , j ; + int (*pbox)[3][2] ; + int **local_values ; + int **map_local_id ; + int **map_recv_pc ; + int **map_send_pc ; + int **map_send_id ; + + pbox = (int (*)[3][2]) malloc( sizeof(int) * np * 3 * 2 ); + + box_partition( 0 , np , 2 , root_box , pbox ); + + local_values = (int **) malloc( sizeof(int*) * np ); + map_local_id = (int **) malloc( sizeof(int*) * np ); + map_recv_pc = (int **) malloc( sizeof(int*) * np ); + map_send_pc = (int **) malloc( sizeof(int*) * np ); + map_send_id = (int **) malloc( sizeof(int*) * np ); + + /* Set each local value to the global equation number */ + + for ( ieq = i = 0 ; i < np ; ++i ) { + const int (*mybox)[2] = (const int (*)[2]) pbox[i] ; + const int nx = mybox[0][1] - mybox[0][0] ; + const int ny = mybox[1][1] - mybox[1][0] ; + const int nz = mybox[2][1] - mybox[2][0] ; + int ix , iy , iz ; + + map_local_id[i] = (int *) malloc( sizeof(int) * + ( nx + 2 * ghost ) * + ( ny + 2 * ghost ) * + ( nz + 2 * ghost ) ); + + /* Generate the partition maps for this rank */ + box_partition_map( np , i , root_box , + (const int (*)[3][2]) pbox , ghost , + map_use_box , + map_local_id[i] , + & map_count_interior , + & map_count_owns , + & map_count_uses , + & map_recv_pc[i] , + & map_send_pc[i] , & map_send_id[i] ); + + if ( map_count_uses != map_recv_pc[i][np] ) { abort(); } + + local_values[i] = (int *) malloc( sizeof(int) * map_count_uses ); + + for ( iz = map_use_box[2][0] ; iz < map_use_box[2][1] ; ++iz ) { + for ( iy = map_use_box[1][0] ; iy < map_use_box[1][1] ; ++iy ) { + for ( ix = map_use_box[0][0] ; ix < map_use_box[0][1] ; ++ix ) { + + const int igrid = map_global_to_use_box((BoxInput)map_use_box,ix,iy,iz); + const int ieq = map_local_id[i][ igrid ]; + + if ( 0 <= ieq ) { + local_values[i][ ieq ] = + ix + iy * nx_global + iz * nx_global * ny_global ; + } + } + } + } + } + + /* Pair-wise compare the local values */ + /* i == receiving processor rank */ + /* ip == sending processor rank */ + /* j == receiving processor data entry for message from 'ip' */ + /* jp == sending processor data entry for message to 'i' */ + + for ( i = 0 ; i < np ; ++i ) { + for ( j = 1 ; j < np ; ++j ) { + const int ip = ( i + j ) % np ; + const int jp = ( i + np - ip ) % np ; + const int nrecv = map_recv_pc[i] [j+1] - map_recv_pc[i] [j] ; + const int nsend = map_send_pc[ip][jp+1] - map_send_pc[ip][jp] ; + int k ; + if ( nrecv != nsend ) { + fprintf(stderr,"P%d recv %d from P%d\n",i,nrecv,ip); + fprintf(stderr,"P%d send %d to P%d\n",ip,nsend,i); + abort(); + } + for ( k = 0 ; k < nrecv ; ++k ) { + const int irecv = map_recv_pc[i][j] + k ; + const int isend = map_send_pc[ip][jp] + k ; + const int val_irecv = local_values[i][irecv] ; + const int val_isend = local_values[ip][ map_send_id[ip][isend] ] ; + if ( val_irecv != val_isend ) { + fprintf(stderr,"P%d recv[%d] = %d , from P%d\n",i,k,val_irecv,ip); + fprintf(stderr,"P%d send[%d] = %d , to P%d\n",ip,k,val_isend,i); + abort(); + } + } + } + } + + for ( i = 0 ; i < np ; ++i ) { + free( map_local_id[i] ); + free( map_recv_pc[i] ); + free( map_send_pc[i] ); + free( map_send_id[i] ); + free( local_values[i] ); + } + free( map_send_id ); + free( map_send_pc ); + free( map_recv_pc ); + free( map_local_id ); + free( local_values ); + free( pbox ); +} + +/*--------------------------------------------------------------------*/ + +int main( int argc , char * argv[] ) +{ + int np_max = 256 ; + int box[3][2] = { { 0 , 64 } , { 0 , 64 } , { 0 , 64 } }; + int np = 0 ; + + switch( argc ) { + case 3: + sscanf(argv[1],"%d",&np); + sscanf(argv[2],"%dx%dx%d",& box[0][1] , & box[1][1] , & box[2][1] ); + if ( 0 < np ) { test_box( (const int (*)[2]) box , np ); } + if ( 0 < np ) { test_maps( (const int (*)[2]) box , np ); } + break ; + default: + for ( np = 1 ; np <= np_max ; ++np ) { + test_box( (const int (*)[2]) box , np ); + test_maps( (const int (*)[2]) box , np ); + } + break ; + } + return 0 ; +} + +#endif + + diff --git a/mkl/basic/optional/ThreadPool/test/hhpccg/BoxPartitionIB.h b/mkl/basic/optional/ThreadPool/test/hhpccg/BoxPartitionIB.h new file mode 100644 index 0000000..71d71f5 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/hhpccg/BoxPartitionIB.h @@ -0,0 +1,88 @@ + + +#ifndef BoxPartionIB_h +#define BoxPartionIB_h + +/** \brief Partition a { [ix,jx) X [iy,jy) X [iz,jz) } box. + * + * Use recursive coordinate bisection to partition a box + * into np disjoint sub-boxes. Allocate (via malloc) and + * populate the sub-boxes, mapping the local (x,y,z) to + * a local ordinal, and mappings for the send-recv messages + * to update the ghost cells. + * + * Order local ordinates as follows: + * { + * interior , + * boundary , + * remote[ ( my_p + i ) % np ] + * } + * where i = 1..(np-1) + * + * usage: + * + * my_nx = pbox[my_p][0][1] - pbox[my_p][0][0] ; + * my_ny = pbox[my_p][1][1] - pbox[my_p][1][0] ; + * my_nz = pbox[my_p][2][1] - pbox[my_p][2][0] ; + * + * for ( x = -ghost ; x < my_nx + ghost ; ++x ) { + * for ( y = -ghost ; y < my_ny + ghost ; ++y ) { + * for ( z = -ghost ; z < my_nz + ghost ; ++z ) { + * const int x_global = x + pbox[my_p][0][0] ; + * const int y_global = y + pbox[my_p][1][0] ; + * const int z_global = z + pbox[my_p][2][0] ; + * + * const int local_ordinal = + * box_map_local( pbox[my_p], ghost, map_local_id, x, y, z ); + * + * if ( 0 <= local_ordinal ) { + * } + * } + * + * for ( i = 1 ; i < np ; ++i ) { + * const int recv_processor = ( my_p + i ) % np ; + * const int recv_ordinal_begin = map_recv_pc[i]; + * const int recv_ordinal_end = map_recv_pc[i+1]; + * } + * + * for ( i = 1 ; i < np ; ++i ) { + * const int send_processor = ( my_p + i ) % np ; + * const int send_map_begin = map_send_pc[i]; + * const int send_map_end = map_send_pc[i+1]; + * for ( j = send_map_begin ; j < send_map_end ; ++j ) { + * send_ordinal = map_send_id[j] ; + * } + * } + */ + + +void box_partition_rcb( + const int np /**< [in] Number of partitions */ , + const int root_box[3][2] /**< [in] Global 3D box to partition */ , + int pbox[][3][2] /**< [out] Partition of global 3D boxes */ ); + +void box_partition_map( + const int np /**< [in] Number of partitions */ , + const int my_p /**< [in] My partition */ , + const int gbox[3][2] /**< [in] Global 3D box */ , + const int pbox[][3][2] /**< [in] Partitions of global 3D box */ , + const int ghost /**< [in] Number of grid points to ghost */ , + + int map_uses_box[3][2] /**< [out] Local box expanded by ghosting */ , + int map_local_id[] /**< [out] Mapping for local points */ , + int * map_count_interior /**< [out] Number of my interior points */ , + int * map_count_owns /**< [out] Number of points I own */ , + int * map_count_uses /**< [out] Number of points I access */ , + int ** map_recv_pc /**< [out] Received prefix spans per process */ , + int ** map_send_pc /**< [out] Send prefix counts per process */ , + int ** map_send_id /**< [out] Send grid points */ ); + +/* \brief Map a global (x,y,z) to a local ordinal. */ +int box_map_local( const int local_uses[3][2] , + const int map_local_id[] , + const int global_x , + const int global_y , + const int global_z ); + +#endif + diff --git a/mkl/basic/optional/ThreadPool/test/hhpccg/CGSolver.c b/mkl/basic/optional/ThreadPool/test/hhpccg/CGSolver.c new file mode 100644 index 0000000..55f739d --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/hhpccg/CGSolver.c @@ -0,0 +1,311 @@ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +/*--------------------------------------------------------------------*/ + +void cgsolve_set_lhs( const struct distributed_crs_matrix * const matrix , + const VECTOR_SCALAR * const x , + VECTOR_SCALAR * const b ) +{ + const int nRow = matrix->n_local_row ; + const int nVec = matrix->p_recv_pc[ matrix->p_size ] ; + + VECTOR_SCALAR * const p = + (VECTOR_SCALAR *) malloc( nVec * sizeof(VECTOR_SCALAR) ); + + tpi_copy( nRow , x , p ); + + dcrs_apply( matrix , p , b ); + + free( p ); +} + +/*--------------------------------------------------------------------*/ + +/* x += alpha * p ; + * r -= alpha * Ap ; + * return dot( r , r ); + */ +static +double cgsolver_update( const int length , + const VECTOR_SCALAR alpha , + const VECTOR_SCALAR * p , + const VECTOR_SCALAR * Ap , + VECTOR_SCALAR * x , + VECTOR_SCALAR * r ); + +/*--------------------------------------------------------------------*/ + +void cgsolve_blas( const struct distributed_crs_matrix * matrix , + const VECTOR_SCALAR * const b , + VECTOR_SCALAR * const x , + const VECTOR_SCALAR tolerance , + const int max_iter , + const int print_iter , + int * const iter_count , + VECTOR_SCALAR * const norm_resid , + double * const solve_dt ) +{ + const int nRow = matrix->n_local_row ; + const int nVec = matrix->p_recv_pc[ matrix->p_size ] ; + + const VECTOR_SCALAR tol_2 = tolerance * tolerance ; + + VECTOR_SCALAR * const r = + (VECTOR_SCALAR *) malloc( nRow * sizeof(VECTOR_SCALAR) ); + VECTOR_SCALAR * const p = + (VECTOR_SCALAR *) malloc( nVec * sizeof(VECTOR_SCALAR) ); + VECTOR_SCALAR * const Ap = + (VECTOR_SCALAR *) malloc( nRow * sizeof(VECTOR_SCALAR) ); + + VECTOR_SCALAR rtrans = 0.0 ; + VECTOR_SCALAR beta = 0.0 ; + VECTOR_SCALAR pAp = 0.0 ; + VECTOR_SCALAR alpha ; + double time_begin , time_end ; + + int k ; + + tpi_copy( nRow , b , r ); + tpi_copy( nRow , x , p ); + + /* Ap = matrix * p ; */ + dcrs_apply( matrix , p , Ap ); + + /* r -= Ap ; */ + tpi_axpy( nRow , -1.0 , Ap , r ); + + rtrans = tpi_dot( nRow , r , r ); + + time_begin = TPI_Walltime(); + + for ( k = 0 ; k < max_iter && tol_2 < rtrans ; ++k ) { + + /* p = r + beta * p ; */ + tpi_xpby( nRow, r, beta, p ); /* parallel */ + + dcrs_apply( matrix , p , Ap ); + + pAp = tpi_dot( nRow , p , Ap ); + + /* If orthogonal then cannot update */ + alpha = 0 < fabs( pAp ) ? rtrans / pAp : 0.0 ; + + /* x += alpha * p ; + * r -= alpha * Ap ; + * return dot( r , r ); + */ + beta = rtrans ; + + tpi_axpy( nRow , alpha , p , x ); + tpi_axpy( nRow , -alpha , Ap , r ); + rtrans = tpi_dot( nRow , r , r ); + beta = rtrans / beta ; + } + + time_end = TPI_Walltime(); + +#ifdef HAVE_MPI + { + double tb = time_begin ; + double te = time_end ; + MPI_Allreduce(&tb, &time_begin, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(&te, &time_end, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + } +#endif + + *solve_dt += time_end - time_begin ; + + *norm_resid = sqrt( rtrans ); + *iter_count = k ; + + free( Ap ); + free( p ); + free( r ); +} + +/*--------------------------------------------------------------------*/ +/*--------------------------------------------------------------------*/ + +void cgsolve( const struct distributed_crs_matrix * matrix , + const VECTOR_SCALAR * const b , + VECTOR_SCALAR * const x , + const int overlap_comm , + const VECTOR_SCALAR tolerance , + const int max_iter , + const int print_iter , + int * const iter_count , + VECTOR_SCALAR * const norm_resid , + double * const solve_dt ) +{ + const int nRow = matrix->n_local_row ; + const int nVec = matrix->p_recv_pc[ matrix->p_size ] ; + + const VECTOR_SCALAR tol_2 = tolerance * tolerance ; + + VECTOR_SCALAR * const r = + (VECTOR_SCALAR *) malloc( nRow * sizeof(VECTOR_SCALAR) ); + VECTOR_SCALAR * const p = + (VECTOR_SCALAR *) malloc( nVec * sizeof(VECTOR_SCALAR) ); + VECTOR_SCALAR * const Ap = + (VECTOR_SCALAR *) malloc( nRow * sizeof(VECTOR_SCALAR) ); + + VECTOR_SCALAR rtrans = 0.0 ; + VECTOR_SCALAR beta = 0.0 ; + VECTOR_SCALAR pAp = 0.0 ; + VECTOR_SCALAR alpha ; + double time_begin , time_end ; + + int k ; + + tpi_copy( nRow , b , r ); + tpi_copy( nRow , x , p ); + + /* gather off-processor components of 'p'. + * Ap = matrix * p ; + * return dot( Ap , p ); + */ + pAp = dcrs_apply_and_dot( matrix , p , Ap , overlap_comm ); + + /* r -= 1.0 * Ap ; + * return dot( r , r ); + */ + alpha = 1.0 ; + rtrans = cgsolver_update( nRow, alpha, NULL, Ap, NULL, r ); /* parallel */ + + time_begin = TPI_Walltime(); + + for ( k = 0 ; k < max_iter && tol_2 < rtrans ; ++k ) { + + /* p = r + beta * p ; */ + tpi_xpby( nRow, r, beta, p ); /* parallel */ + + /* gather off-processor components of 'p'. + * Ap = matrix * p ; + * return dot( Ap , p ); + */ + pAp = dcrs_apply_and_dot( matrix , p , Ap , overlap_comm ); /* parallel */ + + /* If orthogonal then cannot update */ + alpha = 0 < fabs( pAp ) ? rtrans / pAp : 0.0 ; + + /* x += alpha * p ; + * r -= alpha * Ap ; + * return dot( r , r ); + */ + beta = rtrans ; + rtrans = cgsolver_update( nRow , alpha , p , Ap , x , r ); /* parallel */ + beta = rtrans / beta ; + } + + time_end = TPI_Walltime(); + +#ifdef HAVE_MPI + { + double tb = time_begin ; + double te = time_end ; + MPI_Allreduce(&tb, &time_begin, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(&te, &time_end, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + } +#endif + + *solve_dt += time_end - time_begin ; + + *norm_resid = sqrt( rtrans ); + *iter_count = k ; + + free( Ap ); + free( p ); + free( r ); +} + +/*--------------------------------------------------------------------*/ + +struct tpi_work_cgsolve { + const VECTOR_SCALAR * p ; + const VECTOR_SCALAR * Ap ; + VECTOR_SCALAR * x ; + VECTOR_SCALAR * r ; + VECTOR_SCALAR alpha ; + int length ; +}; + +static void tpi_work_dot_join( TPI_Work * work , const void * src ) +{ *((double *) work->reduce ) += *((const double *) src); } + +static void tpi_work_dot_init( TPI_Work * work ) +{ *((double *) work->reduce ) = 0 ; } + +static void tpi_work_update( TPI_Work * work ) +{ + const struct tpi_work_cgsolve * const cg_work = + (const struct tpi_work_cgsolve *) work->info ; + + const int length = cg_work->length ; + const VECTOR_SCALAR alpha = cg_work->alpha ; + const VECTOR_SCALAR * const p = cg_work->p ; + const VECTOR_SCALAR * const Ap = cg_work->Ap ; + VECTOR_SCALAR * const x = cg_work->x ; + VECTOR_SCALAR * const r = cg_work->r ; + + double mag = 0 ; + int iBeg , iEnd , i ; + + tpi_work_span( work , length , & iBeg , & iEnd ); + + if ( x ) { for ( i = iBeg ; i < iEnd ; ++i ) { x[i] += alpha * p[i]; } } + + for ( i = iBeg ; i < iEnd ; ++i ) { + const VECTOR_SCALAR val = ( r[i] -= alpha * Ap[i] ); + mag += val * val ; + } + + *((double*) work->reduce ) = mag ; +} + +double cgsolver_update( const int length , + const VECTOR_SCALAR alpha , + const VECTOR_SCALAR * p , + const VECTOR_SCALAR * Ap , + VECTOR_SCALAR * x , + VECTOR_SCALAR * r ) +{ + struct tpi_work_cgsolve work ; + + double result = 0.0 ; + + work.length = length ; + work.alpha = alpha ; + work.p = p ; + work.Ap = Ap ; + work.x = x ; + work.r = r ; + + TPI_Run_threads_reduce( tpi_work_update , & work , + tpi_work_dot_join , tpi_work_dot_init , + sizeof(result) , & result ); + +#ifdef HAVE_MPI + { + double local = result ; + MPI_Allreduce( & local, & result, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD ); + } +#endif + + return result ; +} + +/*--------------------------------------------------------------------*/ + diff --git a/mkl/basic/optional/ThreadPool/test/hhpccg/CGSolver.h b/mkl/basic/optional/ThreadPool/test/hhpccg/CGSolver.h new file mode 100644 index 0000000..f0ee6f6 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/hhpccg/CGSolver.h @@ -0,0 +1,40 @@ + +#ifndef CGSolver_h +#define CGSolver_h + +#include +#include + +/*--------------------------------------------------------------------*/ + +void cgsolve_set_lhs( const struct distributed_crs_matrix * matrix , + const VECTOR_SCALAR * const x , + VECTOR_SCALAR * const b ); + +/* Solve with fused loops */ +void cgsolve( const struct distributed_crs_matrix * matrix , + const VECTOR_SCALAR * const b , + VECTOR_SCALAR * const x , + const int overlap_comm , + const VECTOR_SCALAR tolerance , + const int max_iter , + const int print_iter , + int * const iter_count , + VECTOR_SCALAR * const norm_resid , + double * const solve_dt ); + +/* Solve with blas-like calls */ +void cgsolve_blas( const struct distributed_crs_matrix * matrix , + const VECTOR_SCALAR * const b , + VECTOR_SCALAR * const x , + const VECTOR_SCALAR tolerance , + const int max_iter , + const int print_iter , + int * const iter_count , + VECTOR_SCALAR * const norm_resid , + double * const solve_dt ); + +/*--------------------------------------------------------------------*/ + +#endif + diff --git a/mkl/basic/optional/ThreadPool/test/hhpccg/CMakeLists.txt b/mkl/basic/optional/ThreadPool/test/hhpccg/CMakeLists.txt new file mode 100644 index 0000000..0c652cd --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/hhpccg/CMakeLists.txt @@ -0,0 +1,83 @@ + +INCLUDE(PackageAddExecutableAndTest) +INCLUDE(PackageLibraryMacros) + +#################### + +SET(HEADERS "") +SET(SOURCES "") + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) + +SET(HEADERS ${HEADERS} + ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h + ) + +INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) + +APPEND_SET(HEADERS + BoxPartition.h + CGSolver.h + tpi_vector.h + dcrs_matrix.h + ) + +#################### + + +PACKAGE_ADD_EXECUTABLE( + test_tpi_hhpccg + COMM serial mpi + SOURCES main.c CGSolver.c BoxPartitionIB.c tpi_vector.c dcrs_matrix.c + DEPLIBS pthread m + DIRECTORY . + ) + +PACKAGE_ADD_TEST( + test_tpi_hhpccg + NAME test_tpi_hhpccg_serial_1 + COMM serial + DIRECTORY . + ) + +PACKAGE_ADD_TEST( + test_tpi_hhpccg + NAME test_tpi_hhpccg_serial_2 + COMM serial + ARGS "threads=2" + DIRECTORY . + ) + +PACKAGE_ADD_TEST( + test_tpi_hhpccg + NAME test_tpi_hhpccg_serial_4 + COMM serial + ARGS "threads=4" + DIRECTORY . + ) + +PACKAGE_ADD_TEST( + test_tpi_hhpccg + NAME test_tpi_hhpccg_mpi_1 + COMM mpi + NUM_MPI_PROCS 1 + DIRECTORY . + ) + +PACKAGE_ADD_TEST( + test_tpi_hhpccg + NAME test_tpi_hhpccg_mpi_2 + COMM mpi + NUM_MPI_PROCS 2 + DIRECTORY . + ) + +PACKAGE_ADD_TEST( + test_tpi_hhpccg + NAME test_tpi_hhpccg_mpi_4 + COMM mpi + NUM_MPI_PROCS 4 + DIRECTORY . + ) + + diff --git a/mkl/basic/optional/ThreadPool/test/hhpccg/dcrs_matrix.c b/mkl/basic/optional/ThreadPool/test/hhpccg/dcrs_matrix.c new file mode 100644 index 0000000..d61404f --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/hhpccg/dcrs_matrix.c @@ -0,0 +1,314 @@ + +#include +#include + +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +#include + +/*--------------------------------------------------------------------*/ +/*--------------------------------------------------------------------*/ + +#if ! defined( HAVE_MPI ) + +static +double comm_sum( double v ) { return v ; } + +#define get_off_process_entries( M , V ) /* */ + +/*--------------------------------------------------------------------*/ +#else /* defined( HAVE_MPI ) */ +/*--------------------------------------------------------------------*/ + +static +double comm_sum( double v ) +{ + double result = 0 ; + MPI_Allreduce( & v , & result , 1 , MPI_DOUBLE , MPI_SUM , MPI_COMM_WORLD ); + return result ; +} + +static +void get_off_process_entries( + const struct distributed_crs_matrix * const matrix , + VECTOR_SCALAR * const vec ) +{ + const int np = matrix->p_size ; + const int my_p = matrix->p_rank ; + const int * const recv_pc = matrix->p_recv_pc ; + const int * const send_pc = matrix->p_send_pc ; + const int * const send_id = matrix->p_send_id ; + int i , irecv ; + + for ( irecv = 0 , i = 1 ; i < np ; ++i ) { + if ( recv_pc[i] < recv_pc[i+1] ) ++irecv ; + } + + { + VECTOR_SCALAR * const send_buf = + (VECTOR_SCALAR *) malloc( sizeof(VECTOR_SCALAR) * send_pc[np] ); + + MPI_Request * const recv_request = + (MPI_Request *) malloc( sizeof(MPI_Request) * irecv ); + + MPI_Status * const recv_status = + (MPI_Status *) malloc( sizeof(MPI_Status) * irecv ); + + for ( irecv = 0 , i = 1 ; i < np ; ++i ) { + const int ip = ( i + my_p ) % np ; + const int recv_beg = recv_pc[i]; + const int recv_length = recv_pc[i+1] - recv_beg ; + if ( recv_length ) { + MPI_Irecv( vec + recv_beg , + recv_length * sizeof(VECTOR_SCALAR), MPI_BYTE , + ip , 0 , MPI_COMM_WORLD , recv_request + irecv ); + ++irecv ; + } + } + + /* Gather components into send buffer */ + + for ( i = 0 ; i < send_pc[np] ; ++i ) { + send_buf[i] = vec[ send_id[i] ]; + } + + MPI_Barrier( MPI_COMM_WORLD ); + + for ( i = 1 ; i < np ; ++i ) { + const int ip = ( i + my_p ) % np ; + const int send_beg = send_pc[i]; + const int send_length = send_pc[i+1] - send_beg ; + if ( send_length ) { /* Send to 'i' */ + MPI_Rsend( send_buf + send_beg , + send_length * sizeof(VECTOR_SCALAR), MPI_BYTE , + ip , 0 , MPI_COMM_WORLD ); + } + } + + MPI_Waitall( irecv , recv_request , recv_status ); + + free( recv_status ); + free( recv_request ); + free( send_buf ); + } +} + +#endif + +/*--------------------------------------------------------------------*/ +/*--------------------------------------------------------------------*/ + +static void dcrs_apply_and_dot_span( + const struct distributed_crs_matrix * const matrix , + const int span_begin , + const int span_end , + const VECTOR_SCALAR * const x , + VECTOR_SCALAR * const y , + double * const result ) +{ + const int * const A_pc = matrix->A_pc ; + const int * const A_ia = matrix->A_ia ; + const MATRIX_SCALAR * const A_a = matrix->A_a ; + + double dot_x_y = *result ; + + int row = span_begin ; + + for ( ; row < span_end ; ++row ) { + const int pcBeg = A_pc[ row ]; + const int pcEnd = A_pc[ row + 1 ]; + + const int * ia = A_ia + pcBeg ; + const MATRIX_SCALAR * a = A_a + pcBeg ; + const MATRIX_SCALAR * const a_end = A_a + pcEnd ; + + VECTOR_SCALAR y_tmp = 0 ; + for ( ; a != a_end ; ++a , ++ia ) { + y_tmp += *a * x[ *ia ]; + } + dot_x_y += x[ row ] * y_tmp ; + y[ row ] = y_tmp ; + } + + *result = dot_x_y ; +} + +static void dcrs_apply_span( + const struct distributed_crs_matrix * const matrix , + const int span_begin , + const int span_end , + const VECTOR_SCALAR * const x , + VECTOR_SCALAR * const y ) +{ + const int * const A_pc = matrix->A_pc ; + const int * const A_ia = matrix->A_ia ; + const MATRIX_SCALAR * const A_a = matrix->A_a ; + + int row = span_begin ; + + for ( ; row < span_end ; ++row ) { + const int pcBeg = A_pc[ row ]; + const int pcEnd = A_pc[ row + 1 ]; + + const int * ia = A_ia + pcBeg ; + const MATRIX_SCALAR * a = A_a + pcBeg ; + const MATRIX_SCALAR * const a_end = A_a + pcEnd ; + + VECTOR_SCALAR y_tmp = 0 ; + for ( ; a != a_end ; ++a , ++ia ) { + y_tmp += *a * x[ *ia ]; + } + y[ row ] = y_tmp ; + } +} + +static void work_span( const int count , const int rank , + int * jBeg , int * jEnd ) +{ + const int length = *jEnd - *jBeg ; + const int chunk = ( length + count - 1 ) / count ; + const int begin = chunk * rank ; + int end = begin + chunk ; + + if ( length < end ) { end = length ; } + + *jEnd = *jBeg + end ; + *jBeg += begin ; +} + +/*--------------------------------------------------------------------*/ +/*--------------------------------------------------------------------*/ + +static void tpi_work_dot_join( TPI_Work * work , const void * src ) +{ *((double *) ( work->reduce) ) += *((const double *) src); } + +static void tpi_work_dot_init( TPI_Work * work ) +{ *((double *) ( work->reduce) ) = 0 ; } + +/*--------------------------------------------------------------------*/ + +struct work_dcrs { + const struct distributed_crs_matrix * matrix ; + const VECTOR_SCALAR * x ; + VECTOR_SCALAR * y ; + int jBeg ; + int jEnd ; +}; + +/*--------------------------------------------------------------------*/ + +static void tpi_work_dcrs_apply_and_dot( TPI_Work * work ) +{ + const struct work_dcrs * const info = (const struct work_dcrs *) work->info ; + + int local_begin = info->jBeg ; + int local_end = info->jEnd ; + + work_span( work->count , work->rank , & local_begin , & local_end ); + + dcrs_apply_and_dot_span( info->matrix , local_begin , local_end , + info->x , info->y , (double *) work->reduce ); +} + +double dcrs_apply_and_dot( + const struct distributed_crs_matrix * matrix , + VECTOR_SCALAR * x , + VECTOR_SCALAR * y , + const int overlap_communication ) +{ + struct work_dcrs info ; + + double result = 0.0 ; + + info.matrix = matrix ; + info.x = x ; + info.y = y ; + + if ( overlap_communication && + matrix->n_internal_row < matrix->n_local_row ) { + + double remote_result = 0 ; + + /* Start the internal matrix-vector multiply */ + /* result += dot( output = A * input , input ); */ + + info.jBeg = 0 ; + info.jEnd = matrix->n_internal_row ; + + /* Divide internal work evenly among worker threads. + * This leave the primary thread completely out of the computation. + */ + TPI_Start_threads_reduce( tpi_work_dcrs_apply_and_dot , & info , + tpi_work_dot_join , + tpi_work_dot_init , + sizeof(result) , & result ); + + get_off_process_entries( matrix , x ); + + TPI_Wait(); /* Wait for internal result */ + + info.jBeg = matrix->n_internal_row ; + info.jEnd = matrix->n_local_row ; + + TPI_Run_threads_reduce( tpi_work_dcrs_apply_and_dot , & info , + tpi_work_dot_join , + tpi_work_dot_init , + sizeof(remote_result) , & remote_result ); + + result += remote_result ; + } + else { + info.jBeg = 0 ; + info.jEnd = matrix->n_local_row ; + + get_off_process_entries( matrix , x ); + + TPI_Run_threads_reduce( tpi_work_dcrs_apply_and_dot , & info , + tpi_work_dot_join , + tpi_work_dot_init , + sizeof(result) , & result ); + } + + result = comm_sum( result ); + + return result ; +} + +/*--------------------------------------------------------------------*/ + +static void tpi_work_dcrs_apply( TPI_Work * work ) +{ + const struct work_dcrs * const info = (const struct work_dcrs *) work->info ; + + int local_begin = info->jBeg ; + int local_end = info->jEnd ; + + work_span( work->count , work->rank , & local_begin , & local_end ); + + dcrs_apply_span( info->matrix , local_begin , local_end , + info->x , info->y ); +} + +void dcrs_apply( + const struct distributed_crs_matrix * matrix , + VECTOR_SCALAR * x , + VECTOR_SCALAR * y ) +{ + struct work_dcrs info ; + + info.matrix = matrix ; + info.x = x ; + info.y = y ; + info.jBeg = 0 ; + info.jEnd = matrix->n_local_row ; + + get_off_process_entries( matrix , x ); + + TPI_Run_threads( tpi_work_dcrs_apply , & info , 0 ); +} + diff --git a/mkl/basic/optional/ThreadPool/test/hhpccg/dcrs_matrix.h b/mkl/basic/optional/ThreadPool/test/hhpccg/dcrs_matrix.h new file mode 100644 index 0000000..61f2032 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/hhpccg/dcrs_matrix.h @@ -0,0 +1,41 @@ + +#ifndef dcrs_matrix_h +#define dcrs_matrix_h + +#include + +struct distributed_crs_matrix { + /* Global parallel */ + int p_size ; + int p_rank ; + int * p_recv_pc ; /* [np+1], span of received off-processor elements */ + int * p_send_pc ; /* [np+1], span of sent off-processor elements */ + int * p_send_id ; /* [send_pc[np]], indices of sent elements */ + + /* Local and local parallel */ + int n_local_column ; /* Number of local columns */ + int n_local_row ; /* Number of local rows */ + int n_internal_row ; /* Number of local rows with internal columns */ + int * A_pc ; /* Offsets into A_ia array for column indices */ + int * A_ia ; + MATRIX_SCALAR * A_a ; +}; + +/* 1) communicate off-processor portions of input. + * 2) apply: output = matrix * input ; + * 3) return: dot( output , input ); + */ +double dcrs_apply_and_dot( const struct distributed_crs_matrix * matrix , + VECTOR_SCALAR * input , + VECTOR_SCALAR * output , + const int overlap_communication ); + +/* 1) communicate off-processor portions of input. + * 2) apply: output = matrix * input ; + */ +void dcrs_apply( const struct distributed_crs_matrix * matrix , + VECTOR_SCALAR * input , + VECTOR_SCALAR * output ); + +#endif + diff --git a/mkl/basic/optional/ThreadPool/test/hhpccg/main.c b/mkl/basic/optional/ThreadPool/test/hhpccg/main.c new file mode 100644 index 0000000..57bb80a --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/hhpccg/main.c @@ -0,0 +1,422 @@ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +/*--------------------------------------------------------------------*/ +static +void hpccg_alloc_and_fill( const int np , + const int my_p , + const int gbox[][2] , + const int ghost , + struct distributed_crs_matrix * const matrix ); + +/*--------------------------------------------------------------------*/ + +int main( int argc , char ** argv ) +{ + const int ghost = 1 ; + const int max_cube = 20 ; + int ncube[20] = { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 }; + + FILE * print_file = stdout ; + int print_iter = 500 ; + int max_iter = 50 ; + int overlap_comm = 0 ; + + float tolerance = 0.0 ; /* Force max iterations */ + + int gbox[3][2] = { { 0 , 16 } , { 0 , 16 } , { 0 , 16 } }; + int nt = 0 ; + int trials = 6 ; + int ntest ; + int np = 1; + int my_p = 0 ; + +#ifdef HAVE_MPI + MPI_Init( & argc , & argv ); + MPI_Comm_size( MPI_COMM_WORLD , & np ); + MPI_Comm_rank( MPI_COMM_WORLD , & my_p ); +#endif + + if ( ! my_p ) { + const char arg_threads[] = "threads=" ; + const char arg_cube[] = "cube=" ; + const char arg_box[] = "box=" ; + const char arg_max[] = "max_iter=" ; + const char arg_trials[] = "trials=" ; + const char arg_print[] = "print_iter=" ; + const char arg_file[] = "print_file=" ; + const char arg_comm[] = "overlap_comm=" ; + const char arg_tolerance[] = "tolerance=" ; + int i ; + for ( i = 1 ; i < argc ; ++i ) { + if ( ! strncmp(argv[i],arg_threads,strlen(arg_threads)) ) { + sscanf(argv[i]+strlen(arg_threads),"%d",&nt); + } + else if ( ! strncmp(argv[i],arg_box,strlen(arg_box)) ) { + sscanf(argv[i]+strlen(arg_box),"%d%*[x]%d%*[x]%d", + & gbox[0][1] , & gbox[1][1] , & gbox[2][1] ); + } + else if ( ! strncmp(argv[i],arg_cube,strlen(arg_cube)) ) { + sscanf(argv[i]+strlen(arg_cube), + "%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d", + ncube+0, ncube+1, ncube+2, ncube+3, ncube+4, + ncube+5, ncube+6, ncube+7, ncube+8, ncube+9, + ncube+10, ncube+11, ncube+12, ncube+13, ncube+14, + ncube+15, ncube+16, ncube+17, ncube+18, ncube+19); + } + else if ( ! strncmp(argv[i],arg_max,strlen(arg_max)) ) { + sscanf(argv[i]+strlen(arg_max),"%d",&max_iter); + } + else if ( ! strncmp(argv[i],arg_trials,strlen(arg_trials)) ) { + sscanf(argv[i]+strlen(arg_trials),"%d",&trials); + } + else if ( ! strncmp(argv[i],arg_print,strlen(arg_print)) ) { + sscanf(argv[i]+strlen(arg_print),"%d",&print_iter); + } + else if ( ! strncmp(argv[i],arg_comm,strlen(arg_comm)) ) { + sscanf(argv[i]+strlen(arg_print),"%d",&overlap_comm); + } + else if ( ! strncmp(argv[i],arg_tolerance,strlen(arg_tolerance)) ) { + sscanf(argv[i]+strlen(arg_print),"%f",&tolerance); + } + else if ( ! strncmp(argv[i],arg_file,strlen(arg_file)) ) { + char buffer[256] ; + sscanf(argv[i]+strlen(arg_file),"%s",buffer); + print_file = fopen(buffer,"a"); + } + } + } + +#ifdef HAVE_MPI + { + MPI_Bcast( & nt , 1 , MPI_INT , 0 , MPI_COMM_WORLD ); + MPI_Bcast( & gbox[0][0] , 6 , MPI_INT , 0 , MPI_COMM_WORLD ); + MPI_Bcast( ncube , max_cube , MPI_INT , 0 , MPI_COMM_WORLD ); + MPI_Bcast( & overlap_comm , 1 , MPI_INT , 0 , MPI_COMM_WORLD ); + MPI_Bcast( & max_iter , 1 , MPI_INT , 0 , MPI_COMM_WORLD ); + MPI_Bcast( & print_iter , 1 , MPI_INT , 0 , MPI_COMM_WORLD ); + MPI_Bcast( & trials , 1 , MPI_INT , 0 , MPI_COMM_WORLD ); + MPI_Bcast( & tolerance , 1 , MPI_FLOAT , 0 , MPI_COMM_WORLD ); + } +#endif + + if ( nt ) { + TPI_Init( nt ); + TPI_Block(); + TPI_Unblock(); + } + + if ( ! my_p ) { + fprintf(print_file,"\"PROC\" , \"THREAD\" , \"EQUATION\" , \"NON-ZERO\" , \"FUSED-AVG\", \"FUSED-MAX\", \"BLAS-AVG\", \"BLAS-MAX\", \"FUSED\", \"BLAS\" , \"Iter\"\n"); + fprintf(print_file,"\"COUNT\", \"COUNT\" , \"COUNT\" , \"COUNT\" , \"Mflops\" , \"Mflops\" , \"Mflops\" , \"Mflops\" , \"error\", \"error\" , \"COUNT\"\n"); + } + + for ( ntest = 0 ; ! ntest || ( ntest < max_cube && ncube[ntest] ) ; ++ntest ) { + struct distributed_crs_matrix matrix ; + + if ( ncube[ntest] ) { + gbox[0][1] = gbox[1][1] = gbox[2][1] = ncube[ntest] ; + } + + hpccg_alloc_and_fill( np, my_p, (const int (*)[2]) gbox, ghost, &matrix); + + { + const int nRow = matrix.n_local_row ; + + double solve_dt[2] = { 0 , 0 }; + double solve_blas_dt[2] = { 0 , 0 }; + VECTOR_SCALAR norm_resid = 0.0 ; + VECTOR_SCALAR norm_resid_blas = 0.0 ; + int iter_count = 0 ; + int iter_count_blas = 0 ; + int k ; + + VECTOR_SCALAR * const b = (VECTOR_SCALAR *) malloc( sizeof(VECTOR_SCALAR) * nRow ); + VECTOR_SCALAR * const x = (VECTOR_SCALAR *) malloc( sizeof(VECTOR_SCALAR) * nRow ); + VECTOR_SCALAR * const x_blas = (VECTOR_SCALAR *) malloc( sizeof(VECTOR_SCALAR) * nRow ); + VECTOR_SCALAR * const xexact = (VECTOR_SCALAR *) malloc( sizeof(VECTOR_SCALAR) * nRow ); + + { + const VECTOR_SCALAR value = 1.0 /* 1.0 / 3.0 */ ; + int i ; + for ( i = 0 ; i < nRow ; ++i ) xexact[i] = value ; + } + + for ( k = 0 ; k < trials ; ++k ) { + double dt = 0 ; + int i ; + + for ( i = 0 ; i < nRow ; ++i ) { x_blas[i] = 0.0 ; } + + cgsolve_set_lhs( & matrix , xexact , b ); + + cgsolve_blas( & matrix, b, x_blas, + tolerance , max_iter , print_iter , + & iter_count_blas, & norm_resid_blas, & dt ); + + solve_blas_dt[0] += dt ; + if ( ! k || dt < solve_blas_dt[1] ) { solve_blas_dt[1] = dt ; } + } + + for ( k = 0 ; k < trials ; ++k ) { + double dt = 0 ; + int i ; + + for ( i = 0 ; i < nRow ; ++i ) { x[i] = 0.0 ; } + + cgsolve_set_lhs( & matrix , xexact , b ); + + cgsolve( & matrix, b, x, overlap_comm, + tolerance , max_iter , print_iter , + & iter_count, & norm_resid, & dt ); + + solve_dt[0] += dt ; + if ( ! k || dt < solve_dt[1] ) { solve_dt[1] = dt ; } + } + + { + int nnzGlobal = matrix.A_pc[ nRow ]; + double error[3] = { 0 , 0 , 0 }; + + for ( k = 0 ; k < nRow ; ++k ) { + error[0] += xexact[k] * xexact[k] ; + error[1] += ( x[k] - xexact[k] ) * ( x[k] - xexact[k] ); + error[2] += ( x_blas[k] - xexact[k] ) * ( x_blas[k] - xexact[k] ); + } + +#ifdef HAVE_MPI + { + double error_global[3] = { 0.0 , 0.0 , 0.0 }; + int nnz = nnzGlobal ; + + MPI_Allreduce( & nnz , & nnzGlobal , 1 , MPI_INT , MPI_SUM , + MPI_COMM_WORLD ); + + MPI_Allreduce( error , error_global , 3 , MPI_DOUBLE , MPI_SUM , + MPI_COMM_WORLD ); + + error[0] = error_global[0]; + error[1] = error_global[1]; + error[2] = error_global[2]; + } +#endif + + error[0] = sqrt( error[0] ); + error[1] = sqrt( error[1] ); + error[2] = sqrt( error[2] ); + + if ( ! my_p ) { + const int nRowGlobal = ( gbox[0][1] - gbox[0][0] ) * + ( gbox[1][1] - gbox[1][0] ) * + ( gbox[2][1] - gbox[2][0] ); + + const double dt_mean_fuse_step = 1.0e6 * solve_dt[0] / (double) trials ; + const double dt_mean_blas_step = 1.0e6 * solve_blas_dt[0] / (double) trials ; + const double dt_min_fuse_step = 1.0e6 * solve_dt[1] ; + const double dt_min_blas_step = 1.0e6 * solve_blas_dt[1] ; + + const double Mflop_step = 2 * nnzGlobal + + 3 * 2 * nRowGlobal + + 2 * 2 * nRowGlobal ; + + const double Mflop_mean_fuse = Mflop_step * iter_count / dt_mean_fuse_step ; + const double Mflop_mean_blas = Mflop_step * iter_count_blas / dt_mean_blas_step ; + + const double Mflop_max_fuse = Mflop_step * iter_count / dt_min_fuse_step ; + const double Mflop_max_blas = Mflop_step * iter_count_blas / dt_min_blas_step ; + + fprintf(print_file,"%8d , %8d , %8d , %8d , %10g , %10g , %10g , %10g , %10g , %10g , %d\n", + np , nt , nRowGlobal , nnzGlobal , + Mflop_mean_fuse , Mflop_max_fuse , + Mflop_mean_blas , Mflop_max_blas , + error[1] / error[0] , error[2] / error[0] , iter_count ); + fflush(print_file); + } + } + + free( xexact ); + free( x_blas ); + free( x ); + free( b ); + } + free( matrix.A_a ); + free( matrix.A_ia ); + free( matrix.A_pc ); + free( matrix.p_recv_pc ); + free( matrix.p_send_pc ); + free( matrix.p_send_id ); + } + + if ( nt ) { TPI_Finalize(); } + +#ifdef HAVE_MPI + MPI_Finalize(); +#endif + + return 0 ; +} + +/*--------------------------------------------------------------------*/ +/*--------------------------------------------------------------------*/ + +static +void hpccg_alloc_and_fill( const int np , + const int my_p , + const int gbox[][2] , + const int ghost , + struct distributed_crs_matrix * const matrix ) +{ + int (* const pbox)[3][2] = (int (*)[3][2]) malloc( sizeof(int)*np*3*2 ); + + const int (* const my_box)[2] = (const int (*)[2]) pbox[my_p] ; + + int my_uses_box[3][2] ; + int * map_local_ord = NULL; + + matrix->n_local_row = 0 ; + matrix->n_internal_row = 0 ; + matrix->A_pc = NULL ; + matrix->A_ia = NULL ; + matrix->A_a = NULL ; + + matrix->p_size = np ; + matrix->p_rank = my_p ; + matrix->p_recv_pc = NULL ; + matrix->p_send_pc = NULL ; + matrix->p_send_id = NULL ; + + /* Partition the global box */ + box_partition_rcb( np , gbox , pbox ); + + /* Upper bound */ + map_local_ord = (int *) malloc( sizeof(int) * + ( 2 * ghost + my_box[0][1]- my_box[0][0] ) * + ( 2 * ghost + my_box[1][1]- my_box[1][0] ) * + ( 2 * ghost + my_box[2][1]- my_box[2][0] ) ); + + /* Generate local layout with ghosting. */ + box_partition_map( np, my_p, gbox, + (const int (* const)[3][2]) pbox, + ghost, + my_uses_box , map_local_ord , + & matrix->n_internal_row , + & matrix->n_local_row , + & matrix->n_local_column , + & matrix->p_recv_pc , + & matrix->p_send_pc , + & matrix->p_send_id ); + + { + const int nrow = matrix->n_local_row ; + int * const pc = (int *) malloc( sizeof(int) * ( nrow + 1 ) ); + int * ia = NULL ; + MATRIX_SCALAR * a = NULL ; + + int ix , iy , iz ; + int sx , sy , sz ; + + /* Number of non zeros in each matrix row, + * then prefix the array for offsets. + */ + pc[0] = 0 ; + + for ( iz = my_box[2][0] ; iz < my_box[2][1] ; ++iz ) { + for ( iy = my_box[1][0] ; iy < my_box[1][1] ; ++iy ) { + for ( ix = my_box[0][0] ; ix < my_box[0][1] ; ++ix ) { + const int irow = box_map_local( (const int (*const)[2]) my_uses_box, map_local_ord, ix, iy, iz ); + int count = 1 ; /* Count the diagonal */ + + /* Count the off-diagonal terms to follow */ + for ( sz = -1 ; sz <= 1 ; ++sz ) { + for ( sy = -1 ; sy <= 1 ; ++sy ) { + for ( sx = -1 ; sx <= 1 ; ++sx ) { + const int g_ix = ix + sx ; + const int g_iy = iy + sy ; + const int g_iz = iz + sz ; + + if ( my_uses_box[0][0] <= g_ix && g_ix < my_uses_box[0][1] && + my_uses_box[1][0] <= g_iy && g_iy < my_uses_box[1][1] && + my_uses_box[2][0] <= g_iz && g_iz < my_uses_box[2][1] && + ! ( sz == 0 && sy == 0 && sx == 0 ) ) { + /* This column is within global bounds and is not a diagonal */ + ++count ; + } + } + } + } + pc[ irow + 1 ] = count ; + } + } + } + + for ( ix = 0 ; ix < nrow ; ++ix ) { pc[ix+1] += pc[ix] ; } + + ia = (int *) malloc( sizeof(int) * pc[ nrow ] ); + a = (MATRIX_SCALAR *) malloc( sizeof(MATRIX_SCALAR) * pc[ nrow ] ); + + for ( iz = my_box[2][0] ; iz < my_box[2][1] ; ++iz ) { + for ( iy = my_box[1][0] ; iy < my_box[1][1] ; ++iy ) { + for ( ix = my_box[0][0] ; ix < my_box[0][1] ; ++ix ) { + const int irow = box_map_local( (const int (*const)[2]) my_uses_box, map_local_ord, ix, iy, iz ); + int ipc = pc[ irow ]; + + /* Diagonal term first */ + ia[ ipc ] = irow ; + a[ ipc ] = 27.0f ; + ++ipc ; + + /* Off-diagonal terms to follow */ + for ( sz = -1 ; sz <= 1 ; ++sz ) { + for ( sy = -1 ; sy <= 1 ; ++sy ) { + for ( sx = -1 ; sx <= 1 ; ++sx ) { + const int g_ix = ix + sx ; + const int g_iy = iy + sy ; + const int g_iz = iz + sz ; + + if ( my_uses_box[0][0] <= g_ix && g_ix < my_uses_box[0][1] && + my_uses_box[1][0] <= g_iy && g_iy < my_uses_box[1][1] && + my_uses_box[2][0] <= g_iz && g_iz < my_uses_box[2][1] && + ! ( sz == 0 && sy == 0 && sx == 0 ) ) { + /* Column is within global bounds and is not a diagonal */ + /* 'icol' is mapped for communication */ + + const int icol = + box_map_local( (const int (*const)[2]) my_uses_box, map_local_ord, g_ix, g_iy, g_iz ); + + if ( icol < 0 ) { abort(); } + + ia[ ipc ] = icol ; + a[ ipc ] = -1.0f ; + ++ipc ; + } + } + } + } + if ( ipc != pc[ irow + 1 ] ) { abort(); } + } + } + } + + matrix->A_pc = pc ; + matrix->A_ia = ia ; + matrix->A_a = a ; + } + + free( map_local_ord ); + free( pbox ); +} + diff --git a/mkl/basic/optional/ThreadPool/test/hhpccg/tpi_vector.c b/mkl/basic/optional/ThreadPool/test/hhpccg/tpi_vector.c new file mode 100644 index 0000000..e5cc365 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/hhpccg/tpi_vector.c @@ -0,0 +1,277 @@ +#include +#include + +#include +#include +#include + +#if defined( HAVE_MPI ) +#include +#endif + +/*--------------------------------------------------------------------*/ + +struct tpi_work_vector { + VECTOR_SCALAR alpha ; + VECTOR_SCALAR beta ; + const VECTOR_SCALAR * x ; + const VECTOR_SCALAR * y ; + VECTOR_SCALAR * w ; + int n ; +}; + +void tpi_work_span( TPI_Work * const work , const int n , + int * const iBeg , int * const iEnd ) +{ + const int chunk = ( n + work->count - 1 ) / work->count ; + const int i_end = chunk + ( *iBeg = chunk * work->rank ); + + *iEnd = n < i_end ? n : i_end ; +} + +/*--------------------------------------------------------------------*/ + +static void tpi_work_fill( TPI_Work * work ) +{ + const struct tpi_work_vector * const h = + (struct tpi_work_vector *) work->info ; + + const VECTOR_SCALAR alpha = h->alpha ; + VECTOR_SCALAR * const w = h->w ; + + int i , iEnd ; + + tpi_work_span( work , h->n , & i , & iEnd ); + + for ( ; i < iEnd ; ++i ) { w[i] = alpha ; } +} + +void tpi_fill( int n , VECTOR_SCALAR alpha , VECTOR_SCALAR * x ) +{ + struct tpi_work_vector tmp = { 0.0 , 0.0 , NULL , NULL , NULL , 0 }; + tmp.alpha = alpha ; + tmp.w = x ; + tmp.n = n ; + TPI_Run_threads( tpi_work_fill , & tmp , 0 ); +} + +/*--------------------------------------------------------------------*/ + +static void tpi_work_scale( TPI_Work * work ) +{ + const struct tpi_work_vector * const h = + (struct tpi_work_vector *) work->info ; + + const VECTOR_SCALAR beta = h->beta ; + VECTOR_SCALAR * const w = h->w ; + + int i , iEnd ; + + tpi_work_span( work , h->n , & i , & iEnd ); + + for ( ; i < iEnd ; ++i ) { w[i] *= beta ; } +} + +void tpi_scale( int n , const VECTOR_SCALAR alpha , VECTOR_SCALAR * x ) +{ + struct tpi_work_vector tmp = { 0.0 , 0.0 , NULL , NULL , NULL , 0 }; + tmp.alpha = alpha ; + tmp.w = x ; + tmp.n = n ; + TPI_Run_threads( tpi_work_scale , & tmp , 0 ); +} + +/*--------------------------------------------------------------------*/ + +static void tpi_work_copy( TPI_Work * work ) +{ + const struct tpi_work_vector * const h = + (struct tpi_work_vector *) work->info ; + + const VECTOR_SCALAR * const x = h->x ; + VECTOR_SCALAR * const w = h->w ; + + int i , iEnd ; + + tpi_work_span( work , h->n , & i , & iEnd ); + + for ( ; i < iEnd ; ++i ) { w[i] = x[i] ; } +} + +void tpi_copy( int n , const VECTOR_SCALAR * x , VECTOR_SCALAR * y ) +{ + struct tpi_work_vector tmp = { 0.0 , 0.0 , NULL , NULL , NULL , 0 }; + tmp.x = x ; + tmp.w = y ; + tmp.n = n ; + TPI_Run_threads( tpi_work_copy , & tmp , 0 ); +} + +/*--------------------------------------------------------------------*/ + +static void tpi_work_axpby( TPI_Work * work ) +{ + const struct tpi_work_vector * const h = + (struct tpi_work_vector *) work->info ; + + const VECTOR_SCALAR alpha = h->alpha ; + const VECTOR_SCALAR beta = h->beta ; + const VECTOR_SCALAR * const x = h->x ; + VECTOR_SCALAR * const w = h->w ; + + int i , iEnd ; + + tpi_work_span( work , h->n , & i , & iEnd ); + + for ( ; i < iEnd ; ++i ) { w[i] = alpha * x[i] + beta * w[i] ; } +} + +void tpi_axpby( int n , VECTOR_SCALAR alpha , const VECTOR_SCALAR * x , + VECTOR_SCALAR beta , VECTOR_SCALAR * y ) +{ + struct tpi_work_vector tmp = { 0.0 , 0.0 , NULL , NULL , NULL , 0 }; + tmp.alpha = alpha ; + tmp.beta = beta ; + tmp.x = x ; + tmp.w = y ; + tmp.n = n ; + + TPI_Run_threads( tpi_work_axpby , & tmp , 0 ); +} + +/*--------------------------------------------------------------------*/ + +static void tpi_work_axpy( TPI_Work * work ) +{ + const struct tpi_work_vector * const h = + (struct tpi_work_vector *) work->info ; + + const VECTOR_SCALAR alpha = h->alpha ; + const VECTOR_SCALAR * const x = h->x ; + VECTOR_SCALAR * const w = h->w ; + + int i , iEnd ; + + tpi_work_span( work , h->n , & i , & iEnd ); + + for ( ; i < iEnd ; ++i ) { w[i] += alpha * x[i] ; } +} + +void tpi_axpy( int n , VECTOR_SCALAR alpha , const VECTOR_SCALAR * x , + VECTOR_SCALAR * y ) +{ + struct tpi_work_vector tmp = { 0.0 , 0.0 , NULL , NULL , NULL , 0 }; + tmp.alpha = alpha ; + tmp.x = x ; + tmp.w = y ; + tmp.n = n ; + + TPI_Run_threads( tpi_work_axpy , & tmp , 0 ); +} + +/*--------------------------------------------------------------------*/ + +static void tpi_work_xpby( TPI_Work * work ) +{ + const struct tpi_work_vector * const h = + (struct tpi_work_vector *) work->info ; + + const VECTOR_SCALAR beta = h->beta ; + const VECTOR_SCALAR * const x = h->x ; + VECTOR_SCALAR * const w = h->w ; + + int i , iEnd ; + + tpi_work_span( work , h->n , & i , & iEnd ); + + for ( ; i < iEnd ; ++i ) { w[i] = x[i] + beta * w[i] ; } +} + +void tpi_xpby( int n , const VECTOR_SCALAR * x , VECTOR_SCALAR beta , + VECTOR_SCALAR * y ) +{ + struct tpi_work_vector tmp = { 0.0 , 0.0 , NULL , NULL , NULL , 0 }; + tmp.beta = beta ; + tmp.x = x ; + tmp.w = y ; + tmp.n = n ; + + TPI_Run_threads( tpi_work_xpby , & tmp , 0 ); +} + +/*--------------------------------------------------------------------*/ + +static void tpi_work_dot_partial( TPI_Work * work ) +{ + const struct tpi_work_vector * const h = + (struct tpi_work_vector *) work->info ; + + const VECTOR_SCALAR * const x = h->x ; + const VECTOR_SCALAR * const y = h->y ; + double * const s = (double *) work->reduce ; + double tmp = *s ; + int i , iEnd ; + + tpi_work_span( work , h->n , & i , & iEnd ); + + for ( ; i < iEnd ; ++i ) { tmp += x[i] * y[i] ; } + + *s = tmp ; +} + +static void tpi_work_dot_partial_self( TPI_Work * work ) +{ + const struct tpi_work_vector * const h = + (struct tpi_work_vector *) work->info ; + + const VECTOR_SCALAR * const x = h->x ; + double * const s = (double *) work->reduce ; + double tmp = *s ; + + int i , iEnd ; + + tpi_work_span( work , h->n , & i , & iEnd ); + + for ( ; i < iEnd ; ++i ) { const VECTOR_SCALAR d = x[i] ; tmp += d * d ; } + + *s = tmp ; +} + +static void tpi_work_dot_join( TPI_Work * work , const void * src ) +{ + *((double *) ( work->reduce) ) += *((const double *) src); +} + +static void tpi_work_dot_init( TPI_Work * work ) +{ + *((double *) ( work->reduce) ) = 0 ; +} + +double tpi_dot( int n , const VECTOR_SCALAR * x , const VECTOR_SCALAR * y ) +{ + struct tpi_work_vector tmp = { 0.0 , 0.0 , NULL , NULL , NULL , 0 }; + double result = 0.0 ; + tmp.x = x ; + tmp.y = y ; + tmp.n = n ; + if ( x != y ) { + TPI_Run_threads_reduce( tpi_work_dot_partial , & tmp , + tpi_work_dot_join , tpi_work_dot_init , + sizeof(result) , & result ); + } + else { + TPI_Run_threads_reduce( tpi_work_dot_partial_self , & tmp , + tpi_work_dot_join , tpi_work_dot_init , + sizeof(result) , & result ); + } +#if defined HAVE_MPI + { + double tmp = result ; + MPI_Allreduce( & tmp , & result , 1 , MPI_DOUBLE , MPI_SUM , MPI_COMM_WORLD ); + } +#endif + return result ; +} + +/*--------------------------------------------------------------------*/ + diff --git a/mkl/basic/optional/ThreadPool/test/hhpccg/tpi_vector.h b/mkl/basic/optional/ThreadPool/test/hhpccg/tpi_vector.h new file mode 100644 index 0000000..fba628f --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/hhpccg/tpi_vector.h @@ -0,0 +1,30 @@ + +#ifndef tpi_vector_h +#define tpi_vector_h + +#define VECTOR_SCALAR float +#define MATRIX_SCALAR float + +void tpi_fill( int n , VECTOR_SCALAR alpha , VECTOR_SCALAR * x ); + +void tpi_scale( int n , const VECTOR_SCALAR alpha , VECTOR_SCALAR * x ); + +void tpi_copy( int n , const VECTOR_SCALAR * x , VECTOR_SCALAR * y ); + +void tpi_xpby( int n , const VECTOR_SCALAR * x , + VECTOR_SCALAR beta , VECTOR_SCALAR * y ); + +void tpi_axpy( int n , VECTOR_SCALAR alpha , const VECTOR_SCALAR * x , + VECTOR_SCALAR * y ); + +void tpi_axpby( int n , VECTOR_SCALAR alpha , const VECTOR_SCALAR * x , + VECTOR_SCALAR beta , VECTOR_SCALAR * y ); + +double tpi_dot( int n , const VECTOR_SCALAR * x , + const VECTOR_SCALAR * y ); + +void tpi_work_span( TPI_Work * const work , const int n , + int * const iBeg , int * const iEnd ); + +#endif + diff --git a/mkl/basic/optional/ThreadPool/test/hpccg/BoxPartition.c b/mkl/basic/optional/ThreadPool/test/hpccg/BoxPartition.c new file mode 100644 index 0000000..ef860ae --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/hpccg/BoxPartition.c @@ -0,0 +1,487 @@ + +#include +#include + +#include + +/*--------------------------------------------------------------------*/ + +static int box_map_local_entry( const int box[][2] , + const int ghost , + int local_x , + int local_y , + int local_z ) +{ + const int nx = 2 * ghost + box[0][1] - box[0][0] ; + const int ny = 2 * ghost + box[1][1] - box[1][0] ; + const int nz = 2 * ghost + box[2][1] - box[2][0] ; + int result = -1 ; + + local_x += ghost ; + local_y += ghost ; + local_z += ghost ; + + if ( 0 <= local_x && local_x < nx && + 0 <= local_y && local_y < ny && + 0 <= local_z && local_z < nz ) { + + result = local_z * ny * nx + local_y * nx + local_x ; + } + return result ; +} + +int box_map_local( const int box_local[][2] , + const int ghost , + const int box_local_map[] , + const int local_x , + const int local_y , + const int local_z ) +{ + int result = box_map_local_entry(box_local,ghost,local_x,local_y,local_z); + + if ( 0 <= result ) { + result = box_local_map[ result ]; + } + + return result ; +} + +/*--------------------------------------------------------------------*/ +/* Recursively split a box into into (up-ip) sub-boxes */ + +static +void box_partition( int ip , int up , int axis , + const int box[3][2] , + int p_box[][3][2] ) +{ + const int np = up - ip ; + if ( 1 == np ) { + p_box[ip][0][0] = box[0][0] ; p_box[ip][0][1] = box[0][1] ; + p_box[ip][1][0] = box[1][0] ; p_box[ip][1][1] = box[1][1] ; + p_box[ip][2][0] = box[2][0] ; p_box[ip][2][1] = box[2][1] ; + } + else { + const int n = box[ axis ][1] - box[ axis ][0] ; + const int np_low = np / 2 ; /* Rounded down */ + const int np_upp = np - np_low ; + + const int n_upp = (int) (((double) n) * ( ((double)np_upp) / ((double)np))); + const int n_low = n - n_upp ; + const int next_axis = ( axis + 2 ) % 3 ; + + if ( np_low ) { /* P = [ip,ip+np_low) */ + int dbox[3][2] ; + dbox[0][0] = box[0][0] ; dbox[0][1] = box[0][1] ; + dbox[1][0] = box[1][0] ; dbox[1][1] = box[1][1] ; + dbox[2][0] = box[2][0] ; dbox[2][1] = box[2][1] ; + + dbox[ axis ][1] = dbox[ axis ][0] + n_low ; + + box_partition( ip, ip + np_low, next_axis, + (const int (*)[2]) dbox, p_box ); + } + + if ( np_upp ) { /* P = [ip+np_low,ip+np_low+np_upp) */ + int dbox[3][2] ; + dbox[0][0] = box[0][0] ; dbox[0][1] = box[0][1] ; + dbox[1][0] = box[1][0] ; dbox[1][1] = box[1][1] ; + dbox[2][0] = box[2][0] ; dbox[2][1] = box[2][1] ; + + ip += np_low ; + dbox[ axis ][0] += n_low ; + dbox[ axis ][1] = dbox[ axis ][0] + n_upp ; + + box_partition( ip, ip + np_upp, next_axis, + (const int (*)[2]) dbox, p_box ); + } + } +} + +/*--------------------------------------------------------------------*/ + +static int box_disjoint( const int a[3][2] , const int b[3][2] ) +{ + return a[0][1] <= b[0][0] || b[0][1] <= a[0][0] || + a[1][1] <= b[1][0] || b[1][1] <= a[1][0] || + a[2][1] <= b[2][0] || b[2][1] <= a[2][0] ; +} + +static void resize_int( int ** a , int * allocLen , int newLen ) +{ + int k = 32; + while ( k < newLen ) { k <<= 1 ; } + if ( NULL == *a ) + { *a = malloc( sizeof(int)*(*allocLen = k) ); } + else if ( *allocLen < k ) + { *a = realloc(*a , sizeof(int)*(*allocLen = k)); } +} + +static void box_partition_maps( + const int np , + const int my_p , + const int pbox[][3][2] , + const int ghost , + int ** map_local_id , + int ** map_recv_pc , + int ** map_send_pc , + int ** map_send_id ) +{ + const int (*my_box)[2] = pbox[my_p] ; + + const int my_ix = my_box[0][0] ; + const int my_iy = my_box[1][0] ; + const int my_iz = my_box[2][0] ; + const int my_nx = my_box[0][1] - my_box[0][0] ; + const int my_ny = my_box[1][1] - my_box[1][0] ; + const int my_nz = my_box[2][1] - my_box[2][0] ; + + const int my_use_nx = 2 * ghost + my_nx ; + const int my_use_ny = 2 * ghost + my_ny ; + const int my_use_nz = 2 * ghost + my_nz ; + + const int id_length = my_use_nx * my_use_ny * my_use_nz ; + + int * local_id = (int *) malloc( id_length * sizeof(int) ); + int * recv_pc = (int *) malloc( ( np + 1 ) * sizeof(int) ); + int * send_pc = (int *) malloc( ( np + 1 ) * sizeof(int) ); + + int * send_id = NULL ; + int send_id_size = 0 ; + + int iLocal , iSend ; + int i ; + + int my_use_box[3][2] ; + + my_use_box[0][0] = my_box[0][0] - ghost ; + my_use_box[0][1] = my_box[0][1] + ghost ; + my_use_box[1][0] = my_box[1][0] - ghost ; + my_use_box[1][1] = my_box[1][1] + ghost ; + my_use_box[2][0] = my_box[2][0] - ghost ; + my_use_box[2][1] = my_box[2][1] + ghost ; + + for ( i = 0 ; i < id_length ; ++i ) { local_id[i] = -1 ; } + + iSend = 0 ; + iLocal = 0 ; + + /* The vector space is partitioned by processors */ + + for ( i = 0 ; i < np ; ++i ) { + const int ip = ( i + my_p ) % np ; + recv_pc[i] = iLocal ; + send_pc[i] = iSend ; + + if ( ! box_disjoint( (const int (*)[2]) my_use_box , pbox[ip] ) ) { + const int p_ix = pbox[ip][0][0] ; + const int p_iy = pbox[ip][1][0] ; + const int p_iz = pbox[ip][2][0] ; + const int p_ex = pbox[ip][0][1] ; + const int p_ey = pbox[ip][1][1] ; + const int p_ez = pbox[ip][2][1] ; + + int local_x , local_y , local_z ; + + /* Run the span of global cells that my processor uses */ + + for ( local_z = -ghost ; local_z < my_nz + ghost ; ++local_z ) { + for ( local_y = -ghost ; local_y < my_ny + ghost ; ++local_y ) { + for ( local_x = -ghost ; local_x < my_nx + ghost ; ++local_x ) { + + const int global_z = local_z + my_iz ; + const int global_y = local_y + my_iy ; + const int global_x = local_x + my_ix ; + + const int entry = + box_map_local_entry(my_box,ghost,local_x,local_y,local_z); + + if ( entry < 0 ) { abort(); } + + if ( p_iz <= global_z && global_z < p_ez && + p_iy <= global_y && global_y < p_ey && + p_ix <= global_x && global_x < p_ex ) { + + /* This ordinal is owned by processor 'ip' */ + + local_id[ entry ] = iLocal++ ; + +#if defined(DEBUG_PRINT) +if ( my_p != ip ) { + fprintf(stdout," (%d,%d,%d) : P%d recv at local %d from P%d\n", + global_x,global_y,global_z,my_p,local_id[entry],ip); + fflush(stdout); +} +#endif + } + + /* If in my ownership and used by the other processor */ + if ( my_p != ip && + /* In my ownership: */ + ( 0 <= local_z && local_z < my_nz && + 0 <= local_y && local_y < my_ny && + 0 <= local_x && local_x < my_nx ) && + /* In other processors usage: */ + ( p_iz - ghost <= global_z && global_z < p_ez + ghost && + p_iy - ghost <= global_y && global_y < p_ey + ghost && + p_ix - ghost <= global_x && global_x < p_ex + ghost ) ) { + + resize_int( & send_id , & send_id_size , (iSend + 1) ); + send_id[ iSend ] = local_id[ entry ] ; + ++iSend ; + +#if defined(DEBUG_PRINT) +{ + fprintf(stdout," (%d,%d,%d) : P%d send at local %d to P%d\n", + global_x,global_y,global_z,my_p,local_id[entry],ip); + fflush(stdout); +} +#endif + } + } + } + } + } + } + recv_pc[np] = iLocal ; + send_pc[np] = iSend ; + + *map_local_id = local_id ; + *map_recv_pc = recv_pc ; + *map_send_pc = send_pc ; + *map_send_id = send_id ; +} + +void box_partition_rcb( const int np , + const int my_p , + const int root_box[][2] , + const int ghost , + int (**pbox)[3][2] , + int ** map_local_id , + int ** map_recv_pc , + int ** map_send_pc , + int ** map_send_id ) +{ + *pbox = (int (*)[3][2]) malloc( sizeof(int) * np * 3 * 2 ); + + box_partition( 0 , np , 2 , root_box , *pbox ); + + box_partition_maps( np , my_p , (const int (*)[3][2]) *pbox , ghost , + map_local_id , map_recv_pc , + map_send_pc , map_send_id ); +} + +/*--------------------------------------------------------------------*/ + +#ifdef UNIT_TEST + +static int box_contain( const int a[3][2] , const int b[3][2] ) +{ + return a[0][0] <= b[0][0] && b[0][1] <= a[0][1] && + a[1][0] <= b[1][0] && b[1][1] <= a[1][1] && + a[2][0] <= b[2][0] && b[2][1] <= a[2][1] ; +} + +static void box_print( FILE * fp , const int a[][2] ) +{ + fprintf(fp,"{ [ %d , %d ) , [ %d , %d ) , [ %d , %d ) }", + a[0][0] , a[0][1] , + a[1][0] , a[1][1] , + a[2][0] , a[2][1] ); +} + +static void test_box( const int box[3][2] , const int np ) +{ + const int ncell_box = box[0][1] * box[1][1] * box[2][1] ; + int ncell_total = 0 ; + int ncell_min = ncell_box ; + int ncell_max = 0 ; + int (*pbox)[3][2] ; + int i , j ; + + pbox = (int (*)[3][2]) malloc( sizeof(int) * np * 3 * 2 ); + + box_partition( 0 , np , 2 , box , pbox ); + + for ( i = 0 ; i < np ; ++i ) { + const int ncell = ( pbox[i][0][1] - pbox[i][0][0] ) * + ( pbox[i][1][1] - pbox[i][1][0] ) * + ( pbox[i][2][1] - pbox[i][2][0] ); + + if ( ! box_contain( box , (const int (*)[2]) pbox[i] ) ) { + fprintf(stdout," OUT OF BOUNDS pbox[%d/%d] = ",i,np); + box_print(stdout,(const int (*)[2]) pbox[i]); + fprintf(stdout,"\n"); + abort(); + } + + for ( j = i + 1 ; j < np ; ++j ) { + if ( ! box_disjoint( (const int (*)[2]) pbox[i] , + (const int (*)[2]) pbox[j] ) ) { + fprintf(stdout," NOT DISJOINT pbox[%d/%d] = ",i,np); + box_print(stdout, (const int (*)[2]) pbox[i]); + fprintf(stdout,"\n"); + fprintf(stdout," pbox[%d/%d] = ",j,np); + box_print(stdout, (const int (*)[2]) pbox[j]); + fprintf(stdout,"\n"); + abort(); + } + } + ncell_total += ncell ; + + if ( ncell_max < ncell ) { ncell_max = ncell ; } + if ( ncell < ncell_min ) { ncell_min = ncell ; } + } + + if ( ncell_total != ncell_box ) { + fprintf(stdout," WRONG CELL COUNT NP = %d\n",np); + abort(); + } + fprintf(stdout,"NP = %d, total = %d, avg = %d, min = %d, max = %d\n", + np,ncell_box,ncell_box/np,ncell_min,ncell_max); + + free( pbox ); +} + +/*--------------------------------------------------------------------*/ + +static void test_maps( const int root_box[][2] , const int np ) +{ + const int ghost = 1 ; + const int nx_global = root_box[0][1] - root_box[0][0] ; + const int ny_global = root_box[1][1] - root_box[1][0] ; + int ieq , i , j ; + int (*pbox)[3][2] ; + int **local_values ; + int **map_local_id ; + int **map_recv_pc ; + int **map_send_pc ; + int **map_send_id ; + + pbox = (int (*)[3][2]) malloc( sizeof(int) * np * 3 * 2 ); + + box_partition( 0 , np , 2 , root_box , pbox ); + + local_values = (int **) malloc( sizeof(int*) * np ); + map_local_id = (int **) malloc( sizeof(int*) * np ); + map_recv_pc = (int **) malloc( sizeof(int*) * np ); + map_send_pc = (int **) malloc( sizeof(int*) * np ); + map_send_id = (int **) malloc( sizeof(int*) * np ); + + /* Set each local value to the global equation number */ + + for ( ieq = i = 0 ; i < np ; ++i ) { + const int (*mybox)[2] = (const int (*)[2]) pbox[i] ; + const int nx = mybox[0][1] - mybox[0][0] ; + const int ny = mybox[1][1] - mybox[1][0] ; + const int nz = mybox[2][1] - mybox[2][0] ; + int ix , iy , iz ; + + /* Generate the partition maps for this rank */ + box_partition_maps( np , i , (const int (*)[3][2]) pbox , ghost , + & map_local_id[i] , & map_recv_pc[i] , + & map_send_pc[i] , & map_send_id[i] ); + + local_values[i] = (int *) malloc( sizeof(int) * map_recv_pc[i][np] ); + + for ( iz = -ghost ; iz < nz + ghost ; ++iz ) { + for ( iy = -ghost ; iy < ny + ghost ; ++iy ) { + for ( ix = -ghost ; ix < nx + ghost ; ++ix ) { + const int ieq = box_map_local(mybox,ghost,map_local_id[i],ix,iy,iz); + + if ( 0 <= ieq ) { + const int ix_global = ix + mybox[0][0] ; + const int iy_global = iy + mybox[1][0] ; + const int iz_global = iz + mybox[2][0] ; + + if ( root_box[0][0] <= ix_global && ix_global < root_box[0][1] && + root_box[1][0] <= iy_global && iy_global < root_box[1][1] && + root_box[2][0] <= iz_global && iz_global < root_box[2][1] ) { + + local_values[i][ ieq ] = ix_global + + iy_global * nx_global + + iz_global * nx_global * ny_global ; + } + else { + local_values[i][ ieq ] = -1 ; + } + } + } + } + } + } + + /* Pair-wise compare the local values */ + /* i == receiving processor rank */ + /* ip == sending processor rank */ + /* j == receiving processor data entry for message from 'ip' */ + /* jp == sending processor data entry for message to 'i' */ + + for ( i = 0 ; i < np ; ++i ) { + for ( j = 1 ; j < np ; ++j ) { + const int ip = ( i + j ) % np ; + const int jp = ( i + np - ip ) % np ; + const int nrecv = map_recv_pc[i] [j+1] - map_recv_pc[i] [j] ; + const int nsend = map_send_pc[ip][jp+1] - map_send_pc[ip][jp] ; + int k ; + if ( nrecv != nsend ) { + fprintf(stderr,"P%d recv %d from P%d\n",i,nrecv,ip); + fprintf(stderr,"P%d send %d to P%d\n",ip,nsend,i); + abort(); + } + for ( k = 0 ; k < nrecv ; ++k ) { + const int irecv = map_recv_pc[i][j] + k ; + const int isend = map_send_pc[ip][jp] + k ; + const int val_irecv = local_values[i][irecv] ; + const int val_isend = local_values[ip][ map_send_id[ip][isend] ] ; + if ( val_irecv != val_isend ) { + fprintf(stderr,"P%d recv[%d] = %d , from P%d\n",i,k,val_irecv,ip); + fprintf(stderr,"P%d send[%d] = %d , to P%d\n",ip,k,val_isend,i); + abort(); + } + } + } + } + + for ( i = 0 ; i < np ; ++i ) { + free( map_local_id[i] ); + free( map_recv_pc[i] ); + free( map_send_pc[i] ); + free( map_send_id[i] ); + free( local_values[i] ); + } + free( map_send_id ); + free( map_send_pc ); + free( map_recv_pc ); + free( map_local_id ); + free( local_values ); + free( pbox ); +} + +/*--------------------------------------------------------------------*/ + +int main( int argc , char * argv[] ) +{ + int np_max = 256 ; + int box[3][2] = { { 0 , 64 } , { 0 , 64 } , { 0 , 64 } }; + int np = 0 ; + + switch( argc ) { + case 3: + sscanf(argv[1],"%d",&np); + sscanf(argv[2],"%dx%dx%d",& box[0][1] , & box[1][1] , & box[2][1] ); + if ( 0 < np ) { test_box( (const int (*)[2]) box , np ); } + if ( 0 < np ) { test_maps( (const int (*)[2]) box , np ); } + break ; + default: + for ( np = 1 ; np <= np_max ; ++np ) { + test_box( (const int (*)[2]) box , np ); + test_maps( (const int (*)[2]) box , np ); + } + break ; + } + return 0 ; +} + +#endif + + diff --git a/mkl/basic/optional/ThreadPool/test/hpccg/BoxPartition.h b/mkl/basic/optional/ThreadPool/test/hpccg/BoxPartition.h new file mode 100644 index 0000000..3dfd839 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/hpccg/BoxPartition.h @@ -0,0 +1,64 @@ + +/** \brief Partition a { [ix,jx) X [iy,jy) X [iz,jz) } box. + * + * Use recursive coordinate bisection to partition a box + * into np disjoint sub-boxes. Allocate (via malloc) and + * populate the sub-boxes, mapping the local (x,y,z) to + * a local ordinal, and mappings for the send-recv messages + * to update the ghost cells. + * + * usage: + * + * my_nx = pbox[my_p][0][1] - pbox[my_p][0][0] ; + * my_ny = pbox[my_p][1][1] - pbox[my_p][1][0] ; + * my_nz = pbox[my_p][2][1] - pbox[my_p][2][0] ; + * + * for ( x = -ghost ; x < my_nx + ghost ; ++x ) { + * for ( y = -ghost ; y < my_ny + ghost ; ++y ) { + * for ( z = -ghost ; z < my_nz + ghost ; ++z ) { + * const int x_global = x + pbox[my_p][0][0] ; + * const int y_global = y + pbox[my_p][1][0] ; + * const int z_global = z + pbox[my_p][2][0] ; + * + * const int local_ordinal = + * box_map_local( pbox[my_p], ghost, map_local_id, x, y, z ); + * + * if ( 0 <= local_ordinal ) { + * } + * } + * + * for ( i = 1 ; i < np ; ++i ) { + * const int recv_processor = ( my_p + i ) % np ; + * const int recv_ordinal_begin = map_recv_pc[i]; + * const int recv_ordinal_end = map_recv_pc[i+1]; + * } + * + * for ( i = 1 ; i < np ; ++i ) { + * const int send_processor = ( my_p + i ) % np ; + * const int send_map_begin = map_send_pc[i]; + * const int send_map_end = map_send_pc[i+1]; + * for ( j = send_map_begin ; j < send_map_end ; ++j ) { + * send_ordinal = map_send_id[j] ; + * } + * } + */ +void box_partition_rcb( + const int np /**< [in] Number of partitions */ , + const int my_p /**< [in] My partition rank */ , + const int root_box[][2] /**< [in] 3D Box to partition */ , + const int ghost /**< [in] Ghost cell boundary */ , + int (**pbox)[3][2] /**< [out] Partition's 3D boxes */ , + int ** map_local_id /**< [out] Map local cells */ , + int ** map_recv_pc /**< [out] Receive spans per processor */ , + int ** map_send_pc /**< [out] Send prefix counts per processor */ , + int ** map_send_id /**< [out] Send message ordinals */ ); + +/* \brief Map a local (x,y,z) to a local ordinal. + */ +int box_map_local( const int box_local[][2] , + const int ghost , + const int map_local_id[] , + const int local_x , + const int local_y , + const int local_z ); + diff --git a/mkl/basic/optional/ThreadPool/test/hpccg/CGSolver.c b/mkl/basic/optional/ThreadPool/test/hpccg/CGSolver.c new file mode 100644 index 0000000..2670bf7 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/hpccg/CGSolver.c @@ -0,0 +1,248 @@ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +/*--------------------------------------------------------------------*/ + +#ifdef HAVE_MPI + +#define TIMER( DT , F ) \ + { double tb , te , tbg , teg , dt ; \ + tb = TPI_Walltime(); \ + F ; \ + te = TPI_Walltime(); \ + MPI_Allreduce(&tb, &tbg, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); \ + MPI_Allreduce(&te, &teg, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); \ + DT[0] += dt = teg - tbg ; \ + DT[1] += dt * dt ; } + +#else + +#define TIMER( DT , F ) \ + { const double tb = TPI_Walltime(); double dt ; \ + F ; \ + DT[0] += dt = TPI_Walltime() - tb ; \ + DT[1] += dt * dt ; } + +#endif + +/*--------------------------------------------------------------------*/ + +static +VECTOR_SCALAR comm_sum( VECTOR_SCALAR v ) +{ +#ifdef HAVE_MPI + VECTOR_SCALAR result = 0 ; + if ( sizeof(VECTOR_SCALAR) == sizeof(double) ) { + MPI_Allreduce( & v , & result , 1 , MPI_DOUBLE , MPI_SUM , MPI_COMM_WORLD ); + } + else { + MPI_Allreduce( & v , & result , 1 , MPI_FLOAT , MPI_SUM , MPI_COMM_WORLD ); + } + return result ; +#else + return v ; +#endif +} + +#ifdef HAVE_MPI +static +void comm_rhs_vector( const struct cgsolve_data * const data , + VECTOR_SCALAR * const vec ) +{ + const int np = data->np ; + const int my_p = data->ip ; + const int * const recv_pc = data->recv_pc ; + const int * const send_pc = data->send_pc ; + const int * const send_id = data->send_id ; + int i , irecv ; + + for ( irecv = 0 , i = 1 ; i < np ; ++i ) { + if ( recv_pc[i] < recv_pc[i+1] ) ++irecv ; + } + +#ifdef DEBUG_PRINT + fflush(stdout); + MPI_Barrier( MPI_COMM_WORLD ); + fflush(stdout); +#endif + + { + VECTOR_SCALAR * const send_buf = + (VECTOR_SCALAR *) malloc( sizeof(VECTOR_SCALAR) * send_pc[np] ); + + MPI_Request * const recv_request = + (MPI_Request *) malloc( sizeof(MPI_Request) * irecv ); + + MPI_Status * const recv_status = + (MPI_Status *) malloc( sizeof(MPI_Status) * irecv ); + + for ( irecv = 0 , i = 1 ; i < np ; ++i ) { + const int ip = ( i + my_p ) % np ; + const int recv_beg = recv_pc[i]; + const int recv_length = recv_pc[i+1] - recv_beg ; + if ( recv_length ) { +#ifdef DEBUG_PRINT + fprintf(stdout," comm_rhs_vector P%d Irecv P%d : %d\n", + my_p, ip, recv_length ); + fflush(stdout); +#endif + MPI_Irecv( vec + recv_beg , + recv_length * sizeof(VECTOR_SCALAR), MPI_BYTE , + ip , 0 , MPI_COMM_WORLD , recv_request + irecv ); + ++irecv ; + } + } + + /* Gather components into send buffer */ + + for ( i = 0 ; i < send_pc[np] ; ++i ) { + send_buf[i] = vec[ send_id[i] ]; + } + + MPI_Barrier( MPI_COMM_WORLD ); + + for ( i = 1 ; i < np ; ++i ) { + const int ip = ( i + my_p ) % np ; + const int send_beg = send_pc[i]; + const int send_length = send_pc[i+1] - send_beg ; + if ( send_length ) { /* Send to 'i' */ +#ifdef DEBUG_PRINT + fprintf(stdout," comm_rhs_vector P%d Rsend P%d : %d\n", + my_p, ip, send_length ); + fflush(stdout); +#endif + MPI_Rsend( send_buf + send_beg , + send_length * sizeof(VECTOR_SCALAR), MPI_BYTE , + ip , 0 , MPI_COMM_WORLD ); + } + } + + MPI_Waitall( irecv , recv_request , recv_status ); + + free( recv_status ); + free( recv_request ); + free( send_buf ); + } +} +#else +#define comm_rhs_vector( D , V ) /* */ +#endif + +/*--------------------------------------------------------------------*/ + +void cgsolve_set_lhs( const struct cgsolve_data * const data , + const VECTOR_SCALAR * const x , + VECTOR_SCALAR * const b ) +{ + const int nRow = data->nRow ; + const int nVec = data->recv_pc[ data->np ] ; + const int * const A_pc = data->A_pc ; + const int * const A_ia = data->A_ia ; + const MATRIX_SCALAR * const A_a = data->A_a ; + + VECTOR_SCALAR * const p = (VECTOR_SCALAR *) malloc( nVec * sizeof(VECTOR_SCALAR) ); + + tpi_copy( nRow , x , p ); + + comm_rhs_vector( data , p ); + + tpi_crs_matrix_apply( nRow, A_pc, A_ia, A_a, p, b ); + + free( p ); +} + +/*--------------------------------------------------------------------*/ + +void cgsolve( const struct cgsolve_data * const data , + const VECTOR_SCALAR * const b , + VECTOR_SCALAR * const x , + int * const iter_count , + VECTOR_SCALAR * const norm_resid , + double * const dt_mxv , + double * const dt_axpby , + double * const dt_dot ) +{ + const int nRow = data->nRow ; + const int nVec = data->recv_pc[ data->np ] ; + const int max_iter = data->max_iter ; + const int print_iter = data->print_iter ; + const int * const A_pc = data->A_pc ; + const int * const A_ia = data->A_ia ; + const MATRIX_SCALAR * const A_a = data->A_a ; + const VECTOR_SCALAR tolerance = data->tolerance ; + + const VECTOR_SCALAR tol_2 = tolerance * tolerance ; + + VECTOR_SCALAR * const r = (VECTOR_SCALAR *) malloc( nRow * sizeof(VECTOR_SCALAR) ); + VECTOR_SCALAR * const p = (VECTOR_SCALAR *) malloc( nVec * sizeof(VECTOR_SCALAR) ); + VECTOR_SCALAR * const Ap = (VECTOR_SCALAR *) malloc( nRow * sizeof(VECTOR_SCALAR) ); + + VECTOR_SCALAR rtrans = 0.0 ; + + int k ; + + tpi_copy( nRow , b , r ); + tpi_copy( nRow , x , p ); + + comm_rhs_vector( data , p ); tpi_crs_matrix_apply( nRow, A_pc, A_ia, A_a, p, Ap ); + + tpi_axpby( nRow , -1.0, Ap, 1.0 , r ); + + /* Include timing dot product for 2 * #iter dot products */ + TIMER( dt_dot , rtrans = comm_sum( tpi_dot( nRow , r , r ) ) ); + + for ( k = 0 ; k < max_iter && tol_2 < rtrans ; ++k ) { + VECTOR_SCALAR alpha ; + VECTOR_SCALAR beta = 0.0 ; + VECTOR_SCALAR pAp = 0.0 ; + + if ( k ) { + const VECTOR_SCALAR oldrtrans = rtrans ; + TIMER( dt_dot , rtrans = comm_sum( tpi_dot( nRow , r , r ) ) ); + beta = rtrans / oldrtrans ; + } + + TIMER( dt_axpby , tpi_axpby( nRow, 1.0, r, beta, p ) ); + + TIMER( dt_mxv , comm_rhs_vector( data , p ); tpi_crs_matrix_apply( nRow, A_pc, A_ia, A_a, p, Ap ) ); + + TIMER( dt_dot , pAp = comm_sum( tpi_dot( nRow , p , Ap ) ) ); + + if ( 0 < fabs( pAp ) ) { + alpha = rtrans / pAp ; + } + else { + alpha = rtrans = 0.0 ; /* Orthogonal, cannot continue */ + } + + if ( ! ( ( k + 1 ) % print_iter ) ) { + fprintf(stdout," cgsolve | r(%d) | = %g\n",k,sqrt(rtrans)); + fflush(stdout); + } + + TIMER( dt_axpby , tpi_axpby( nRow , alpha, p, 1.0, x) ); + TIMER( dt_axpby , tpi_axpby( nRow , -alpha, Ap, 1.0, r) ); + } + + *norm_resid = sqrt( rtrans ); + *iter_count = k ; + + free( Ap ); + free( p ); + free( r ); +} + +/*--------------------------------------------------------------------*/ + diff --git a/mkl/basic/optional/ThreadPool/test/hpccg/CGSolver.h b/mkl/basic/optional/ThreadPool/test/hpccg/CGSolver.h new file mode 100644 index 0000000..0660a01 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/hpccg/CGSolver.h @@ -0,0 +1,32 @@ + +#include + +struct cgsolve_data { + int nRow ; + int * A_pc ; + int * A_ia ; + MATRIX_SCALAR * A_a ; + int max_iter ; + int print_iter ; + VECTOR_SCALAR tolerance ; + + int np ; + int ip ; + int * recv_pc ; + int * send_pc ; + int * send_id ; +}; + +void cgsolve_set_lhs( const struct cgsolve_data * data , + const VECTOR_SCALAR * const x , + VECTOR_SCALAR * const b ); + +void cgsolve( const struct cgsolve_data * data , + const VECTOR_SCALAR * const b , + VECTOR_SCALAR * const x , + int * const iter_count , + VECTOR_SCALAR * const norm_resid , + double * const dt_mxv , + double * const dt_axpby , + double * const dt_dot ); + diff --git a/mkl/basic/optional/ThreadPool/test/hpccg/CMakeLists.txt b/mkl/basic/optional/ThreadPool/test/hpccg/CMakeLists.txt new file mode 100644 index 0000000..bfba897 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/hpccg/CMakeLists.txt @@ -0,0 +1,83 @@ + +INCLUDE(PackageAddExecutableAndTest) +INCLUDE(PackageLibraryMacros) + +#################### + +SET(HEADERS "") +SET(SOURCES "") + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) + +SET(HEADERS ${HEADERS} + ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h + ) + +INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) + +APPEND_SET(HEADERS + BoxPartition.h + CGSolver.h + tpi_vector.h + ) + +#################### + + +PACKAGE_ADD_EXECUTABLE( + test_tpi_hpccg + COMM serial mpi + SOURCES main.c CGSolver.c BoxPartition.c tpi_vector.c + DEPLIBS pthread m + DIRECTORY . + ) + +PACKAGE_ADD_TEST( + test_tpi_hpccg + NAME test_tpi_hpccg_serial_1 + COMM serial + DIRECTORY . + ) + +PACKAGE_ADD_TEST( + test_tpi_hpccg + NAME test_tpi_hpccg_serial_2 + COMM serial + ARGS "threads=2" + DIRECTORY . + XHOSTTYPE AIX + ) + +PACKAGE_ADD_TEST( + test_tpi_hpccg + NAME test_tpi_hpccg_serial_4 + COMM serial + ARGS "threads=4" + DIRECTORY . + ) + +PACKAGE_ADD_TEST( + test_tpi_hpccg + NAME test_tpi_hpccg_mpi_1 + COMM mpi + NUM_MPI_PROCS 1 + DIRECTORY . + ) + +PACKAGE_ADD_TEST( + test_tpi_hpccg + NAME test_tpi_hpccg_mpi_2 + COMM mpi + NUM_MPI_PROCS 2 + DIRECTORY . + ) + +PACKAGE_ADD_TEST( + test_tpi_hpccg + NAME test_tpi_hpccg_mpi_4 + COMM mpi + NUM_MPI_PROCS 4 + DIRECTORY . + ) + + diff --git a/mkl/basic/optional/ThreadPool/test/hpccg/main.c b/mkl/basic/optional/ThreadPool/test/hpccg/main.c new file mode 100644 index 0000000..676a02d --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/hpccg/main.c @@ -0,0 +1,340 @@ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +/*--------------------------------------------------------------------*/ + +static +void hpccg_alloc_and_fill( const int np , + const int my_p , + const int gbox[][2] , + const int ghost , + struct cgsolve_data * const data ) +{ + int (*pbox)[3][2] = NULL ; + int * map_local_ord = NULL; + + data->nRow = 0 ; + data->A_pc = NULL ; + data->A_ia = NULL ; + data->A_a = NULL ; + + data->np = np ; + data->ip = my_p ; + data->recv_pc = NULL ; + data->send_pc = NULL ; + data->send_id = NULL ; + + box_partition_rcb( np, my_p, + (const int (*)[2]) gbox, ghost, + & pbox , + & map_local_ord , + & data->recv_pc , + & data->send_pc , + & data->send_id ); + + { + const int (* const my_box)[2] = (const int (*)[2]) pbox[my_p] ; + const int bx = my_box[0][0] ; + const int by = my_box[1][0] ; + const int bz = my_box[2][0] ; + const int nx = my_box[0][1] - bx ; + const int ny = my_box[1][1] - by ; + const int nz = my_box[2][1] - bz ; + const int n = nx * ny * nz ; + const int nnz = 27 * n ; /* Upper bound */ + int * const pc = (int *) malloc( sizeof(int) * ( n + 1 ) ); + int * const ia = (int *) malloc( sizeof(int) * nnz ); + MATRIX_SCALAR * const a = (MATRIX_SCALAR *) malloc( sizeof(MATRIX_SCALAR) * nnz ); + + int irow = 0 ; + int ipc = 0 ; + int ix , iy , iz ; + int sx , sy , sz ; + + for ( iz = 0 ; iz < nz ; ++iz ) { + for ( iy = 0 ; iy < ny ; ++iy ) { + for ( ix = 0 ; ix < nx ; ++ix , ++irow ) { + + if ( irow != box_map_local( my_box, ghost, map_local_ord,ix,iy,iz) ) { + fprintf(stderr,"P%d: irow[%d] != box_map_local(%d,%d,%d) = %d\n", + my_p,irow,ix,iy,iz, + box_map_local( my_box, ghost, map_local_ord, ix, iy, iz) ); + } + + pc[ irow ] = ipc ; /* Beginning of row coefficients */ + /* Diagonal term first */ + ia[ ipc ] = irow ; + a[ ipc ] = 27.0f ; + ++ipc ; + + /* Off-diagonal terms to follow */ + for ( sz = -1 ; sz <= 1 ; ++sz ) { + for ( sy = -1 ; sy <= 1 ; ++sy ) { + for ( sx = -1 ; sx <= 1 ; ++sx ) { + const int dx = ix + sx ; + const int dy = iy + sy ; + const int dz = iz + sz ; + const int global_x = dx + bx ; + const int global_y = dy + by ; + const int global_z = dz + bz ; + + if ( gbox[0][0] <= global_x && global_x < gbox[0][1] && + gbox[1][0] <= global_y && global_y < gbox[1][1] && + gbox[2][0] <= global_z && global_z < gbox[2][1] && + ! ( sz == 0 && sy == 0 && sx == 0 ) ) { + /* 'icol' is mapped for communication */ + + const int icol = + box_map_local(my_box,ghost,map_local_ord,dx,dy,dz); + + if ( icol < 0 ) { + fprintf(stderr,"P%d : bad column at local (%d,%d,%d) global(%d,%d,%d)\n", + my_p, dx,dy,dz,global_x,global_y,global_z); + fflush(stderr); + abort(); + } + + ia[ ipc ] = icol ; + a[ ipc ] = -1.0f ; + ++ipc ; + } + } + } + } + } + } + } + + pc[irow] = ipc ; + + data->nRow = irow ; + data->A_pc = pc ; + data->A_ia = ia ; + data->A_a = a ; + } + + free( map_local_ord ); + free( pbox ); +} + +/*--------------------------------------------------------------------*/ + +int main( int argc , char ** argv ) +{ + const int ghost = 1 ; + const int max_cube = 20 ; + int ncube[20] = { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 }; + + FILE * print_file = stdout ; + int print_iter = 500 ; + int max_iter = 50 ; + + VECTOR_SCALAR tolerance = 0.0 ; /* Force max iterations */ + + int gbox[3][2] = { { 0 , 16 } , { 0 , 16 } , { 0 , 16 } }; + int nt = 0 ; + int trials = 5 ; + int ntest ; + int np = 1; + int my_p = 0 ; + +#ifdef HAVE_MPI + MPI_Init( & argc , & argv ); + MPI_Comm_size( MPI_COMM_WORLD , & np ); + MPI_Comm_rank( MPI_COMM_WORLD , & my_p ); +#endif + + if ( ! my_p ) { + const char arg_threads[] = "threads=" ; + const char arg_cube[] = "cube=" ; + const char arg_box[] = "box=" ; + const char arg_max[] = "max_iter=" ; + const char arg_trials[] = "trials=" ; + const char arg_print[] = "print_iter=" ; + const char arg_file[] = "print_file=" ; + int i ; + for ( i = 1 ; i < argc ; ++i ) { + if ( ! strncmp(argv[i],arg_threads,strlen(arg_threads)) ) { + sscanf(argv[i]+strlen(arg_threads),"%d",&nt); + } + else if ( ! strncmp(argv[i],arg_box,strlen(arg_box)) ) { + sscanf(argv[i]+strlen(arg_box),"%d%*[x]%d%*[x]%d", + & gbox[0][1] , & gbox[1][1] , & gbox[2][1] ); + } + else if ( ! strncmp(argv[i],arg_cube,strlen(arg_cube)) ) { + sscanf(argv[i]+strlen(arg_cube), + "%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d", + ncube+0, ncube+1, ncube+2, ncube+3, ncube+4, + ncube+5, ncube+6, ncube+7, ncube+8, ncube+9, + ncube+10, ncube+11, ncube+12, ncube+13, ncube+14, + ncube+15, ncube+16, ncube+17, ncube+18, ncube+19); + } + else if ( ! strncmp(argv[i],arg_max,strlen(arg_max)) ) { + sscanf(argv[i]+strlen(arg_max),"%d",&max_iter); + } + else if ( ! strncmp(argv[i],arg_trials,strlen(arg_trials)) ) { + sscanf(argv[i]+strlen(arg_trials),"%d",&trials); + } + else if ( ! strncmp(argv[i],arg_print,strlen(arg_print)) ) { + sscanf(argv[i]+strlen(arg_print),"%d",&print_iter); + } + else if ( ! strncmp(argv[i],arg_file,strlen(arg_file)) ) { + char buffer[256] ; + sscanf(argv[i]+strlen(arg_file),"%s",buffer); + print_file = fopen(buffer,"a"); + } + } + } + +#ifdef HAVE_MPI + { + MPI_Bcast( & nt , 1 , MPI_INT , 0 , MPI_COMM_WORLD ); + MPI_Bcast( & gbox[0][0] , 6 , MPI_INT , 0 , MPI_COMM_WORLD ); + MPI_Bcast( ncube , max_cube , MPI_INT , 0 , MPI_COMM_WORLD ); + MPI_Bcast( & max_iter , 1 , MPI_INT , 0 , MPI_COMM_WORLD ); + MPI_Bcast( & print_iter , 1 , MPI_INT , 0 , MPI_COMM_WORLD ); + MPI_Bcast( & trials , 1 , MPI_INT , 0 , MPI_COMM_WORLD ); + } +#endif + + if ( nt ) { + TPI_Init( nt ); + TPI_Block(); + TPI_Unblock(); + } + + if ( ! my_p ) { + fprintf(print_file,"\"PROC\" , \"THREAD\" , \"EQUATION\" , \"NON-ZERO\" , \"MXV\" , \"AXPBY\" , \"DOT\" , \"Xerror\" , \"Iter\"\n"); + fprintf(print_file,"\"COUNT\" , \"COUNT\" , \"COUNT\" , \"COUNT\" , \"Mflops\" , \"Mflops\" , \"Mflops\" , \"L2norm\" , \"COUNT\"\n"); + } + + for ( ntest = 0 ; ! ntest || ( ntest < max_cube && ncube[ntest] ) ; ++ntest ) { + struct cgsolve_data cgdata ; + + if ( ncube[ntest] ) { + gbox[0][1] = gbox[1][1] = gbox[2][1] = ncube[ntest] ; + } + + hpccg_alloc_and_fill( np, my_p, (const int (*)[2]) gbox, ghost, &cgdata); + + cgdata.max_iter = max_iter ; + cgdata.print_iter = print_iter ; + cgdata.tolerance = tolerance ; + + { + double dt_mxv[2] = { 0 , 0 }; + double dt_axpby[2] = { 0 , 0 }; + double dt_dot[2] = { 0 , 0 }; + VECTOR_SCALAR norm_resid = 0.0 ; + int iter_count = 0 ; + int iter_total = 0 ; + int k ; + + VECTOR_SCALAR * const b = (VECTOR_SCALAR *) malloc( sizeof(VECTOR_SCALAR) * cgdata.nRow ); + VECTOR_SCALAR * const x = (VECTOR_SCALAR *) malloc( sizeof(VECTOR_SCALAR) * cgdata.nRow ); + VECTOR_SCALAR * const xexact = (VECTOR_SCALAR *) malloc( sizeof(VECTOR_SCALAR) * cgdata.nRow ); + + { + const VECTOR_SCALAR value = 1.0 /* 1.0 / 3.0 */ ; + int i ; + for ( i = 0 ; i < cgdata.nRow ; ++i ) xexact[i] = value ; + } + + for ( k = 0 ; k < trials ; ++k ) { + int i ; + + for ( i = 0 ; i < cgdata.nRow ; ++i ) { x[i] = 0.0 ; } + + cgsolve_set_lhs( & cgdata , xexact , b ); + + cgsolve( & cgdata, b, x, + & iter_count, & norm_resid, + dt_mxv , dt_axpby , dt_dot ); + + iter_total += iter_count ; + } + + { + int nnzGlobal = cgdata.A_pc[ cgdata.nRow ]; + double error[2] = { 0 , 0 }; + + for ( k = 0 ; k < cgdata.nRow ; ++k ) { + error[0] += ( x[k] - xexact[k] ) * ( x[k] - xexact[k] ); + error[1] += xexact[k] * xexact[k] ; + } + +#ifdef HAVE_MPI + { + double error_global[2] = { 0.0 , 0.0 }; + int nnz = nnzGlobal ; + + MPI_Allreduce( & nnz , & nnzGlobal , 1 , MPI_INT , MPI_SUM , + MPI_COMM_WORLD ); + + MPI_Allreduce( error , error_global , 2 , MPI_DOUBLE , MPI_SUM , + MPI_COMM_WORLD ); + + error[0] = error_global[0]; + error[1] = error_global[1]; + } +#endif + + error[0] = sqrt( error[0] ); + error[1] = sqrt( error[1] ); + + if ( ! my_p ) { + const int nRowGlobal = ( gbox[0][1] - gbox[0][0] ) * + ( gbox[1][1] - gbox[1][0] ) * + ( gbox[2][1] - gbox[2][0] ); + + const double mflop_mxv = + 1.0e-6 * ( iter_total ) * 2 * nnzGlobal / dt_mxv[0] ; + + const double mflop_axpby = + 1.0e-6 * ( iter_total * 3 ) * 3 * nRowGlobal / dt_axpby[0] ; + + const double mflop_dot = + 1.0e-6 * ( iter_total * 2 ) * 2 * nRowGlobal / dt_dot[0] ; + + fprintf(print_file,"%8d , %8d , %8d , %8d , %10g , %10g , %10g , %g , %d\n", + np , nt , nRowGlobal , nnzGlobal , + mflop_mxv , mflop_axpby , mflop_dot , + error[0] / error[1] , iter_total ); + fflush(print_file); + } + } + + free( xexact ); + free( x ); + free( b ); + } + free( cgdata.A_a ); + free( cgdata.A_ia ); + free( cgdata.A_pc ); + free( cgdata.recv_pc ); + free( cgdata.send_pc ); + free( cgdata.send_id ); + } + + if ( nt ) { TPI_Finalize(); } + +#ifdef HAVE_MPI + MPI_Finalize(); +#endif + + return 0 ; +} + diff --git a/mkl/basic/optional/ThreadPool/test/hpccg/tpi_vector.c b/mkl/basic/optional/ThreadPool/test/hpccg/tpi_vector.c new file mode 100644 index 0000000..1b8a26c --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/hpccg/tpi_vector.c @@ -0,0 +1,273 @@ +#include + +#include + +#include +#include + +/*--------------------------------------------------------------------*/ + +struct tpi_work_vector { + VECTOR_SCALAR alpha ; + VECTOR_SCALAR beta ; + const VECTOR_SCALAR * x ; + const VECTOR_SCALAR * y ; + VECTOR_SCALAR * w ; + int n ; +}; + +static void tpi_work_span( TPI_Work * const work , const int n , + int * const iBeg , int * const iEnd ) +{ + const int chunk = ( n + work->count - 1 ) / work->count ; + const int i_end = chunk + ( *iBeg = chunk * work->rank ); + + *iEnd = n < i_end ? n : i_end ; +} + +/*--------------------------------------------------------------------*/ + +static void tpi_work_fill( TPI_Work * work ) +{ + const struct tpi_work_vector * const h = + (struct tpi_work_vector *) work->info ; + + const VECTOR_SCALAR alpha = h->alpha ; + VECTOR_SCALAR * const w = h->w ; + + int i , iEnd ; + + tpi_work_span( work , h->n , & i , & iEnd ); + + for ( ; i < iEnd ; ++i ) { w[i] = alpha ; } +} + +void tpi_fill( int n , VECTOR_SCALAR alpha , VECTOR_SCALAR * x ) +{ + struct tpi_work_vector tmp = { 0.0 , 0.0 , NULL , NULL , NULL , 0 }; + tmp.alpha = alpha ; + tmp.w = x ; + tmp.n = n ; + TPI_Run_threads( tpi_work_fill , & tmp , 0 ); +} + +/*--------------------------------------------------------------------*/ + +static void tpi_work_scale( TPI_Work * work ) +{ + const struct tpi_work_vector * const h = + (struct tpi_work_vector *) work->info ; + + const VECTOR_SCALAR beta = h->beta ; + VECTOR_SCALAR * const w = h->w ; + + int i , iEnd ; + + tpi_work_span( work , h->n , & i , & iEnd ); + + for ( ; i < iEnd ; ++i ) { w[i] *= beta ; } +} + +void tpi_scale( int n , const VECTOR_SCALAR alpha , VECTOR_SCALAR * x ) +{ + struct tpi_work_vector tmp = { 0.0 , 0.0 , NULL , NULL , NULL , 0 }; + tmp.alpha = alpha ; + tmp.w = x ; + tmp.n = n ; + TPI_Run_threads( tpi_work_scale , & tmp , 0 ); +} + +/*--------------------------------------------------------------------*/ + +static void tpi_work_copy( TPI_Work * work ) +{ + const struct tpi_work_vector * const h = + (struct tpi_work_vector *) work->info ; + + const VECTOR_SCALAR * const x = h->x ; + VECTOR_SCALAR * const w = h->w ; + + int i , iEnd ; + + tpi_work_span( work , h->n , & i , & iEnd ); + + for ( ; i < iEnd ; ++i ) { w[i] = x[i] ; } +} + +void tpi_copy( int n , const VECTOR_SCALAR * x , VECTOR_SCALAR * y ) +{ + struct tpi_work_vector tmp = { 0.0 , 0.0 , NULL , NULL , NULL , 0 }; + tmp.x = x ; + tmp.w = y ; + tmp.n = n ; + TPI_Run_threads( tpi_work_copy , & tmp , 0 ); +} + +/*--------------------------------------------------------------------*/ + +static void tpi_work_axpby( TPI_Work * work ) +{ + const struct tpi_work_vector * const h = + (struct tpi_work_vector *) work->info ; + + const VECTOR_SCALAR alpha = h->alpha ; + const VECTOR_SCALAR beta = h->beta ; + const VECTOR_SCALAR * const x = h->x ; + VECTOR_SCALAR * const w = h->w ; + + int i , iEnd ; + + tpi_work_span( work , h->n , & i , & iEnd ); + + for ( ; i < iEnd ; ++i ) { w[i] = alpha * x[i] + beta * w[i] ; } +} + +void tpi_axpby( int n , VECTOR_SCALAR alpha , const VECTOR_SCALAR * x , + VECTOR_SCALAR beta , VECTOR_SCALAR * y ) +{ + struct tpi_work_vector tmp = { 0.0 , 0.0 , NULL , NULL , NULL , 0 }; + tmp.alpha = alpha ; + tmp.beta = beta ; + tmp.x = x ; + tmp.w = y ; + tmp.n = n ; + + TPI_Run_threads( tpi_work_axpby , & tmp , 0 ); +} + +/*--------------------------------------------------------------------*/ + +static void tpi_work_dot_partial( TPI_Work * work ) +{ + const struct tpi_work_vector * const h = + (struct tpi_work_vector *) work->info ; + + VECTOR_SCALAR * const s = (VECTOR_SCALAR *) work->reduce ; + const VECTOR_SCALAR * const x = h->x ; + const VECTOR_SCALAR * const y = h->y ; + VECTOR_SCALAR tmp = *s ; + int i , iEnd ; + + tpi_work_span( work , h->n , & i , & iEnd ); + + for ( ; i < iEnd ; ++i ) { tmp += x[i] * y[i] ; } + + *s = tmp ; +} + +static void tpi_work_dot_partial_self( TPI_Work * work ) +{ + const struct tpi_work_vector * const h = + (struct tpi_work_vector *) work->info ; + + VECTOR_SCALAR * const s = (VECTOR_SCALAR *) work->reduce ; + const VECTOR_SCALAR * const x = h->x ; + VECTOR_SCALAR tmp = *s ; + + int i , iEnd ; + + tpi_work_span( work , h->n , & i , & iEnd ); + + for ( ; i < iEnd ; ++i ) { const VECTOR_SCALAR d = x[i] ; tmp += d * d ; } + + *s = tmp ; +} + +static void tpi_work_dot_join( TPI_Work * work , const void * src ) +{ + *((VECTOR_SCALAR *) ( work->reduce) ) += *((const VECTOR_SCALAR *) src); +} + +static void tpi_work_dot_init( TPI_Work * work ) +{ + *((VECTOR_SCALAR *) ( work->reduce) ) = 0 ; +} + +VECTOR_SCALAR tpi_dot( int n , const VECTOR_SCALAR * x , const VECTOR_SCALAR * y ) +{ + struct tpi_work_vector tmp = { 0.0 , 0.0 , NULL , NULL , NULL , 0 }; + VECTOR_SCALAR result = 0.0 ; + tmp.x = x ; + tmp.y = y ; + tmp.n = n ; + if ( x != y ) { + TPI_Run_threads_reduce( tpi_work_dot_partial , & tmp , + tpi_work_dot_join , tpi_work_dot_init , + sizeof(result) , & result ); + } + else { + TPI_Run_threads_reduce( tpi_work_dot_partial_self , & tmp , + tpi_work_dot_join , tpi_work_dot_init , + sizeof(result) , & result ); + } + return result ; +} + +/*--------------------------------------------------------------------*/ + +struct tpi_crs_matrix { + int nRow ; + const int * A_pc ; + const int * A_ia ; + const MATRIX_SCALAR * A_a ; + const VECTOR_SCALAR * x ; + VECTOR_SCALAR * y ; +}; + +static void tpi_work_crs_matrix_apply( TPI_Work * work ) +{ + const struct tpi_crs_matrix * const h = + (struct tpi_crs_matrix *) work->info ; + + const int * const A_pc = h->A_pc ; + const int * const A_ia = h->A_ia ; + const MATRIX_SCALAR * const A_a = h->A_a ; + const VECTOR_SCALAR * const x = h->x ; + + const int nRow = h->nRow ; + const int chunk = ( nRow + work->count - 1 ) / work->count ; + + int row = chunk * work->rank ; + int rowEnd = chunk + row ; + + if ( nRow < rowEnd ) { rowEnd = nRow ; } + + { + const int * const pc_end = A_pc + rowEnd ; + const int * pc = A_pc + row ; + VECTOR_SCALAR * y = h->y + row ; + + for ( ; pc != pc_end ; ++pc , ++y ) { + const int * ia = A_ia + *pc ; + const MATRIX_SCALAR * a = A_a + *pc ; + const MATRIX_SCALAR * const a_end = A_a + pc[1] ; + VECTOR_SCALAR tmp = 0 ; + for ( ; a != a_end ; ++a , ++ia ) { + tmp += *a * x[ *ia ]; + } + *y = tmp ; + } + } +} + +/*--------------------------------------------------------------------*/ + +void tpi_crs_matrix_apply( + const int nRow , + const int * A_pc , + const int * A_ia , + const MATRIX_SCALAR * A_a , + const VECTOR_SCALAR * x , + VECTOR_SCALAR * y ) +{ + struct tpi_crs_matrix h = { 0 , NULL , NULL , NULL , NULL , NULL }; + h.nRow = nRow ; + h.A_pc = A_pc ; + h.A_ia = A_ia ; + h.A_a = A_a ; + h.x = x ; + h.y = y ; + TPI_Run_threads( tpi_work_crs_matrix_apply , & h , 0 ); +} + + diff --git a/mkl/basic/optional/ThreadPool/test/hpccg/tpi_vector.h b/mkl/basic/optional/ThreadPool/test/hpccg/tpi_vector.h new file mode 100644 index 0000000..bcd514e --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/hpccg/tpi_vector.h @@ -0,0 +1,31 @@ + +#include + +#ifndef tpi_vector_h +#define tpi_vector_h + +#define VECTOR_SCALAR float +#define MATRIX_SCALAR float + +void tpi_fill( int n , VECTOR_SCALAR alpha , VECTOR_SCALAR * x ); + +void tpi_scale( int n , const VECTOR_SCALAR alpha , VECTOR_SCALAR * x ); + +void tpi_copy( int n , const VECTOR_SCALAR * x , VECTOR_SCALAR * y ); + +void tpi_axpby( int n , VECTOR_SCALAR alpha , const VECTOR_SCALAR * x , + VECTOR_SCALAR beta , VECTOR_SCALAR * y ); + +VECTOR_SCALAR tpi_dot( int n , const VECTOR_SCALAR * x , + const VECTOR_SCALAR * y ); + +void tpi_crs_matrix_apply( + const int nRow , + const int * A_pc , + const int * A_ia , + const MATRIX_SCALAR * A_a , + const VECTOR_SCALAR * x , + VECTOR_SCALAR * y ); + +#endif + diff --git a/mkl/basic/optional/ThreadPool/test/test_c_dnax.c b/mkl/basic/optional/ThreadPool/test/test_c_dnax.c new file mode 100644 index 0000000..4f6ab9b --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/test_c_dnax.c @@ -0,0 +1,414 @@ +/*------------------------------------------------------------------------*/ +/* TPI: Thread Pool Interface */ +/* Copyright (2008) Sandia Corporation */ +/* */ +/* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */ +/* license for use of this work by or on behalf of the U.S. Government. */ +/* */ +/* This library is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU Lesser General Public License as */ +/* published by the Free Software Foundation; either version 2.1 of the */ +/* License, or (at your option) any later version. */ +/* */ +/* This library is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ +/* Lesser General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU Lesser General Public */ +/* License along with this library; if not, write to the Free Software */ +/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 */ +/* USA */ +/*------------------------------------------------------------------------*/ +/** + * @author H. Carter Edwards + * + * Multi-array 'axpby' + */ + +#include +#include +#include +#include +#include + +#if defined( HAVE_MPI ) +#include +#endif + +int test_c_tpi_dnax( int , int ); + +int main( int argc , char ** argv ) +{ + int num_thread[] = { 1 , 2 , 4 , 6 , 8 , 12 , 16 }; + int num_test = sizeof(num_thread) / sizeof(int); + + const int ntrial = 1 < argc ? atoi( argv[1] ) : 2 ; + int i ; + +#if defined( HAVE_MPI ) + int rank ; + + MPI_Init( & argc , & argv ); + MPI_Comm_rank( MPI_COMM_WORLD , & rank ); + if ( 0 == rank ) { +#endif + + + fprintf( stdout , "\"TESTING Multiarray 'axpby' with: %s\"\n" , + TPI_Version() ); + + for ( i = 0 ; i < num_test ; ++i ) { + test_c_tpi_dnax( num_thread[i] , ntrial ); + } + +#if defined( HAVE_MPI ) + } + MPI_Finalize(); +#endif + + return 0 ; +} + +/*------------------------------------------------------------------------*/ + +typedef double SCALAR ; + +/*------------------------------------------------------------------------*/ + +struct TestTPI_DNAX { + SCALAR * coef ; + SCALAR * array ; + unsigned number ; + unsigned length ; + unsigned stride ; + unsigned chunk_length ; +}; + +/*------------------------------------------------------------------------*/ + +static +void test_dnax_column( const unsigned num_array , + const unsigned stride , + const unsigned length , + const SCALAR * const coef , + SCALAR * const array ) +{ + unsigned i = 0 ; + for ( ; i < length ; ++i ) { + SCALAR * const a = array + i ; + SCALAR tmp = 0 ; + unsigned j = 0 ; + for ( ; j < num_array ; ++j ) { tmp += coef[j] * a[ j * stride ] ; } + a[0] = tmp ; + } +} + +static +void test_dnax_row( const unsigned num_array , + const unsigned stride , + const unsigned length , + const SCALAR * const coef , + SCALAR * const array ) +{ + unsigned i = 0 ; + for ( ; i < length ; ++i ) { + SCALAR * const a = array + i * stride ; + SCALAR tmp = 0 ; + unsigned j = 0 ; + for ( ; j < num_array ; ++j ) { tmp += coef[j] * a[j] ; } + a[0] = tmp ; + } +} + +/*------------------------------------------------------------------------*/ +/* The multi-array storage is flat: every array is fully contiguous. + * Work corresponds to a span of the array. + */ +static +void test_dnax_flat_work( TPI_Work * work ) +{ + const struct TestTPI_DNAX * const info = + (struct TestTPI_DNAX *) work->info ; + + const unsigned which_chunk = work->rank ; + const unsigned beg_local = info->chunk_length * which_chunk ; + const unsigned max_local = info->length - beg_local ; + const unsigned len_local = info->chunk_length < max_local ? + info->chunk_length : max_local ; + + test_dnax_column( info->number , + info->stride , + len_local , + info->coef , + info->array + beg_local ); + + return ; +} + +/* The multi-array storage is chunked: each array has a contiguous chunk; + * but chunk-subarrays are contiguously grouped. + */ +static +void test_dnax_column_work( TPI_Work * work ) +{ + const struct TestTPI_DNAX * const info = + (struct TestTPI_DNAX *) work->info ; + + const unsigned which_chunk = work->rank ; + const unsigned beg_local = info->chunk_length * which_chunk ; + const unsigned max_local = info->length - beg_local ; + const unsigned len_local = info->chunk_length < max_local ? + info->chunk_length : max_local ; + + const unsigned chunk_size = info->chunk_length * info->number ; + + test_dnax_column( info->number , + info->chunk_length , + len_local , + info->coef , + info->array + which_chunk * chunk_size ); + + return ; +} + +static +void test_dnax_row_work( TPI_Work * work ) +{ + const struct TestTPI_DNAX * const info = + (struct TestTPI_DNAX *) work->info ; + + const unsigned which_chunk = work->rank ; + const unsigned beg_local = info->chunk_length * which_chunk ; + const unsigned max_local = info->length - beg_local ; + const unsigned len_local = info->chunk_length < max_local ? + info->chunk_length : max_local ; + + const unsigned chunk_size = info->chunk_length * info->number ; + + test_dnax_row( info->number , + info->number , + len_local , + info->coef , + info->array + which_chunk * chunk_size ); + + return ; +} + +/*------------------------------------------------------------------------*/ +/* Process identical block of allocated memory as a + * as a flat array, chunked-column, and chunked-row. + */ + +static +void test_tpi_dnax_driver( const int nthread , + const unsigned Mflop_target , + const unsigned num_trials , + const unsigned num_test , + const unsigned num_test_array[] , + const unsigned length_array , + const unsigned length_chunk ) +{ + const unsigned max_array = num_test_array[ num_test - 1 ]; + + const unsigned num_chunk = + ( length_array + length_chunk - 1 ) / length_chunk ; + + const unsigned stride_array = num_chunk * length_chunk ; + const unsigned size_alloc = max_array * stride_array ; + + SCALAR * const coef = (SCALAR *) malloc( max_array * sizeof(SCALAR) ); + SCALAR * const array = (SCALAR *) malloc( size_alloc * sizeof(SCALAR) ); + + struct TestTPI_DNAX data = { NULL , NULL , 0 , 0 , 0 , 0 }; + + unsigned i_test , i , j ; + + data.coef = coef ; + + if ( NULL == array ) { + fprintf(stderr,"allocation failure for %u\n",size_alloc); + abort(); + } + + for ( i = 0 ; i < max_array ; ++i ) { coef[i] = 0 ; } + + printf("\n\"test_tpi_dnax[%d]( length_array = %u , stride_array = %u )\"\n", + nthread , length_array , stride_array ); + printf("\"NUMBER OF THREADS\" , %d\n" , nthread ); + printf("\"NUMBER OF CHUNKS\" , %u\n" , num_chunk ); + printf("\"NUMBER OF TRIALS\" , %u \n", num_trials ); + + printf("\"TEST\" , \"#ARRAY\" \"DT-MEAN\" , \"DT-STDDEV\" , \"MFLOP-MEAN\" , \"MFLOP-STDDEV\"\n"); + + /*----------------------------------------------------------------------*/ + + for ( i_test = 0 ; i_test < num_test ; ++i_test ) { + const unsigned num_array = num_test_array[ i_test ]; + const unsigned num_sets = max_array / num_array ; + + const double mflop_cycle = + ((double)( 2 * num_array * length_array )) / 1.0e6 ; + + const unsigned ncycle = 1 + (unsigned)( Mflop_target / mflop_cycle ); + + double dt_sum = 0 ; + double dt_sum_2 = 0 ; + + data.length = length_array ; + data.number = num_array ; + data.stride = stride_array ; + data.chunk_length = length_chunk ; + + for ( i = 0 ; i < size_alloc ; ++i ) { array[i] = 0 ; } + + for ( j = 0 ; j < num_trials ; ++j ) { + + double dt_tmp = TPI_Walltime(); + for ( i = 0 ; i < ncycle ; ++i ) { + data.array = array + stride_array * num_array * ( i % num_sets ); + TPI_Run( & test_dnax_flat_work , & data , num_chunk , 0 ); + } + dt_tmp = TPI_Walltime() - dt_tmp ; + + dt_sum += dt_tmp ; + dt_sum_2 += dt_tmp * dt_tmp ; + } + + { + const double dt_mean = dt_sum / num_trials ; + const double dt_sdev = sqrt( ( num_trials * dt_sum_2 - dt_sum * dt_sum ) / ( num_trials * ( num_trials - 1 ) ) ); + const double mflop_mean = mflop_cycle * ncycle / dt_mean ; + const double mflop_sdev = mflop_mean * dt_sdev / ( dt_mean + dt_sdev ); + + printf("\"FLAT ARRAY\" , %6u , %9.5g , %9.3g , %9.5g , %9.3g\n", + num_array, dt_mean, dt_sdev, mflop_mean, mflop_sdev ); + } + } + + /*----------------------------------------------------------------------*/ + + for ( i_test = 0 ; i_test < num_test ; ++i_test ) { + + const unsigned num_array = num_test_array[ i_test ]; + const unsigned num_sets = max_array / num_array ; + + const double mflop_cycle = + ((double)( 2 * num_array * length_array )) / 1.0e6 ; + + const unsigned ncycle = 1 + (unsigned)( Mflop_target / mflop_cycle ); + + double dt_sum = 0 ; + double dt_sum_2 = 0 ; + + data.length = length_array ; + data.number = num_array ; + data.stride = stride_array ; + data.chunk_length = length_chunk ; + + for ( i = 0 ; i < size_alloc ; ++i ) { array[i] = 0 ; } + + for ( j = 0 ; j < num_trials ; ++j ) { + + double dt_tmp = TPI_Walltime(); + for ( i = 0 ; i < ncycle ; ++i ) { + data.array = array + stride_array * num_array * ( i % num_sets ); + TPI_Run( & test_dnax_column_work , & data , num_chunk , 0 ); + } + dt_tmp = TPI_Walltime() - dt_tmp ; + + dt_sum += dt_tmp ; + dt_sum_2 += dt_tmp * dt_tmp ; + } + + { + const double dt_mean = dt_sum / num_trials ; + const double dt_sdev = sqrt( ( num_trials * dt_sum_2 - dt_sum * dt_sum ) / ( num_trials * ( num_trials - 1 ) ) ); + const double mflop_mean = mflop_cycle * ncycle / dt_mean ; + const double mflop_sdev = mflop_mean * dt_sdev / ( dt_mean + dt_sdev ); + + printf("\"CHUNK COLUMN\" , %6u , %9.5g , %9.3g , %9.5g , %9.3g\n", + num_array, dt_mean, dt_sdev, mflop_mean, mflop_sdev ); + } + } + + /*----------------------------------------------------------------------*/ + + for ( i_test = 0 ; i_test < num_test ; ++i_test ) { + + const unsigned num_array = num_test_array[ i_test ]; + const unsigned num_sets = max_array / num_array ; + + const double mflop_cycle = + ((double)( 2 * num_array * length_array )) / 1.0e6 ; + + const unsigned ncycle = 1 + (unsigned)( Mflop_target / mflop_cycle ); + + double dt_sum = 0 ; + double dt_sum_2 = 0 ; + + data.length = length_array ; + data.number = num_array ; + data.stride = stride_array ; + data.chunk_length = length_chunk ; + + for ( i = 0 ; i < size_alloc ; ++i ) { array[i] = 0 ; } + + for ( j = 0 ; j < num_trials ; ++j ) { + + double dt_tmp = TPI_Walltime(); + + for ( i = 0 ; i < ncycle ; ++i ) { + data.array = array + stride_array * num_array * ( i % num_sets ); + TPI_Run( & test_dnax_row_work , & data , num_chunk , 0 ); + } + dt_tmp = TPI_Walltime() - dt_tmp ; + + dt_sum += dt_tmp ; + dt_sum_2 += dt_tmp * dt_tmp ; + } + + { + const double dt_mean = dt_sum / num_trials ; + const double dt_sdev = sqrt( ( num_trials * dt_sum_2 - dt_sum * dt_sum ) / ( num_trials * ( num_trials - 1 ) ) ); + const double mflop_mean = mflop_cycle * ncycle / dt_mean ; + const double mflop_sdev = mflop_mean * dt_sdev / ( dt_mean + dt_sdev ); + + printf("\"CHUNK ROW\" , %6u , %9.5g , %9.3g , %9.5g , %9.3g\n", + num_array, dt_mean, dt_sdev, mflop_mean, mflop_sdev ); + } + } + + /*----------------------------------------------------------------------*/ + + free( array ); + free( coef ); +} + +/*------------------------------------------------------------------------*/ + +int test_c_tpi_dnax( int nthread , int ntrial ) +{ + const unsigned Mflop_target = 10 ; + const unsigned num_array[6] = { 2 , 5 , 10 , 20 , 50 , 100 }; + const unsigned ntest = sizeof(num_array) / sizeof(unsigned); + + if ( ntrial <= 0 ) { ntrial = 7 ; } + + TPI_Init( nthread ); + + test_tpi_dnax_driver( nthread , + Mflop_target * nthread , + ntrial /* number trials */ , + ntest /* number of tests */ , + num_array /* number of arrays for each test */ , + 1e6 /* array computation length */ , + 1000 /* chunk length */ ); + + TPI_Finalize(); + + return 0 ; +} + + + diff --git a/mkl/basic/optional/ThreadPool/test/test_mpi_sum.c b/mkl/basic/optional/ThreadPool/test/test_mpi_sum.c new file mode 100644 index 0000000..51d6b9e --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/test_mpi_sum.c @@ -0,0 +1,764 @@ +/*------------------------------------------------------------------------*/ +/* TPI: Thread Pool Interface */ +/* Copyright (2008) Sandia Corporation */ +/* */ +/* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */ +/* license for use of this work by or on behalf of the U.S. Government. */ +/* */ +/* This library is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU Lesser General Public License as */ +/* published by the Free Software Foundation; either version 2.1 of the */ +/* License, or (at your option) any later version. */ +/* */ +/* This library is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ +/* Lesser General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU Lesser General Public */ +/* License along with this library; if not, write to the Free Software */ +/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 */ +/* USA */ +/*------------------------------------------------------------------------*/ +/** + * @author H. Carter Edwards + */ + +#include +#include +#include +#include +#include + +int rand_r( unsigned int * ); + +/*--------------------------------------------------------------------*/ + +#if defined(HAVE_MPI) + +#include + +typedef MPI_Comm COMM ; + +#else + +typedef int COMM ; + +#endif + +static int comm_size( COMM ); +static int comm_rank( COMM ); +static void comm_reduce_dmax( COMM , double * ); +static void comm_reduce_dsum( COMM , double * ); +static void comm_reduce_d4_sum( COMM , double * ); + +/*--------------------------------------------------------------------*/ + +static void my_span( const unsigned count , const unsigned rank , + const unsigned size , + unsigned * begin , unsigned * length ) +{ + const unsigned int max = ( size + count - 1 ) / count ; + const unsigned int end = size - max * ( count - ( rank + 1 ) ); + if ( rank ) { + *begin = end - max ; + *length = max ; + } + else { + *begin = 0 ; + *length = end ; + } +} + +/*--------------------------------------------------------------------*/ + +#define LESS_ABS( X , Y ) ( ( X < 0 ? -X : X ) < ( Y < 0 ? -Y : Y ) ) + +static void d2_add_d( double v[] , const double a ) +{ + const int AltV = a < 0 ? ( - a < ( v[0] < 0 ? - v[0] : v[0] ) ) + : ( a < ( v[0] < 0 ? - v[0] : v[0] ) ); + + const double VpA = v[0] + a ; + + v[1] += AltV ? ( a - ( VpA - v[0] ) ) : ( v[0] - ( VpA - a ) ); + v[0] = VpA + v[1] ; + v[1] += VpA - v[0] ; +} + +void d4_dot( double v[] , unsigned n , const double * x , const double * y ) +{ + double * pos = v ; + double * neg = v + 2 ; + const double * const x_end = x + n ; + for ( ; x < x_end ; ++x , ++y ) { + const double a = *x * *y ; + if ( a < 0 ) { d2_add_d( neg , a ); } + else { d2_add_d( pos , a ); } + } +} + +double ddot( unsigned n , const double * x , const double * y ) +{ + double val = 0 ; + const double * const x_end = x + n ; + for ( ; x < x_end ; ++x , ++y ) { val += *x * *y ; } + return val ; +} + +/*--------------------------------------------------------------------*/ + +struct TaskXY { + unsigned int nreduce ; + unsigned int n ; + const double * x ; + const double * y ; +}; + +static +void reduce_init( TPI_Work * work ) +{ + struct TaskXY * const info = (struct TaskXY *) work->info ; + double * const dst = (double *) work->reduce ; + + if ( info->nreduce == 4 ) { + dst[0] = 0 ; + dst[1] = 0 ; + dst[2] = 0 ; + dst[3] = 0 ; + } + else if ( info->nreduce == 1 ) { + dst[0] = 0 ; + } +} + +static +void reduce_join( TPI_Work * work , const void * arg_src ) +{ + struct TaskXY * const info = (struct TaskXY *) work->info ; + double * const dst = (double *) work->reduce ; + const double * const src = (const double *) arg_src ; + + if ( info->nreduce == 4 ) { + d2_add_d( dst , src[0] ); + d2_add_d( dst , src[1] ); + d2_add_d( dst + 2 , src[2] ); + d2_add_d( dst + 2 , src[3] ); + } + else if ( info->nreduce == 1 ) { + dst[0] += src[0] ; + } +} + +/*--------------------------------------------------------------------*/ + +static +void work_d4_dot_tp( TPI_Work * work ) +{ + struct TaskXY * const info = (struct TaskXY *) work->info ; + double * const dst = (double *) work->reduce ; + + unsigned int begin , length ; + + my_span( work->count , work->rank , info->n , & begin , & length ); + + d4_dot( dst , length , info->x + begin , info->y + begin ); +} + +double d4_dot_tp( COMM comm, unsigned nwork, unsigned n, + const double * x, const double * y ) +{ + struct TaskXY info = { 4 , 0 , NULL , NULL }; + double result[4] = { 0 , 0 , 0 , 0 }; + info.n = n ; + info.x = x ; + info.y = y ; + + if ( nwork ) { + TPI_Run_reduce( work_d4_dot_tp , & info , nwork , + reduce_join, reduce_init, sizeof(result) , result ); + } + else { + TPI_Run_threads_reduce( work_d4_dot_tp , & info , + reduce_join, reduce_init, sizeof(result), result); + } + + comm_reduce_d4_sum( comm , result ); + + d2_add_d( result , result[2] ); + d2_add_d( result , result[3] ); + + return result[0] ; +} + +static +void task_ddot_tp( TPI_Work * work ) +{ + struct TaskXY * const info = (struct TaskXY *) work->info ; + double * const dst = (double *) work->reduce ; + unsigned int begin , length ; + + my_span( work->count , work->rank , info->n , & begin , & length ); + + *dst += ddot( length , info->x + begin , info->y + begin ); + + return ; +} + +double ddot_tp( COMM comm, unsigned nwork, unsigned n, + const double * x, const double * y ) +{ + struct TaskXY info = { 1 , 0 , NULL , NULL }; + double result = 0 ; + info.n = n ; + info.x = x ; + info.y = y ; + + if ( nwork ) { + TPI_Run_reduce( task_ddot_tp , & info , nwork , + reduce_join, reduce_init, sizeof(result), & result); + } + else { + TPI_Run_threads_reduce( task_ddot_tp , & info , + reduce_join, reduce_init, sizeof(result), & result); + } + + comm_reduce_dsum( comm , & result ); + + return result ; +} + +/*--------------------------------------------------------------------*/ + +void dfill_rand( unsigned seed , unsigned n , double * x , double mag ) +{ + const double scale = 2.0 * mag / (double) RAND_MAX ; + double * const xe = x + n ; + for ( ; xe != x ; ++x , ++seed ) { + unsigned s = seed ; + *x = scale * ((double) rand_r( & s )) - mag ; + } +} + +struct FillWork { + double mag ; + double * beg ; + unsigned length ; + unsigned seed ; +}; + +static void task_dfill_rand( TPI_Work * work ) +{ + struct FillWork * const w = (struct FillWork *) work->info ; + + unsigned int begin , length ; + + my_span( work->count, work->rank, w->length, & begin , & length ); + + dfill_rand( w->seed + begin , length , w->beg + begin , w->mag ); +} + +void dfill_rand_tp( unsigned nblock , unsigned seed , + unsigned n , double * x , double mag ) +{ + struct FillWork data ; + data.mag = mag ; + data.beg = x ; + data.length = n ; + data.seed = seed ; + if ( nblock ) { + const int nwork = ( n + nblock - 1 ) / nblock ; + TPI_Run( & task_dfill_rand , & data , nwork , 0 ); + } + else { + TPI_Run_threads( & task_dfill_rand , & data , 0 ); + } +} + +/*--------------------------------------------------------------------*/ + +static +void test_ddot_performance( + COMM comm , + const int nthreads , + const int nblock , + const unsigned int num_trials , + const unsigned int num_tests , + const unsigned int length_array[] /* Global array length for each test */ , + const double mag ) +{ + const unsigned int ddot_flop = 2 ; /* 1 mult, 1 sum */ + const unsigned int d4_dot_flop = 12 ; /* 1 mult, 7 sum, 4 compare */ + + const unsigned int p_rank = comm_rank( comm ); + const unsigned int p_size = comm_size( comm ); + + const unsigned int max_array = length_array[ num_tests - 1 ]; + + unsigned int local_max_size = 0 ; + unsigned int i_test ; + + TPI_Init( nthreads ); + + if ( 0 == p_rank ) { + fprintf(stdout,"\n\"DDOT and D4DOT Performance testing\"\n"); + fprintf(stdout,"\"MPI size = %u , TPI size = %d , BlockSize = %d , #Trials = %u\"\n",p_size,nthreads,nblock,num_trials); + fprintf(stdout,"\"TEST\" , \"LENGTH\" , \"#CYCLE\" , \"DT-MEAN\" , \"DT-STDDEV\" , \"MFLOP-MEAN\" , \"MFLOP-STDDEV\"\n"); + } + + for ( i_test = 0 ; i_test < num_tests ; ++i_test ) { + const unsigned length = length_array[ i_test ]; /* Global */ + const unsigned ncycle = 2 * max_array / length ; + const unsigned local_max = ncycle * ( ( length + p_size - 1 ) / p_size ); + if ( local_max_size < local_max ) { local_max_size = local_max ; } + } + + { + double * const x = (double*) malloc(local_max_size * 2 * sizeof(double)); + double * const y = x + local_max_size ; + + unsigned int i , j ; + + dfill_rand_tp( nblock, 0, local_max_size, x, mag ); + dfill_rand_tp( nblock, local_max_size, local_max_size, y, mag ); + + for ( i_test = 0 ; i_test < num_tests ; ++i_test ) { + const unsigned length = length_array[ i_test ]; /* Global */ + const unsigned ncycle = 2 * max_array / length ; + + unsigned int local_begin , local_length , local_nwork ; + + double dt_sum = 0.0 ; + double dt_sum_2 = 0.0 ; + + my_span( p_size, p_rank, length, & local_begin , & local_length ); + + local_nwork = nblock ? ( local_length + nblock - 1 ) / nblock : 0 ; + + /*--------------------------------------------------------------*/ + + for ( i = 0 ; i < num_trials ; ++i ) { + double dt = TPI_Walltime(); + for ( j = 0 ; j < ncycle ; ++j ) { + ddot_tp( comm, local_nwork, local_length, + x + j * local_length , + y + j * local_length ); + } + dt = TPI_Walltime() - dt ; + comm_reduce_dmax( comm , & dt ); + dt_sum += dt ; + dt_sum_2 += dt * dt ; + } + + if ( 0 == p_rank ) { + const double mflop = ((double)( ddot_flop * length * ncycle ) ) / ((double) 1e6 ); + + const double dt_mean = dt_sum / num_trials ; + const double dt_sdev = sqrt( ( num_trials * dt_sum_2 - dt_sum * dt_sum ) / + ( num_trials * ( num_trials - 1 ) ) ); + const double mflop_mean = mflop / dt_mean ; + const double mflop_sdev = mflop_mean * dt_sdev / ( dt_mean + dt_sdev ); + + fprintf(stdout,"\"DDOT\" , %8u , %8u , %9.5g , %9.5g , %9.5g , %9.5g\n", + length, ncycle, dt_mean, dt_sdev, mflop_mean, mflop_sdev ); + fflush(stdout); + } + } + + for ( i_test = 0 ; i_test < num_tests ; ++i_test ) { + const unsigned length = length_array[ i_test ]; /* Global */ + const unsigned ncycle = 2 * max_array / length ; + + unsigned int local_begin , local_length , local_nwork ; + + double dt_sum = 0 ; + double dt_sum_2 = 0 ; + + my_span( p_size, p_rank, length, & local_begin , & local_length ); + + local_nwork = nblock ? ( local_length + nblock - 1 ) / nblock : 0 ; + + /*--------------------------------------------------------------*/ + + for ( i = 0 ; i < num_trials ; ++i ) { + double dt = TPI_Walltime(); + for ( j = 0 ; j < ncycle ; ++j ) { + d4_dot_tp( comm, local_nwork, local_length, + x + j * local_length , + y + j * local_length ); + } + dt = TPI_Walltime() - dt ; + comm_reduce_dmax( comm , & dt ); + dt_sum += dt ; + dt_sum_2 += dt * dt ; + } + + if ( 0 == p_rank ) { + const double mflop = ((double)( d4_dot_flop * length * ncycle ) ) / ((double) 1e6 ); + + const double dt_mean = dt_sum / num_trials ; + const double dt_sdev = sqrt( ( num_trials * dt_sum_2 - dt_sum * dt_sum ) / + ( num_trials * ( num_trials - 1 ) ) ); + const double mflop_mean = mflop / dt_mean ; + const double mflop_sdev = mflop_mean * dt_sdev / ( dt_mean + dt_sdev ); + + fprintf(stdout,"\"D4DOT\" , %8u , %8u , %9.5g , %9.5g , %9.5g , %9.5g\n", + length, ncycle, dt_mean, dt_sdev, mflop_mean, mflop_sdev ); + fflush(stdout); + } + } + + /*--------------------------------------------------------------*/ + + free( x ); + } + + TPI_Finalize(); + + return ; +} + +/*--------------------------------------------------------------------*/ + +static +void test_ddot_accuracy( + COMM comm , + const int nthreads , + const int nblock , + const unsigned int num_tests , + const unsigned int length_array[] /* Global array length for each test */ , + const double mag ) +{ + const unsigned int p_rank = comm_rank( comm ); + const unsigned int p_size = comm_size( comm ); + + const unsigned int max_array = length_array[ num_tests - 1 ]; + const unsigned int local_max_size = ( max_array + p_size - 1 ) / p_size ; + + unsigned int i_test ; + + TPI_Init( nthreads ); + + if ( 0 == p_rank ) { + fprintf(stdout,"\n\"DDOT and D4DOT Accuracy testing\"\n"); + fprintf(stdout,"\"MPI size = %u , TPI size = %d , BlockSize = %d\"\n",p_size,nthreads,nblock); + fprintf(stdout,"\"TEST\" , \"LENGTH\" , \"VALUE\"\n"); + } + + { + double * const x = (double*) malloc(local_max_size * 2 * sizeof(double)); + double * const y = x + local_max_size ; + + for ( i_test = 0 ; i_test < num_tests ; ++i_test ) { + const unsigned length = length_array[ i_test ]; /* Global */ + const unsigned length_half = length / 2 ; + + unsigned local_begin , local_length , local_nwork ; + + double val_ddot ; + + my_span( p_size, p_rank, length, & local_begin , & local_length ); + + local_nwork = nblock ? ( local_length + nblock - 1 ) / nblock : 0 ; + + /*--------------------------------------------------------------*/ + + if ( local_begin < length_half ) { + const unsigned len = local_length < length_half - local_begin + ? local_length : length_half - local_begin ; + + dfill_rand_tp( nblock, local_begin, len, x, mag ); + dfill_rand_tp( nblock, length + local_begin, len, y, mag ); + } + + if ( length_half < local_begin + local_length ) { + const unsigned beg = length_half > local_begin + ? length_half : local_begin ; + const unsigned off = beg - local_begin ; + const unsigned len = local_length - off ; + + dfill_rand_tp( nblock, beg - length_half, len, x + off, mag ); + dfill_rand_tp( nblock, length + beg - length_half, len, y + off, - mag ); + } + + /*--------------------------------------------------------------*/ + + val_ddot = ddot_tp( comm, local_nwork, local_length, x, y ); + + if ( 0 == p_rank ) { + fprintf(stdout,"\"DDOT\" , %8u , %9.3g\n", length , val_ddot ); + fflush(stdout); + } + } + + for ( i_test = 0 ; i_test < num_tests ; ++i_test ) { + const unsigned length = length_array[ i_test ]; /* Global */ + const unsigned length_half = length / 2 ; + + unsigned local_begin , local_length , local_nwork ; + + double val_d4_dot ; + + my_span( p_size, p_rank, length, & local_begin , & local_length ); + + local_nwork = nblock ? ( local_length + nblock - 1 ) / nblock : 0 ; + + /*--------------------------------------------------------------*/ + + if ( local_begin < length_half ) { + const unsigned len = local_length < length_half - local_begin + ? local_length : length_half - local_begin ; + + dfill_rand_tp( nblock, local_begin, len, x, mag ); + dfill_rand_tp( nblock, length + local_begin, len, y, mag ); + } + + if ( length_half < local_begin + local_length ) { + const unsigned beg = length_half > local_begin + ? length_half : local_begin ; + const unsigned off = beg - local_begin ; + const unsigned len = local_length - off ; + + dfill_rand_tp( nblock, beg - length_half, len, x + off, mag ); + dfill_rand_tp( nblock, length + beg - length_half, len, y + off, - mag ); + } + + /*--------------------------------------------------------------*/ + + val_d4_dot = d4_dot_tp( comm, local_nwork, local_length, x , y ); + + if ( 0 == p_rank ) { + fprintf(stdout,"\"DDOT\" , %8u , %9.3g\n", length , val_d4_dot ); + fflush(stdout); + } + } + + /*--------------------------------------------------------------*/ + + free( x ); + } + + TPI_Finalize(); + + return ; +} + +/*--------------------------------------------------------------------*/ + +const unsigned test_lengths[] = + { 1e4 , 2e4 , 5e4 , + 1e5 , 2e5 , 5e5 , + 1e6 , 2e6 , 5e6 , 1e7 }; + +const unsigned test_count = sizeof(test_lengths) / sizeof(unsigned); +const unsigned nblock = 2500 ; + +const double test_mag = 1e4 ; + +static void test_performance( + COMM comm , const int test_thread_count , const int test_thread[] ) +{ + const unsigned num_trials = 11 ; + + int i ; + + for ( i = 0 ; i < test_thread_count ; ++i ) { + + test_ddot_performance( comm , test_thread[i] , nblock, + num_trials , test_count , test_lengths , test_mag ); + + test_ddot_performance( comm , test_thread[i] , 0, + num_trials , test_count , test_lengths , test_mag ); + } +} + +static void test_accuracy( + COMM comm , const int test_thread_count , const int test_thread[] , + unsigned test_do ) +{ + int i ; + + if ( test_count < test_do ) { test_do = test_count ; } + + for ( i = 0 ; i < test_thread_count ; ++i ) { + + test_ddot_accuracy( comm, test_thread[i], nblock, + test_do, test_lengths, test_mag ); + + test_ddot_accuracy( comm, test_thread[i], 0, + test_do, test_lengths, test_mag ); + } +} + +/*--------------------------------------------------------------------*/ +/*--------------------------------------------------------------------*/ + +#define TEST_THREAD_MAX 128 + +#if defined(HAVE_MPI) + +int main( int argc , char **argv ) +{ + int nthread[ TEST_THREAD_MAX ]; + int i ; + + MPI_Init( & argc , & argv ); + + for ( i = 0 ; i < TEST_THREAD_MAX ; ++i ) { nthread[i] = 0 ; } + + if ( 0 == comm_rank( MPI_COMM_WORLD ) ) { + if ( 1 < argc && argc < TEST_THREAD_MAX ) { + nthread[0] = 1 ; + nthread[1] = argc - 1 ; + for ( i = 1 ; i < argc ; ++i ) { nthread[i+1] = atoi( argv[i] ); } + } + else { + nthread[0] = 0 ; + nthread[1] = 1 ; + nthread[2] = 1 ; + } + } + + MPI_Bcast( nthread , TEST_THREAD_MAX , MPI_INT , 0 , MPI_COMM_WORLD ); + + if ( nthread[0] ) { + test_accuracy( MPI_COMM_WORLD , nthread[1] , nthread + 2 , test_count ); + test_performance( MPI_COMM_WORLD , nthread[1] , nthread + 2 ); + } + else { + test_accuracy( MPI_COMM_WORLD , nthread[1] , nthread + 2 , 3 ); + } + + MPI_Finalize(); + + return 0 ; +} + +static int comm_size( COMM comm ) +{ + int size = 0 ; + MPI_Comm_size( comm , & size ); + return size ; +} + +static int comm_rank( COMM comm ) +{ + int rank = 0 ; + MPI_Comm_rank( comm , & rank ); + return rank ; +} + +static void comm_reduce_dmax( COMM comm , double * val ) +{ + double tmp ; + if ( MPI_SUCCESS == + MPI_Allreduce( val , & tmp , 1 , MPI_DOUBLE , MPI_MAX , comm ) ) { + *val = tmp ; + } + else { + *val = 0 ; + } +} + +static void comm_reduce_dsum( COMM comm , double * val ) +{ + double tmp ; + if ( MPI_SUCCESS == + MPI_Allreduce( val , & tmp , 1 , MPI_DOUBLE , MPI_SUM , comm ) ) { + *val = tmp ; + } + else { + *val = 0 ; + } +} + +static void comm_reduce_d4_op( void * argin , + void * argout , + int * n , + MPI_Datatype * d ) +{ + if ( d && n && *n == 4 ) { + double * const in = (double*) argin ; + double * const out = (double*) argout ; + d2_add_d( out , in[0] ); + d2_add_d( out , in[1] ); + d2_add_d( out + 2 , in[2] ); + d2_add_d( out + 2 , in[3] ); + } + return ; +} + +static void comm_reduce_d4_sum( COMM comm , double * val ) +{ + double tmp[4] ; + MPI_Op mpi_op = MPI_OP_NULL ; + + /* Use Reduce->Bcast instead of Allreduce due to a bug with the SUN MPI. */ + + MPI_Op_create( comm_reduce_d4_op , 0 , & mpi_op ); + MPI_Reduce( val , tmp , 4 , MPI_DOUBLE , mpi_op , 0 , comm ); + MPI_Bcast( tmp , 4 , MPI_DOUBLE , 0 , comm ); + MPI_Op_free( & mpi_op ); + + val[0] = tmp[0] ; + val[1] = tmp[1] ; + val[2] = tmp[2] ; + val[3] = tmp[3] ; +} + +#else + +int main( int argc , char **argv ) +{ + int nthread[ TEST_THREAD_MAX ]; + int i ; + + for ( i = 0 ; i < TEST_THREAD_MAX ; ++i ) { nthread[i] = 0 ; } + + if ( 1 < argc && argc < TEST_THREAD_MAX ) { + nthread[0] = 1 ; + nthread[1] = argc - 1 ; + for ( i = 1 ; i < argc ; ++i ) { nthread[i+1] = atoi( argv[i] ); } + } + else { + nthread[0] = 0 ; + nthread[1] = 4 ; + nthread[2] = 1 ; + nthread[3] = 2 ; + nthread[4] = 4 ; + nthread[5] = 8 ; + } + + if ( nthread[0] ) { + test_accuracy( 0 , nthread[1] , nthread + 2 , test_count ); + test_performance( 0 , nthread[1] , nthread + 2 ); + } + else { + test_accuracy( 0 , nthread[1] , nthread + 2 , 3 ); + } + + return 0 ; +} + +static int comm_size( COMM comm ) { return comm ? -1 : 1 ; } +static int comm_rank( COMM comm ) { return comm ? -1 : 0 ; } +static void comm_reduce_dmax( COMM comm , double * val ) +{ + if ( comm ) { *val = 0 ; } + return ; +} +static void comm_reduce_dsum( COMM comm , double * val ) +{ + if ( comm ) { *val = 0 ; } + return ; +} +static void comm_reduce_d4_sum( COMM comm , double * val ) +{ + if ( comm ) { val[0] = val[1] = val[2] = val[3] = 0 ; } + return ; +} + +#endif + +/*--------------------------------------------------------------------*/ + diff --git a/mkl/basic/optional/ThreadPool/test/test_pthreads.c b/mkl/basic/optional/ThreadPool/test/test_pthreads.c new file mode 100644 index 0000000..235eb41 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/test_pthreads.c @@ -0,0 +1,279 @@ +/*------------------------------------------------------------------------*/ +/* TPI: Thread Pool Interface */ +/* Copyright (2008) Sandia Corporation */ +/* */ +/* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */ +/* license for use of this work by or on behalf of the U.S. Government. */ +/* */ +/* This library is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU Lesser General Public License as */ +/* published by the Free Software Foundation; either version 2.1 of the */ +/* License, or (at your option) any later version. */ +/* */ +/* This library is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ +/* Lesser General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU Lesser General Public */ +/* License along with this library; if not, write to the Free Software */ +/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 */ +/* USA */ +/*------------------------------------------------------------------------*/ +/** + * @author H. Carter Edwards + */ + +#include +#include +#include +#include + +/*------------------------------------------------------------------------*/ +/* Test various ways of controling worker threads */ + +typedef struct TestPthreads_struct { + pthread_mutex_t m_lock ; + pthread_cond_t m_cond ; + int m_thread_rank ; + int m_thread_count ; +} TestPthreads ; + +/*------------------------------------------------------------------------*/ +/*------------------------------------------------------------------------*/ + +static void * test_driver( void * arg ) +{ + TestPthreads * const data = (TestPthreads*) arg ; + TestPthreads * const root = data - data->m_thread_rank ; + + /*------------------------------*/ + /* Initializing */ + + pthread_mutex_lock( & data->m_lock ); + + pthread_mutex_lock( & root->m_lock ); + pthread_cond_signal( & root->m_cond ); + pthread_mutex_unlock( & root->m_lock ); + + /*------------------------------*/ + + while ( data->m_thread_rank ) { + pthread_cond_wait( & data->m_cond , & data->m_lock ); + } + pthread_mutex_unlock( & data->m_lock ); + + /*------------------------------*/ + /* Terminating */ + + pthread_mutex_lock( & root->m_lock ); + if ( 0 == --( root->m_thread_count ) ) { + pthread_cond_signal( & root->m_cond ); + } + pthread_mutex_unlock( & root->m_lock ); + + return NULL ; +} + + +static void test_run( pthread_attr_t * const thread_attr , + const int number_threads , + const int number_trials , + const int number_loops , + double * const dt_start_stop , + double * const dt_loop ) +{ + TestPthreads data[ number_threads ]; + double dt_total ; + double dt_run = 0 ; + int j ; + + dt_total = TPI_Walltime(); + + for ( j = 0 ; j < number_trials ; ++j ) { + int i ; + + for ( i = 0 ; i < number_threads ; ++i ) { + pthread_cond_init( & data[i].m_cond , NULL ); + pthread_mutex_init( & data[i].m_lock , NULL ); + data[i].m_thread_rank = i ; + data[i].m_thread_count = number_threads ; + } + + pthread_mutex_lock( & data->m_lock ); + + for ( i = 1 ; i < number_threads ; ++i ) { + pthread_t pt ; + pthread_create( & pt, thread_attr, & test_driver , data + i ); + pthread_cond_wait( & data->m_cond , & data->m_lock ); + pthread_mutex_lock( & data[i].m_lock ); + } + + /* Running */ + + { + double dt = TPI_Walltime(); + int k ; + + for ( k = 1 ; k < number_loops ; ++k ) { + for ( i = 1 ; i < number_threads ; ++i ) { + pthread_cond_signal( & data[i].m_cond ); + pthread_mutex_unlock( & data[i].m_lock ); + } + + /* Work goes here */ + + for ( i = 1 ; i < number_threads ; ++i ) { + pthread_mutex_lock( & data[i].m_lock ); + } + } + + dt_run += TPI_Walltime() - dt ; + } + + /* Termination */ + + --( data->m_thread_count ); + + if ( data->m_thread_count ) { + for ( i = 1 ; i < number_threads ; ++i ) { + data[i].m_thread_rank = 0 ; + pthread_cond_signal( & data[i].m_cond ); + pthread_mutex_unlock( & data[i].m_lock ); + } + + pthread_cond_wait( & data->m_cond , & data->m_lock ); + } + + pthread_mutex_unlock( & data->m_lock ); + + for ( i = 0 ; i < number_threads ; ++i ) { + pthread_cond_destroy( & data[i].m_cond ); + pthread_mutex_destroy( & data[i].m_lock ); + } + } + + dt_total = TPI_Walltime() - dt_total ; + + *dt_loop = 1.0e6 * dt_run / (double) ( number_trials * number_loops ); + *dt_start_stop = 1.0e6 * ( dt_total - dt_run ) / (double) number_trials ; +} + +/*------------------------------------------------------------------------*/ +/*------------------------------------------------------------------------*/ + +static double test_mutex_init_destroy( const int number ) +{ + pthread_mutex_t mutex ; + double dt ; + int i ; + dt = TPI_Walltime(); + for ( i = 0 ; i < number ; ++i ) { + pthread_mutex_init( & mutex , NULL ); + pthread_mutex_destroy( & mutex ); + } + dt = ( TPI_Walltime() - dt ) / (double) number ; + return dt ; +} + +static double test_mutex_lock_unlock( const int number ) +{ + pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER ; + double dt ; + int i ; + + dt = TPI_Walltime(); + for ( i = 0 ; i < number ; ++i ) { + pthread_mutex_lock( & mutex ); + pthread_mutex_unlock( & mutex ); + } + dt = ( TPI_Walltime() - dt ) / (double) number ; + + pthread_mutex_destroy( & mutex ); + return dt ; +} + +/*------------------------------------------------------------------------*/ + +void test_pthreads_performance( int n_test , int * n_concurrent ) +{ + const int n_mutex = 1e4 /* 1e8 */ ; + const int n_trial = 1e2 /* 1e4 */ ; + const int n_loop = 1e3 /* 1e4 */ ; + + { + const double dt = 1e6 * test_mutex_init_destroy( n_mutex ); + fprintf(stdout,"\n\"test pthreads mutex init/destroy (microsec)\" , %g\n",dt); + } + + { + const double dt = 1e6 * test_mutex_lock_unlock( n_mutex ); + fprintf(stdout,"\n\"test pthreads mutex lock/unlock (microsec)\" , %g\n",dt); + } + + /*------------------------------------------------------------------*/ + + { + int i ; + + pthread_attr_t thread_attr ; + + fprintf(stdout,"\n\"test pthreads SCOPE_SYSTEM run-blocking\"\n"); + fprintf(stdout,"\"#Threads\" , \"#Spawned\" \"Spawn (microsec)\" , \"Loop (microsec)\"\n"); + + pthread_attr_init( & thread_attr ); + pthread_attr_setscope( & thread_attr, PTHREAD_SCOPE_SYSTEM ); + pthread_attr_setdetachstate( & thread_attr, PTHREAD_CREATE_DETACHED ); + + for ( i = 0 ; i < n_test ; ++i ) { + const int nthread = n_concurrent[i] ; + double dt_start_stop , dt_loop ; + + test_run( & thread_attr, nthread, n_trial, n_loop, + & dt_start_stop , & dt_loop ); + + fprintf( stdout, "%d , %d , %g , %g\n", + nthread , nthread - 1 , dt_start_stop , dt_loop ); + fflush( stdout ); + } + + pthread_attr_destroy( & thread_attr ); + } + + /*------------------------------------------------------------------*/ + + { + int i ; + + pthread_attr_t thread_attr ; + + fprintf(stdout,"\n\"test pthreads SCOPE_PROCESS run-blocking\"\n"); + fprintf(stdout,"\"#Threads\" , \"#Spawned\" \"Spawn (microsec)\" , \"Loop (microsec)\"\n"); + + pthread_attr_init( & thread_attr ); + pthread_attr_setscope( & thread_attr, PTHREAD_SCOPE_PROCESS ); + pthread_attr_setdetachstate( & thread_attr, PTHREAD_CREATE_DETACHED ); + + for ( i = 0 ; i < n_test ; ++i ) { + const int nthread = n_concurrent[i] ; + double dt_start_stop , dt_loop ; + + test_run( & thread_attr, nthread, n_trial, n_loop, + & dt_start_stop , & dt_loop ); + + fprintf( stdout, "%d , %d , %g , %g\n", + nthread , nthread - 1 , dt_start_stop , dt_loop ); + fflush( stdout ); + } + + pthread_attr_destroy( & thread_attr ); + } + + /*------------------------------------------------------------------*/ + + fflush( stdout ); +} + +/*------------------------------------------------------------------------*/ + + diff --git a/mkl/basic/optional/ThreadPool/test/test_tpi.cpp b/mkl/basic/optional/ThreadPool/test/test_tpi.cpp new file mode 100644 index 0000000..cf5a649 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/test_tpi.cpp @@ -0,0 +1,123 @@ +/*------------------------------------------------------------------------*/ +/* TPI: Thread Pool Interface */ +/* Copyright (2008) Sandia Corporation */ +/* */ +/* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */ +/* license for use of this work by or on behalf of the U.S. Government. */ +/* */ +/* This library is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU Lesser General Public License as */ +/* published by the Free Software Foundation; either version 2.1 of the */ +/* License, or (at your option) any later version. */ +/* */ +/* This library is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ +/* Lesser General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU Lesser General Public */ +/* License along with this library; if not, write to the Free Software */ +/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 */ +/* USA */ +/*------------------------------------------------------------------------*/ +/** + * @author H. Carter Edwards + */ + +#include +#include +#include + +/*------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------*/ + +template class TEST ; + +template +class TEST { +public: + int m_flag[N] ; + ~TEST() {} + TEST(); + void flag( TPI::Work & ); + void verify(); +private: + TEST( const TEST & ); + TEST & operator = ( const TEST & ); +}; + +template +TEST::TEST() +{ + for ( unsigned i = 0 ; i < N ; ++i ) { m_flag[i] = 0 ; } +} + +template +void TEST::flag( TPI::Work & work ) +{ + static const char method[] = "TEST::flag" ; + if ( work.count != (int) N ) { + std::cerr << method + << "<" << N << "> count(" << work.count << ") failed" + << std::endl ; + throw std::exception(); + } + m_flag[ work.rank ] = 1 ; +} + +template +void TEST::verify() +{ + static const char method[] = "TEST::verify" ; + + for ( unsigned i = 0 ; i < N ; ++i ) { + if ( ! m_flag[i] ) { + std::cerr << method + << "<" << N << "> m_flag[" << i << "] failed" + << std::endl ; + throw std::exception(); + } + else { + m_flag[i] = 0 ; + } + } +} + +void test_tpi_cpp( int np ) +{ + TEST<1> test_1 ; + TEST<2> test_2 ; + TEST<4> test_4 ; + TEST<8> test_8 ; + TEST<16> test_16 ; + + TPI::Init( np ); + + TPI::Run( test_1 , & TEST<1>::flag , 1 ); + TPI::Run( test_2 , & TEST<2>::flag , 2 ); + TPI::Run( test_4 , & TEST<4>::flag , 4 ); + TPI::Run( test_8 , & TEST<8>::flag , 8 ); + TPI::Run( test_16 , & TEST<16>::flag , 16 ); + + test_1.verify(); + test_2.verify(); + test_4.verify(); + test_8.verify(); + test_16.verify(); + + TPI::Finalize(); +} + +int main( int argc , char ** argv ) +{ + if ( argc ) { std::cout << argv[0] ; } + else { std::cout << "test" ; } + test_tpi_cpp(1); std::cout << " 1 " ; + test_tpi_cpp(2); std::cout << " 2 " ; + test_tpi_cpp(4); std::cout << " 4 " ; + test_tpi_cpp(8); std::cout << " 8 " ; + test_tpi_cpp(16); std::cout << " 16 " ; + std::cout << " passed" << std::endl ; + return 0 ; +} + diff --git a/mkl/basic/optional/ThreadPool/test/test_tpi_unit.c b/mkl/basic/optional/ThreadPool/test/test_tpi_unit.c new file mode 100644 index 0000000..34faef8 --- /dev/null +++ b/mkl/basic/optional/ThreadPool/test/test_tpi_unit.c @@ -0,0 +1,505 @@ +/*------------------------------------------------------------------------*/ +/* TPI: Thread Pool Interface */ +/* Copyright (2008) Sandia Corporation */ +/* */ +/* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */ +/* license for use of this work by or on behalf of the U.S. Government. */ +/* */ +/* This library is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU Lesser General Public License as */ +/* published by the Free Software Foundation; either version 2.1 of the */ +/* License, or (at your option) any later version. */ +/* */ +/* This library is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ +/* Lesser General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU Lesser General Public */ +/* License along with this library; if not, write to the Free Software */ +/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 */ +/* USA */ +/*------------------------------------------------------------------------*/ +/** + * @author H. Carter Edwards + */ + +#include +#include +#include +#include + +#if defined( HAVE_MPI ) +#include +#endif + +/*--------------------------------------------------------------------*/ + +static void test_work( TPI_Work * ); +static void test_reduce_work( TPI_Work * ); +static void test_reduce_init( TPI_Work * ); +static void test_reduce_join( TPI_Work * , const void * ); +static void test_reduce_via_lock( TPI_Work * ); +static void test_reduce_via_nolock( TPI_Work * ); + +void test_tpi_init( const int ntest, const int nthread[], const int ntrial); +void test_tpi_block( const int ntest, const int nthread[], const int ntrial); +void test_tpi_reduce( const int ntest, const int nthread[], const int ntrial); +void test_tpi_work( const int ntest, const int nthread[], + const int nwork , const int ntrial ); +void test_tpi_work_async( + const int ntest , const int nthread[] , const int nwork , const int ntrial ); + +int main( int argc , char ** argv ) +{ + int num_thread[] = { 1 , 2 , 4 , 6 , 8 , 12 , 16 }; + int num_test = sizeof(num_thread) / sizeof(int); + +#if defined( HAVE_MPI ) + int rank ; + + MPI_Init( & argc , & argv ); + MPI_Comm_rank( MPI_COMM_WORLD , & rank ); + if ( 0 == rank ) { +#endif + + const int ntrial = 1 < argc ? atoi( argv[1] ) : 5 ; + const int nwork = 2 < argc ? atoi( argv[2] ) : 100 ; + + /* Get the configuration print message out. */ + fprintf( stdout , "\"%s\"\n" , TPI_Version() ); + fprintf( stdout , "\"Unit Testing: ntrial = %d , nwork = %d\"\n" , ntrial , nwork ); + + test_tpi_init( num_test , num_thread , ntrial ); + test_tpi_block( num_test , num_thread , ntrial ); + test_tpi_reduce( num_test , num_thread , ntrial ); + test_tpi_work( num_test , num_thread , nwork , ntrial ); + test_tpi_work_async( num_test , num_thread , nwork , ntrial ); + +#if defined( HAVE_MPI ) + } + MPI_Finalize(); +#endif + + return 0 ; +} + +/*--------------------------------------------------------------------*/ + +void test_tpi_init( const int ntest , const int nthread[] , const int ntrial ) +{ + int j ; + + fprintf( stdout , "\n\"TEST TPI_Init / TPI_Finalize\"\n" ); + fprintf( stdout , "\"#Thread\" , \"#Trial\" , \"TPI_Init(avg-msec)\" , \"TPI_Init(stddev-msec)\" , \"TPI_Finalize(avg-msec)\" , \"TPI_Finalize(stddev-msec)\"\n"); + + for ( j = 0 ; j < ntest ; ++j ) { + const int nth = nthread[j]; + double dt_init_total = 0.0 ; + double dt_init_total_2 = 0.0 ; + double dt_fin_total = 0.0 ; + double dt_fin_total_2 = 0.0 ; + int i ; + int result ; + + for ( i = 0 ; i < ntrial ; ++i ) { + double t , dt ; + + t = TPI_Walltime(); + result = TPI_Init( nth ); + dt = TPI_Walltime() - t ; + dt_init_total += dt ; + dt_init_total_2 += dt * dt ; + + if ( result != nth ) { + fprintf(stderr,"%d != TPI_Init(%d) : FAILED at trial %d\n", + result , nth , i ); + abort(); + } + + t = TPI_Walltime(); + TPI_Finalize(); + dt = TPI_Walltime() - t ; + dt_fin_total += dt ; + dt_fin_total_2 += dt * dt ; + } + + if ( 1 < ntrial ) { + const double init_mean = 1.0e6 * dt_init_total / ntrial ; + const double init_sdev = 1.0e6 * sqrt( ( ntrial * dt_init_total_2 - + dt_init_total * dt_init_total ) / + ( ntrial * ( ntrial - 1 ) ) ); + + const double fin_mean = 1.0e6 * dt_fin_total / ntrial ; + const double fin_sdev = 1.0e6 * sqrt( ( ntrial * dt_fin_total_2 - + dt_fin_total * dt_fin_total ) / + ( ntrial * ( ntrial - 1 ) ) ); + + fprintf(stdout,"%d , %d , %10g , %10g , %10g , %10g\n", + nth , ntrial , init_mean , init_sdev , fin_mean , fin_sdev ); + } + } +} + +/*--------------------------------------------------------------------*/ + +void test_tpi_block( const int ntest , const int nthread[] , const int ntrial ) +{ + int i, j ; + + fprintf( stdout , "\n\"TEST TPI_Block / TPI_Unblock\"\n" ); + fprintf( stdout , "\"#Thread\" , \"#Trial\" , \"TPI_Block(avg-msec)\" , \"TPI_Block(stddev-msec)\" , \"TPI_Unblock(avg-msec)\" , \"TPI_Unblock(stddev-msec)\"\n"); + + for ( j = 0 ; j < ntest ; ++j ) { + const int nth = nthread[j]; + + double dt_block_total = 0.0 ; + double dt_block_total_2 = 0.0 ; + double dt_unblock_total = 0.0 ; + double dt_unblock_total_2 = 0.0 ; + + int result = TPI_Init( nth ); + + if ( result != nth ) { + fprintf(stderr,"%d != TPI_Init(%d) : FAILED\n", result , nth ); + abort(); + } + + for ( i = 0 ; i < ntrial ; ++i ) { + double t , dt ; + + t = TPI_Walltime(); + TPI_Block(); + dt = TPI_Walltime() - t ; + dt_block_total += dt ; + dt_block_total_2 += dt * dt ; + + + t = TPI_Walltime(); + TPI_Unblock(); + dt = TPI_Walltime() - t ; + dt_unblock_total += dt ; + dt_unblock_total_2 += dt * dt ; + } + + TPI_Finalize(); + + if ( 1 < ntrial ) { + const double block_mean = 1.0e6 * dt_block_total / ntrial ; + const double block_sdev = 1.0e6 * sqrt( ( ntrial * dt_block_total_2 - + dt_block_total * dt_block_total ) / + ( ntrial * ( ntrial - 1 ) ) ); + + const double unblock_mean = 1.0e6 * dt_unblock_total / ntrial ; + const double unblock_sdev = 1.0e6 * sqrt( ( ntrial * dt_unblock_total_2 - + dt_unblock_total * dt_unblock_total) / + ( ntrial * ( ntrial - 1 ) ) ); + + fprintf(stdout,"%d , %d , %10g , %10g , %10g , %10g\n", + nth , ntrial , block_mean , block_sdev , unblock_mean , unblock_sdev ); + } + } +} + +/*--------------------------------------------------------------------*/ + +void test_tpi_reduce( const int ntest , const int nthread[] , const int ntrial ) +{ + int j ; + + fprintf( stdout , "\n\"TEST TPI_Run_threads(reduce) / TPI_Run_threads_reduce\"\n" ); + fprintf( stdout , "\"#Thread\" , \"#Trial\" , \"TPI_Run_threads(avg-msec)\" , \"TPI_Run_threads(stddev-msec)\" , \"TPI_Run_threads_reduce(avg-msec)\" , \"TPI_Run_threads_reduce(stddev-msec)\"\n"); + + for ( j = 0 ; j < ntest ; ++j ) { + const int nth = nthread[j]; + + double dt_lock_total = 0.0 ; + double dt_lock_total_2 = 0.0 ; + double dt_reduce_total = 0.0 ; + double dt_reduce_total_2 = 0.0 ; + int i ; + + int result = TPI_Init( nth ); + + if ( result != nth ) { + fprintf(stderr,"%d != TPI_Init(%d) : FAILED\n", result , nth ); + } + + for ( i = 0 ; i < ntrial ; ++i ) { + double t , dt ; + int value = 0 ; + int * const ptr = & value ; + + t = TPI_Walltime(); + TPI_Run_threads( test_reduce_via_lock , & ptr , 1 ); + dt = TPI_Walltime() - t ; + dt_lock_total += dt ; + dt_lock_total_2 += dt * dt ; + + if ( value != nth ) { + fprintf(stderr, + "TPI_Run_threads(reduce,...) : FAILED at trial %d\n", + i ); + abort(); + } + + value = 0 ; + + t = TPI_Walltime(); + TPI_Run_threads_reduce( test_reduce_via_nolock , NULL , + test_reduce_join , test_reduce_init , + sizeof(value) , & value ); + + dt = TPI_Walltime() - t ; + dt_reduce_total += dt ; + dt_reduce_total_2 += dt * dt ; + + if ( value != nth ) { + fprintf(stderr, + "TPI_Run_threads_reduce(...) : FAILED at trial %d\n", + i ); + abort(); + } + } + + TPI_Finalize(); + + if ( 1 < ntrial ) { + const double lock_mean = 1.0e6 * dt_lock_total / ntrial ; + const double lock_sdev = 1.0e6 * sqrt( ( ntrial * dt_lock_total_2 - + dt_lock_total * dt_lock_total ) / + ( ntrial * ( ntrial - 1 ) ) ); + + const double reduce_mean = 1.0e6 * dt_reduce_total / ntrial ; + const double reduce_sdev = 1.0e6 * sqrt( ( ntrial * dt_reduce_total_2 - + dt_reduce_total * dt_reduce_total) / + ( ntrial * ( ntrial - 1 ) ) ); + + fprintf(stdout,"%d , %d , %10g , %10g , %10g , %10g\n", + nth, ntrial, lock_mean, lock_sdev, reduce_mean, reduce_sdev); + } + } +} + +/*--------------------------------------------------------------------*/ + +void test_tpi_work( const int ntest , const int nthread[] , const int nwork , + const int ntrial ) +{ + int * const flags = (int *) malloc( sizeof(int) * nwork ); + int j ; + + fprintf( stdout , "\n\"TEST TPI_Run / TPI_Run_reduce\"\n" ); + fprintf( stdout , "\"#Thread\" , \"#Work\" , \"#Trial\" , \"TPI_Run(avg-msec)\" , \"TPI_Run(stddev-msec)\" , \"TPI_Run_reduce(avg-msec)\" , \"TPI_Run_reduce(stddev-msec)\"\n"); + + for ( j = 0 ; j < ntest ; ++j ) { + const int nth = nthread[j]; + + double dt_work_total = 0.0 ; + double dt_work_total_2 = 0.0 ; + double dt_reduce_total = 0.0 ; + double dt_reduce_total_2 = 0.0 ; + int i , k ; + + int result = TPI_Init( nth ); + + if ( result != nth ) { + fprintf(stderr,"%d != TPI_Init(%d) : FAILED\n", result , nth ); + } + + for ( i = 0 ; i < ntrial ; ++i ) { + double t , dt ; + int value = 0 ; + + for ( k = 0 ; k < nwork ; ++k ) { flags[k] = 0 ; } + + t = TPI_Walltime(); + TPI_Run( test_work , & flags , nwork , 0 ); + dt = TPI_Walltime() - t ; + dt_work_total += dt ; + dt_work_total_2 += dt * dt ; + + for ( k = 0 ; k < nwork && flags[k] ; ++k ); + + if ( k < nwork ) { + fprintf(stderr, "TPI_Run(...) : FAILED at trial %d\n", i ); + abort(); + } + + for ( k = 0 ; k < nwork ; ++k ) { flags[k] = 0 ; } + + t = TPI_Walltime(); + TPI_Run_reduce( test_reduce_work , & flags , nwork , + test_reduce_join , test_reduce_init , + sizeof(value) , & value ); + + dt = TPI_Walltime() - t ; + dt_reduce_total += dt ; + dt_reduce_total_2 += dt * dt ; + + for ( k = 0 ; k < nwork && flags[k] ; ++k ); + + if ( value != nwork || k < nwork ) { + fprintf(stderr, "TPI_Run_reduce(...) : FAILED at trial %d\n", i ); + abort(); + } + } + + TPI_Finalize(); + + if ( 1 < ntrial ) { + const double work_mean = 1.0e6 * dt_work_total / ntrial ; + const double work_sdev = 1.0e6 * sqrt( ( ntrial * dt_work_total_2 - + dt_work_total * dt_work_total ) / + ( ntrial * ( ntrial - 1 ) ) ); + + const double reduce_mean = 1.0e6 * dt_reduce_total / ntrial ; + const double reduce_sdev = 1.0e6 * sqrt( ( ntrial * dt_reduce_total_2 - + dt_reduce_total * dt_reduce_total) / + ( ntrial * ( ntrial - 1 ) ) ); + + fprintf(stdout,"%d , %d , %d , %10g , %10g , %10g , %10g\n", + nth, ntrial, nwork, work_mean, work_sdev, reduce_mean, reduce_sdev); + } + } + + free( flags ); +} + +/*--------------------------------------------------------------------*/ + +void test_tpi_work_async( + const int ntest , const int nthread[] , const int nwork , const int ntrial ) +{ + int * const flags = (int *) malloc( sizeof(int) * nwork ); + int j ; + + fprintf( stdout , "\n\"TEST TPI_Start / TPI_Start_reduce\"\n" ); + fprintf( stdout , "\"#Thread\" , \"#Work\" , \"#Trial\" , \"TPI_Start(avg-msec)\" , \"TPI_Start(stddev-msec)\" , \"TPI_Start_reduce(avg-msec)\" , \"TPI_Start_reduce(stddev-msec)\"\n"); + + for ( j = 0 ; j < ntest ; ++j ) { + const int nth = nthread[j]; + + double dt_work_total = 0.0 ; + double dt_work_total_2 = 0.0 ; + double dt_reduce_total = 0.0 ; + double dt_reduce_total_2 = 0.0 ; + int i , k ; + + int result = TPI_Init( nth ); + + if ( result != nth ) { + fprintf(stderr,"%d != TPI_Init(%d) : FAILED\n", result , nth ); + } + + for ( i = 0 ; i < ntrial ; ++i ) { + double t , dt ; + int value = 0 ; + + for ( k = 0 ; k < nwork ; ++k ) { flags[k] = 0 ; } + + t = TPI_Walltime(); + TPI_Start( test_work , & flags , nwork , 0 ); + TPI_Wait(); + dt = TPI_Walltime() - t ; + dt_work_total += dt ; + dt_work_total_2 += dt * dt ; + + for ( k = 0 ; k < nwork && flags[k] ; ++k ); + + if ( k < nwork ) { + fprintf(stderr, "TPI_Run(...) : FAILED at trial %d\n", i ); + abort(); + } + + for ( k = 0 ; k < nwork ; ++k ) { flags[k] = 0 ; } + + t = TPI_Walltime(); + + TPI_Start_reduce( test_reduce_work , & flags , nwork , + test_reduce_join , test_reduce_init , + sizeof(value) , & value ); + TPI_Wait(); + + dt = TPI_Walltime() - t ; + dt_reduce_total += dt ; + dt_reduce_total_2 += dt * dt ; + + for ( k = 0 ; k < nwork && flags[k] ; ++k ); + + if ( value != nwork || k < nwork ) { + fprintf(stderr, "TPI_Run_reduce(...) : FAILED at trial %d\n", i ); + abort(); + } + } + + TPI_Finalize(); + + if ( 1 < ntrial ) { + const double work_mean = 1.0e6 * dt_work_total / ntrial ; + const double work_sdev = 1.0e6 * sqrt( ( ntrial * dt_work_total_2 - + dt_work_total * dt_work_total ) / + ( ntrial * ( ntrial - 1 ) ) ); + + const double reduce_mean = 1.0e6 * dt_reduce_total / ntrial ; + const double reduce_sdev = 1.0e6 * sqrt( ( ntrial * dt_reduce_total_2 - + dt_reduce_total * dt_reduce_total) / + ( ntrial * ( ntrial - 1 ) ) ); + + fprintf(stdout,"%d , %d , %d , %10g , %10g , %10g , %10g\n", + nth, ntrial, nwork, work_mean, work_sdev, reduce_mean, reduce_sdev); + } + } + + free( flags ); +} + +/*--------------------------------------------------------------------*/ + +static void test_work( TPI_Work * work ) +{ + int * const flags = * (int *const*) work->info ; + flags[ work->rank ] = 1 ; +} + +static void test_reduce_work( TPI_Work * work ) +{ + int * const flags = * (int *const*) work->info ; + flags[ work->rank ] = 1 ; + + *((int *) work->reduce) += 1 ; +} + +static void test_reduce_init( TPI_Work * work ) +{ + *((int *) work->reduce) = 0 ; +} + +static void test_reduce_join( TPI_Work * work , const void * src ) +{ + *((int *) work->reduce) += *( (const int *) src ); +} + +static void test_reduce_via_lock( TPI_Work * work ) +{ + int * const value = * ((int *const*) work->info ); + int result ; + if ( ( result = TPI_Lock(0) ) ) { + fprintf(stderr,"TPI_Lock(0) = %d : FAILED\n", result); + abort(); + } + *value += 1 ; + if ( ( result = TPI_Unlock(0) ) ) { + fprintf(stderr,"TPI_Unlock(0) = %d : FAILED\n", result); + abort(); + } +} + +static void test_reduce_via_nolock( TPI_Work * work ) +{ + int * const value = (int *) work->reduce ; + *value += 1 ; +} + +/*--------------------------------------------------------------------*/ + diff --git a/mkl/basic/optional/copy_from_trilinos b/mkl/basic/optional/copy_from_trilinos new file mode 100755 index 0000000..042e4fb --- /dev/null +++ b/mkl/basic/optional/copy_from_trilinos @@ -0,0 +1,25 @@ +#!/bin/bash + +TRILINOS_SRC=$1 + +if [ -d "${TRILINOS_SRC}" -a -d "${TRILINOS_SRC}/packages" ] ; +then + +#----------------------------------------------------------------------- +cp -r ${TRILINOS_SRC}/packages/ThreadPool/* ThreadPool +rm -rf ThreadPool/doc + +cat << END_CAT > ThreadPool/ThreadPool_config.h +#ifndef HAVE_PTHREAD +#define HAVE_PTHREAD +#endif +END_CAT + +#----------------------------------------------------------------------- + +else + + echo 'usage: ' $0 '' + +fi + diff --git a/mkl/basic/optional/cuda/CudaCall.hpp b/mkl/basic/optional/cuda/CudaCall.hpp new file mode 100644 index 0000000..f4b8c70 --- /dev/null +++ b/mkl/basic/optional/cuda/CudaCall.hpp @@ -0,0 +1,21 @@ +#ifndef stk_algsup_CudaCall_hpp +#define stk_algsup_CudaCall_hpp + +#include +#include + +//---------------------------------------------------------------- +inline +void stk_cuda_call(cudaError err , const char* name ) +{ + if ( err != cudaSuccess ) { + fprintf(stderr, "%s error: %s\n",name, cudaGetErrorString(err) ); + exit(-1); + } +} + +#define CUDA_CALL( cuda_fn ) stk_cuda_call( cuda_fn , #cuda_fn ) + + +#endif + diff --git a/mkl/basic/optional/cuda/CudaMemoryModel.hpp b/mkl/basic/optional/cuda/CudaMemoryModel.hpp new file mode 100644 index 0000000..54d189e --- /dev/null +++ b/mkl/basic/optional/cuda/CudaMemoryModel.hpp @@ -0,0 +1,152 @@ +#ifndef _CudaMemoryModel_hpp_ +#define _CudaMemoryModel_hpp_ + +#include +#ifdef MINIFE_HAVE_CUDA + +#include +#include +#include + +#include +#include +#include + +class CudaMemoryModel { + public: + CudaMemoryModel() + : host_to_device_map(), + device_to_host_map() + {} + + /** Destructor + * Upon destruction this class de-allocates all device-buffers that + * it was tracking. + */ + virtual ~CudaMemoryModel(); + + /** Return a device-pointer corresponding to the given host-ptr and size. + * The returned device-pointer points to a buffer which has been allocated + * on the CUDA device with length buf_size*sizeof(T), but not initialized. + * + * If a device-pointer has already been allocated for the given host-pointer + * (by a previous call to this method) then that (previously-allocated) device-pointer + * is returned. + */ + template + T* get_buffer(const T* host_ptr, size_t buf_size); + + /** Destroy (free) the specified device-pointer. + * + * De-allocates the cuda-device buffer. + */ + template + void destroy_buffer(T*& device_ptr); + + /** Copy the contents of the given host-ptr to the given device-ptr. + * If the given device-ptr is not known (was not created by a previous + * call to get_buffer), an exception is thrown. + */ + template + void copy_to_buffer(const T* host_ptr, size_t buf_size, T* device_ptr); + + /** Copy the contents of the given device-ptr to the given host-ptr. + * If the given device-ptr is not known (was not created by a previous + * call to get_buffer), an exception is thrown. + */ + template + void copy_from_buffer(T* host_ptr, size_t buf_size, const T* device_ptr); + + private: + std::map host_to_device_map; + std::map device_to_host_map; +}; + +//------------------------------------------------------------------------------ +template +inline +T* CudaMemoryModel::get_buffer(const T* host_ptr, size_t buf_size) +{ + T* device_ptr = NULL; + + std::map::iterator iter = host_to_device_map.find(host_ptr); + + if (iter == host_to_device_map.end()) { + CUDA_CALL( cudaMalloc( (void**)&device_ptr, sizeof(T)*buf_size) ); + + host_to_device_map.insert( std::make_pair(host_ptr, device_ptr) ); + device_to_host_map.insert( std::make_pair(device_ptr, host_ptr) ); + } + else { + device_ptr = reinterpret_cast(iter->second); + } + + return device_ptr; +} + +//------------------------------------------------------------------------------ +template +inline +void CudaMemoryModel::destroy_buffer(T*& device_ptr) +{ + std::map::iterator iter = device_to_host_map.find(device_ptr); + if (iter != device_to_host_map.end()) { + const void* host_ptr = iter->second; + if (host_ptr != NULL) { + std::map::iterator iter2 = host_to_device_map.find(host_ptr); + if (iter2 != host_to_device_map.end()) { + host_to_device_map.erase(iter2); + } + } + CUDA_CALL( cudaFree(device_ptr) ); + device_ptr = NULL; + device_to_host_map.erase(iter); + } +} + +//------------------------------------------------------------------------------ +template +inline +void CudaMemoryModel::copy_to_buffer(const T* host_ptr, size_t buf_size, T* device_ptr) +{ + std::map::iterator iter = device_to_host_map.find(device_ptr); + if (iter == device_to_host_map.end()) { + //failed to find device_ptr in device_to_host_map + throw std::runtime_error("CudaMemoryModel::copy_to_buffer ERROR, device_ptr not known."); + } + + CUDA_CALL( cudaMemcpy( device_ptr, host_ptr, sizeof(T)*buf_size, cudaMemcpyHostToDevice) ); +} + +//------------------------------------------------------------------------------ +template +inline +void CudaMemoryModel::copy_from_buffer(T* host_ptr, size_t buf_size, const T* device_ptr) +{ + std::map::iterator iter = device_to_host_map.find(device_ptr); + if (iter == device_to_host_map.end()) { + //failed to find device_ptr in device_to_host_map + throw std::runtime_error("CudaMemoryModel::copy_from_buffer ERROR, device_ptr not known."); + } + + CUDA_CALL( cudaMemcpy( host_ptr, device_ptr, sizeof(T)*buf_size, cudaMemcpyDeviceToHost) ); +} + +inline +CudaMemoryModel::~CudaMemoryModel() +{ + std::map::iterator + iter = device_to_host_map.begin(), + iter_end = device_to_host_map.end(); + + for(; iter!=iter_end; ++iter) { + //cast away const so we can free the pointer: + void* dev_ptr = const_cast(iter->first); + CUDA_CALL( cudaFree(dev_ptr) ); + } +} + +#endif + +#endif + diff --git a/mkl/basic/optional/cuda/CudaNode.cpp b/mkl/basic/optional/cuda/CudaNode.cpp new file mode 100644 index 0000000..5ddc580 --- /dev/null +++ b/mkl/basic/optional/cuda/CudaNode.cpp @@ -0,0 +1,96 @@ +#include +#include +#include +#include + +// some CUDA rules of thumb employed here (stolen from slides by Mike Bailey, Oregon State) +// -The number of Blocks should be at least twice the number of MPs +// -The number of Threads per Block should be a multiple of 64 +// - 192 or 256 are good numbers for Threads/Block +// We will enforce that numThreads is a power of two (to ease the reduction kernel) +// greater than 64 + +CUDANode::CUDANode(int device, int numBlocks, int numThreads, int verbose) +: numBlocks_(numBlocks) +, numThreads_(numThreads) +, h_blk_mem_(NULL) +, d_blk_mem_(NULL) +, blk_mem_size_(0) +{ + using std::cout; + using std::endl; + using std::runtime_error; + // enforce that numThreads_ is a multiple of 64 + if (numThreads_ != 64 && numThreads_ != 128 && numThreads_ != 256 && numThreads_ != 512 + && numThreads_ != 1 && numThreads_ != 2 && numThreads_ != 4 && numThreads_ != 8 && numThreads_ != 16 + && numThreads_ != 32) { +// throw runtime_error("CUDANode::CUDANode(): number of threads per block must be a power of two in [1,512]."); + } + int deviceCount; cudaGetDeviceCount(&deviceCount); + if (device >= deviceCount) { + if (deviceCount == 0) { +// throw runtime_error("CUDANode::CUDANode(): system has no CUDA devices."); + } + if (verbose) { + cout << "CUDANode::CUDANode(): specified device number not valid. Using device 0." << endl; + } + device = 0; + } + cudaDeviceProp deviceProp; + int deviceAlreadyBeingUsed = -1; + cudaGetDevice(&deviceAlreadyBeingUsed); + if (deviceAlreadyBeingUsed >= 0 && deviceAlreadyBeingUsed < deviceCount) { + device = deviceAlreadyBeingUsed; + } + else { + cudaSetDevice(device); + } + cudaGetDeviceProperties(&deviceProp, device); + // as of CUDA 2.1, device prop contains the following fields + // char name[256]; + // size_t totalGlobalMem, sharedMemPerBlock; + // int regsPerBlock, warpSize; + // size_t memPitch; + // int maxThreadsPerBlock, maxThreadsDim[3], maxGridSize[3]; + // size_t totalConstMem; + // int major, minor; + // int clockRate; + // size_t textureAlignment; + // int deviceOverlap; + // int multiProcessorCount; + // int kernelExecTimeoutEnabled; + if (verbose) { + cout << "CUDANode attached to device #" << device << " \"" << deviceProp.name + << "\", of compute capability " << deviceProp.major << "." << deviceProp.minor + << endl; + } + totalMem_ = deviceProp.totalGlobalMem; + + expand_blk_mem(numBlocks_*8); +} + +void CUDANode::expand_blk_mem(size_t size_in_bytes) +{ + if (blk_mem_size_ >= size_in_bytes) return; + + if (d_blk_mem_ != NULL) { + cutilSafeCallNoSync( cudaFree(d_blk_mem_) ); + delete [] h_blk_mem_; + } + + cutilSafeCallNoSync( cudaMalloc(&d_blk_mem_, size_in_bytes) ); + h_blk_mem_ = new char[size_in_bytes]; + blk_mem_size_ = size_in_bytes; +} + +CUDANode::~CUDANode() +{ + if (d_blk_mem_ != NULL) { + cutilSafeCallNoSync( cudaFree(d_blk_mem_) ); + d_blk_mem_ = NULL; + delete [] h_blk_mem_; + h_blk_mem_ = NULL; + } + blk_mem_size_ = 0; +} + diff --git a/mkl/basic/optional/cuda/CudaNode.cuh b/mkl/basic/optional/cuda/CudaNode.cuh new file mode 100644 index 0000000..9b1b4fb --- /dev/null +++ b/mkl/basic/optional/cuda/CudaNode.cuh @@ -0,0 +1,66 @@ +#ifndef CUDANODE_CUH_ +#define CUDANODE_CUH_ + +#include +#include +#include +#include +#include + +// must define this before including any kernels +#define KERNEL_PREFIX __device__ __host__ + +#include + +#include + +#ifdef CUDANODE_INCLUDE_PARALLEL_FOR +template +__global__ void +Tkern1D(int length, WDP wd, int stride) +{ + unsigned int i = blockIdx.x*blockDim.x + threadIdx.x; + while(i < length) { + wd(i); + i += stride; + } +} + +template +void CUDANode::parallel_for(int length, WDP wd) { + if (length == 0) return; + unsigned int stride = numThreads_ * numBlocks_; + Tkern1D <<< numBlocks_, numThreads_ >>>(length,wd,stride); +} +#endif // parallel_for + +#ifdef CUDANODE_INCLUDE_PARALLEL_REDUCE +template +void call_dot(DotOp& wd) +{ + printf("ERROR, unknown scalar-type, skipping cuda dot-product.\n"); +} +template<> +void call_dot(DotOp& wd) +{ + wd.result = cublasDdot(wd.n, wd.x, 1, wd.y, 1); +} +template<> +void call_dot(DotOp& wd) +{ + wd.result = cublasSdot(wd.n, wd.x, 1, wd.y, 1); +} + +template +void CUDANode::parallel_reduce(int length, WDP& wd) +{ + if (length == 1) { + wd.result = wd.generate(0); + return; + } + + call_dot(wd); +} +#endif // parallel_reduce + +#endif diff --git a/mkl/basic/optional/cuda/CudaNode.hpp b/mkl/basic/optional/cuda/CudaNode.hpp new file mode 100644 index 0000000..de078ea --- /dev/null +++ b/mkl/basic/optional/cuda/CudaNode.hpp @@ -0,0 +1,57 @@ +#ifndef CUDANODE_HPP_ +#define CUDANODE_HPP_ + +#include + +// forward declaration +class CUDANode; + +class CUDANode : public CudaMemoryModel { + public: + + CUDANode(int device = 0, int numBlocks = -1, int numThreads = 256, int verbose = 1); + + ~CUDANode(); + + //@{ Computational methods + + template + void parallel_for(int length, WDP wdp); + + template + void parallel_reduce(int length, WDP& wd); + + //@} + + static CUDANode& singleton(int device=0, int numBlocks=-1, int numThreads=256) + { + static CUDANode* cuda_node = NULL; + if (cuda_node == NULL) { + cuda_node = new CUDANode(device, numBlocks, numThreads); + } + return *cuda_node; + } + + private: + //template + //void call_reduce(int length, WDP wd, int threads, int blocks, void * d_blkpart); + // numBlocks_ is + // - the number of blocks launched in a call to parallel_for() + // - not used by parallel_reduce() + int numBlocks_; + // numThreads_ is required to be a power-of-two (our requirement) between 1 and 512 (CUDA's requirement). It is: + // - the maximum number of threads used by parallel_reduce() + // - the number of threads per block in a call to parallel_for() + int numThreads_; + // total global device memory, in bytes + int totalMem_; + + void expand_blk_mem(size_t size_in_bytes); + + char* h_blk_mem_; + void* d_blk_mem_; + size_t blk_mem_size_; + +}; + +#endif diff --git a/mkl/basic/optional/cuda/CudaNodeImpl.hpp b/mkl/basic/optional/cuda/CudaNodeImpl.hpp new file mode 100644 index 0000000..4b94562 --- /dev/null +++ b/mkl/basic/optional/cuda/CudaNodeImpl.hpp @@ -0,0 +1,15 @@ +#ifndef CUDANODE_IMPL_HPP_ +#define CUDANODE_IMPL_HPP_ + +#include +#include +#include +#include +#include +#include + +// TODO: consider using cudaMallocHost to allocate page-locked host memory +// this speeds up transfer between device and host, and could be very +// useful in the case of Import/Export multivector operations + +#endif diff --git a/mkl/basic/optional/cuda/Matrix.cu b/mkl/basic/optional/cuda/Matrix.cu new file mode 100644 index 0000000..1487f1a --- /dev/null +++ b/mkl/basic/optional/cuda/Matrix.cu @@ -0,0 +1,22 @@ +#define CUDANODE_INCLUDE_PARALLEL_FOR + +// include for CudaNode method implementations +#include + +// includes for all operators for which Matrix needs support +#include +#include +#include + +#include +#include + +// explicit instantiations for Matrix class +#define EXPLICIT_MATRIX_SUPPORT(MATRIX,VECTOR) \ +template void CUDANode::parallel_for >(int , MatvecOp< MATRIX, VECTOR >); + +typedef miniFE::SparseMatrix Matrix_type; +typedef miniFE::Vector Vector_type; + +EXPLICIT_MATRIX_SUPPORT(Matrix_type,Vector_type) + diff --git a/mkl/basic/optional/cuda/Vector.cu b/mkl/basic/optional/cuda/Vector.cu new file mode 100644 index 0000000..9a79955 --- /dev/null +++ b/mkl/basic/optional/cuda/Vector.cu @@ -0,0 +1,19 @@ +#define CUDANODE_INCLUDE_PARALLEL_REDUCE +#define CUDANODE_INCLUDE_PARALLEL_FOR + +// include for CudaNode method implementations +#include + +// includes for all operators for which Vector needs support +#include +#include +#include +#include + +// explicit instantiations for Vectors +#define EXPLICIT_VECTOR_SUPPORT(GLOBALORDINAL, SCALAR) \ +template void CUDANode::parallel_for >(int , WaxpbyOp< SCALAR >); \ +template void CUDANode::parallel_reduce< DotOp< SCALAR > >(int , DotOp< SCALAR >& ); \ +template void CUDANode::parallel_for >(int , FEComputeElem< GLOBALORDINAL, SCALAR > ); + +EXPLICIT_VECTOR_SUPPORT(MINIFE_GLOBAL_ORDINAL, MINIFE_SCALAR) diff --git a/mkl/basic/optional/cuda/cutil_inline_runtime.h b/mkl/basic/optional/cuda/cutil_inline_runtime.h new file mode 100644 index 0000000..1f49afb --- /dev/null +++ b/mkl/basic/optional/cuda/cutil_inline_runtime.h @@ -0,0 +1,63 @@ +#ifndef _CUTIL_INLINE_FUNCTIONS_RUNTIME_H_ +#define _CUTIL_INLINE_FUNCTIONS_RUNTIME_H_ + +#include +#include +#include + +#include + +// We define these calls here, so the user doesn't need to include __FILE__ and __LINE__ +// The advantage is the developers gets to use the inline function so they can debug +#define cutilSafeCallNoSync(err) __cudaSafeCallNoSync(err, __FILE__, __LINE__) +#define cutilSafeCall(err) __cudaSafeCall (err, __FILE__, __LINE__) +#define cutilSafeThreadSync() __cudaSafeThreadSync(__FILE__, __LINE__) +#define cutilCheckMsg(msg) __cutilCheckMsg (msg, __FILE__, __LINE__) + +inline void __cudaSafeCallNoSync( cudaError err, const char *file, const int line ) +{ + if( cudaSuccess != err) { + fprintf(stderr, "cudaSafeCallNoSync() Runtime API error in file <%s>, line %i : %s.\n", + file, line, cudaGetErrorString( err) ); + exit(-1); + } +} + +inline void __cudaSafeCall( cudaError err, const char *file, const int line ) +{ + if( cudaSuccess != err) { + fprintf(stderr, "cudaSafeCall() Runtime API error in file <%s>, line %i : %s.\n", + file, line, cudaGetErrorString( err) ); + exit(-1); + } +} + +inline void __cudaSafeThreadSync( const char *file, const int line ) +{ + cudaError err = cudaThreadSynchronize(); + if ( cudaSuccess != err) { + fprintf(stderr, "cudaThreadSynchronize() Driver API error in file '%s' in line %i : %s.\n", + file, line, cudaGetErrorString( err) ); + exit(-1); + } +} + +inline void __cutilCheckMsg( const char *errorMessage, const char *file, const int line ) +{ + cudaError_t err = cudaGetLastError(); + if( cudaSuccess != err) { + fprintf(stderr, "cutilCheckMsg() CUTIL CUDA error: %s in file <%s>, line %i : %s.\n", + errorMessage, file, line, cudaGetErrorString( err) ); + exit(-1); + } +#ifdef _DEBUG + err = cudaThreadSynchronize(); + if( cudaSuccess != err) { + fprintf(stderr, "cutilCheckMsg cudaThreadSynchronize error: %s in file <%s>, line %i : %s.\n", + errorMessage, file, line, cudaGetErrorString( err) ); + exit(-1); + } +#endif +} + +#endif // _CUTIL_INLINE_FUNCTIONS_RUNTIME_H_ diff --git a/mkl/basic/optional/make_targets b/mkl/basic/optional/make_targets new file mode 100644 index 0000000..01ed2c8 --- /dev/null +++ b/mkl/basic/optional/make_targets @@ -0,0 +1,54 @@ +#----------------------------------------------------------------------- + +TPI.o : ./optional/ThreadPool/src/TPI.c + $(CC) $(CFLAGS) $(CPPFLAGS) -c $< + +#----------------------------------------------------------------------- + +CudaNode.o : ./optional/cuda/CudaNode.cpp ./optional/cuda/*.hpp ./optional/cuda/*.h + $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $< + +CudaVector.o : ./optional/cuda/Vector.cu ./optional/cuda/*.cuh + nvcc $(CUDAFLAGS) $(CPPFLAGS) -c -o $@ $< + +CudaMatrix.o : ./optional/cuda/Matrix.cu ./optional/cuda/*.cuh + nvcc $(CUDAFLAGS) $(CPPFLAGS) -c -o $@ $< + +#----------------------------------------------------------------------- +# Recursive make to create the object files in this directory, +# generate the archive, and then remove the object files. + +libstk.a : + cd ./optional ; \ + $(MAKE) "CC=$(CC)" "CXX=$(CXX)" "CPPFLAGS=$(CPPFLAGS)" "CFLAGS=$(CFLAGS)" "CXXFLAGS=$(CXXFLAGS)" -f make_targets stk_library + +STK_SOURCE = \ + ./shards/src/*.cpp \ + ./stk_util/util/*.cpp \ + ./stk_util/environment/*.cpp \ + ./stk_util/parallel/*.cpp \ + ./stk_mesh/base/*.cpp \ + ./stk_mesh/baseImpl/*.cpp \ + ./stk_mesh/fem/*.cpp \ + stk_helpers.cpp + +STK_INCLUDES = \ + ./shards/src/*.hpp \ + ./shards/src/*.h \ + ./stk_util/util/*.hpp \ + ./stk_util/environment/*.hpp \ + ./stk_util/parallel/*.hpp \ + ./stk_mesh/base/*.hpp \ + ./stk_mesh/fem/*.hpp + +STK_INC = -I${PWD}/ThreadPool -I${PWD}/shards + +stk_library : $(STK_SOURCE) $(STK_INCLUDES) + $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(STK_INC) -c $(STK_SOURCE) + ar -qc ../libstk.a *.o + ranlib ../libstk.a + rm *.o + +#----------------------------------------------------------------------- + + diff --git a/mkl/basic/perform_element_loop.hpp b/mkl/basic/perform_element_loop.hpp new file mode 100644 index 0000000..f65ad4f --- /dev/null +++ b/mkl/basic/perform_element_loop.hpp @@ -0,0 +1,110 @@ +#ifndef _perform_element_loop_hpp_ +#define _perform_element_loop_hpp_ + +//@HEADER +// ************************************************************************ +// +// miniFE: simple finite-element assembly and linear-solve +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include +#include +#include +#include +#include +#include + +namespace miniFE { + +template +void +perform_element_loop(const simple_mesh_description& mesh, + const Box& local_elem_box, + MatrixType& A, VectorType& b, + Parameters& /*params*/) +{ + typedef typename MatrixType::ScalarType Scalar; + + int global_elems_x = mesh.global_box[0][1]; + int global_elems_y = mesh.global_box[1][1]; + int global_elems_z = mesh.global_box[2][1]; + + //We will iterate the local-element-box (local portion of the mesh), and + //get element-IDs in preparation for later assembling the FE operators + //into the global sparse linear-system. + + GlobalOrdinal num_elems = get_num_ids(local_elem_box); + std::vector elemIDs(num_elems); + + BoxIterator iter = BoxIterator::begin(local_elem_box); + BoxIterator end = BoxIterator::end(local_elem_box); + + for(size_t i=0; iter != end; ++iter, ++i) { + elemIDs[i] = get_id(global_elems_x, global_elems_y, global_elems_z, + iter.x, iter.y, iter.z); +//#ifdef MINIFE_DEBUG +//std::cout << "elem ID " << elemIDs[i] << " ("< elem_data; + + compute_gradient_values(elem_data.grad_vals); + + timer_type t_gn = 0, t_ce = 0, t_si = 0; + timer_type t0 = 0; + for(size_t i=0; i +#include +#include +#include +#include +#include +#include + +#include + +namespace miniFE { + +//--------------------------------------------------------------------- +//This file contains three 'filter' classes, and a 'perform_element_loop' +//function that uses those filter classes to run a TBB pipeline. +// +//The filter classes are as follows: +//1. GetElemNodesCoords +// For each element in the mesh, create an elem-data object with coords +// and node-ids. +//2. Compute_FE_Operators +// Given an elem-data object (with coords and node-ids), compute the +// diffusion-matrix and source-vector. +//3. LockingSumIntoLinearSystem +// Given an elem-data object (with diffusion-matrix and source-vector), +// assemble into global-sparse linear-system. Uses a lock on each +// matrix row to ensure that multiple threads don't update the same row +// at the same time. +//... or: +//3. SumIntoLinearSystem +// Given an elem-data object (with diffusion-matrix and source-vector), +// assemble into global-sparse linear-system. +// There are several of these filters, usually 1 per thread, and each +// will be responsible for a certain slice of equations. It will check +// the elem-data for equations that are in its slice, assemble those, and +// pass the elem-data on so that the next SumIntoLinearSystem filter can +// deal with equations in a different 'slice'. +// +//--------------------------------------------------------------------- + +//--------------------------------------------------------------------- + +/** Filter 1.: GetElemNodesCoords + */ +template +class GetElemNodesCoords : public tbb::filter { +public: + GetElemNodesCoords(const std::vector& elemIDs, + const simple_mesh_description& mesh, + size_t num_elems_at_a_time) + : tbb::filter(/*is_serial=*/true), + elemIDs_(elemIDs), + i_(0), + mesh_(mesh), + num_elems_(num_elems_at_a_time) + { + if (num_elems_ < 1) num_elems_ = 1; + } + + ~GetElemNodesCoords(){} + +private: + /** This operator launches an elem-data object for a 'group' (size num_elems_) + * of elements. When all elements have been launched, return NULL to signal + * that we're done issuing data. + */ + void* operator()(void* item) { + if (i_ >= elemIDs_.size()) return NULL; + + size_t num = num_elems_; + if (i_+num > elemIDs_.size()) num = elemIDs_.size() - i_; + + std::vector >* elemdata_vec = new std::vector >(num); + + size_t i=0; + while (i_ < elemIDs_.size() && i < num) { + get_elem_nodes_and_coords(mesh_, elemIDs_[i_], (*elemdata_vec)[i]); + ++i_; + ++i; + } + + return elemdata_vec; + } + + const std::vector& elemIDs_; + size_t i_; + const simple_mesh_description& mesh_; + size_t num_elems_; +}; + +//--------------------------------------------------------------------- + +/** Filter 2.: Compute_FE_Operators + */ +template +class Compute_FE_Operators : public tbb::filter { +public: + Compute_FE_Operators() : tbb::filter(/*is_serial=*/false) {} + ~Compute_FE_Operators() {} + +private: + /** This operator takes a vector of elem-data objects which are assumed + * to have nodal-coordinates already populated, and computes the + * element-diffusion-matrix and element-source-vector for each. + */ + void* operator()(void* item) { + if (item == NULL) return NULL; + std::vector >* elemdata = static_cast >*>(item); + + for(size_t i=0; isize(); ++i) { + compute_element_matrix_and_vector((*elemdata)[i]); + } + return elemdata; + } +}; + +//--------------------------------------------------------------------- + +/** Filter 3.: SumIntoLinearSystem + */ +template +class SumIntoLinearSystem : public tbb::filter { + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::ScalarType Scalar; + +public: + SumIntoLinearSystem(GlobalOrdinal myFirstRow, + GlobalOrdinal myLastRow, + MatrixType& mat, VectorType& vec) + : tbb::filter(/*is_serial=*/true), + A_(mat), b_(vec), + myFirstRow_(myFirstRow), + myLastRow_(myLastRow) + { + } + + ~SumIntoLinearSystem() {} + +private: + /** This operator takes a vector of elem-data objects which have an + * element-diffusion-matrix and source-vector, looks through it for + * any rows in this filter's slice of the global matrix, assembles + * those rows into the linear-system, then passes the elem-data object + * on for use by the next assembly filter. + * If this assembly filter is responsible for the last slice of the + * row-space, then this is the last filter and so we delete the + * elem-data object. + */ + void* operator()(void* item) { + if (item == NULL) return NULL; + std::vector >* elemdata_vec = static_cast >*>(item); + + for(size_t e=0; esize(); ++e) { + ElemData& elemdata = (*elemdata_vec)[e]; + size_t nnodes = elemdata.nodes_per_elem; + for(size_t i=0; i myLastRow_) continue; + + sum_into_row(row, nnodes, elemdata.elem_node_ids, + &(elemdata.elem_diffusion_matrix[i*nnodes]), A_); + sum_into_vector(1, &row, &(elemdata.elem_source_vector[i]), b_); + } + } + + if (myLastRow_ >= A_.rows.size()) { + delete elemdata_vec; + return NULL; + } + + return elemdata_vec; + } + + MatrixType& A_; + VectorType& b_; + GlobalOrdinal myFirstRow_; + GlobalOrdinal myLastRow_; +}; + +//--------------------------------------------------------------------- + +static tbb::atomic matrix_suminto; + +/** Filter 3.: SumIntoLinearSystem with locking + */ +template +class LockingSumIntoLinearSystem : public tbb::filter { + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::ScalarType Scalar; + +public: + LockingSumIntoLinearSystem(MatrixType& mat, VectorType& vec) + : tbb::filter(/*is_serial=*/false), + A_(mat), b_(vec) + { + } + + ~LockingSumIntoLinearSystem() {} + +private: + /** This operator takes a vector of elem-data objects which have an + * element-diffusion-matrix and source-vector, and assembles into + * the linear-system, using locking to make sure no other + * thread is assembling the same global row at the same time. + */ + void* operator()(void* item) { + if (item == NULL) return NULL; + std::vector >* elemdata_vec = static_cast >*>(item); + + for(size_t e=0; esize(); ++e) { + ElemData& elemdata = (*elemdata_vec)[e]; + size_t nnodes = elemdata.nodes_per_elem; + size_t offset = 0; + for(size_t i=0; i A_; + LockingVector b_; +}; + +//--------------------------------------------------------------------- + +template +void +perform_element_loop(const simple_mesh_description& mesh, + const Box& local_elem_box, + MatrixType& A, VectorType& b, + Parameters& params) +{ + typedef typename MatrixType::ScalarType Scalar; + + if (A.rows.size() == 0) return; + + int num_threads = params.numthreads; + + //We will iterate the local-element-box (local portion of the mesh), and + //assemble the FE operators into the global sparse linear-system. + + tbb::pipeline pipe; + + int global_elems_x = mesh.global_box[0][1]; + int global_elems_y = mesh.global_box[1][1]; + int global_elems_z = mesh.global_box[2][1]; + + GlobalOrdinal num_elems = get_num_ids(local_elem_box); + std::vector elemIDs(num_elems); + + BoxIterator iter = BoxIterator::begin(local_elem_box); + BoxIterator end = BoxIterator::end(local_elem_box); + + for(size_t i=0; iter != end; ++iter, ++i) { + elemIDs[i] = get_id(global_elems_x, global_elems_y, global_elems_z, + iter.x, iter.y, -iter.z); + } + + //Create the first stage of the pipeline, the filter that will + //launch elem-data from the mesh, through the pipeline. + GetElemNodesCoords get_nodes_coords(elemIDs, mesh, params.elem_group_size); + + //Create the second stage of the pipeline, the parallel filter that will + //compute element-matrices and element-vectors. + Compute_FE_Operators fe_ops; + + //Add the filters to the pipeline: + pipe.add_filter(get_nodes_coords); + pipe.add_filter(fe_ops); + + LockingSumIntoLinearSystem* sum_into_linsys = NULL; + size_t num_assembly_filters = 0; + std::vector*> linsys; + + bool use_locking = params.use_locking==1; + if (use_locking) { + sum_into_linsys = new LockingSumIntoLinearSystem(A, b); + pipe.add_filter(*sum_into_linsys); + } + else { + //If not using locking, create several assembly filters, each of which + //will be responsible for assembling rows into a certain slice of the + //global matrix. + + num_assembly_filters = num_threads/3; + if (num_assembly_filters == 0) num_assembly_filters = 1; + num_assembly_filters = 2; + + size_t num_rows = A.rows.size(); + size_t rows_per_thread = num_rows/num_assembly_filters; + if (num_rows % num_assembly_filters > 0) ++rows_per_thread; + size_t first_row = A.rows[0]; + for(int i=0; i * sum_into = new SumIntoLinearSystem(first_row, last_row, A, b); + linsys.push_back(sum_into); + pipe.add_filter(*sum_into); + + first_row += rows_per_thread; + } + } + + //Running the pipeline carries out the element-loop and assembly. + pipe.run(num_threads); + + pipe.clear(); + + if (use_locking) { + std::cout << "\n{number of matrix conflicts: " << miniFE_num_matrix_conflicts << "}"< +#include +#include +#include +#include +#include +#include +#include + +namespace miniFE { + +//--------------------------------------------------------------------- + +template +struct FEAssembleSumInto { + const simple_mesh_description* mesh; + GlobalOrdinal* elemIDs; + LockingMatrix* A; + LockingVector* b; + +inline void operator()(int i) +{ + ElemData elem_data; + GlobalOrdinal elemID = elemIDs[i]; + get_elem_nodes_and_coords(*mesh, elemID, elem_data.elem_node_ids, + elem_data.elem_node_coords); + compute_element_matrix_and_vector(elem_data); + sum_into_global_linear_system(elem_data, *A, *b); +} +}; + +template +void +perform_element_loop(const simple_mesh_description& mesh, + const Box& local_elem_box, + MatrixType& A, VectorType& b, + Parameters& params) +{ + typedef typename MatrixType::ScalarType Scalar; + + if (A.rows.size() == 0) return; + + int num_threads = params.numthreads; + + timer_type t0 = mytimer(); + + //We will iterate the local-element-box (local portion of the mesh), and + //assemble the FE operators into the global sparse linear-system. + + int global_elems_x = mesh.global_box[0][1]; + int global_elems_y = mesh.global_box[1][1]; + int global_elems_z = mesh.global_box[2][1]; + + GlobalOrdinal num_elems = get_num_ids(local_elem_box); + std::vector elemIDs(num_elems); + + BoxIterator iter = BoxIterator::begin(local_elem_box); + BoxIterator end = BoxIterator::end(local_elem_box); + + for(size_t i=0; iter != end; ++iter, ++i) { + elemIDs[i] = get_id(global_elems_x, global_elems_y, global_elems_z, + iter.x, iter.y, iter.z); + } + + LockingMatrix lockingA(A); + LockingVector lockingb(b); + + FEAssembleSumInto fe_op; + fe_op.mesh = &mesh; + fe_op.elemIDs = &elemIDs[0]; + fe_op.A = &lockingA; + fe_op.b = &lockingb; + + typedef typename VectorType::ComputeNodeType ComputeNodeType; + + ComputeNodeType& compute_node = b.compute_node; + + compute_node.parallel_for(elemIDs.size(), fe_op); + + std::cout << "\n{number of matrix conflicts: " << miniFE_num_matrix_conflicts << "}"< +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace miniFE { + +//--------------------------------------------------------------------- + +template +void +perform_element_loop(const simple_mesh_description& mesh, + const Box& local_elem_box, + MatrixType& A, VectorType& b, + Parameters& params) +{ + typedef typename MatrixType::ScalarType Scalar; + + if (A.rows.size() == 0) return; + + int num_threads = params.numthreads; + + timer_type t0 = mytimer(); + + //We will iterate the local-element-box (local portion of the mesh), and + //assemble the FE operators into the global sparse linear-system. + + int global_elems_x = mesh.global_box[0][1]; + int global_elems_y = mesh.global_box[1][1]; + int global_elems_z = mesh.global_box[2][1]; + + GlobalOrdinal num_elems = get_num_ids(local_elem_box); + std::vector elemIDs(num_elems); + + BoxIterator iter = BoxIterator::begin(local_elem_box); + BoxIterator end = BoxIterator::end(local_elem_box); + + for(size_t i=0; iter != end; ++iter, ++i) { + elemIDs[i] = get_id(global_elems_x, global_elems_y, global_elems_z, + iter.x, iter.y, iter.z); + } + + std::vector node_ordinals(num_elems*Hex8::numNodesPerElem); + std::vector node_coords(num_elems*Hex8::numNodesPerElem*Hex8::spatialDim); + std::vector elem_matrices(num_elems*Hex8::numNodesPerElem*Hex8::numNodesPerElem); + std::vector elem_vectors(num_elems*Hex8::numNodesPerElem); + + LockingMatrix lockingA(A); + LockingVector lockingb(b); + + GetNodesCoords get_nodes_coords; + get_nodes_coords.elemIDs = &elemIDs[0]; + get_nodes_coords.mesh = &mesh; + get_nodes_coords.node_ordinals = &node_ordinals[0]; + get_nodes_coords.elem_node_coords = &node_coords[0]; + + typedef typename VectorType::ComputeNodeType ComputeNodeType; + + ComputeNodeType& compute_node = b.compute_node; + + compute_node.parallel_for(elemIDs.size(), get_nodes_coords); + + timer_type t_gn = mytimer() - t0; + t0 = mytimer(); + +#ifdef MINIFE_HAVE_CUDA + CUDANode& elem_compute_node = CUDANode::singleton(); +#else + ComputeNodeType& elem_compute_node = compute_node; +#endif + timer_type t_ccn = mytimer() - t0; + t0 = mytimer(); + + Scalar* d_node_coords = elem_compute_node.get_buffer(&node_coords[0], node_coords.size()); + Scalar* d_elem_matrices = elem_compute_node.get_buffer(&elem_matrices[0], elem_matrices.size()); + Scalar* d_elem_vectors = elem_compute_node.get_buffer(&elem_vectors[0], elem_vectors.size()); + + elem_compute_node.copy_to_buffer(&node_coords[0], node_coords.size(), d_node_coords); + + FEComputeElem fe_compute_elem; + fe_compute_elem.elem_node_coords = &d_node_coords[0]; + fe_compute_elem.elem_diffusion_matrix = &d_elem_matrices[0]; + fe_compute_elem.elem_source_vector = &d_elem_vectors[0]; + + elem_compute_node.parallel_for(elemIDs.size(), fe_compute_elem); + + elem_compute_node.copy_from_buffer(&elem_matrices[0], elem_matrices.size(), d_elem_matrices); + elem_compute_node.copy_from_buffer(&elem_vectors[0], elem_vectors.size(), d_elem_vectors); + + timer_type t_ce = mytimer() - t0; + + t0 = mytimer(); + SumInLinSys sum_in; + sum_in.node_ordinals = &node_ordinals[0]; + sum_in.elem_diffusion_matrix = &elem_matrices[0]; + sum_in.elem_source_vector = &elem_vectors[0]; + sum_in.A = &lockingA; + sum_in.b = &lockingb; + + compute_node.parallel_for(elemIDs.size(), sum_in); + + timer_type t_si = mytimer() - t0; + std::cout << "time to get nodes/coords: " << t_gn << std::endl; + std::cout << "time to create compute-node: " << t_ccn << ", time to compute elements: " << t_ce << std::endl; + std::cout << "time to sum into linsys: " << t_si << std::endl; + std::cout << "\n{number of matrix conflicts: " << miniFE_num_matrix_conflicts << "}"< " +exit 1 +fi + +np=$1 +nx=$2 +ny=$3 +nz=$4 + +echo " " +echo "running miniFE test for np=${np}, nx=${nx} ny=${ny} nz=${nz}..." + +if [ ! -x miniFE.x ]; then +echo "miniFE.x doesn't exist or isn't executable. Aborting." +exit -1 +fi + +mpirun -np ${np} miniFE.x nx=${nx} ny=${ny} nz=${nz} >& miniFE_run.out +rm miniFE_run.out + +if [ ! -f A.mtx.${np}.0 ]; then +echo "matrix file A.mtx.${np}.0 doesn't exist. build miniFE with -DMINIFE_DEBUG." +fi + +p=0 +while [ $p -lt ${np} ]; do +diff A.mtx.${np}.$p gold_files/1x1x2_A.mtx.${np}.$p >& diff.A.$p.txt +diff b.vec.${np}.$p gold_files/1x1x2_b.vec.${np}.$p >& diff.b.$p.txt +diff x.vec.${np}.$p gold_files/1x1x2_x.vec.${np}.$p >& diff.x.$p.txt + +test_result="passed" +if [ -s diff.A.$p.txt ]; then +echo "TEST FAILED: see diff.A.${p}.txt" +test_result="failed" +fi + +if [ -s diff.b.$p.txt ]; then +echo "TEST FAILED: see diff.b.${p}.txt" +test_result="failed" +fi + +if [ -s diff.x.$p.txt ]; then +echo "TEST FAILED: see diff.x.${p}.txt" +test_result="failed" +fi + +if [ $test_result != "passed" ]; then +echo "test failed" +exit 1 +fi + +let p=p+1 +rm diff.*.txt +done + +echo "tests passed" + diff --git a/mkl/basic/run_tests b/mkl/basic/run_tests new file mode 100755 index 0000000..5e03399 --- /dev/null +++ b/mkl/basic/run_tests @@ -0,0 +1,22 @@ +#!/bin/bash + +echo " " +echo "running miniFE tests..." + +if [ ! -x miniFE.x ]; then +echo "miniFE.x doesn't exist or isn't executable. Aborting." +exit -1 +fi + +./run_one_test 1 1 1 2 +if [ $? != 0 ]; then +echo "test failed" +exit $? +fi + +./run_one_test 2 1 1 2 +if [ $? != 0 ]; then +echo "test failed" +exit $? +fi + diff --git a/mkl/basic/sharedmem.cuh b/mkl/basic/sharedmem.cuh new file mode 100644 index 0000000..b13c4f2 --- /dev/null +++ b/mkl/basic/sharedmem.cuh @@ -0,0 +1,153 @@ +/* +* Copyright 1993-2006 NVIDIA Corporation. All rights reserved. +* +* NOTICE TO USER: +* +* This source code is subject to NVIDIA ownership rights under U.S. and +* international Copyright laws. +* +* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +* OR PERFORMANCE OF THIS SOURCE CODE. +* +* U.S. Government End Users. This source code is a "commercial item" as +* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +* "commercial computer software" and "commercial computer software +* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +* and is provided to the U.S. Government only as a commercial end item. +* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +* source code with only those rights set forth herein. +*/ + +#ifndef _SHAREDMEM_H_ +#define _SHAREDMEM_H_ + +//**************************************************************************** +// Because dynamically sized shared memory arrays are declared "extern", +// we can't templatize them directly. To get around this, we declare a +// simple wrapper struct that will declare the extern array with a different +// name depending on the type. This avoids compiler errors about duplicate +// definitions. +// +// To use dynamically allocated shared memory in a templatized __global__ or +// __device__ function, just replace code like this: +// +// +// template +// __global__ void +// foo( T* g_idata, T* g_odata) +// { +// // Shared mem size is determined by the host app at run time +// extern __shared__ T sdata[]; +// ... +// doStuff(sdata); +// ... +// } +// +// With this +// template +// __global__ void +// foo( T* g_idata, T* g_odata) +// { +// // Shared mem size is determined by the host app at run time +// SharedMemory smem; +// T* sdata = smem.getPointer(); +// ... +// doStuff(sdata); +// ... +// } +//**************************************************************************** + +// This is the un-specialized struct. Note that we prevent instantiation of this +// struct by putting an undefined symbol in the function body so it won't compile. +template +struct SharedMemory +{ + // Ensure that we won't compile any un-specialized types + __device__ T* getPointer() { + extern __device__ void error(void); + error(); + return NULL; + } +}; + +// Following are the specializations for the following types. +// int, uint, char, uchar, short, ushort, long, ulong, bool, float, and double +// One could also specialize it for user-defined types. + +template <> +struct SharedMemory +{ + __device__ int* getPointer() { extern __shared__ int s_int[]; return s_int; } +}; + +template <> +struct SharedMemory +{ + __device__ unsigned int* getPointer() { extern __shared__ unsigned int s_uint[]; return s_uint; } +}; + +template <> +struct SharedMemory +{ + __device__ char* getPointer() { extern __shared__ char s_char[]; return s_char; } +}; + +template <> +struct SharedMemory +{ + __device__ unsigned char* getPointer() { extern __shared__ unsigned char s_uchar[]; return s_uchar; } +}; + +template <> +struct SharedMemory +{ + __device__ short* getPointer() { extern __shared__ short s_short[]; return s_short; } +}; + +template <> +struct SharedMemory +{ + __device__ unsigned short* getPointer() { extern __shared__ unsigned short s_ushort[]; return s_ushort; } +}; + +template <> +struct SharedMemory +{ + __device__ long* getPointer() { extern __shared__ long s_long[]; return s_long; } +}; + +template <> +struct SharedMemory +{ + __device__ unsigned long* getPointer() { extern __shared__ unsigned long s_ulong[]; return s_ulong; } +}; + +template <> +struct SharedMemory +{ + __device__ bool* getPointer() { extern __shared__ bool s_bool[]; return s_bool; } +}; + +template <> +struct SharedMemory +{ + __device__ float* getPointer() { extern __shared__ float s_float[]; return s_float; } +}; + +template <> +struct SharedMemory +{ + __device__ double* getPointer() { extern __shared__ double s_double[]; return s_double; } +}; + + +#endif //_SHAREDMEM_H_ diff --git a/mkl/basic/simple_mesh_description.hpp b/mkl/basic/simple_mesh_description.hpp new file mode 100644 index 0000000..717dc6c --- /dev/null +++ b/mkl/basic/simple_mesh_description.hpp @@ -0,0 +1,239 @@ + +#ifndef _simple_mesh_description_hpp_ +#define _simple_mesh_description_hpp_ + +//@HEADER +// ************************************************************************ +// +// miniFE: simple finite-element assembly and linear-solve +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include +#include +#include + +namespace miniFE { + +template +class simple_mesh_description { +public: + simple_mesh_description(const Box& global_box_in, const Box& local_box_in) + { + Box local_node_box; + for(int i=0; i<3; ++i) { + global_box[i][0] = global_box_in[i][0]; + global_box[i][1] = global_box_in[i][1]; + local_box[i][0] = local_box_in[i][0]; + local_box[i][1] = local_box_in[i][1]; + local_node_box[i][0] = local_box_in[i][0]; + local_node_box[i][1] = local_box_in[i][1]; + //num-owned-nodes == num-elems+1 in this dimension if the elem box is not empty + //and we are at the high end of the global range in that dimension: + if (local_box_in[i][1] > local_box_in[i][0] && local_box_in[i][1] == global_box[i][1]) local_node_box[i][1] += 1; + } + + int max_node_x = global_box[0][1]+1; + int max_node_y = global_box[1][1]+1; + int max_node_z = global_box[2][1]+1; + create_map_id_to_row(max_node_x, max_node_y, max_node_z, local_node_box, + map_ids_to_rows); + + //As described in analytic_soln.hpp, + //we will impose a 0 boundary-condition on faces x=0, y=0, z=0, y=1, z=1 + //we will impose a 1 boundary-condition on face x=1 + +#ifdef MINIFE_DEBUG +std::cout< 0) --miny; + if (local_node_box[Z][0] > 0) --minz; + if (local_node_box[Y][1] < max_node_y) ++maxy; + if (local_node_box[Z][1] < max_node_z) ++maxz; + + for(int iz=minz; iz(max_node_x, max_node_y, max_node_z, + 0, iy, iz); +#ifdef MINIFE_DEBUG +std::cout<<"x=0 BC, node "< 0) --minx; + if (local_node_box[Z][0] > 0) --minz; + if (local_node_box[X][1] < max_node_x) ++maxx; + if (local_node_box[Z][1] < max_node_z) ++maxz; + + for(int iz=minz; iz(max_node_x, max_node_y, max_node_z, + ix, 0, iz); +#ifdef MINIFE_DEBUG +std::cout<<"y=0 BC, node "< 0) --minx; + if (local_node_box[Y][0] > 0) --miny; + if (local_node_box[X][1] < max_node_x) ++maxx; + if (local_node_box[Y][1] < max_node_y) ++maxy; + + for(int iy=miny; iy(max_node_x, max_node_y, max_node_z, + ix, iy, 0); +#ifdef MINIFE_DEBUG +std::cout<<"z=0 BC, node "< 0) --minz; + if (local_node_box[Y][0] > 0) --miny; + if (local_node_box[Z][1] < max_node_z) ++maxz; + if (local_node_box[Y][1] < max_node_y) ++maxy; + + for(int iy=miny; iy(max_node_x, max_node_y, max_node_z, + x1, iy, iz); + int row = map_id_to_row(nodeID); +#ifdef MINIFE_DEBUG +std::cout<<"x=1 BC, node "< 0) --minz; + if (local_node_box[X][0] > 0) --minx; + if (local_node_box[Z][1] < max_node_z) ++maxz; + if (local_node_box[X][1] < max_node_x) ++maxx; + + for(int ix=minx; ix(max_node_x, max_node_y, max_node_z, + ix, y1, iz); +#ifdef MINIFE_DEBUG +std::cout<<"y=1 BC, node "< 0) --miny; + if (local_node_box[X][0] > 0) --minx; + if (local_node_box[Y][1] < max_node_y) ++maxy; + if (local_node_box[X][1] < max_node_x) ++maxx; + + for(int ix=minx; ix(max_node_x, max_node_y, max_node_z, + ix, iy, z1); +#ifdef MINIFE_DEBUG +std::cout<<"z=1 BC, node "< bc_rows_0; + std::set bc_rows_1; + std::map map_ids_to_rows; + Box global_box; + Box local_box; +};//class simple_mesh_description + +}//namespace miniFE + +#endif diff --git a/mkl/basic/time_kernels.hpp b/mkl/basic/time_kernels.hpp new file mode 100644 index 0000000..b14f743 --- /dev/null +++ b/mkl/basic/time_kernels.hpp @@ -0,0 +1,140 @@ +#ifndef _time_kernels_hpp_ +#define _time_kernels_hpp_ + +//@HEADER +// ************************************************************************ +// +// miniFE: simple finite-element assembly and linear-solve +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include + +#include +#include + +#ifdef MINIFE_HAVE_CUDA +#include +#endif + +namespace miniFE { + +template +void +time_kernels(OperatorType& A, + const VectorType& b, + VectorType& x, + Matvec matvec, + typename OperatorType::LocalOrdinalType max_iter, + typename OperatorType::ScalarType& xdotp, + timer_type* my_kern_times) +{ + typedef typename OperatorType::ScalarType ScalarType; + typedef typename OperatorType::LocalOrdinalType OrdinalType; + typedef typename TypeTraits::magnitude_type magnitude_type; + + timer_type t0 = 0, tWAXPY = 0, tDOT = 0, tMATVEC = 0; + + int myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + if (!A.has_local_indices) { + std::cerr << "miniFE::time_kernels ERROR, A.has_local_indices is false, needs to be true. This probably means " + << "miniFE::make_local_matrix(A) was not called prior to calling miniFE::time_kernels." + << std::endl; + return; + } + + OrdinalType nrows = A.rows.size(); + OrdinalType ncols = A.num_cols; + + VectorType p(0, ncols, b.compute_node); + + ScalarType one = 1.0; + ScalarType zero = 0.0; + + typedef typename VectorType::ComputeNodeType ComputeNodeType; + ComputeNodeType& compute_node = x.compute_node; + + //The following lines that create and initialize buffers are no-ops in many + //cases, but perform actual allocations and copies if a off-cpu device such as + //a GPU is being used by compute_node. + + //Do any required allocations for buffers that will be needed during CG: + ScalarType* d_x = compute_node.get_buffer(&x.coefs[0], x.coefs.size()); + ScalarType* d_p = compute_node.get_buffer(&p.coefs[0], p.coefs.size()); + ScalarType* d_b = compute_node.get_buffer(&b.coefs[0], b.coefs.size()); + OrdinalType* d_Arowoff = compute_node.get_buffer(&A.row_offsets[0], A.row_offsets.size()); + OrdinalType* d_Acols = compute_node.get_buffer(&A.packed_cols[0], A.packed_cols.size()); + ScalarType* d_Acoefs = compute_node.get_buffer(&A.packed_coefs[0], A.packed_coefs.size()); + + //Copy data to buffers that need to be initialized from input data: + compute_node.copy_to_buffer(&x.coefs[0], x.coefs.size(), d_x); + compute_node.copy_to_buffer(&b.coefs[0], b.coefs.size(), d_b); + compute_node.copy_to_buffer(&A.row_offsets[0], A.row_offsets.size(), d_Arowoff); + compute_node.copy_to_buffer(&A.packed_cols[0], A.packed_cols.size(), d_Acols); + compute_node.copy_to_buffer(&A.packed_coefs[0], A.packed_coefs.size(), d_Acoefs); + + TICK(); + for(OrdinalType i=0; i + +#ifdef HAVE_MPI +#include +#endif + +#include +#include + +int main(int argc, char** argv) { + +#ifdef HAVE_MPI + MPI_Init(&argc, &argv); +#endif + + //utest_case.hpp declares the 'get_utest_cases' function. + + std::vector& utest_cases = get_utest_cases(); + bool tests_passed = true; + + for(size_t i=0; irun(); + if (passed) std::cout << " pass: " << utest_cases[i]->name() << std::endl; + else { + std::cout << "!!!FAIL: " << utest_cases[i]->name() << std::endl; + tests_passed = false; + } + } + + if (!tests_passed) { + std::cout << "at least 1 test failed."< + +class utest_case; + +std::vector& get_utest_cases() +{ + static std::vector utest_cases; + return utest_cases; +} + +//When a class that inherits the utest_case class is constructed, +//it gets added to the vector of utest_cases returned by +//the above 'get_utest_cases' function. +class utest_case { +public: + utest_case(){ get_utest_cases().push_back(this); } + ~utest_case(){} + virtual const char* name() = 0; + virtual bool run() = 0; +}; + +//The following macro declares and instantiates a class that +//inherits the above utest_case interfaces. +// +//use the macro like this: +// UTEST_CASE(mytest) +// { +// ... test code here ... +// } +// +//See example usages in utest_cases.hpp +// +#define UTEST_CASE(TESTNAME) \ + class TESTNAME##_utest : public utest_case { \ + public: \ + TESTNAME##_utest(){} \ + const char* name() {return #TESTNAME;} \ + bool run(); \ + }; \ + \ + TESTNAME##_utest instance_##TESTNAME##_utest; \ + \ + bool TESTNAME##_utest::run() + +#define TEST_EQUAL(A,B) \ + if ((A) != (B)) return false; + +#define TEST_EQUAL_TOL(A,B,tol) \ + if (std::abs((A) - (B)) > tol) return false; + +#endif + diff --git a/mkl/basic/utest_cases.hpp b/mkl/basic/utest_cases.hpp new file mode 100644 index 0000000..d15ef9d --- /dev/null +++ b/mkl/basic/utest_cases.hpp @@ -0,0 +1,1232 @@ +#ifndef _utest_cases_hpp_ +#define _utest_cases_hpp_ + +//@HEADER +// ************************************************************************ +// +// miniFE: simple finite-element assembly and linear-solve +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifdef MINIFE_HAVE_TPI +#include +#include +#endif + +#ifdef MINIFE_HAVE_TBB +#include +#include +#endif + +#ifdef MINIFE_HAVE_CUDA +#include +#endif + +#include + +typedef MINIFE_SCALAR Scalar; +typedef MINIFE_LOCAL_ORDINAL LocalOrdinal; +typedef MINIFE_GLOBAL_ORDINAL GlobalOrdinal; + +template +inline +int check_get_id(int nx, int ny, int nz, int x, int y, int z, T expected, const char* testname) +{ + T val = miniFE::get_id(nx,ny,nz,x,y,z); + if (val != expected) { + std::cout << testname << " failed. val=" << val<<", expected " << expected << std::endl; + return -1; + } + return 0; +} + +UTEST_CASE(box_partition) +{ + int global_box[3][2] = { { 0, 2000 }, { 0, 2000}, { 0, 2000} }; + int numprocs = 4, myproc = 0; + + int (*local_boxes0)[3][2] = (int(*)[3][2])std::malloc(sizeof(int)*numprocs*3*2); + int (*local_boxes1)[3][2] = (int(*)[3][2])std::malloc(sizeof(int)*numprocs*3*2); + int (*local_boxes2)[3][2] = (int(*)[3][2])std::malloc(sizeof(int)*numprocs*3*2); + int (*local_boxes3)[3][2] = (int(*)[3][2])std::malloc(sizeof(int)*numprocs*3*2); + + box_partition(0, numprocs, 2, global_box, local_boxes0); + box_partition(0, numprocs, 2, global_box, local_boxes1); + box_partition(0, numprocs, 2, global_box, local_boxes2); + box_partition(0, numprocs, 2, global_box, local_boxes3); + + for(int i=1; i(local_boxes0[i]) != + miniFE::get_num_ids(local_boxes0[0])) { + return false; + } + if (miniFE::get_num_ids(local_boxes1[i]) != + miniFE::get_num_ids(local_boxes1[0])) { + return false; + } + if (miniFE::get_num_ids(local_boxes2[i]) != + miniFE::get_num_ids(local_boxes2[0])) { + return false; + } + if (miniFE::get_num_ids(local_boxes3[i]) != + miniFE::get_num_ids(local_boxes3[0])) { + return false; + } + + if (miniFE::get_num_ids(local_boxes0[i]) < 0 || + miniFE::get_num_ids(local_boxes0[i]) > 2000000000) { + return false; + } + } + + std::free(local_boxes0); + std::free(local_boxes1); + std::free(local_boxes2); + std::free(local_boxes3); + + return true; +} + +UTEST_CASE(generate_matrix_structure1) +{ + int global_box[3][2] = {{ 0, 1 }, { 0, 1 }, { 0, 1 } }; + int box[3][2] = {{ 0, 1 }, { 0, 1 }, { 0, 1 } }; + + miniFE::simple_mesh_description mesh(global_box, box); + + SerialComputeNode compute_node; + miniFE::CSRMatrix A(compute_node); + + miniFE::generate_matrix_structure(mesh, A); + + int nodes_x = global_box[0][1]+1; + int nodes_y = global_box[1][1]+1; + int nodes_z = global_box[2][1]+1; + int nrows = nodes_x*nodes_y*nodes_z; + + if (A.rows.size() != nrows) { + return false; + } + + if (A.row_offsets[nrows] != 64) { + return false; + } + + return true; +} + +UTEST_CASE(generate_matrix_structure2) +{ + int global_box[3][2] = {{ 0, 2 }, { 0, 2 }, { 0, 2 } }; + int box[3][2] = {{ 0, 2 }, { 0, 2 }, { 0, 2 } }; + + miniFE::simple_mesh_description mesh(global_box, box); + + SerialComputeNode compute_node; + miniFE::CSRMatrix A(compute_node); + + int nodes_x = global_box[0][1]+1; + int nodes_y = global_box[1][1]+1; + int nodes_z = global_box[2][1]+1; + int nrows = nodes_x*nodes_y*nodes_z; + + if (nrows != 27) { + return false; + } + + miniFE::generate_matrix_structure(mesh, A); + + if (A.row_offsets.size() != nrows+1) { + return false; + } + + if (A.row_offsets[nrows] != 343) { + return false; + } + + if (A.row_offsets[14]-A.row_offsets[13] != 27) { + return false; + } + + return true; +} + +UTEST_CASE(get_hex8_node_coords_3d) +{ + std::vector coords(24); + coords[0] = 0; + coords[1] = 0; + coords[2] = 0; + coords[3] = 1; + coords[4] = 0; + coords[5] = 0; + coords[6] = 1; + coords[7] = 0; + coords[8] = -1; + coords[9] = 0; + coords[10] = 0; + coords[11] = -1; + coords[12] = 0; + coords[13] = 1; + coords[14] = 0; + coords[15] = 1; + coords[16] = 1; + coords[17] = 0; + coords[18] = 1; + coords[19] = 1; + coords[20] = -1; + coords[21] = 0; + coords[22] = 1; + coords[23] = -1; + + std::vector testcoords(24); + + miniFE::get_hex8_node_coords_3d(0, 0, 0, 1.0, &testcoords[0]); + + if (coords != testcoords) { + return false; + } + + return true; +} + +inline +void get_test_elem_mat(std::vector& elem_mat) +{ +//after much careful debugging, I'm convinced that the following is a +//correct element-diffusion matrix for the element with local-node-0 at +//coordinates 0,0,0. So pasting this into a unit-test will guard against +//unintended changes as I continue working on the code for various reasons. + + elem_mat.resize(36); +elem_mat[0] = 0.6666666664477059; +elem_mat[1] = 1.094804871759614e-10; +elem_mat[2] = -0.1666666666666667; +elem_mat[3] = 1.094805019211109e-10; +elem_mat[4] = 1.094804871759614e-10; +elem_mat[5] = -0.1666666666666667; +elem_mat[6] = -0.1666666667761472; +elem_mat[7] = -0.1666666666666667; +elem_mat[8] = 0.666666666447706; +elem_mat[9] = 1.094804941148553e-10; +elem_mat[10] = -0.1666666666666667; +elem_mat[11] = -0.1666666666666667; +elem_mat[12] = 1.094804732981736e-10; +elem_mat[13] = -0.1666666666666667; +elem_mat[14] = -0.1666666667761472; +elem_mat[15] = 0.666666666447706; +elem_mat[16] = 1.094804841401953e-10; +elem_mat[17] = -0.1666666667761472; +elem_mat[18] = -0.1666666666666667; +elem_mat[19] = 1.094804871759614e-10; +elem_mat[20] = -0.1666666666666667; +elem_mat[21] = 0.6666666664477059; +elem_mat[22] = -0.1666666666666668; +elem_mat[23] = -0.1666666667761472; +elem_mat[24] = -0.1666666666666667; +elem_mat[25] = 1.094804702624075e-10; +elem_mat[26] = 0.666666666447706; +elem_mat[27] = 1.094804802370675e-10; +elem_mat[28] = -0.1666666666666667; +elem_mat[29] = 1.094804698287266e-10; +elem_mat[30] = 0.666666666447706; +elem_mat[31] = 1.094805079926431e-10; +elem_mat[32] = -0.1666666666666667; +elem_mat[33] = 0.666666666447706; +elem_mat[34] = 1.094804663592797e-10; +elem_mat[35] = 0.666666666447706; +} + +UTEST_CASE(diffusionMatrix) +{ + std::vector elem_mat_correct(64); + get_test_elem_mat(elem_mat_correct); + + const size_t len = miniFE::Hex8::numNodesPerElem*miniFE::Hex8::numNodesPerElem; + Scalar elem_mat[len]; + Scalar testcoords[miniFE::Hex8::numNodesPerElem*miniFE::Hex8::spatialDim]; + + miniFE::get_hex8_node_coords_3d(0, 0, 0, 1.0, &testcoords[0]); + + miniFE::Hex8::diffusionMatrix_symm(testcoords, elem_mat); + + for(size_t i=0; i 1.e-6) { + return false; + } + } + + Scalar elem_vec_correct[miniFE::Hex8::numNodesPerElem]; + elem_vec_correct[0] = 0.125; + elem_vec_correct[1] = 0.125; + elem_vec_correct[2] = 0.125; + elem_vec_correct[3] = 0.125; + elem_vec_correct[4] = 0.125; + elem_vec_correct[5] = 0.125; + elem_vec_correct[6] = 0.125; + elem_vec_correct[7] = 0.125; + + Scalar elem_vec[miniFE::Hex8::numNodesPerElem]; + miniFE::Hex8::sourceVector(testcoords, elem_vec); + + const size_t nn = miniFE::Hex8::numNodesPerElem; + for(size_t i=0; i 1.e-13) { + return false; + } + } + + return true; +} + +UTEST_CASE(sum_into_row) +{ + SerialComputeNode compute_node; + miniFE::CSRMatrix A(compute_node); + A.rows.resize(1,0); + A.row_offsets.resize(2,0); + A.row_offsets[1] = 4; + A.packed_cols.resize(4); + A.packed_cols[0] = 0; + A.packed_cols[1] = 1; + A.packed_cols[2] = 2; + A.packed_cols[3] = 3; + A.packed_coefs.resize(4,0); + + std::vector indices(4); + indices[0] = 2; + indices[1] = 0; + indices[2] = 1; + indices[3] = 3; + std::vector coefs(4); + coefs[0] = 2.0; + coefs[1] = 0.0; + coefs[2] = 1.0; + coefs[3] = 3.0; + + miniFE::sum_into_row(0, 4, &indices[0], &coefs[0], A); + + coefs[0] = 0.0; + coefs[1] = 1.0; + coefs[2] = 2.0; + coefs[3] = 3.0; + + if (coefs != A.packed_coefs) { + return false; + } + + return true; +} + +UTEST_CASE(sum_in_elem_matrix) +{ + SerialComputeNode compute_node; + miniFE::CSRMatrix A(compute_node); + A.rows.resize(4,0); + A.rows[0] = 0; + A.rows[1] = 1; + A.rows[2] = 2; + A.rows[3] = 3; + A.row_offsets.resize(5,0); + A.row_offsets[1] = 4; + A.row_offsets[2] = 8; + A.row_offsets[3] = 12; + A.row_offsets[4] = 16; + A.packed_cols.resize(16); + A.packed_cols[0] = 0; + A.packed_cols[1] = 1; + A.packed_cols[2] = 2; + A.packed_cols[3] = 3; + A.packed_cols[4] = 0; + A.packed_cols[5] = 1; + A.packed_cols[6] = 2; + A.packed_cols[7] = 3; + A.packed_cols[8] = 0; + A.packed_cols[9] = 1; + A.packed_cols[10] = 2; + A.packed_cols[11] = 3; + A.packed_cols[12] = 0; + A.packed_cols[13] = 1; + A.packed_cols[14] = 2; + A.packed_cols[15] = 3; + + A.packed_coefs.resize(16,0); + + std::vector indices(4); + indices[0] = 2; + indices[1] = 0; + indices[2] = 1; + indices[3] = 3; + std::vector coefs(16); + coefs[0] = 2.0; + coefs[1] = 0.0; + coefs[2] = 1.0; + coefs[3] = 3.0; + coefs[4] = 2.0; + coefs[5] = 0.0; + coefs[6] = 1.0; + coefs[7] = 3.0; + coefs[8] = 2.0; + coefs[9] = 0.0; + coefs[10] = 1.0; + coefs[11] = 3.0; + coefs[12] = 2.0; + coefs[13] = 0.0; + coefs[14] = 1.0; + coefs[15] = 3.0; + + miniFE::sum_in_elem_matrix(4, &indices[0], &coefs[0], A); + + coefs[0] = 0.0; + coefs[1] = 1.0; + coefs[2] = 2.0; + coefs[3] = 3.0; + coefs[4] = 0.0; + coefs[5] = 1.0; + coefs[6] = 2.0; + coefs[7] = 3.0; + coefs[8] = 0.0; + coefs[9] = 1.0; + coefs[10] = 2.0; + coefs[11] = 3.0; + coefs[12] = 0.0; + coefs[13] = 1.0; + coefs[14] = 2.0; + coefs[15] = 3.0; + + if (coefs != A.packed_coefs) { + return false; + } + + return true; +} + +UTEST_CASE(assemble_FE_data) +{ + int global_box[3][2] = {{ 0, 1 }, { 0, 1 }, { 0, 1 } }; + int box[3][2] = {{ 0, 1 }, { 0, 1 }, { 0, 1 } }; + + miniFE::simple_mesh_description mesh(global_box, box); + + SerialComputeNode compute_node; + miniFE::CSRMatrix A(compute_node); + + miniFE::generate_matrix_structure(mesh, A); + + miniFE::Vector b(0, 8, compute_node); + + const int num_nodes = 8; + + std::vector symm_elem_mat_correct; + get_test_elem_mat(symm_elem_mat_correct); + std::vector full_elem_mat_correct(num_nodes*num_nodes); + + int offset = 0; + for(int i=0; i=i) { + Scalar coef = symm_elem_mat_correct[offset++]; + full_elem_mat_correct[i*num_nodes+j] = coef; + full_elem_mat_correct[j*num_nodes+i] = coef; + } + } + } + + std::vector elem_node_ids(num_nodes); + elem_node_ids[0] = 0; + elem_node_ids[1] = 1; + elem_node_ids[2] = 5; + elem_node_ids[3] = 4; + elem_node_ids[4] = 2; + elem_node_ids[5] = 3; + elem_node_ids[6] = 7; + elem_node_ids[7] = 6; + + //now for each row of of the 8x8 elem_mat_correct, reorder that + //row according to the order of elem_node_ids, rows and columns. + std::vector elem_mat_reordered(num_nodes*num_nodes); + offset = 0; + int row = 0; + for(int i=0; i& assembled_mat = A.packed_coefs; + + for(size_t i=0; i 1.e-13) { + return false; + } + } + + return true; +} + +UTEST_CASE(pll_matvec2) +{ + int numprocs = 1, myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + if (numprocs != 2) { + if (myproc == 0) std::cout <<"pll_matvec2_utest only runs when numprocs=2."< A(compute_node); + miniFE::Vector x(myproc, 4,compute_node) ,y(myproc, 4,compute_node); + + A.rows.resize(2, 0); + if (myproc == 0) { + A.rows[0] = 0; A.rows[1] = 1; + } + else { + A.rows[0] = 2; A.rows[1] = 3; + } + + A.row_offsets.resize(3, 0); + if (myproc == 0) { + A.row_offsets[1] = 2; A.row_offsets[2] = 6; + } + else { + A.row_offsets[1] = 2; A.row_offsets[2] = 4; + } + + if (myproc == 0) { + A.packed_cols.resize(6, 0); + A.packed_cols[1] = 1; + A.packed_cols[2] = 0; + A.packed_cols[3] = 1; + A.packed_cols[4] = 2; + A.packed_cols[5] = 3; + } + else { + A.packed_cols.resize(4, 0); + A.packed_cols[0] = 1; + A.packed_cols[1] = 2; + A.packed_cols[2] = 1; + A.packed_cols[3] = 3; + } + if (myproc == 0) { + A.packed_coefs.resize(6, 1); + A.packed_coefs[2] = 2; + A.packed_coefs[4] = -1; + } + else { + A.packed_coefs.resize(4, 1); + A.packed_coefs[0] = -2; + A.packed_coefs[2] = 2; + } + + if (myproc == 0) { + x.coefs[0] = 1; x.coefs[1] = 2; + } + else { + x.coefs[0] = 3; x.coefs[1] = 4; + } + + miniFE::make_local_matrix(A); + miniFE::exchange_externals(A, x); + miniFE::matvec(A, x, y); + + if (myproc == 0) { + if (y.coefs[0] != 3.0 || y.coefs[1] != 5.0) { + std::cout << "proc 0: pll_matvec2_utest failed" << std::endl; + return false; + } + } + else { + if (y.coefs[0] != -1.0 || y.coefs[1] != 8.0) { + std::cout << "proc 1: pll_matvec2_utest failed" << std::endl; + return false; + } + } + + return true; +} + +UTEST_CASE(pll_matvec3) +{ + int numprocs = 1, myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + if (numprocs != 3) { + if (myproc == 0) std::cout <<"pll_matvec3_utest only runs when numprocs=3."< A(compute_node); + miniFE::Vector x(myproc, 6, compute_node) ,y(myproc, 6, compute_node); + + A.rows.resize(2, 0); + A.rows[0] = myproc*2; A.rows[1] = myproc*2+1; + + A.row_offsets.resize(3, 0); + if (myproc == 0) { + A.row_offsets[1] = 2; A.row_offsets[2] = 4; + } + else if (myproc == 1) { + A.row_offsets[1] = 3; A.row_offsets[2] = 4; + } + else { + A.row_offsets[1] = 2; A.row_offsets[2] = 4; + } + + A.packed_cols.resize(4, 0); + if (myproc == 0) { + A.packed_cols[1] = 3; + A.packed_cols[2] = 1; + A.packed_cols[3] = 5; + } + else if (myproc == 1) { + A.packed_cols[1] = 2; + A.packed_cols[2] = 4; + A.packed_cols[3] = 3; + } + else { + A.packed_cols[0] = 1; + A.packed_cols[1] = 4; + A.packed_cols[2] = 3; + A.packed_cols[3] = 5; + } + + A.packed_coefs.resize(4, 1); + if (myproc == 0) { + A.packed_coefs[1] = -1; + A.packed_coefs[3] = -1; + } + else if (myproc == 1) { + A.packed_coefs[0] = 2; + A.packed_coefs[2] = -1; + } + else { + A.packed_coefs[0] = 2; + A.packed_coefs[2] = 2; + } + + if (myproc == 0) { + x.coefs[0] = 1; x.coefs[1] = 2; + } + else if (myproc == 1) { + x.coefs[0] = 3; x.coefs[1] = 4; + } + else { + x.coefs[0] = 5; x.coefs[1] = 6; + } + + miniFE::make_local_matrix(A); + miniFE::exchange_externals(A, x); + miniFE::matvec(A, x, y); + + if (myproc == 0) { + if (y.coefs[0] != -3.0 || y.coefs[1] != -4.0) { + std::cout << "proc 0: pll_matvec3 failed" << std::endl; + return false; + } + } + else if (myproc == 1) { + if (y.coefs[0] != 0.0 || y.coefs[1] != 4.0) { + std::cout << "proc 1: pll_matvec3 failed" << std::endl; + return false; + } + } + else { + if (y.coefs[0] != 9.0 || y.coefs[1] != 14.0) { + std::cout << "proc 2: pll_matvec3 failed" << std::endl; + return false; + } + } + + return true; +} + +UTEST_CASE(ComputeNode_waxpy1) +{ + int numprocs = 1, myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + if (numprocs != 1) { + if (myproc == 0) std::cout <<"ComputeNode_waxpy1 only runs when numprocs=1."< x(0, len, compute_node), y(0, len, compute_node), w(0, len, compute_node); + + std::vector inds(len, 0); + for(size_t i=0; i coefs(len, 1); + + miniFE::sum_into_vector(len, &inds[0], &coefs[0], x); + miniFE::sum_into_vector(len, &inds[0], &coefs[0], y); + miniFE::sum_into_vector(len, &inds[0], &coefs[0], w); + + Scalar* d_x = compute_node.get_buffer(&x.coefs[0], x.coefs.size()); + Scalar* d_y = compute_node.get_buffer(&y.coefs[0], y.coefs.size()); + + compute_node.copy_to_buffer(&x.coefs[0], x.coefs.size(), d_x); + compute_node.copy_to_buffer(&y.coefs[0], y.coefs.size(), d_y); + + miniFE::waxpby(1.0, x, 1.0, y, w); + + Scalar* d_w = compute_node.get_buffer(&w.coefs[0], w.coefs.size()); + compute_node.copy_from_buffer(&w.coefs[0], w.coefs.size(), d_w); + + Scalar expected = 2; + Scalar tol = 1.e-7; + + for(size_t i=0; i tol) { + return false; + } + } + return true; +} + +UTEST_CASE(ComputeNode_dot1) +{ + int numprocs = 1, myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + if (numprocs != 1) { + if (myproc == 0) std::cout <<"ComputeNode_dot1 only runs when numprocs=1."< x(0, N, compute_node), y(0, N, compute_node); + + std::vector inds(N, 0); + for(size_t i=0; i coefs(N, 1); + + miniFE::sum_into_vector(N, &inds[0], &coefs[0], x); + miniFE::sum_into_vector(N, &inds[0], &coefs[0], y); + + Scalar* d_x = compute_node.get_buffer(&x.coefs[0], x.coefs.size()); + Scalar* d_y = compute_node.get_buffer(&y.coefs[0], y.coefs.size()); + + compute_node.copy_to_buffer(&x.coefs[0], x.coefs.size(), d_x); + compute_node.copy_to_buffer(&y.coefs[0], y.coefs.size(), d_y); + + Scalar dot_prod = miniFE::dot(x,y); + + if (dot_prod != N) { + return false; + } + + return true; +} + +UTEST_CASE(ComputeNode_TBB_dot1) +{ + int numprocs = 1, myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + if (numprocs != 1) { + if (myproc == 0) std::cout <<"ComputeNode_TBB_dot1_utest only runs when numprocs=1."< x(0, N, compute_node), y(0, N, compute_node); + + std::vector inds(N, 0); + for(size_t i=0; i coefs(N, 1); + + miniFE::sum_into_vector(inds.size(), &inds[0], &coefs[0], x); + miniFE::sum_into_vector(inds.size(), &inds[0], &coefs[0], y); + + Scalar dot_prod = miniFE::dot(x,y); + + if (dot_prod != N) { + return false; + } + +#else + std::cout << "ComputeNode_TBB_dot1_utest only runs when MINIFE_HAVE_TBB is defined."< x(0, 10, compute_node), y(0, 10, compute_node); + + size_t len = 10; + std::vector inds(len, 0); + for(size_t i=0; i coefs(len, 1); + + miniFE::sum_into_vector(len, &inds[0], &coefs[0], x); + miniFE::sum_into_vector(len, &inds[0], &coefs[0], y); + + Scalar* d_x = compute_node.get_buffer(&x.coefs[0], x.coefs.size()); + Scalar* d_y = compute_node.get_buffer(&y.coefs[0], y.coefs.size()); + + compute_node.copy_to_buffer(&x.coefs[0], x.coefs.size(), d_x); + compute_node.copy_to_buffer(&y.coefs[0], y.coefs.size(), d_y); + + Scalar dot_prod = miniFE::dot(x, y); + + if (std::abs(dot_prod-10.0) > 1.e-12) { + return false; + } + return true; +} + +UTEST_CASE(ser_matvec1) +{ + int numprocs = 1, myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + if (numprocs != 1) { + if (myproc == 0) std::cout <<"ser_matvec1_utest only runs when numprocs=1."< A(compute_node); + miniFE::Vector x(0, 4,compute_node) ,y(0, 4,compute_node); + + A.rows.resize(4, 0); + A.rows[0] = 0; A.rows[1] = 1; + A.rows[2] = 2; A.rows[3] = 3; + + A.row_offsets.resize(5, 0); + A.row_offsets[1] = 2; A.row_offsets[2] = 6; + A.row_offsets[3] = 8; A.row_offsets[4] = 10; + + A.packed_cols.resize(10, 0); + A.packed_cols[1] = 1; + A.packed_cols[2] = 0; + A.packed_cols[3] = 1; + A.packed_cols[4] = 2; + A.packed_cols[5] = 3; + A.packed_cols[6] = 1; + A.packed_cols[7] = 2; + A.packed_cols[8] = 1; + A.packed_cols[9] = 3; + + A.packed_coefs.resize(10, 1); + A.packed_coefs[2] = 2; + A.packed_coefs[4] = -1; + A.packed_coefs[6] = -2; + A.packed_coefs[8] = 2; + + x.coefs[0] = 1; x.coefs[1] = 2; x.coefs[2] = 3; x.coefs[3] = 4; + + for(size_t i=0; i 1.e-12) { + std::cout << "failed 0. y.coefs[0]=" < 1.e-12) { + std::cout << "failed 1. y.coefs[1]=" < 1.e-12) { + std::cout << "failed 2. y.coefs[2]=" < 1.e-12) { + std::cout << "failed 3. y.coefs[3]=" < x(0, len,compute_node) ,y(0, len,compute_node), w(0, len,compute_node); + + Scalar one = 1, zero = 0; + + for(size_t i=0; i1.e-2 ? 1.e-6 * (waxpy_flops/tWAXPY) : 0; + + std::cout << "waxpby_perf_utest: WAXPBY time: " << tWAXPY << ", len: " << len << ", num_iters: " << num_iters + << ", MFLOPS: " << waxpy_mflops << std::endl; + return true; +} + +UTEST_CASE(matmat3x3_1) +{ + Scalar A[] = {1, 4, 7, 2, 5, 8, 3, 6, 9}; + Scalar B[] = {1, 1, 1, 2, 2, 2, 3, 3, 3}; + Scalar C[9]; + + miniFE::matmat3x3(A, B, C); + + TEST_EQUAL(C[0], 6.0); + TEST_EQUAL(C[1], 15.0); + TEST_EQUAL(C[2], 24.0); + TEST_EQUAL(C[3], 12.0); + TEST_EQUAL(C[4], 30.0); + TEST_EQUAL(C[5], 48.0); + TEST_EQUAL(C[6], 18.0); + TEST_EQUAL(C[7], 45.0); + TEST_EQUAL(C[8], 72.0); + + return true; +} + +UTEST_CASE(matmat3x3_X_3xn_1) +{ + Scalar A[] = {1, 4, 7, 2, 5, 8, 3, 6, 9}; + Scalar B[] = {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}; + Scalar C[18]; + + miniFE::matmat3x3_X_3xn(A, 6, B, C); + + TEST_EQUAL(C[0], 6.0); + TEST_EQUAL(C[1], 15.0); + TEST_EQUAL(C[2], 24.0); + TEST_EQUAL(C[3], 12.0); + TEST_EQUAL(C[4], 30.0); + TEST_EQUAL(C[5], 48.0); + TEST_EQUAL(C[6], 18.0); + TEST_EQUAL(C[7], 45.0); + TEST_EQUAL(C[8], 72.0); + TEST_EQUAL(C[9], 24.0); + TEST_EQUAL(C[10], 60.0); + TEST_EQUAL(C[11], 96.0); + TEST_EQUAL(C[12], 30.0); + TEST_EQUAL(C[13], 75.0); + TEST_EQUAL(C[14], 120.0); + TEST_EQUAL(C[15], 36.0); + TEST_EQUAL(C[16], 90.0); + TEST_EQUAL(C[17], 144.0); + + return true; +} + +UTEST_CASE(matTransMat3x3_X_3xn_1) +{ + Scalar A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + Scalar B[] = {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}; + Scalar C[18]; + + miniFE::matTransMat3x3_X_3xn(A, 6, B, C); + + TEST_EQUAL(C[0], 6.0); + TEST_EQUAL(C[1], 15.0); + TEST_EQUAL(C[2], 24.0); + TEST_EQUAL(C[3], 12.0); + TEST_EQUAL(C[4], 30.0); + TEST_EQUAL(C[5], 48.0); + TEST_EQUAL(C[6], 18.0); + TEST_EQUAL(C[7], 45.0); + TEST_EQUAL(C[8], 72.0); + TEST_EQUAL(C[9], 24.0); + TEST_EQUAL(C[10], 60.0); + TEST_EQUAL(C[11], 96.0); + TEST_EQUAL(C[12], 30.0); + TEST_EQUAL(C[13], 75.0); + TEST_EQUAL(C[14], 120.0); + TEST_EQUAL(C[15], 36.0); + TEST_EQUAL(C[16], 90.0); + TEST_EQUAL(C[17], 144.0); + + return true; +} + +UTEST_CASE(BoxIterator1) +{ + int box1[3][2] = {{ 0, 2 }, { 0, 2 }, { 0, 2 } }; + miniFE::BoxIterator iter = miniFE::BoxIterator::begin(box1); + miniFE::BoxIterator end = miniFE::BoxIterator::end(box1); + + for(int iz=box1[2][0]; iz(nx,ny,nz,iter.x,iter.y,-iter.z); + int x, y, z; + miniFE::get_coords(elemID, nx,ny,nz, x,y,z); + TEST_EQUAL(x,iter.x); + TEST_EQUAL(y,iter.y); + TEST_EQUAL(z,-iter.z); + } + + return true; +} + +#endif + diff --git a/mkl/basic/verify_solution.hpp b/mkl/basic/verify_solution.hpp new file mode 100644 index 0000000..fb3bd3b --- /dev/null +++ b/mkl/basic/verify_solution.hpp @@ -0,0 +1,170 @@ +#ifndef _verify_solution_hpp_ +#define _verify_solution_hpp_ + +//@HEADER +// ************************************************************************ +// +// miniFE: simple finite-element assembly and linear-solve +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include +#include +#include +#include + +#include +#include +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +namespace miniFE { + +template +struct err_info { + Scalar err; + Scalar computed; + Scalar analytic; + Scalar coords[3]; +}; + +template +void +verify_solution(const simple_mesh_description& mesh, + const VectorType& x) +{ + typedef typename VectorType::GlobalOrdinalType GlobalOrdinal; + typedef typename VectorType::ScalarType Scalar; + + int global_nodes_x = mesh.global_box[0][1]+1; + int global_nodes_y = mesh.global_box[1][1]+1; + int global_nodes_z = mesh.global_box[2][1]+1; + Box box; + copy_box(mesh.local_box, box); + + //num-owned-nodes in each dimension is num-elems+1 + //only if num-elems > 0 in that dimension *and* + //we are at the high end of the global range in that dimension: + if (box[0][1] > box[0][0] && box[0][1] == mesh.global_box[0][1]) ++box[0][1]; + if (box[1][1] > box[1][0] && box[1][1] == mesh.global_box[1][1]) ++box[1][1]; + if (box[2][1] > box[2][0] && box[2][1] == mesh.global_box[2][1]) ++box[2][1]; + + GlobalOrdinal nrows = get_num_ids(box); + + std::vector rows(nrows); + std::vector row_coords(nrows*3); + + unsigned roffset = 0; + + for(int iz=box[2][0]; iz(global_nodes_x, global_nodes_y, global_nodes_z, + ix, iy, iz); + Scalar x, y, z; + get_coords(row_id, global_nodes_x, global_nodes_y, global_nodes_z, x, y, z); + + rows[roffset] = mesh.map_id_to_row(row_id); + row_coords[roffset*3] = x; + row_coords[roffset*3+1] = y; + row_coords[roffset*3+2] = z; + ++roffset; + } + } + } + + if (x.local_size != rows.size() || x.local_size != nrows) { + throw std::runtime_error("verify_solution ERROR, size mismatch"); + } + + const int num_terms = 300; + + err_info max_error; + max_error.err = 0.0; + + for(size_t i=0; i max_error.err) { + max_error.err = err; + max_error.computed = computed_soln; + max_error.analytic = analytic_soln; + max_error.coords[0] = x; + max_error.coords[1] = y; + max_error.coords[2] = z; + } + } + + Scalar local_max_err = max_error.err; + Scalar global_max_err = 0; +#ifdef HAVE_MPI + MPI_Allreduce(&local_max_err, &global_max_err, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); +#else + global_max_err = local_max_err; +#endif + + if (local_max_err == global_max_err) { + if (max_error.err > 1.e-6) { + std::cout << "max absolute error is "< + +namespace miniFE { + +template +struct ElemData { + ElemData() : nodes_per_elem(Hex8::numNodesPerElem) {} + ~ElemData(){} + + const size_t nodes_per_elem; + GlobalOrdinal elem_node_ids[Hex8::numNodesPerElem]; + Scalar grad_vals[Hex8::numGaussPointsPerDim * Hex8::numGaussPointsPerDim * Hex8::numGaussPointsPerDim * Hex8::numNodesPerElem * Hex8::spatialDim]; + Scalar elem_node_coords[Hex8::numNodesPerElem*Hex8::spatialDim]; + Scalar elem_diffusion_matrix[(Hex8::numNodesPerElem*(Hex8::numNodesPerElem+1))/2]; + Scalar elem_source_vector[Hex8::numNodesPerElem]; +}; + +template +struct ElemDataPtr { + ElemDataPtr() : nodes_per_elem(Hex8::numNodesPerElem) {} + ~ElemDataPtr(){} + + const size_t nodes_per_elem; + GlobalOrdinal elem_node_ids[Hex8::numNodesPerElem]; + Scalar grad_vals[Hex8::numGaussPointsPerDim * Hex8::numGaussPointsPerDim * Hex8::numGaussPointsPerDim * Hex8::numNodesPerElem * Hex8::spatialDim]; + Scalar elem_node_coords[(Hex8::numNodesPerElem*(Hex8::spatialDim+1))/2]; + Scalar* elem_diffusion_matrix; + Scalar* elem_source_vector; +}; + +}//namespace miniFE + +#endif + diff --git a/mkl/fem/Hex8.hpp b/mkl/fem/Hex8.hpp new file mode 100644 index 0000000..d2cd4f2 --- /dev/null +++ b/mkl/fem/Hex8.hpp @@ -0,0 +1,417 @@ +#ifndef _Hex8_hpp_ +#define _Hex8_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#ifndef KERNEL_PREFIX +#define KERNEL_PREFIX +#endif + +#include +#include +#include + +namespace miniFE { + +namespace Hex8 { + +template +KERNEL_PREFIX void shape_fns(const Scalar* x, Scalar* values_at_nodes) +{ + //assumptions: values_at_nodes has length numNodesPerElem + // x has length 3 (hard-coded spatialDim) + + const Scalar u = 1.0 - x[0]; + const Scalar v = 1.0 - x[1]; + const Scalar w = 1.0 - x[2]; + + const Scalar up1 = 1.0 + x[0]; + const Scalar vp1 = 1.0 + x[1]; + const Scalar wp1 = 1.0 + x[2]; + + values_at_nodes[0] = 0.125 * u * v * w;//(1-x)*(1-y)*(1-z) + values_at_nodes[1] = 0.125 * up1 * v * w;//(1+x)*(1-y)*(1-z) + values_at_nodes[2] = 0.125 * up1 * vp1 * w;//(1+x)*(1+y)*(1-z) + values_at_nodes[3] = 0.125 * u * vp1 * w;//(1-x)*(1+y)*(1-z) + values_at_nodes[4] = 0.125 * u * v * wp1;//(1-x)*(1-y)*(1+z) + values_at_nodes[5] = 0.125 * up1 * v * wp1;//(1+x)*(1-y)*(1+z) + values_at_nodes[6] = 0.125 * up1 * vp1 * wp1;//(1+x)*(1+y)*(1+z) + values_at_nodes[7] = 0.125 * u * vp1 * wp1;//(1-x)*(1+y)*(1+z) +} + +template +KERNEL_PREFIX void gradients(const Scalar* x, Scalar* values_per_fn) +{ + //assumptions values_per_fn has length 24 (numNodesPerElem*spatialDim) + // spatialDim == 3 + + const Scalar u = 1.0 - x[0]; + const Scalar v = 1.0 - x[1]; + const Scalar w = 1.0 - x[2]; + + const Scalar up1 = 1.0 + x[0]; + const Scalar vp1 = 1.0 + x[1]; + const Scalar wp1 = 1.0 + x[2]; + +//fn 0 + values_per_fn[0] = -0.125 * v * w; + values_per_fn[1] = -0.125 * u * w; + values_per_fn[2] = -0.125 * u * v; +//fn 1 + values_per_fn[3] = 0.125 * v * w; + values_per_fn[4] = -0.125 * up1 * w; + values_per_fn[5] = -0.125 * up1 * v; +//fn 2 + values_per_fn[6] = 0.125 * vp1 * w; + values_per_fn[7] = 0.125 * up1 * w; + values_per_fn[8] = -0.125 * up1 * vp1; +//fn 3 + values_per_fn[9] = -0.125 * vp1 * w; + values_per_fn[10] = 0.125 * u * w; + values_per_fn[11] = -0.125 * u * vp1; +//fn 4 + values_per_fn[12] = -0.125 * v * wp1; + values_per_fn[13] = -0.125 * u * wp1; + values_per_fn[14] = 0.125 * u * v; +//fn 5 + values_per_fn[15] = 0.125 * v * wp1; + values_per_fn[16] = -0.125 * up1 * wp1; + values_per_fn[17] = 0.125 * up1 * v; +//fn 6 + values_per_fn[18] = 0.125 * vp1 * wp1; + values_per_fn[19] = 0.125 * up1 * wp1; + values_per_fn[20] = 0.125 * up1 * vp1; +//fn 7 + values_per_fn[21] = -0.125 * vp1 * wp1; + values_per_fn[22] = 0.125 * u * wp1; + values_per_fn[23] = 0.125 * u * vp1; +} + +template +KERNEL_PREFIX void gradients_and_detJ(const Scalar* elemNodeCoords, + const Scalar* grad_vals, + Scalar& detJ) +{ +/** + pt is the point at which the jacobian is to be computed. +*/ + + //assumptions on the lengths of input arguments: + //elemNodeCoords has length numNodesPerElem*spatialDim, + //grad_vals has length numNodesPerElem*spatialDim + + const Scalar zero = 0; + + Scalar J00 = zero; + Scalar J01 = zero; + Scalar J02 = zero; + + Scalar J10 = zero; + Scalar J11 = zero; + Scalar J12 = zero; + + Scalar J20 = zero; + Scalar J21 = zero; + Scalar J22 = zero; + + size_t i_X_spatialDim = 0; + for(size_t i=0; i +KERNEL_PREFIX void gradients_and_invJ_and_detJ(const Scalar* elemNodeCoords, + const Scalar* grad_vals, + Scalar* invJ, + Scalar& detJ) +{ +/** + pt is the point at which the jacobian is to be computed. +*/ + + //assumptions on the lengths of input arguments: + //pt has length spatialDim, + //elemNodeCoords has length numNodesPerElem*spatialDim, + //grad_vals has length numNodesPerElem*spatialDim, and + //J has length spatialDim*spatialDim + + const Scalar zero = 0; + + // + //First we compute the jacobian J: + // + Scalar J00 = zero; + Scalar J01 = zero; + Scalar J02 = zero; + + Scalar J10 = zero; + Scalar J11 = zero; + Scalar J12 = zero; + + Scalar J20 = zero; + Scalar J21 = zero; + Scalar J22 = zero; + + size_t i_X_spatialDim = 0; + for(size_t i=0; i +KERNEL_PREFIX void diffusionMatrix_symm(const Scalar* elemNodeCoords, + const Scalar* grad_vals, + Scalar* elem_mat) +{ + int len = (numNodesPerElem * (numNodesPerElem+1))/2; + const Scalar zero = 0; + miniFE::fill(elem_mat, elem_mat+len, zero); + + Scalar gpts[numGaussPointsPerDim]; + Scalar gwts[numGaussPointsPerDim]; + + gauss_pts(numGaussPointsPerDim, gpts, gwts); + + const Scalar k = 1.0; + Scalar detJ = 0.0; + + Scalar dpsidx[numNodesPerElem], dpsidy[numNodesPerElem], dpsidz[numNodesPerElem]; + + Scalar invJ[spatialDim*spatialDim]; + + //The following nested loop implements equations 3.4.5 and 3.4.7 on page 88 + //of Reddy & Gartling, "The Finite Element Method in Heat Transfer and Fluid + //Dynamics", 2nd edition, + //to compute the element diffusion matrix for the steady conduction equation. + + Scalar pt[spatialDim]; + +#ifdef MINIFE_DEBUG + Scalar volume = zero; +#endif + + size_t gv_offset = 0; + for(size_t ig=0; ig 1.e-7) { +// std::cout << "element volume is "< +KERNEL_PREFIX void sourceVector(const Scalar* elemNodeCoords, + const Scalar* grad_vals, + Scalar* elem_vec) +{ + int len = numNodesPerElem; + const Scalar zero = 0; + miniFE::fill(elem_vec, elem_vec+len, zero); + + Scalar gpts[numGaussPointsPerDim]; + Scalar gwts[numGaussPointsPerDim]; + + Scalar psi[numNodesPerElem]; + + gauss_pts(numGaussPointsPerDim, gpts, gwts); + + Scalar Q = 1.0; + + Scalar pt[spatialDim]; + + size_t gv_offset = 0; + for(size_t ig=0; ig +#include +#include + +namespace miniFE { + +template +void compute_gradient_values(Scalar* grad_vals) +{ + Scalar gpts[Hex8::numGaussPointsPerDim]; + Scalar gwts[Hex8::numGaussPointsPerDim]; + + gauss_pts(Hex8::numGaussPointsPerDim, gpts, gwts); + + Scalar pt[Hex8::spatialDim]; + + Scalar* grad_vals_ptr = grad_vals; + for(size_t ig=0; ig +void +compute_element_matrix_and_vector(ElemData& elem_data) +{ + Hex8::diffusionMatrix_symm(elem_data.elem_node_coords, elem_data.grad_vals, + elem_data.elem_diffusion_matrix); + Hex8::sourceVector(elem_data.elem_node_coords, elem_data.grad_vals, + elem_data.elem_source_vector); +} + +template +void +compute_element_matrix_and_vector(ElemDataPtr& elem_data) +{ + Hex8::diffusionMatrix_symm(elem_data.elem_node_coords, elem_data.grad_vals, + elem_data.elem_diffusion_matrix); + Hex8::sourceVector(elem_data.elem_node_coords, elem_data.grad_vals, + elem_data.elem_source_vector); +} + +}//namespace miniFE + +#endif + diff --git a/mkl/fem/Hex8_enums.hpp b/mkl/fem/Hex8_enums.hpp new file mode 100644 index 0000000..3dfac26 --- /dev/null +++ b/mkl/fem/Hex8_enums.hpp @@ -0,0 +1,52 @@ +#ifndef _Hex8_enums_hpp_ +#define _Hex8_enums_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +namespace miniFE { + +namespace Hex8 { + +// !!!!!!! +//Important note: there are places in miniFE code where +//loops over spatialDim are unrolled (spatialDim is assumed to be 3). +//Thus, changing this enum is not enough to make miniFE code +//work for spatialDim values other than 3. +// !!!!!!! +enum { + spatialDim = 3, + numNodesPerElem = 8, + numGaussPointsPerDim = 2 +}; + +}//namespace Hex8 + +}//namespace miniFE + +#endif + diff --git a/mkl/fem/analytic_soln.hpp b/mkl/fem/analytic_soln.hpp new file mode 100644 index 0000000..2d130e2 --- /dev/null +++ b/mkl/fem/analytic_soln.hpp @@ -0,0 +1,116 @@ +#ifndef _analytic_soln_hpp_ +#define _analytic_soln_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include + +#ifndef MINIFE_SCALAR +#define MINIFE_SCALAR double; +#endif + +namespace miniFE { + +typedef MINIFE_SCALAR Scalar; + +// The 'soln' function below computes the analytic solution for +// steady state temperature in a brick-shaped domain (formally called +// a rectangular parallelepiped). The inputs to the function are +// the x,y,z coordinates of the point at which temperature is to be +// computed, and the number of terms p,q in the series expansion. +// +// The equations used for the temperature solution are equations 9 and 10 +// in section 6.2 of Carslaw & Jaeger, "Conduction of Heat in Solids". +// +// The paralellepiped being used is defined by this domain: +// 0 <= x <= 1.0 +// 0 <= y <= 1.0 +// 0 <= z <= 1.0 +// +// With boundary conditions prescribing the temperature to be 1.0 on +// the x==1.0 face, and 0.0 on all other faces. +// +// Thus, in the equations from Carslaw & Jaeger, the following constants +// are used: +// +// a == b == c == 1.0 (the extents of the domain) +// v1 == 0.0 (temperature at x == 0.0) +// v2 == 1.0 (temperature at x == 1.0) +// + +const Scalar PI = 3.141592653589793238462; +const Scalar PI_SQR = PI*PI; +const Scalar term0 = 16.0/(PI_SQR); + +inline Scalar fcn_l(int p, int q) +{ + return std::sqrt((2*p+1)*(2*p+1)*PI_SQR + (2*q+1)*(2*q+1)*PI_SQR); +} + +inline Scalar fcn(int n, Scalar u) +{ + return (2*n+1)*PI*u; +} + +inline Scalar soln(Scalar x, Scalar y, Scalar z, int max_p, int max_q) +{ + Scalar sum = 0; + for(int p=0; p<=max_p; ++p) { + const Scalar p21y = fcn(p, y); + const Scalar sin_py = std::sin(p21y)/(2*p+1); + for(int q=0; q<=max_q; ++q) { + const Scalar q21z = fcn(q, z); + const Scalar sin_qz = std::sin(q21z)/(2*q+1); + + const Scalar l = fcn_l(p, q); + + const Scalar sinh1 = std::sinh(l*x); + const Scalar sinh2 = std::sinh(l); + + const Scalar tmp = (sinh1*sin_py)*(sin_qz/sinh2); + + //if the scalar l gets too big, sinh(l) becomes inf. + //if that happens, tmp is a NaN. + //crude check for NaN: + //if tmp != tmp, tmp is NaN + if (tmp == tmp) { + sum += tmp; + } + else { + //if we got a NaN, break out of this inner loop and go to + //the next iteration of the outer loop. + break; + } + } + } + return term0*sum; +} + +}//namespace miniFE + +#endif /* _analytic_soln_hpp_ */ diff --git a/mkl/fem/gauss_pts.hpp b/mkl/fem/gauss_pts.hpp new file mode 100644 index 0000000..7652839 --- /dev/null +++ b/mkl/fem/gauss_pts.hpp @@ -0,0 +1,67 @@ +#ifndef _gauss_pts_hpp_ +#define _gauss_pts_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#ifndef KERNEL_PREFIX +#define KERNEL_PREFIX +#endif + +namespace miniFE { + +template +inline +KERNEL_PREFIX void gauss_pts(int N, Scalar* pts, Scalar* wts) +{ + const Scalar x2 = 0.577350269; // 1.0/sqrt(3.0) + const Scalar x3 = 0.77459667; // sqrt(3.0/5.0) + const Scalar w1 = 0.55555556; // 5.0/9.0 + const Scalar w2 = 0.88888889; // 8.0/9.0 + + switch(N) { + case 1: + pts[0] = 0.0; wts[0] = 2.0; + break; + case 2: + pts[0] = -x2; wts[0] = 1.0; + pts[1] = x2; wts[1] = 1.0; + break; + case 3: + pts[0] = -x3; wts[0] = w1; + pts[1] = 0.0; wts[1] = w2; + pts[2] = x3; wts[2] = w1; + break; + default: + break; + } +} + +}//namespace miniFE + +#endif + diff --git a/mkl/fem/matrix_algebra_3x3.hpp b/mkl/fem/matrix_algebra_3x3.hpp new file mode 100644 index 0000000..012ae82 --- /dev/null +++ b/mkl/fem/matrix_algebra_3x3.hpp @@ -0,0 +1,166 @@ +#ifndef _matrix_algebra_3x3_hpp_ +#define _matrix_algebra_3x3_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#ifndef KERNEL_PREFIX +#define KERNEL_PREFIX +#endif + +namespace miniFE { + +template +#ifdef __CUDACC__ + __host__ __device__ +#endif +KERNEL_PREFIX void fill(Scalar* begin, Scalar* end, const Scalar& val) +{ + while(begin != end) {*begin++ = val;} +} + +template +KERNEL_PREFIX void inverse_and_determinant3x3(const Scalar* J, Scalar* invJ, Scalar& detJ) +{ + //hardwired "3x3" in function-name allows us to assume + //that J and invJ have length 9: + + Scalar J00 = J[0]; + Scalar J01 = J[1]; + Scalar J02 = J[2]; + + Scalar J10 = J[3]; + Scalar J11 = J[4]; + Scalar J12 = J[5]; + + Scalar J20 = J[6]; + Scalar J21 = J[7]; + Scalar J22 = J[8]; + + Scalar term0 = J22*J11 - J21*J12; + Scalar term1 = J22*J01 - J21*J02; + Scalar term2 = J12*J01 - J11*J02; + + detJ = J00*term0 - J10*term1 + J20*term2; + + Scalar inv_detJ = 1.0/detJ; + + invJ[0] = term0*inv_detJ; + invJ[1] = -term1*inv_detJ; + invJ[2] = term2*inv_detJ; + + invJ[3] = -(J22*J10 - J20*J12)*inv_detJ; + invJ[4] = (J22*J00 - J20*J02)*inv_detJ; + invJ[5] = -(J12*J00 - J10*J02)*inv_detJ; + + invJ[6] = (J21*J10 - J20*J11)*inv_detJ; + invJ[7] = -(J21*J00 - J20*J01)*inv_detJ; + invJ[8] = (J11*J00 - J10*J01)*inv_detJ; +} + +template +KERNEL_PREFIX void matmat3x3(const Scalar* A, const Scalar* B, Scalar* C) +{ + //hardwired "3x3" in function-name allows us to assume args have length 9: + //A,B,C are all assumed to be ordered such that columns are contiguous. + + const Scalar zero = 0; + miniFE::fill(C, C+9, zero); + + for(int i=0; i<3; ++i) { + for(int j=0; j<3; ++j) { + C[i+j*3] = A[i+0]*B[j*3+0] + + A[i+3]*B[j*3+1] + + A[i+6]*B[j*3+2]; + } + } +} + +template +KERNEL_PREFIX Scalar determinant3x3(const Scalar* J) +{ + //hardwired "3x3" in function-name allows us to assume that J has length 9: + + Scalar J00 = J[0]; + Scalar J01 = J[1]; + Scalar J02 = J[2]; + + Scalar J10 = J[3]; + Scalar J11 = J[4]; + Scalar J12 = J[5]; + + Scalar J20 = J[6]; + Scalar J21 = J[7]; + Scalar J22 = J[8]; + + Scalar term0 = J22*J11 - J21*J12; + Scalar term1 = J22*J01 - J21*J02; + Scalar term2 = J12*J01 - J11*J02; + + Scalar detJ = J00*term0 - J10*term1 + J20*term2; + + return detJ; +} + +template +KERNEL_PREFIX void matmat3x3_X_3xn(const Scalar* A, int n, const Scalar* B, Scalar* C) +{ + //A is 3x3, B is 3xn. So C is also 3xn. + //A,B,C are all assumed to be ordered such that columns are contiguous. + + Scalar* Cj = C; + const Scalar* Bj = B; + for(int j=0; j +KERNEL_PREFIX void matTransMat3x3_X_3xn(const Scalar* A, int n, const Scalar* B, Scalar* C) +{ + //A is 3x3, B is 3xn. So C is also 3xn. + //A,B,C are all assumed to be ordered such that columns are contiguous. + + Scalar* Cj = C; + const Scalar* Bj = B; + for(int j=0; j +#include +#include +#include + +#include +#include +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +namespace miniFE { + +template +struct err_info { + Scalar err; + Scalar computed; + Scalar analytic; + Scalar coords[3]; +}; + +template +int +verify_solution(const simple_mesh_description& mesh, + const VectorType& x, double tolerance, bool verify_whole_domain = false) +{ + typedef typename VectorType::GlobalOrdinalType GlobalOrdinal; + typedef typename VectorType::ScalarType Scalar; + + int global_nodes_x = mesh.global_box[0][1]+1; + int global_nodes_y = mesh.global_box[1][1]+1; + int global_nodes_z = mesh.global_box[2][1]+1; + Box box; + copy_box(mesh.local_box, box); + + //num-owned-nodes in each dimension is num-elems+1 + //only if num-elems > 0 in that dimension *and* + //we are at the high end of the global range in that dimension: + if (box[0][1] > box[0][0] && box[0][1] == mesh.global_box[0][1]) ++box[0][1]; + if (box[1][1] > box[1][0] && box[1][1] == mesh.global_box[1][1]) ++box[1][1]; + if (box[2][1] > box[2][0] && box[2][1] == mesh.global_box[2][1]) ++box[2][1]; + + std::vector rows; + std::vector row_coords; + + int roffset = 0; + for(int iz=box[2][0]; iz(global_nodes_x, global_nodes_y, global_nodes_z, + ix, iy, iz); + Scalar x, y, z; + get_coords(row_id, global_nodes_x, global_nodes_y, global_nodes_z, x, y, z); + + bool verify_this_point = false; + if (verify_whole_domain) verify_this_point = true; + else if (std::abs(x - 0.5) < 0.05 && std::abs(y - 0.5) < 0.05 && std::abs(z - 0.5) < 0.05) { + verify_this_point = true; + } + + if (verify_this_point) { + rows.push_back(roffset); + row_coords.push_back(x); + row_coords.push_back(y); + row_coords.push_back(z); + } + + ++roffset; + } + } + } + + int return_code = 0; + + const int num_terms = 300; + + err_info max_error; + max_error.err = 0.0; + + for(size_t i=0; i max_error.err) { + max_error.err = err; + max_error.computed = computed_soln; + max_error.analytic = analytic_soln; + max_error.coords[0] = x; + max_error.coords[1] = y; + max_error.coords[2] = z; + } + } + + Scalar local_max_err = max_error.err; + Scalar global_max_err = 0; +#ifdef HAVE_MPI + MPI_Allreduce(&local_max_err, &global_max_err, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); +#else + global_max_err = local_max_err; +#endif + + if (local_max_err == global_max_err) { + if (max_error.err > tolerance) { + std::cout << "max absolute error is "< tolerance) { + return_code = 1; + } + + return return_code; +} + +}//namespace miniFE + +#endif + diff --git a/mkl/src/CSRMatrix.hpp b/mkl/src/CSRMatrix.hpp new file mode 100644 index 0000000..360d085 --- /dev/null +++ b/mkl/src/CSRMatrix.hpp @@ -0,0 +1,135 @@ +#ifndef _CSRMatrix_hpp_ +#define _CSRMatrix_hpp_ + +//@HEADER +// ************************************************************************ +// +// HPCCG: Simple Conjugate Gradient Benchmark Code +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include +#include +#include +#ifdef HAVE_MPI +#include +#endif + +namespace miniFE { + +template +struct +CSRMatrix { + CSRMatrix() + : has_local_indices(false), + rows(), row_offsets(), row_offsets_external(), + packed_cols(), packed_coefs(), + num_cols(0) +#ifdef HAVE_MPI + ,external_index(), external_local_index(), elements_to_send(), + neighbors(), recv_length(), send_length(), send_buffer(), request() +#endif + { + } + + ~CSRMatrix() + {} + + typedef Scalar ScalarType; + typedef LocalOrdinal LocalOrdinalType; + typedef GlobalOrdinal GlobalOrdinalType; + + bool has_local_indices; + std::vector rows; + std::vector row_offsets; + std::vector row_offsets_external; + std::vector packed_cols; + std::vector packed_coefs; + LocalOrdinal num_cols; + +#ifdef HAVE_MPI + std::vector external_index; + std::vector external_local_index; + std::vector elements_to_send; + std::vector neighbors; + std::vector recv_length; + std::vector send_length; + std::vector send_buffer; + std::vector request; +#endif + + size_t num_nonzeros() const + { + return row_offsets[row_offsets.size()-1]; + } + + void reserve_space(unsigned nrows, unsigned ncols_per_row) + { + rows.resize(nrows); + row_offsets.resize(nrows+1); + packed_cols.reserve(nrows * ncols_per_row); + packed_coefs.reserve(nrows * ncols_per_row); + } + + void get_row_pointers(GlobalOrdinalType row, size_t& row_length, + GlobalOrdinalType*& cols, + ScalarType*& coefs) + { + ptrdiff_t local_row = -1; + //first see if we can get the local-row index using fast direct lookup: + if (rows.size() >= 1) { + ptrdiff_t idx = row - rows[0]; + if (idx < rows.size() && rows[idx] == row) { + local_row = idx; + } + } + + //if we didn't get the local-row index using direct lookup, try a + //more expensive binary-search: + if (local_row == -1) { + typename std::vector::iterator row_iter = + std::lower_bound(rows.begin(), rows.end(), row); + + //if we still haven't found row, it's not local so jump out: + if (row_iter == rows.end() || *row_iter != row) { + row_length = 0; + return; + } + + local_row = row_iter - rows.begin(); + } + + LocalOrdinalType offset = row_offsets[local_row]; + row_length = row_offsets[local_row+1] - offset; + cols = &packed_cols[offset]; + coefs = &packed_coefs[offset]; + } +}; + +}//namespace miniFE + +#endif + diff --git a/mkl/src/ELLMatrix.hpp b/mkl/src/ELLMatrix.hpp new file mode 100644 index 0000000..71c0683 --- /dev/null +++ b/mkl/src/ELLMatrix.hpp @@ -0,0 +1,140 @@ +#ifndef _ELLMatrix_hpp_ +#define _ELLMatrix_hpp_ + +//@HEADER +// ************************************************************************ +// +// HPCCG: Simple Conjugate Gradient Benchmark Code +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include +#include +#include +#ifdef HAVE_MPI +#include +#endif + +namespace miniFE { + +template +struct +ELLMatrix { + ELLMatrix() + : has_local_indices(false), + rows(), + cols(), coefs(), + num_cols(0), + num_cols_per_row(0) +#ifdef HAVE_MPI + ,external_index(), external_local_index(), elements_to_send(), + neighbors(), recv_length(), send_length(), send_buffer(), request() +#endif + { + } + + ~ELLMatrix() + {} + + typedef Scalar ScalarType; + typedef LocalOrdinal LocalOrdinalType; + typedef GlobalOrdinal GlobalOrdinalType; + + bool has_local_indices; + std::vector rows; + std::vector cols; + std::vector coefs; + LocalOrdinal num_cols; + LocalOrdinal num_cols_per_row; + +#ifdef HAVE_MPI + std::vector external_index; + std::vector external_local_index; + std::vector elements_to_send; + std::vector neighbors; + std::vector recv_length; + std::vector send_length; + std::vector send_buffer; + std::vector request; +#endif + + size_t num_nonzeros() const + { + return rows.size()*num_cols_per_row; + } + + void reserve_space(unsigned nrows, unsigned ncols_per_row) + { + rows.resize(nrows); + cols.resize(nrows * ncols_per_row); + coefs.resize(nrows * ncols_per_row); + num_cols_per_row = ncols_per_row; + } + + void get_row_pointers(GlobalOrdinalType row, size_t& row_length, + GlobalOrdinalType*& cols_ptr, + ScalarType*& coefs_ptr) + { + ptrdiff_t local_row = -1; + //first see if we can get the local-row index using fast direct lookup: + if (rows.size() >= 1) { + ptrdiff_t idx = row - rows[0]; + if (idx < rows.size() && rows[idx] == row) { + local_row = idx; + } + } + + //if we didn't get the local-row index using direct lookup, try a + //more expensive binary-search: + if (local_row == -1) { + typename std::vector::iterator row_iter = + std::lower_bound(rows.begin(), rows.end(), row); + + //if we still haven't found row, it's not local so jump out: + if (row_iter == rows.end() || *row_iter != row) { + row_length = 0; + return; + } + + local_row = row_iter - rows.begin(); + } + + cols_ptr = &cols[local_row*num_cols_per_row]; + coefs_ptr = &coefs[local_row*num_cols_per_row]; + + int idx = num_cols_per_row-1; + while(idx>=0) { + if (cols_ptr[idx] != 0) break; + --idx; + } + row_length = idx+1; + } +}; + +}//namespace miniFE + +#endif + diff --git a/mkl/src/GetNodesCoords.hpp b/mkl/src/GetNodesCoords.hpp new file mode 100644 index 0000000..8d958ca --- /dev/null +++ b/mkl/src/GetNodesCoords.hpp @@ -0,0 +1,54 @@ +/* +//@HEADER +// ************************************************************************ +// +// HPCCG: Simple Conjugate Gradient Benchmark Code +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef _GETNODESCOORDS_HPP_ +#define _GETNODESCOORDS_HPP_ + +#include +#include + +template +struct GetNodesCoords { + const miniFE::simple_mesh_description* mesh; + GlobalOrdinal* elemIDs; + GlobalOrdinal* node_ordinals; + Scalar* elem_node_coords; + +inline void operator()(int i) +{ + unsigned nnodes = miniFE::Hex8::numNodesPerElem; + GlobalOrdinal elemID = elemIDs[i]; + GlobalOrdinal* node_ords = node_ordinals+i*nnodes; + Scalar* node_coords = elem_node_coords+i*nnodes*miniFE::Hex8::spatialDim; + get_elem_nodes_and_coords(*mesh, elemID, node_ords, node_coords); +} +}; + +#endif diff --git a/mkl/src/Hex8_box_utils.hpp b/mkl/src/Hex8_box_utils.hpp new file mode 100644 index 0000000..82b6abd --- /dev/null +++ b/mkl/src/Hex8_box_utils.hpp @@ -0,0 +1,174 @@ +#ifndef _Hex8_box_utils_hpp_ +#define _Hex8_box_utils_hpp_ + +//@HEADER +// ************************************************************************ +// +// HPCCG: Simple Conjugate Gradient Benchmark Code +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include + +#include +#include +#include +#include + +namespace miniFE { + + +template +void get_hex8_node_ids(int nx, int ny, + GlobalOrdinal node0, + GlobalOrdinal* elem_node_ids) +{ +//Given box dimensions nx and ny, and a starting node +//(local-node-0 for a hex8), compute the other nodes +//of the hex8 using the exodus ordering convention. + elem_node_ids[0] = node0; + elem_node_ids[1] = node0 + 1; + elem_node_ids[2] = node0 + nx + 1; + elem_node_ids[3] = node0 + nx; + elem_node_ids[4] = node0 + nx*ny; + elem_node_ids[5] = node0 + 1 + nx*ny; + elem_node_ids[6] = node0 + nx + nx*ny + 1; + elem_node_ids[7] = node0 + nx + nx*ny; +} + +template +void get_hex8_node_coords_3d(Scalar x, Scalar y, Scalar z, + Scalar hx, Scalar hy, Scalar hz, + Scalar* elem_node_coords) +{ + //Input: x,y,z are the coordinates of local-node 0 for a Hex8. + //'hx', 'hy', 'hz' are the lengths of the sides of the element + //in each direction. + + elem_node_coords[0] = x; + elem_node_coords[1] = y; + elem_node_coords[2] = z; + + elem_node_coords[3] = x + hx; + elem_node_coords[4] = y; + elem_node_coords[5] = z; + + elem_node_coords[6] = x + hx; + elem_node_coords[7] = y + hy; + elem_node_coords[8] = z; + + elem_node_coords[9] = x; + elem_node_coords[10] = y + hy; + elem_node_coords[11] = z; + + elem_node_coords[12] = x; + elem_node_coords[13] = y; + elem_node_coords[14] = z + hz; + + elem_node_coords[15] = x + hx; + elem_node_coords[16] = y; + elem_node_coords[17] = z + hz; + + elem_node_coords[18] = x + hx; + elem_node_coords[19] = y + hy; + elem_node_coords[20] = z + hz; + + elem_node_coords[21] = x; + elem_node_coords[22] = y + hy; + elem_node_coords[23] = z + hz; +} + +template +void +get_elem_nodes_and_coords(const simple_mesh_description& mesh, + GlobalOrdinal elemID, + GlobalOrdinal* node_ords, Scalar* node_coords) +{ + int global_nodes_x = mesh.global_box[0][1]+1; + int global_nodes_y = mesh.global_box[1][1]+1; + int global_nodes_z = mesh.global_box[2][1]+1; + + if (elemID < 0) { + //I don't think this can happen, but check for the sake of paranoia... + throw std::runtime_error("get_elem_nodes_and_coords ERROR, negative elemID"); + } + + int elem_int_x, elem_int_y, elem_int_z; + get_int_coords(elemID, global_nodes_x-1, global_nodes_y-1, global_nodes_z-1, + elem_int_x, elem_int_y, elem_int_z); + GlobalOrdinal nodeID = get_id(global_nodes_x, global_nodes_y, global_nodes_z, elem_int_x, elem_int_y, elem_int_z); + +#ifdef MINIFE_DEBUG_VERBOSE + std::cout<<"\nelemID: "<(nodeID, global_nodes_x,global_nodes_y,global_nodes_z, + ix,iy,iz); + Scalar hx = 1.0/global_elems_x; + Scalar hy = 1.0/global_elems_y; + Scalar hz = 1.0/global_elems_z; + get_hex8_node_coords_3d(ix, iy, iz, hx, hy, hz, node_coords); +#ifdef MINIFE_DEBUG_VERBOSE + int offset = 0; + for(int i=0; i +void +get_elem_nodes_and_coords(const simple_mesh_description& mesh, + GlobalOrdinal elemID, + ElemData& elem_data) +{ + get_elem_nodes_and_coords(mesh, elemID, elem_data.elem_node_ids, elem_data.elem_node_coords); +} + +}//namespace miniFE + +#endif + diff --git a/mkl/src/Makefile b/mkl/src/Makefile new file mode 100644 index 0000000..d81bbcf --- /dev/null +++ b/mkl/src/Makefile @@ -0,0 +1,34 @@ +#----------------------------------------------------------------------- + +MINIFE_TYPES = \ + -DMINIFE_SCALAR=double \ + -DMINIFE_LOCAL_ORDINAL=int \ + -DMINIFE_GLOBAL_ORDINAL=int + +MINIFE_MATRIX_TYPE = -DMINIFE_CSR_MATRIX +# MINIFE_MATRIX_TYPE = -DMINIFE_ELL_MATRIX + +#----------------------------------------------------------------------- + +CFLAGS = -O3 -mkl -DMINIFE_MKL_DOUBLE -fopenmp -DUSE_MKL_DAXPBY -mavx +CXXFLAGS = $(CFLAGS) + +# For debugging, the macro MINIFE_DEBUG will cause miniFE to dump a log file +# from each proc containing various information. +# This macro will also enable a somewhat expensive range-check on indices in +# the exchange_externals function. + +# CPPFLAGS = -I. -I../utils -I../fem $(MINIFE_TYPES) -DMINIFE_DEBUG -DHAVE_MPI -DMPICH_IGNORE_CXX_SEEK +CPPFLAGS = -I. -I../utils -I../fem $(MINIFE_TYPES) $(MINIFE_MATRIX_TYPE) -DHAVE_MPI -DMPICH_IGNORE_CXX_SEEK + +LDFLAGS= +LIBS= + +# The MPICH_IGNORE_CXX_SEEK macro is required for some mpich versions, +# such as the one on my cygwin machine. + +CXX=mpiicpc +CC=mpiicc + +include make_targets + diff --git a/mkl/src/Makefile.intel.mpi b/mkl/src/Makefile.intel.mpi new file mode 100644 index 0000000..d81bbcf --- /dev/null +++ b/mkl/src/Makefile.intel.mpi @@ -0,0 +1,34 @@ +#----------------------------------------------------------------------- + +MINIFE_TYPES = \ + -DMINIFE_SCALAR=double \ + -DMINIFE_LOCAL_ORDINAL=int \ + -DMINIFE_GLOBAL_ORDINAL=int + +MINIFE_MATRIX_TYPE = -DMINIFE_CSR_MATRIX +# MINIFE_MATRIX_TYPE = -DMINIFE_ELL_MATRIX + +#----------------------------------------------------------------------- + +CFLAGS = -O3 -mkl -DMINIFE_MKL_DOUBLE -fopenmp -DUSE_MKL_DAXPBY -mavx +CXXFLAGS = $(CFLAGS) + +# For debugging, the macro MINIFE_DEBUG will cause miniFE to dump a log file +# from each proc containing various information. +# This macro will also enable a somewhat expensive range-check on indices in +# the exchange_externals function. + +# CPPFLAGS = -I. -I../utils -I../fem $(MINIFE_TYPES) -DMINIFE_DEBUG -DHAVE_MPI -DMPICH_IGNORE_CXX_SEEK +CPPFLAGS = -I. -I../utils -I../fem $(MINIFE_TYPES) $(MINIFE_MATRIX_TYPE) -DHAVE_MPI -DMPICH_IGNORE_CXX_SEEK + +LDFLAGS= +LIBS= + +# The MPICH_IGNORE_CXX_SEEK macro is required for some mpich versions, +# such as the one on my cygwin machine. + +CXX=mpiicpc +CC=mpiicc + +include make_targets + diff --git a/mkl/src/Makefile.intel.nompi b/mkl/src/Makefile.intel.nompi new file mode 100644 index 0000000..3cc3904 --- /dev/null +++ b/mkl/src/Makefile.intel.nompi @@ -0,0 +1,27 @@ +#----------------------------------------------------------------------- + +MINIFE_TYPES = \ + -DMINIFE_SCALAR=double \ + -DMINIFE_LOCAL_ORDINAL=int \ + -DMINIFE_GLOBAL_ORDINAL=int + +MINIFE_MATRIX_TYPE = -DMINIFE_CSR_MATRIX +# MINIFE_MATRIX_TYPE = -DMINIFE_ELL_MATRIX + +#----------------------------------------------------------------------- + +CFLAGS = -O3 -mkl -DMINIFE_MKL_DOUBLE -fopenmp -DUSE_MKL_DAXPBY -mavx +CXXFLAGS = $(CFLAGS) +CPPFLAGS = -I. -I../utils -I../fem $(MINIFE_TYPES) $(MINIFE_MATRIX_TYPE) + +LDFLAGS= +LIBS= + +# The MPICH_IGNORE_CXX_SEEK macro is required for some mpich versions, +# such as the one on my cygwin machine. + +CXX=icpc +CC=icc + +include make_targets + diff --git a/mkl/src/MatrixCopyOp.hpp b/mkl/src/MatrixCopyOp.hpp new file mode 100644 index 0000000..473ff36 --- /dev/null +++ b/mkl/src/MatrixCopyOp.hpp @@ -0,0 +1,63 @@ +/* +//@HEADER +// ************************************************************************ +// +// HPCCG: Simple Conjugate Gradient Benchmark Code +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef _MatrixCopyOp_hpp_ +#define _MatrixCopyOp_hpp_ + +template +struct MatrixCopyOp { + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinalType; + typedef typename MatrixType::LocalOrdinalType LocalOrdinalType; + typedef typename MatrixType::ScalarType ScalarType; + + const GlobalOrdinalType* src_rows; + const LocalOrdinalType* src_rowoffsets; + const GlobalOrdinalType* src_cols; + const ScalarType* src_coefs; + + GlobalOrdinalType* dest_rows; + LocalOrdinalType* dest_rowoffsets; + GlobalOrdinalType* dest_cols; + ScalarType* dest_coefs; + int n; + + inline void operator()(int i) + { + dest_rows[i] = src_rows[i]; + dest_rowoffsets[i] = src_rowoffsets[i]; + for(int j=src_rowoffsets[i]; j +#include + +#include +#include + +#include + +template +void sort_if_needed(GlobalOrdinal* list, + GlobalOrdinal list_len) +{ + bool need_to_sort = false; + for(GlobalOrdinal i=list_len-1; i>=1; --i) { + if (list[i] < list[i-1]) { + need_to_sort = true; + break; + } + } + + if (need_to_sort) { + std::sort(list,list+list_len); + } +} + +template +struct MatrixInitOp { +}; + +template<> +struct MatrixInitOp > { + MatrixInitOp(const std::vector& rows_vec, + const std::vector& row_offsets_vec, + const std::vector& row_coords_vec, + int global_nx, int global_ny, int global_nz, + MINIFE_GLOBAL_ORDINAL global_n_rows, + const miniFE::simple_mesh_description& input_mesh, + miniFE::CSRMatrix& matrix) + : rows(&rows_vec[0]), + row_offsets(&row_offsets_vec[0]), + row_coords(&row_coords_vec[0]), + global_nodes_x(global_nx), + global_nodes_y(global_ny), + global_nodes_z(global_nz), + global_nrows(global_n_rows), + mesh(&input_mesh), + dest_rows(&matrix.rows[0]), + dest_rowoffsets(&matrix.row_offsets[0]), + dest_cols(&matrix.packed_cols[0]), + dest_coefs(&matrix.packed_coefs[0]), + n(matrix.rows.size()) + { + if (matrix.packed_cols.capacity() != matrix.packed_coefs.capacity()) { + std::cout<<"Warning, packed_cols.capacity ("<* mesh; + + inline void operator()(int i) + { + dest_rows[i] = rows[i]; + int offset = row_offsets[i]; + dest_rowoffsets[i] = offset; + int ix = row_coords[i*3]; + int iy = row_coords[i*3+1]; + int iz = row_coords[i*3+2]; + GlobalOrdinalType nnz = 0; + for(int sz=-1; sz<=1; ++sz) { + for(int sy=-1; sy<=1; ++sy) { + for(int sx=-1; sx<=1; ++sx) { + GlobalOrdinalType col_id = + miniFE::get_id(global_nodes_x, global_nodes_y, global_nodes_z, + ix+sx, iy+sy, iz+sz); + if (col_id >= 0 && col_id < global_nrows) { + GlobalOrdinalType col = mesh->map_id_to_row(col_id); + dest_cols[offset+nnz] = col; + dest_coefs[offset+nnz] = 0; + ++nnz; + } + } + } + } + + sort_if_needed(&dest_cols[offset], nnz); + } +}; + +template<> +struct MatrixInitOp > { + MatrixInitOp(const std::vector& rows_vec, + const std::vector& /*row_offsets_vec*/, + const std::vector& row_coords_vec, + int global_nx, int global_ny, int global_nz, + MINIFE_GLOBAL_ORDINAL global_n_rows, + const miniFE::simple_mesh_description& input_mesh, + miniFE::ELLMatrix& matrix) + : rows(&rows_vec[0]), + row_coords(&row_coords_vec[0]), + global_nodes_x(global_nx), + global_nodes_y(global_ny), + global_nodes_z(global_nz), + global_nrows(global_n_rows), + mesh(&input_mesh), + dest_rows(&matrix.rows[0]), + dest_cols(&matrix.cols[0]), + dest_coefs(&matrix.coefs[0]), + n(matrix.rows.size()), + ncols_per_row(matrix.num_cols_per_row) + { + } + + typedef MINIFE_GLOBAL_ORDINAL GlobalOrdinalType; + typedef MINIFE_LOCAL_ORDINAL LocalOrdinalType; + typedef MINIFE_SCALAR ScalarType; + + const GlobalOrdinalType* rows; + const int* row_coords; + + int global_nodes_x; + int global_nodes_y; + int global_nodes_z; + + GlobalOrdinalType global_nrows; + + GlobalOrdinalType* dest_rows; + GlobalOrdinalType* dest_cols; + ScalarType* dest_coefs; + int n; + int ncols_per_row; + + const miniFE::simple_mesh_description* mesh; + + inline void operator()(int i) + { + dest_rows[i] = rows[i]; + int offset = i*ncols_per_row; + int ix = row_coords[i*3]; + int iy = row_coords[i*3+1]; + int iz = row_coords[i*3+2]; + GlobalOrdinalType nnz = 0; + for(int sz=-1; sz<=1; ++sz) + for(int sy=-1; sy<=1; ++sy) + for(int sx=-1; sx<=1; ++sx) { + GlobalOrdinalType col_id = + miniFE::get_id(global_nodes_x, global_nodes_y, global_nodes_z, + ix+sx, iy+sy, iz+sz); + if (col_id >= 0 && col_id < global_nrows) { + GlobalOrdinalType col = mesh->map_id_to_row(col_id); + dest_cols[offset+nnz] = col; + dest_coefs[offset+nnz] = 0; + ++nnz; + } + } + + sort_if_needed(&dest_cols[offset], nnz); + } +}; + +#endif + diff --git a/mkl/src/MemInitOp.hpp b/mkl/src/MemInitOp.hpp new file mode 100644 index 0000000..23266b4 --- /dev/null +++ b/mkl/src/MemInitOp.hpp @@ -0,0 +1,44 @@ +/* +//@HEADER +// ************************************************************************ +// +// HPCCG: Simple Conjugate Gradient Benchmark Code +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef MEMINITOP_HPP_ +#define MEMINITOP_HPP_ + +template +struct MemInitOp { + Scalar* ptr; + size_t n; + inline void operator()(size_t i) + { + ptr[i] = 0; + } +}; + +#endif diff --git a/mkl/src/SparseMatrix_functions.hpp b/mkl/src/SparseMatrix_functions.hpp new file mode 100644 index 0000000..47178ef --- /dev/null +++ b/mkl/src/SparseMatrix_functions.hpp @@ -0,0 +1,655 @@ +#ifndef _SparseMatrix_functions_hpp_ +#define _SparseMatrix_functions_hpp_ + +//@HEADER +// ************************************************************************ +// +// HPCCG: Simple Conjugate Gradient Benchmark Code +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#ifdef MINIFE_HAVE_TBB +#include +#endif + +#ifdef HAVE_MPI +#include +#endif + +#include "mkl.h" + +namespace miniFE { + +template +void init_matrix(MatrixType& M, + const std::vector& rows, + const std::vector& row_offsets, + const std::vector& row_coords, + int global_nodes_x, + int global_nodes_y, + int global_nodes_z, + typename MatrixType::GlobalOrdinalType global_nrows, + const simple_mesh_description& mesh) +{ + MatrixInitOp mat_init(rows, row_offsets, row_coords, + global_nodes_x, global_nodes_y, global_nodes_z, + global_nrows, mesh, M); + + for(int i=0; i +void sort_with_companions(ptrdiff_t len, T* array, U* companions) +{ + ptrdiff_t i, j, index; + U companion; + + for (i=1; i < len; i++) { + index = array[i]; + companion = companions[i]; + j = i; + while ((j > 0) && (array[j-1] > index)) + { + array[j] = array[j-1]; + companions[j] = companions[j-1]; + j = j - 1; + } + array[j] = index; + companions[j] = companion; + } +} + +template +void write_matrix(const std::string& filename, + MatrixType& mat) +{ + typedef typename MatrixType::LocalOrdinalType LocalOrdinalType; + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinalType; + typedef typename MatrixType::ScalarType ScalarType; + + int numprocs = 1, myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + std::ostringstream osstr; + osstr << filename << "." << numprocs << "." << myproc; + std::string full_name = osstr.str(); + std::ofstream ofs(full_name.c_str()); + + size_t nrows = mat.rows.size(); + size_t nnz = mat.num_nonzeros(); + + for(int p=0; p +void +sum_into_row(int row_len, + GlobalOrdinal* row_indices, + Scalar* row_coefs, + int num_inputs, + const GlobalOrdinal* input_indices, + const Scalar* input_coefs) +{ + for(size_t i=0; i +void +sum_into_row(typename MatrixType::GlobalOrdinalType row, + size_t num_indices, + const typename MatrixType::GlobalOrdinalType* col_inds, + const typename MatrixType::ScalarType* coefs, + MatrixType& mat) +{ + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::ScalarType Scalar; + + size_t row_len = 0; + GlobalOrdinal* mat_row_cols = NULL; + Scalar* mat_row_coefs = NULL; + + mat.get_row_pointers(row, row_len, mat_row_cols, mat_row_coefs); + if (row_len == 0) return; + + sum_into_row(row_len, mat_row_cols, mat_row_coefs, num_indices, col_inds, coefs); +} + +template +void +sum_in_symm_elem_matrix(size_t num, + const typename MatrixType::GlobalOrdinalType* indices, + const typename MatrixType::ScalarType* coefs, + MatrixType& mat) +{ + typedef typename MatrixType::ScalarType Scalar; + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + +//indices is length num (which should be nodes-per-elem) +//coefs is the upper triangle of the element diffusion matrix +//which should be length num*(num+1)/2 +//std::cout< +void +sum_in_elem_matrix(size_t num, + const typename MatrixType::GlobalOrdinalType* indices, + const typename MatrixType::ScalarType* coefs, + MatrixType& mat) +{ + size_t offset = 0; + + for(size_t i=0; i +void +sum_into_global_linear_system(ElemData& elem_data, + MatrixType& A, VectorType& b) +{ + sum_in_symm_elem_matrix(elem_data.nodes_per_elem, elem_data.elem_node_ids, + elem_data.elem_diffusion_matrix, A); + sum_into_vector(elem_data.nodes_per_elem, elem_data.elem_node_ids, + elem_data.elem_source_vector, b); +} + +#ifdef MINIFE_HAVE_TBB +template +void +sum_in_elem_matrix(size_t num, + const typename MatrixType::GlobalOrdinalType* indices, + const typename MatrixType::ScalarType* coefs, + LockingMatrix& mat) +{ + size_t offset = 0; + + for(size_t i=0; i +void +sum_into_global_linear_system(ElemData& elem_data, + LockingMatrix& A, LockingVector& b) +{ + sum_in_elem_matrix(elem_data.nodes_per_elem, elem_data.elem_node_ids, + elem_data.elem_diffusion_matrix, A); + sum_into_vector(elem_data.nodes_per_elem, elem_data.elem_node_ids, + elem_data.elem_source_vector, b); +} +#endif + +template +void +add_to_diagonal(typename MatrixType::ScalarType value, MatrixType& mat) +{ + for(size_t i=0; i +double +parallel_memory_overhead_MB(const MatrixType& A) +{ + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + double mem_MB = 0; + +#ifdef HAVE_MPI + double invMB = 1.0/(1024*1024); + mem_MB = invMB*A.external_index.size()*sizeof(GlobalOrdinal); + mem_MB += invMB*A.external_local_index.size()*sizeof(GlobalOrdinal); + mem_MB += invMB*A.elements_to_send.size()*sizeof(GlobalOrdinal); + mem_MB += invMB*A.neighbors.size()*sizeof(int); + mem_MB += invMB*A.recv_length.size()*sizeof(LocalOrdinal); + mem_MB += invMB*A.send_length.size()*sizeof(LocalOrdinal); + + double tmp = mem_MB; + MPI_Allreduce(&tmp, &mem_MB, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); +#endif + + return mem_MB; +} + +template +void rearrange_matrix_local_external(MatrixType& A) +{ + //This function will rearrange A so that local entries are contiguous at the front + //of A's memory, and external entries are contiguous at the back of A's memory. + // + //A.row_offsets will describe where the local entries occur, and + //A.row_offsets_external will describe where the external entries occur. + + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + typedef typename MatrixType::ScalarType Scalar; + + size_t nrows = A.rows.size(); + std::vector tmp_row_offsets(nrows*2); + std::vector tmp_row_offsets_external(nrows*2); + + LocalOrdinal num_local_nz = 0; + LocalOrdinal num_extern_nz = 0; + + //First sort within each row of A, so that local entries come + //before external entries within each row. + //tmp_row_offsets describe the locations of the local entries, and + //tmp_row_offsets_external describe the locations of the external entries. + // + for(size_t i=0; i ext_cols(num_extern_nz); + std::vector ext_coefs(num_extern_nz); + std::vector ext_offsets(nrows+1); + LocalOrdinal offset = 0; + for(size_t i=0; i +void +zero_row_and_put_1_on_diagonal(MatrixType& A, typename MatrixType::GlobalOrdinalType row) +{ + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + typedef typename MatrixType::ScalarType Scalar; + + size_t row_len = 0; + GlobalOrdinal* cols = NULL; + Scalar* coefs = NULL; + A.get_row_pointers(row, row_len, cols, coefs); + + for(size_t i=0; i +void +impose_dirichlet(typename MatrixType::ScalarType prescribed_value, + MatrixType& A, + VectorType& b, + int global_nx, + int global_ny, + int global_nz, + const std::set& bc_rows) +{ + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + typedef typename MatrixType::ScalarType Scalar; + + GlobalOrdinal first_local_row = A.rows.size()>0 ? A.rows[0] : 0; + GlobalOrdinal last_local_row = A.rows.size()>0 ? A.rows[A.rows.size()-1] : -1; + + typename std::set::const_iterator + bc_iter = bc_rows.begin(), bc_end = bc_rows.end(); + for(; bc_iter!=bc_end; ++bc_iter) { + GlobalOrdinal row = *bc_iter; + if (row >= first_local_row && row <= last_local_row) { + size_t local_row = row - first_local_row; + b.coefs[local_row] = prescribed_value; + zero_row_and_put_1_on_diagonal(A, row); + } + } + + for(size_t i=0; i +typename TypeTraits::magnitude_type +matvec_and_dot(MatrixType& A, + VectorType& x, + VectorType& y) +{ + timer_type t0 = mytimer(); + exchange_externals(A, x); + exchtime += mytimer()-t0; + + typedef typename TypeTraits::magnitude_type magnitude; + typedef typename MatrixType::ScalarType ScalarType; + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinalType; + typedef typename MatrixType::LocalOrdinalType LocalOrdinalType; + + int n = A.rows.size(); + const LocalOrdinalType* Arowoffsets = &A.row_offsets[0]; + const GlobalOrdinalType* Acols = &A.packed_cols[0]; + const ScalarType* Acoefs = &A.packed_coefs[0]; + const ScalarType* xcoefs = &x.coefs[0]; + ScalarType* ycoefs = &y.coefs[0]; + ScalarType beta = 0; + + magnitude result = 0; + + for(int row=0; row::mpi_type(); + MPI_Allreduce(&local_dot, &global_dot, 1, mpi_dtype, MPI_SUM, MPI_COMM_WORLD); + return global_dot; +#else + return result; +#endif +} + +//------------------------------------------------------------------------ +//Compute matrix vector product y = A*x where: +// +// A - input matrix +// x - input vector +// y - result vector +// +template +struct matvec_std { +void operator()(MatrixType& A, + VectorType& x, + VectorType& y) +{ + exchange_externals(A, x); + + typedef typename MatrixType::ScalarType ScalarType; + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinalType; + typedef typename MatrixType::LocalOrdinalType LocalOrdinalType; + + int n = A.rows.size(); + LocalOrdinalType* Arowoffsets = &A.row_offsets[0]; + GlobalOrdinalType* Acols = &A.packed_cols[0]; + ScalarType* Acoefs = &A.packed_coefs[0]; + ScalarType* xcoefs = &x.coefs[0]; + ScalarType* ycoefs = &y.coefs[0]; + ScalarType beta = 0; + char transpose = 'n'; + +// for(int i = 0; i < n ; i++) { +// ycoefs[i] = 0; +// } + +#if defined(MINIFE_MKL_FLOAT) + mkl_cspblas_scsrgemv( +#elif defined(MINIFE_MKL_DOUBLE) + mkl_cspblas_dcsrgemv( +#else + #error Unknown MINIFE_SCALAR base type. +#endif + &transpose, + &n, + Acoefs, + Arowoffsets, + Acols, + xcoefs, + ycoefs); +} +}; + +template +void matvec(MatrixType& A, VectorType& x, VectorType& y) +{ + matvec_std mv; + mv(A, x, y); +} + +template +struct matvec_overlap { +void operator()(MatrixType& A, + VectorType& x, + VectorType& y) +{ +#ifdef HAVE_MPI + begin_exchange_externals(A, x); +#endif + + typedef typename MatrixType::ScalarType ScalarType; + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinalType; + typedef typename MatrixType::LocalOrdinalType LocalOrdinalType; + + + int n = A.rows.size(); + const LocalOrdinalType* Arowoffsets = &A.row_offsets[0]; + const GlobalOrdinalType* Acols = &A.packed_cols[0]; + const ScalarType* Acoefs = &A.packed_coefs[0]; + const ScalarType* xcoefs = &x.coefs[0]; + ScalarType* ycoefs = &y.coefs[0]; + ScalarType beta = 0; + + for(int row=0; row + +#include + +namespace miniFE { + + +template +struct Vector { + typedef Scalar ScalarType; + typedef LocalOrdinal LocalOrdinalType; + typedef GlobalOrdinal GlobalOrdinalType; + + Vector(GlobalOrdinal startIdx, LocalOrdinal local_sz) + : startIndex(startIdx), + local_size(local_sz), + coefs(local_size) + { + MemInitOp mem_init; + mem_init.ptr = &coefs[0]; + mem_init.n = local_size; + for(size_t i=0; i coefs; +}; + + +}//namespace miniFE + +#endif + diff --git a/mkl/src/Vector_functions.hpp b/mkl/src/Vector_functions.hpp new file mode 100644 index 0000000..3add84c --- /dev/null +++ b/mkl/src/Vector_functions.hpp @@ -0,0 +1,269 @@ +#ifndef _Vector_functions_hpp_ +#define _Vector_functions_hpp_ + +//@HEADER +// ************************************************************************ +// +// HPCCG: Simple Conjugate Gradient Benchmark Code +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +#include +#include + +// Import the Intel MKL Library +#include "mkl.h" + +#define MINIFE_MIN(X, Y) ((X) < (Y) ? (X) : (Y)) + +namespace miniFE { + + +template +void write_vector(const std::string& filename, + const VectorType& vec) +{ + int numprocs = 1, myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + std::ostringstream osstr; + osstr << filename << "." << numprocs << "." << myproc; + std::string full_name = osstr.str(); + std::ofstream ofs(full_name.c_str()); + + typedef typename VectorType::ScalarType ScalarType; + + const std::vector& coefs = vec.coefs; + for(int p=0; p +void sum_into_vector(size_t num_indices, + const typename VectorType::GlobalOrdinalType* indices, + const typename VectorType::ScalarType* coefs, + VectorType& vec) +{ + typedef typename VectorType::GlobalOrdinalType GlobalOrdinal; + typedef typename VectorType::ScalarType Scalar; + + GlobalOrdinal first = vec.startIndex; + GlobalOrdinal last = first + vec.local_size - 1; + + std::vector& vec_coefs = vec.coefs; + + for(size_t i=0; i last) continue; + size_t idx = indices[i] - first; + vec_coefs[idx] += coefs[i]; + } +} + +#ifdef MINIFE_HAVE_TBB +template +void sum_into_vector(size_t num_indices, + const typename VectorType::GlobalOrdinalType* indices, + const typename VectorType::ScalarType* coefs, + LockingVector& vec) +{ + vec.sum_in(num_indices, indices, coefs); +} +#endif + +//------------------------------------------------------------ +//Compute the update of a vector with the sum of two scaled vectors where: +// +// w = alpha*x + beta*y +// +// x,y - input vectors +// +// alpha,beta - scalars applied to x and y respectively +// +// w - output vector +// +template +void + waxpby(typename VectorType::ScalarType alpha, const VectorType& x, + typename VectorType::ScalarType beta, const VectorType& y, + VectorType& w) +{ + typedef typename VectorType::ScalarType ScalarType; + +#ifdef MINIFE_DEBUG + if (y.local_size < x.local_size || w.local_size < x.local_size) { + std::cerr << "miniFE::waxpby ERROR, y and w must be at least as long as x." << std::endl; + return; + } +#endif + + int n = x.coefs.size(); + const ScalarType* xcoefs = &x.coefs[0]; + const ScalarType* ycoefs = &y.coefs[0]; + ScalarType* wcoefs = &w.coefs[0]; + + #pragma omp parallel for + for(int i=0; i +void + daxpby(typename VectorType::ScalarType alpha, const VectorType& x, + typename VectorType::ScalarType beta, VectorType& y) +{ + typedef typename VectorType::ScalarType ScalarType; + + const int n = MINIFE_MIN(x.coefs.size(), y.coefs.size()); + const MINIFE_SCALAR* xcoefs = &x.coefs[0]; + MINIFE_SCALAR* ycoefs = &y.coefs[0]; + + #if defined(MINIFE_MKL_DOUBLE) + cblas_daxpby( + #elif defined(MINIFE_MKL_FLOAT) + cblas_saxpby( + #else + #error "Unknown MINIFE MKL type" + #endif + (const MKL_INT) n, + alpha, + xcoefs, + (const MKL_INT) 1, + beta, + ycoefs, + (const MKL_INT) 1); +} + +//Like waxpby above, except operates on two sets of arguments. +//In other words, performs two waxpby operations in one loop. +template +void + fused_waxpby(typename VectorType::ScalarType alpha, const VectorType& x, + typename VectorType::ScalarType beta, const VectorType& y, + VectorType& w, + typename VectorType::ScalarType alpha2, const VectorType& x2, + typename VectorType::ScalarType beta2, const VectorType& y2, + VectorType& w2) +{ + typedef typename VectorType::ScalarType ScalarType; + +#ifdef MINIFE_DEBUG + if (y.local_size < x.local_size || w.local_size < x.local_size) { + std::cerr << "miniFE::waxpby ERROR, y and w must be at least as long as x." << std::endl; + return; + } +#endif + + int n = x.coefs.size(); + const ScalarType* xcoefs = &x.coefs[0]; + const ScalarType* ycoefs = &y.coefs[0]; + ScalarType* wcoefs = &w.coefs[0]; + + const ScalarType* x2coefs = &x2.coefs[0]; + const ScalarType* y2coefs = &y2.coefs[0]; + ScalarType* w2coefs = &w2.coefs[0]; + + for(int i=0; i +typename TypeTraits::magnitude_type + dot(const Vector& x, + const Vector& y) +{ + int n = x.coefs.size(); + +#ifdef MINIFE_DEBUG + if (y.local_size < n) { + std::cerr << "miniFE::dot ERROR, y must be at least as long as x."<::magnitude_type magnitude; + + const Scalar* xcoefs = &x.coefs[0]; + const Scalar* ycoefs = &y.coefs[0]; +#if defined(MINIFE_MKL_DOUBLE) + magnitude result = cblas_ddot( +#elif defined(MINIFE_MKL_FLOAT) + magnitude result = cblas_sdot( +#else + #error Unknown MINIFE_SCALAR type. +#endif + (MKL_INT) n, + xcoefs, + (MKL_INT) 1, + ycoefs, + (MKL_INT) 1); + +#ifdef HAVE_MPI + magnitude local_dot = result, global_dot = 0; + MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); + MPI_Allreduce(&local_dot, &global_dot, 1, mpi_dtype, MPI_SUM, MPI_COMM_WORLD); + return global_dot; +#else + return result; +#endif +} + +}//namespace miniFE + +#endif + diff --git a/mkl/src/YAML_Doc.cpp b/mkl/src/YAML_Doc.cpp new file mode 100644 index 0000000..f79e6d9 --- /dev/null +++ b/mkl/src/YAML_Doc.cpp @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include +#include +#include +#ifdef REDSTORM +#include +#include +#include +#endif +#include "YAML_Doc.hpp" +using namespace std; + +//set the microapp_name and version which will become part of the YAML doc. +YAML_Doc::YAML_Doc(const std::string& miniApp_Name, const std::string& miniApp_Version, const std::string& destination_Directory, const std::string& destination_FileName){ + miniAppName = miniApp_Name; + miniAppVersion = miniApp_Version; + destinationDirectory = destination_Directory; + destinationFileName = destination_FileName; +} + +//inherits the destructor from YAML_Element +YAML_Doc::~YAML_Doc(void){ +} + +/* +* generates YAML from the elements of the document and saves it +* to a file +*/ +string YAML_Doc::generateYAML(){ + string yaml; + yaml = yaml + "Mini-Application Name: " + miniAppName + "\n"; + yaml = yaml + "Mini-Application Version: " + miniAppVersion + "\n"; + for(size_t i=0; iprintYAML(""); + } + + time_t rawtime; + tm * ptm; + time ( &rawtime ); + ptm = localtime(&rawtime); + char sdate[25]; + //use tm_mon+1 because tm_mon is 0 .. 11 instead of 1 .. 12 + sprintf (sdate,"%04d:%02d:%02d-%02d:%02d:%02d",ptm->tm_year + 1900, ptm->tm_mon+1, + ptm->tm_mday, ptm->tm_hour, ptm->tm_min,ptm->tm_sec); + + string filename; + if (destinationFileName=="") + filename = miniAppName + "-" + miniAppVersion + "_"; + else + filename = destinationFileName; + filename = filename + string(sdate) + ".yaml"; + if (destinationDirectory!="" && destinationDirectory!=".") { + string mkdir_cmd = "mkdir " + destinationDirectory; +#ifdef REDSTORM + mkdir(destinationDirectory.c_str(),0755); +#else + system(mkdir_cmd.c_str()); +#endif + filename = destinationDirectory + "/" + destinationFileName; + } + else + filename = "./" + filename; + + ofstream myfile; + myfile.open(filename.c_str()); + myfile << yaml; + myfile.close(); + return yaml; +} + + diff --git a/mkl/src/YAML_Doc.hpp b/mkl/src/YAML_Doc.hpp new file mode 100644 index 0000000..057bb44 --- /dev/null +++ b/mkl/src/YAML_Doc.hpp @@ -0,0 +1,122 @@ +//@HEADER +// ************************************************************************ +// +// Mantevo: A collection of mini-applications for HPC +// Copyright (2008) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +// Changelog +// +// Version 0.1 +// - Initial version. +// +///////////////////////////////////////////////////////////////////////// + +#ifndef YAML_DOC_H +#define YAML_DOC_H +#include +#include +#include "YAML_Element.hpp" + +//! The Mantevo YAML_Doc class for the uniform collecting and reporting of performance data for mini-applications + +/*! + +The YAML_Doc class works in conjuction with the YAML_Element class to facilitate easy collecting and reporting of YAML-formatted +data that can be then registered with the Mantevo results collection website. + +\code + +//EXAMPLE CODE FOR GENERATING YAML + + YAML_Doc doc("hpccg","1.0"); + doc.add("final_residual",1.4523e-13); + doc.add("time","4.893"); + +//note: the following line will remove the data (4.890) associated with "time" + doc.get("time")->add("total",4.243); + +//note: the following line will likewise remove the data (1.243) associated with "time" + doc.get("time")->get("total")->add("time",2.457); + doc.get("time")->get("total")->add("flops",4.88e5); + doc.get("time")->add("ddot",1.243); + doc.get("time")->add("sparsemv",""); + doc.get("time")->get("sparsemv")->add("time",0.3445); + doc.get("time")->get("sparsemv")->add("overhead",""); + doc.get("time")->get("sparsemv")->get("overhead")->add("time",0.0123); + doc.get("time")->get("sparsemv")->get("overhead")->add("percentage",0.034); + cout << doc.generateYAML() << endl; + return 0; + +\endcode + +Below is the output generated by the above code: + +\verbatim + +final_residual: 1.4523e-13 +time: + total: + time: 2.457 + flops: 4.88e5 + ddot: 1.243 + sparsemv: + time: 0.3445 + overhead: + time: 0.0123 + percentage: 0.034 + +\endverbatim + +\note {No value is allowed to be attached to a key that has children. If children are added to a key, the value is simply set to "".} + +*/ +class YAML_Doc: public YAML_Element { + public: + //! Constructor: accepts mini-application name and version as strings, optionally accepts directory and file name for printing results. + /*! + The sole constructor for this class accepts and name and version number for the mini-application as well as optional directory + and file name information for results that are generated by the generateYAML() method. + \param miniApp_Name (in) string containing name of the mini-application + \param miniApp_Version (in) string containing the version of the mini-application + \param destination_Directory (in, optional) path of diretory where results file will be stored, relative to current working directory. + If this value is not supplied, the results file will be stored in the current working directory. If the directory does not exist + it will be created. + \param destination_FileName (in, optional) root name of the results file. A suffix of ".yaml" will be automatically appended. If no + file name is specified the filename will be constructed by concatenating the miniAppName + miniAppVersion + ".yaml" strings. + */ + YAML_Doc(const std::string& miniApp_Name, const std::string& miniApp_Version, const std::string& destination_Directory = "", const std::string& destination_FileName = ""); + //! Destructor + ~YAML_Doc(); + //! Generate YAML results to standard out and to a file using specified directory and filename, using current directory and miniAppName + miniAppVersion + ".yaml" by default + std::string generateYAML(); + +protected: + std::string miniAppName; + std::string miniAppVersion; + std::string destinationDirectory; + std::string destinationFileName; +}; +#endif /* YAML_DOC_H */ + diff --git a/mkl/src/YAML_Element.cpp b/mkl/src/YAML_Element.cpp new file mode 100644 index 0000000..114a612 --- /dev/null +++ b/mkl/src/YAML_Element.cpp @@ -0,0 +1,120 @@ +#include +#include +#include +#include +#include "YAML_Element.hpp" +using namespace std; +YAML_Element::YAML_Element(const std::string& key_arg, const std::string& value_arg){ + key = key_arg; + value = value_arg; +} + +YAML_Element::~YAML_Element(){ + for (size_t i=0; ivalue = ""; + string converted_value = convert_double_to_string(value_arg); + YAML_Element* element = new YAML_Element(key_arg,converted_value); + children.push_back(element); + return element; +} + +YAML_Element* YAML_Element::add(const std::string& key_arg, int value_arg) { + this->value = ""; + string converted_value = convert_int_to_string(value_arg); + YAML_Element* element = new YAML_Element(key_arg,converted_value); + children.push_back(element); + return element; +} + +#ifndef MINIFE_NO_LONG_LONG + +YAML_Element* YAML_Element::add(const std::string& key_arg, long long value_arg) { + this->value = ""; + string converted_value = convert_long_long_to_string(value_arg); + YAML_Element* element = new YAML_Element(key_arg,converted_value); + children.push_back(element); + return element; +} + +#endif + +YAML_Element* YAML_Element::add(const std::string& key_arg, size_t value_arg) { + this->value = ""; + string converted_value = convert_size_t_to_string(value_arg); + YAML_Element* element = new YAML_Element(key_arg,converted_value); + children.push_back(element); + return element; +} + +YAML_Element* YAML_Element::add(const std::string& key_arg, const std::string& value_arg) { + this->value = ""; + YAML_Element* element = new YAML_Element(key_arg, value_arg); + children.push_back(element); + return element; +} + +/* +* returns pointer to the YAML_Element for the given key. +* I, cam, believe an exception should be thrown if there is no +* element in the vector for the specified key +*/ +YAML_Element* YAML_Element::get(const std::string& key_arg) { + for (size_t i=0; igetKey() == key_arg){ + return children[i]; + } + } + return 0; +} + +/* +* prints a line of a YAML document. Correct YAML depends on +* correct spacing; the parameter space should be the proper +* amount of space for the parent element +*/ +string YAML_Element::printYAML(std::string space){ + string yaml_line = space + key + ": " + value + "\n"; + for(int i=0; i<2; i++) space = space + " "; + for(size_t i=0; iprintYAML(space); + } + return yaml_line; +} + +string YAML_Element::convert_double_to_string(double value_arg){ + stringstream strm; + strm << value_arg; + return strm.str(); +} +string YAML_Element::convert_int_to_string(int value_arg){ + stringstream strm; + strm << value_arg; + return strm.str(); +} + +#ifndef MINIFE_NO_LONG_LONG + +string YAML_Element::convert_long_long_to_string(long long value_arg){ + stringstream strm; + strm << value_arg; + return strm.str(); +} + +#endif + +string YAML_Element::convert_size_t_to_string(size_t value_arg){ + stringstream strm; + strm << value_arg; + return strm.str(); +} diff --git a/mkl/src/YAML_Element.hpp b/mkl/src/YAML_Element.hpp new file mode 100644 index 0000000..611dca1 --- /dev/null +++ b/mkl/src/YAML_Element.hpp @@ -0,0 +1,86 @@ +//@HEADER +// ************************************************************************ +// +// Mantevo: A collection of mini-applications for HPC +// Copyright (2008) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +// Changelog +// +// Version 0.1 +// - Initial version. +// +///////////////////////////////////////////////////////////////////////// + +#ifndef YAML_ELEMENT_H +#define YAML_ELEMENT_H +#include +#include +//! The Mantevo YAML_Element class for registering key-value pairs of performance data + +/*! + Mantevo mini-applications generate a collection of performance data for each run of the executable. YAML_Element, and + the related YAML_Doc class, provide a uniform facility for gathering and reporting this data using the YAML text format. +*/ +class YAML_Element { + public: + + //! Default constructor. + YAML_Element (){key="";value="";} + //! Construct with known key-value pair + YAML_Element (const std::string& key_arg, const std::string& value_arg); + //! Destructor + ~YAML_Element (); + //! Key accessor method + std::string getKey(){return key;} + //! Add a child element to an element list associated with this element, value of type double + YAML_Element* add(const std::string& key_arg, double value_arg); + //! Add a child element to an element list associated with this element, value of type int + YAML_Element* add(const std::string& key_arg, int value_arg); +#ifndef MINIFE_NO_LONG_LONG + //! Add a child element to an element list associated with this element, value of type long long + YAML_Element* add(const std::string& key_arg, long long value_arg); +#endif + //! Add a child element to an element list associated with this element, value of type size_t + YAML_Element* add(const std::string& key_arg, size_t value_arg); + //! Add a child element to an element list associated with this element, value of type string + YAML_Element* add(const std::string& key_arg, const std::string& value_arg); + //! get the element in the list with the given key + YAML_Element* get(const std::string& key_arg); + std::string printYAML(std::string space); + +protected: + std::string key; + std::string value; + std::vector children; + +private: + std::string convert_double_to_string(double value_arg); + std::string convert_int_to_string(int value_arg); +#ifndef MINIFE_NO_LONG_LONG + std::string convert_long_long_to_string(long long value_arg); +#endif + std::string convert_size_t_to_string(size_t value_arg); +}; +#endif /* YAML_ELEMENT_H */ diff --git a/mkl/src/assemble_FE_data.hpp b/mkl/src/assemble_FE_data.hpp new file mode 100644 index 0000000..b469808 --- /dev/null +++ b/mkl/src/assemble_FE_data.hpp @@ -0,0 +1,79 @@ +#ifndef _assemble_FE_data_hpp_ +#define _assemble_FE_data_hpp_ + +//@HEADER +// ************************************************************************ +// +// HPCCG: Simple Conjugate Gradient Benchmark Code +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include +#include + +#include + +namespace miniFE { + +template +void +assemble_FE_data(const simple_mesh_description& mesh, + MatrixType& A, + VectorType& b, + Parameters& params) +{ + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + + int global_elems_x = mesh.global_box[0][1]; + int global_elems_y = mesh.global_box[1][1]; + int global_elems_z = mesh.global_box[2][1]; + + Box local_elem_box; + copy_box(mesh.local_box, local_elem_box); + + if (get_num_ids(local_elem_box) < 1) { + return; + } + + // + //We want the element-loop to loop over our (processor-local) domain plus a + //ghost layer, so we can assemble the complete linear-system without doing + //any communication. + // + int ghost = 1; + if (local_elem_box[0][0] > 0) local_elem_box[0][0] -= ghost; + if (local_elem_box[1][0] > 0) local_elem_box[1][0] -= ghost; + if (local_elem_box[2][0] > 0) local_elem_box[2][0] -= ghost; + if (local_elem_box[0][1] < global_elems_x) local_elem_box[0][1] += ghost; + if (local_elem_box[1][1] < global_elems_y) local_elem_box[1][1] += ghost; + if (local_elem_box[2][1] < global_elems_z) local_elem_box[2][1] += ghost; + + perform_element_loop(mesh, local_elem_box, A, b, params); +} + +}//namespace miniFE + +#endif + diff --git a/mkl/src/cg_solve.hpp b/mkl/src/cg_solve.hpp new file mode 100644 index 0000000..1150d3e --- /dev/null +++ b/mkl/src/cg_solve.hpp @@ -0,0 +1,235 @@ +#ifndef _cg_solve_hpp_ +#define _cg_solve_hpp_ + +//@HEADER +// ************************************************************************ +// +// HPCCG: Simple Conjugate Gradient Benchmark Code +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include +#include + +#include +#include + +#include + +namespace miniFE { + +template +void print_vec(const std::vector& vec, const std::string& name) +{ + for(size_t i=0; i +bool breakdown(typename VectorType::ScalarType inner, + const VectorType& v, + const VectorType& w) +{ + typedef typename VectorType::ScalarType Scalar; + typedef typename TypeTraits::magnitude_type magnitude; + +//This is code that was copied from Aztec, and originally written +//by my hero, Ray Tuminaro. +// +//Assuming that inner = (inner product of v and w), +//v and w are considered orthogonal if +// |inner| < 100 * ||v||_2 * ||w||_2 * epsilon + + magnitude vnorm = std::sqrt(dot(v,v)); + magnitude wnorm = std::sqrt(dot(w,w)); + return std::abs(inner) <= 100*vnorm*wnorm*std::numeric_limits::epsilon(); +} + +template +void +cg_solve(OperatorType& A, + const VectorType& b, + VectorType& x, + Matvec matvec, + typename OperatorType::LocalOrdinalType max_iter, + typename TypeTraits::magnitude_type& tolerance, + typename OperatorType::LocalOrdinalType& num_iters, + typename TypeTraits::magnitude_type& normr, + timer_type* my_cg_times) +{ + typedef typename OperatorType::ScalarType ScalarType; + typedef typename OperatorType::GlobalOrdinalType GlobalOrdinalType; + typedef typename OperatorType::LocalOrdinalType LocalOrdinalType; + typedef typename TypeTraits::magnitude_type magnitude_type; + + timer_type t0 = 0, tWAXPY = 0, tDOT = 0, tMATVEC = 0, tMATVECDOT = 0; + timer_type total_time = mytimer(); + + int myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + if (!A.has_local_indices) { + std::cerr << "miniFE::cg_solve ERROR, A.has_local_indices is false, needs to be true. This probably means " + << "miniFE::make_local_matrix(A) was not called prior to calling miniFE::cg_solve." + << std::endl; + return; + } + + size_t nrows = A.rows.size(); + LocalOrdinalType ncols = A.num_cols; + + VectorType r(b.startIndex, nrows); + VectorType p(0, ncols); + VectorType Ap(b.startIndex, nrows); + + normr = 0; + magnitude_type rtrans = 0; + magnitude_type oldrtrans = 0; + + LocalOrdinalType print_freq = max_iter/10; + if (print_freq>50) print_freq = 50; + if (print_freq<1) print_freq = 1; + + ScalarType one = 1.0; + ScalarType zero = 0.0; + + TICK(); waxpby(one, x, zero, x, p); TOCK(tWAXPY); + +// print_vec(p.coefs, "p"); + + TICK(); + matvec(A, p, Ap); + TOCK(tMATVEC); + + TICK(); waxpby(one, b, -one, Ap, r); TOCK(tWAXPY); + + TICK(); rtrans = dot(r, r); TOCK(tDOT); + +//std::cout << "rtrans="< +int +driver(const Box& global_box, Box& my_box, + Parameters& params, YAML_Doc& ydoc) +{ + int global_nx = global_box[0][1]; + int global_ny = global_box[1][1]; + int global_nz = global_box[2][1]; + + int numprocs = 1, myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + if (params.load_imbalance > 0) { + add_imbalance(global_box, my_box, params.load_imbalance, ydoc); + } + + float largest_imbalance = 0, std_dev = 0; + compute_imbalance(global_box, my_box, largest_imbalance, + std_dev, ydoc, true); + + + //Create a representation of the mesh: + //Note that 'simple_mesh_description' is a virtual or conceptual + //mesh that doesn't actually store mesh data. + + if (myproc==0) { + std::cout.width(30); + std::cout << "creating/filling mesh..."; + std::cout.flush(); + } + + timer_type t_start = mytimer(); + timer_type t0 = mytimer(); + + simple_mesh_description mesh(global_box, my_box); + + timer_type mesh_fill = mytimer() - t0; + timer_type t_total = mytimer() - t_start; + + if (myproc==0) { + std::cout << mesh_fill << "s, total time: " << t_total << std::endl; + } + + //next we will generate the matrix structure. + + //Declare matrix object: + +#if defined(MINIFE_ELL_MATRIX) + typedef ELLMatrix MatrixType; +#else + typedef CSRMatrix MatrixType; +#endif + + MatrixType A; + + timer_type gen_structure; + RUN_TIMED_FUNCTION("generating matrix structure...", + generate_matrix_structure(mesh, A), + gen_structure, t_total); + + GlobalOrdinal local_nrows = A.rows.size(); + GlobalOrdinal my_first_row = local_nrows > 0 ? A.rows[0] : -1; + + Vector b(my_first_row, local_nrows); + Vector x(my_first_row, local_nrows); + + //Assemble finite-element sub-matrices and sub-vectors into the global + //linear system: + + timer_type fe_assembly; + RUN_TIMED_FUNCTION("assembling FE data...", + assemble_FE_data(mesh, A, b, params), + fe_assembly, t_total); + + if (myproc == 0) { + ydoc.add("Matrix structure generation",""); + ydoc.get("Matrix structure generation")->add("Mat-struc-gen Time",gen_structure); + ydoc.add("FE assembly",""); + ydoc.get("FE assembly")->add("FE assembly Time",fe_assembly); + } + +#ifdef MINIFE_DEBUG + write_matrix("A_prebc.mtx", A); + write_vector("b_prebc.vec", b); +#endif + + //Now apply dirichlet boundary-conditions + //(Apply the 0-valued surfaces first, then the 1-valued surface last.) + + timer_type dirbc_time; + RUN_TIMED_FUNCTION("imposing Dirichlet BC...", + impose_dirichlet(0.0, A, b, global_nx+1, global_ny+1, global_nz+1, mesh.bc_rows_0), dirbc_time, t_total); + RUN_TIMED_FUNCTION("imposing Dirichlet BC...", + impose_dirichlet(1.0, A, b, global_nx+1, global_ny+1, global_nz+1, mesh.bc_rows_1), dirbc_time, t_total); + +#ifdef MINIFE_DEBUG + write_matrix("A.mtx", A); + write_vector("b.vec", b); +#endif + + //Transform global indices to local, set up communication information: + + timer_type make_local_time; + RUN_TIMED_FUNCTION("making matrix indices local...", + make_local_matrix(A), + make_local_time, t_total); + +#ifdef MINIFE_DEBUG + write_matrix("A_local.mtx", A); + write_vector("b_local.vec", b); +#endif + + size_t global_nnz = compute_matrix_stats(A, myproc, numprocs, ydoc); + + //Prepare to perform conjugate gradient solve: + + LocalOrdinal max_iters = 200; + LocalOrdinal num_iters = 0; + typedef typename TypeTraits::magnitude_type magnitude; + magnitude rnorm = 0; + magnitude tol = std::numeric_limits::epsilon(); + + timer_type cg_times[NUM_TIMERS]; + + typedef Vector VectorType; + + t_total = mytimer() - t_start; + + bool matvec_with_comm_overlap = params.mv_overlap_comm_comp==1; + + int verify_result = 0; + +#if MINIFE_KERNELS != 0 + if (myproc==0) { + std::cout.width(30); + std::cout << "Starting kernel timing loops ..." << std::endl; + } + + max_iters = 500; + x.coefs[0] = 0.9; + if (matvec_with_comm_overlap) { + time_kernels(A, b, x, matvec_overlap(), max_iters, rnorm, cg_times); + } + else { + time_kernels(A, b, x, matvec_std(), max_iters, rnorm, cg_times); + } + num_iters = max_iters; + std::string title("Kernel timings"); +#else + if (myproc==0) { + std::cout << "Starting CG solver ... " << std::endl; + } + + if (matvec_with_comm_overlap) { +#ifdef MINIFE_CSR_MATRIX + rearrange_matrix_local_external(A); + cg_solve(A, b, x, matvec_overlap(), max_iters, tol, + num_iters, rnorm, cg_times); +#else + std::cout << "ERROR, matvec with overlapping comm/comp only works with CSR matrix."<(), max_iters, tol, + num_iters, rnorm, cg_times); + if (myproc == 0) { + std::cout << "Final Resid Norm: " << rnorm << std::endl; + } + + if (params.verify_solution > 0) { + double tolerance = 0.06; + bool verify_whole_domain = false; + #ifdef MINIFE_DEBUG + verify_whole_domain = true; + #endif + if (myproc == 0) { + if (verify_whole_domain) std::cout << "verifying solution..." << std::endl; + else std::cout << "verifying solution at ~ (0.5, 0.5, 0.5) ..." << std::endl; + } + verify_result = verify_solution(mesh, x, tolerance, verify_whole_domain); + } + } + +#ifdef MINIFE_DEBUG + write_vector("x.vec", x); +#endif + std::string title("CG solve"); +#endif + + if (myproc == 0) { + ydoc.get("Global Run Parameters")->add("ScalarType",TypeTraits::name()); + ydoc.get("Global Run Parameters")->add("GlobalOrdinalType",TypeTraits::name()); + ydoc.get("Global Run Parameters")->add("LocalOrdinalType",TypeTraits::name()); + ydoc.add(title,""); + ydoc.get(title)->add("Iterations",num_iters); + ydoc.get(title)->add("Final Resid Norm",rnorm); + + GlobalOrdinal global_nrows = global_nx; + global_nrows *= global_ny*global_nz; + + //flops-per-mv, flops-per-dot, flops-per-waxpy: + double mv_flops = global_nnz*2.0; + double dot_flops = global_nrows*2.0; + double waxpy_flops = global_nrows*3.0; + +#if MINIFE_KERNELS == 0 +//if MINIFE_KERNELS == 0 then we did a CG solve, and in that case +//there were num_iters+1 matvecs, num_iters*2 dots, and num_iters*3+2 waxpys. + mv_flops *= (num_iters+1); + dot_flops *= (2*num_iters); + waxpy_flops *= (3*num_iters+2); +#else +//if MINIFE_KERNELS then we did one of each operation per iteration. + mv_flops *= num_iters; + dot_flops *= num_iters; + waxpy_flops *= num_iters; +#endif + + double total_flops = mv_flops + dot_flops + waxpy_flops; + + double mv_mflops = -1; + if (cg_times[MATVEC] > 1.e-4) + mv_mflops = 1.e-6 * (mv_flops/cg_times[MATVEC]); + + double dot_mflops = -1; + if (cg_times[DOT] > 1.e-4) + dot_mflops = 1.e-6 * (dot_flops/cg_times[DOT]); + + double waxpy_mflops = -1; + if (cg_times[WAXPY] > 1.e-4) + waxpy_mflops = 1.e-6 * (waxpy_flops/cg_times[WAXPY]); + + double total_mflops = -1; + if (cg_times[TOTAL] > 1.e-4) + total_mflops = 1.e-6 * (total_flops/cg_times[TOTAL]); + + ydoc.get(title)->add("WAXPY Time",cg_times[WAXPY]); + ydoc.get(title)->add("WAXPY Flops",waxpy_flops); + if (waxpy_mflops >= 0) + ydoc.get(title)->add("WAXPY Mflops",waxpy_mflops); + else + ydoc.get(title)->add("WAXPY Mflops","inf"); + + ydoc.get(title)->add("DOT Time",cg_times[DOT]); + ydoc.get(title)->add("DOT Flops",dot_flops); + if (dot_mflops >= 0) + ydoc.get(title)->add("DOT Mflops",dot_mflops); + else + ydoc.get(title)->add("DOT Mflops","inf"); + + ydoc.get(title)->add("MATVEC Time",cg_times[MATVEC]); + ydoc.get(title)->add("MATVEC Flops",mv_flops); + if (mv_mflops >= 0) + ydoc.get(title)->add("MATVEC Mflops",mv_mflops); + else + ydoc.get(title)->add("MATVEC Mflops","inf"); + +#ifdef MINIFE_FUSED + ydoc.get(title)->add("MATVECDOT Time",cg_times[MATVECDOT]); + ydoc.get(title)->add("MATVECDOT Flops",mv_flops); + if (mv_mflops >= 0) + ydoc.get(title)->add("MATVECDOT Mflops",mv_mflops); + else + ydoc.get(title)->add("MATVECDOT Mflops","inf"); +#endif + +#if MINIFE_KERNELS == 0 + ydoc.get(title)->add("Total",""); + ydoc.get(title)->get("Total")->add("Total CG Time",cg_times[TOTAL]); + ydoc.get(title)->get("Total")->add("Total CG Flops",total_flops); + if (total_mflops >= 0) + ydoc.get(title)->get("Total")->add("Total CG Mflops",total_mflops); + else + ydoc.get(title)->get("Total")->add("Total CG Mflops","inf"); + ydoc.get(title)->add("Time per iteration",cg_times[TOTAL]/num_iters); +#endif + } + + return verify_result; +} + +}//namespace miniFE + +#endif + diff --git a/mkl/src/exchange_externals.hpp b/mkl/src/exchange_externals.hpp new file mode 100644 index 0000000..4e01f2d --- /dev/null +++ b/mkl/src/exchange_externals.hpp @@ -0,0 +1,270 @@ +#ifndef _exchange_externals_hpp_ +#define _exchange_externals_hpp_ + +//@HEADER +// ************************************************************************ +// +// HPCCG: Simple Conjugate Gradient Benchmark Code +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +#include + +#include + +namespace miniFE { + +template +void +exchange_externals(MatrixType& A, + VectorType& x) +{ +#ifdef HAVE_MPI +#ifdef MINIFE_DEBUG + std::ostream& os = outstream(); + os << "entering exchange_externals\n"; +#endif + + int numprocs = 1; + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + + if (numprocs < 2) return; + + typedef typename MatrixType::ScalarType Scalar; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + + // Extract Matrix pieces + + int local_nrow = A.rows.size(); + int num_neighbors = A.neighbors.size(); + const std::vector& recv_length = A.recv_length; + const std::vector& send_length = A.send_length; + const std::vector& neighbors = A.neighbors; + const std::vector& elements_to_send = A.elements_to_send; + + std::vector& send_buffer = A.send_buffer; + + // + // first post receives, these are immediate receives + // Do not wait for result to come, will do that at the + // wait call below. + // + + int MPI_MY_TAG = 99; + + std::vector& request = A.request; + + // + // Externals are at end of locals + // + + std::vector& x_coefs = x.coefs; + Scalar* x_external = &(x_coefs[local_nrow]); + + MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); + + // Post receives first + for(int i=0; i x.coefs.size()) { + os << "error, out-of-range. x.coefs.size()=="< exch_ext_requests; +#endif + +template +void +begin_exchange_externals(MatrixType& A, + VectorType& x) +{ +#ifdef HAVE_MPI + + int numprocs = 1, myproc = 0; + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); + + if (numprocs < 2) return; + + typedef typename MatrixType::ScalarType Scalar; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + + // Extract Matrix pieces + + int local_nrow = A.rows.size(); + int num_neighbors = A.neighbors.size(); + const std::vector& recv_length = A.recv_length; + const std::vector& send_length = A.send_length; + const std::vector& neighbors = A.neighbors; + const std::vector& elements_to_send = A.elements_to_send; + + std::vector send_buffer(elements_to_send.size(), 0); + + // + // first post receives, these are immediate receives + // Do not wait for result to come, will do that at the + // wait call below. + // + + int MPI_MY_TAG = 99; + + exch_ext_requests.resize(num_neighbors); + + // + // Externals are at end of locals + // + + std::vector& x_coefs = x.coefs; + Scalar* x_external = &(x_coefs[local_nrow]); + + MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); + + // Post receives first + for(int i=0; i +# example: +# % generate_info_header g++ -O3 miniFE MINIFE +# this will cause the appropriate info to be put in a +# header named miniFE_info.hpp and the info will be in macros +# that start with MINIFE. +# +# an example of usage can be seen in miniFE/make_targets +# +if [ $# != 4 ] ; then +echo "error, need 4 arguments."; +exit 1; +fi + +cxx=`which ${1}` +errcode="$?" +if [ ${errcode} != "0" ] ; then +cxx="unknown"; +fi +echo "CXX: ${cxx}" + +cxx_ver=`${1} --version 2>&1` +errcode="$?" +if [ ${errcode} != "0" ] ; then +cxx_ver=`${1} -V 2>&1`; +errcode="$?" +if [ ${errcode} != "0" ] ; then +cxx_ver="unknown"; +fi +fi + +cxx_ver=${cxx_ver// /@} +cxx_version="" +for i in $(echo ${cxx_ver}); +do + if [ "$cxx_version" == "" ]; then + cxx_version=$i; + fi +done +cxx_version=${cxx_version//@/ } +echo "Compiler version: ${cxx_version}" + +cxxflags=${2} +hostname=`uname -n` +errcode="$?" +if [ ${errcode} != "0" ] ; then +hostname="unknown"; +fi + +kern_name=`uname -s` +errcode="$?" +if [ ${errcode} != "0" ] ; then +kern_name="unknown"; +fi + +kern_rel=`uname -r` +errcode="$?" +if [ ${errcode} != "0" ] ; then +kern_rel="unknown"; +fi + +proc=`uname -p` +errcode="$?" +if [ ${errcode} != "0" ] ; then +proc="unknown"; +fi + +header_prefix=${3} +macro_prefix=${4} + +cat << END_CAT > ${header_prefix}_info.hpp +#ifndef ${header_prefix}_info_hpp +#define ${header_prefix}_info_hpp + +#define ${macro_prefix}_HOSTNAME "${hostname}" +#define ${macro_prefix}_KERNEL_NAME "'${kern_name}'" +#define ${macro_prefix}_KERNEL_RELEASE "'${kern_rel}'" +#define ${macro_prefix}_PROCESSOR "'${proc}'" + +#define ${macro_prefix}_CXX "'${cxx}'" +#define ${macro_prefix}_CXX_VERSION "'${cxx_version}'" +#define ${macro_prefix}_CXXFLAGS "'${cxxflags}'" + +#endif +END_CAT diff --git a/mkl/src/generate_matrix_structure.hpp b/mkl/src/generate_matrix_structure.hpp new file mode 100644 index 0000000..245f107 --- /dev/null +++ b/mkl/src/generate_matrix_structure.hpp @@ -0,0 +1,155 @@ +#ifndef _generate_matrix_structure_hpp_ +#define _generate_matrix_structure_hpp_ + +//@HEADER +// ************************************************************************ +// +// HPCCG: Simple Conjugate Gradient Benchmark Code +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include +#include +#include +#include + +#include +#include +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +namespace miniFE { + +template +int +generate_matrix_structure(const simple_mesh_description& mesh, + MatrixType& A) +{ + int myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + int threw_exc = 0; + try { + + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + + int global_nodes_x = mesh.global_box[0][1]+1; + int global_nodes_y = mesh.global_box[1][1]+1; + int global_nodes_z = mesh.global_box[2][1]+1; + Box box; + copy_box(mesh.local_box, box); + + //num-owned-nodes in each dimension is num-elems+1 + //only if num-elems > 0 in that dimension *and* + //we are at the high end of the global range in that dimension: + if (box[0][1] > box[0][0] && box[0][1] == mesh.global_box[0][1]) ++box[0][1]; + if (box[1][1] > box[1][0] && box[1][1] == mesh.global_box[1][1]) ++box[1][1]; + if (box[2][1] > box[2][0] && box[2][1] == mesh.global_box[2][1]) ++box[2][1]; + + GlobalOrdinal global_nrows = global_nodes_x; + global_nrows *= global_nodes_y*global_nodes_z; + + GlobalOrdinal nrows = get_num_ids(box); + try { + A.reserve_space(nrows, 27); + } + catch(std::exception& exc) { + std::ostringstream osstr; + osstr << "One of A.rows.resize, A.row_offsets.resize, A.packed_cols.reserve or A.packed_coefs.reserve: nrows=" < rows(nrows); + std::vector row_offsets(nrows+1); + std::vector row_coords(nrows*3); + + unsigned roffset = 0; + GlobalOrdinal nnz = 0; + + for(int iz=box[2][0]; iz(global_nodes_x, global_nodes_y, global_nodes_z, + ix, iy, iz); + rows[roffset] = mesh.map_id_to_row(row_id); + row_coords[roffset*3] = ix; + row_coords[roffset*3+1] = iy; + row_coords[roffset*3+2] = iz; + row_offsets[roffset++] = nnz; + + for(int sz=-1; sz<=1; ++sz) { + for(int sy=-1; sy<=1; ++sy) { + for(int sx=-1; sx<=1; ++sx) { + GlobalOrdinal col_id = + get_id(global_nodes_x, global_nodes_y, global_nodes_z, + ix+sx, iy+sy, iz+sz); + if (col_id >= 0 && col_id < global_nrows) { + ++nnz; + } + } + } + } + + } + } + } + + if (roffset != nrows) { + throw std::runtime_error("ERROR in generate_matrix_structure, roffset != nrows."); + } + row_offsets[roffset] = nnz; + + init_matrix(A, rows, row_offsets, row_coords, + global_nodes_x, global_nodes_y, global_nodes_z, global_nrows, mesh); + } + catch(...) { + std::cout << "proc " << myproc << " threw an exception in generate_matrix_structure, probably due to running out of memory." << std::endl; + threw_exc = 1; + } +#ifdef HAVE_MPI + int global_throw = 0; + MPI_Allreduce(&threw_exc, &global_throw, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + threw_exc = global_throw; +#endif + if (threw_exc) { + return 1; + } + + return 0; +} + +}//namespace miniFE + +#endif + diff --git a/mkl/src/get_common_files b/mkl/src/get_common_files new file mode 100755 index 0000000..e2448e3 --- /dev/null +++ b/mkl/src/get_common_files @@ -0,0 +1,15 @@ +#!/bin/bash + +if [ -f ./YAML_Doc.cpp ] ; then + exit 0; +fi + +dir=../../common + +cp ${dir}/YAML_Doc.cpp . +cp ${dir}/YAML_Doc.hpp . +cp ${dir}/YAML_Element.cpp . +cp ${dir}/YAML_Element.hpp . + +cp ${dir}/generate_info_header . + diff --git a/mkl/src/main.cpp b/mkl/src/main.cpp new file mode 100644 index 0000000..16e27e1 --- /dev/null +++ b/mkl/src/main.cpp @@ -0,0 +1,214 @@ + +//@HEADER +// ************************************************************************ +// +// HPCCG: Simple Conjugate Gradient Benchmark Code +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER +#include +#include +#include +#include + +#include + +#include + +#ifdef HAVE_MPI +#include +#endif + +#include +#include +#include +#include +#include +#include +#include + +#if MINIFE_INFO != 0 +#include +#else +#include +#endif + +//The following macros should be specified as compile-macros in the +//makefile. They are defaulted here just in case... +#ifndef MINIFE_SCALAR +#define MINIFE_SCALAR double +#endif +#ifndef MINIFE_LOCAL_ORDINAL +#define MINIFE_LOCAL_ORDINAL int +#endif +#ifndef MINIFE_GLOBAL_ORDINAL +#define MINIFE_GLOBAL_ORDINAL int +#endif + +// ************************************************************************ + +void add_params_to_yaml(YAML_Doc& doc, miniFE::Parameters& params); +void add_configuration_to_yaml(YAML_Doc& doc, int numprocs, int numthreads); +void add_timestring_to_yaml(YAML_Doc& doc); + +inline void print_box(int myproc, const char* name, const Box& box, + const char* name2, const Box& box2) +{ + std::cout << "proc " << myproc << " "< local_boxes(numprocs); + + box_partition(0, numprocs, 2, global_box, &local_boxes[0]); + + Box& my_box = local_boxes[myproc]; + +//print_box(myproc, "global-box", global_box, "local-box", my_box); + + std::ostringstream osstr; + osstr << "miniFE." << params.nx << "x" << params.ny << "x" << params.nz; +#ifdef HAVE_MPI + osstr << ".P"<. + //To run miniFE with float instead of double, or 'long long' instead of int, + //etc., change these template-parameters by changing the macro definitions in + //the makefile or on the make command-line. + + int return_code = + miniFE::driver< MINIFE_SCALAR, MINIFE_LOCAL_ORDINAL, MINIFE_GLOBAL_ORDINAL>(global_box, my_box, params, doc); + + miniFE::timer_type total_time = miniFE::mytimer() - start_time; + + if (myproc == 0) { + doc.add("Total Program Time",total_time); + doc.generateYAML(); + } + + miniFE::finalize_mpi(); + + return return_code; +} + +void add_params_to_yaml(YAML_Doc& doc, miniFE::Parameters& params) +{ + doc.add("Global Run Parameters",""); + doc.get("Global Run Parameters")->add("dimensions",""); + doc.get("Global Run Parameters")->get("dimensions")->add("nx",params.nx); + doc.get("Global Run Parameters")->get("dimensions")->add("ny",params.ny); + doc.get("Global Run Parameters")->get("dimensions")->add("nz",params.nz); + doc.get("Global Run Parameters")->add("load_imbalance", params.load_imbalance); + if (params.mv_overlap_comm_comp == 1) { + std::string val("1 (yes)"); + doc.get("Global Run Parameters")->add("mv_overlap_comm_comp", val); + } + else { + std::string val("0 (no)"); + doc.get("Global Run Parameters")->add("mv_overlap_comm_comp", val); + } +} + +void add_configuration_to_yaml(YAML_Doc& doc, int numprocs, int numthreads) +{ + doc.get("Global Run Parameters")->add("number of processors", numprocs); + + doc.add("Platform",""); + doc.get("Platform")->add("hostname",MINIFE_HOSTNAME); + doc.get("Platform")->add("kernel name",MINIFE_KERNEL_NAME); + doc.get("Platform")->add("kernel release",MINIFE_KERNEL_RELEASE); + doc.get("Platform")->add("processor",MINIFE_PROCESSOR); + + doc.add("Build",""); + doc.get("Build")->add("CXX",MINIFE_CXX); +#if MINIFE_INFO != 0 + doc.get("Build")->add("compiler version",MINIFE_CXX_VERSION); +#endif + doc.get("Build")->add("CXXFLAGS",MINIFE_CXXFLAGS); + std::string using_mpi("no"); +#ifdef HAVE_MPI + using_mpi = "yes"; +#endif + doc.get("Build")->add("using MPI",using_mpi); +} + +void add_timestring_to_yaml(YAML_Doc& doc) +{ + std::time_t rawtime; + struct tm * timeinfo; + std::time(&rawtime); + timeinfo = std::localtime(&rawtime); + std::ostringstream osstr; + osstr.fill('0'); + osstr << timeinfo->tm_year+1900 << "-"; + osstr.width(2); osstr << timeinfo->tm_mon+1 << "-"; + osstr.width(2); osstr << timeinfo->tm_mday << ", "; + osstr.width(2); osstr << timeinfo->tm_hour << "-"; + osstr.width(2); osstr << timeinfo->tm_min << "-"; + osstr.width(2); osstr << timeinfo->tm_sec; + std::string timestring = osstr.str(); + doc.add("Run Date/Time",timestring); +} + diff --git a/mkl/src/make_local_matrix.hpp b/mkl/src/make_local_matrix.hpp new file mode 100644 index 0000000..8823119 --- /dev/null +++ b/mkl/src/make_local_matrix.hpp @@ -0,0 +1,449 @@ +#ifndef _make_local_matrix_hpp_ +#define _make_local_matrix_hpp_ +#include + +//@HEADER +// ************************************************************************ +// +// HPCCG: Simple Conjugate Gradient Benchmark Code +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include + +#include + +#ifdef HAVE_MPI +#include +#endif + +namespace miniFE { + +//The following function was converted from Mike's HPCCG code. +template +void +make_local_matrix(MatrixType& A) +{ +#ifdef HAVE_MPI + int numprocs = 1, myproc = 0; + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); + + if (numprocs < 2) { + A.num_cols = A.rows.size(); + A.has_local_indices = true; + return; + } + + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + typedef typename MatrixType::ScalarType Scalar; + + std::map externals; + LocalOrdinal num_external = 0; + + //Extract Matrix pieces + + size_t local_nrow = A.rows.size(); + GlobalOrdinal start_row = local_nrow>0 ? A.rows[0] : -1; + GlobalOrdinal stop_row = local_nrow>0 ? A.rows[local_nrow-1] : -1; + + // We need to convert the index values for the rows on this processor + // to a local index space. We need to: + // - Determine if each index reaches to a local value or external value + // - If local, subtract start_row from index value to get local index + // - If external, find out if it is already accounted for. + // - If so, then do nothing, + // - otherwise + // - add it to the list of external indices, + // - find out which processor owns the value. + // - Set up communication for sparse MV operation + + /////////////////////////////////////////// + // Scan the indices and transform to local + /////////////////////////////////////////// + + std::vector& external_index = A.external_index; + + for(size_t i=0; i tmp_buffer(numprocs, 0); // Temp buffer space needed below + + // Build list of global index offset + + std::vector global_index_offsets(numprocs, 0); + + tmp_buffer[myproc] = start_row; // This is my start row + + // This call sends the start_row of each ith processor to the ith + // entry of global_index_offsets on all processors. + // Thus, each processor knows the range of indices owned by all + // other processors. + // Note: There might be a better algorithm for doing this, but this + // will work... + + MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); + MPI_Allreduce(&tmp_buffer[0], &global_index_offsets[0], numprocs, mpi_dtype, + MPI_SUM, MPI_COMM_WORLD); + + // Go through list of externals and find the processor that owns each + std::vector external_processor(num_external); + + for(LocalOrdinal i=0; i=0; --j) { + if (global_index_offsets[j] <= cur_ind && global_index_offsets[j] >= 0) { + external_processor[i] = j; + break; + } + } + } + + ///////////////////////////////////////////////////////////////////////// + // Sift through the external elements. For each newly encountered external + // point assign it the next index in the sequence. Then look for other + // external elements who are updated by the same node and assign them the next + // set of index numbers in the sequence (ie. elements updated by the same node + // have consecutive indices). + ///////////////////////////////////////////////////////////////////////// + + size_t count = local_nrow; + std::vector& external_local_index = A.external_local_index; + external_local_index.assign(num_external, -1); + + for(LocalOrdinal i=0; i new_external_processor(num_external, 0); + + for(int i=0; i No external elements are updated by + // processor i. + // tmp_neighbors[i] = x ==> (x-1)/numprocs elements are updated from + // processor i. + /// + //////////////////////////////////////////////////////////////////////// + + std::vector tmp_neighbors(numprocs, 0); + + int num_recv_neighbors = 0; + int length = 1; + + for(LocalOrdinal i=0; i recv_list; + recv_list.push_back(new_external_processor[0]); + for(LocalOrdinal i=1; i send_list(num_send_neighbors, 0); + + // + // first post receives, these are immediate receives + // Do not wait for result to come, will do that at the + // wait call below. + // + int MPI_MY_TAG = 99; + + std::vector request(num_send_neighbors); + for(int i=0; i new_external(num_external); + for(LocalOrdinal i=0; i lengths(num_recv_neighbors); + + ++MPI_MY_TAG; + + // First post receives + + for(int i=0; i& neighbors = A.neighbors; + std::vector& recv_length = A.recv_length; + std::vector& send_length = A.send_length; + + neighbors.resize(num_recv_neighbors, 0); + A.request.resize(num_recv_neighbors); + recv_length.resize(num_recv_neighbors, 0); + send_length.resize(num_recv_neighbors, 0); + + LocalOrdinal j = 0; + for(int i=0; i= A.rows.size()) { +//std::cout<<"start_row: "< +#include +#include +#include +#include +#include + +namespace miniFE { + +template +void +perform_element_loop(const simple_mesh_description& mesh, + const Box& local_elem_box, + MatrixType& A, VectorType& b, + Parameters& /*params*/) +{ + typedef typename MatrixType::ScalarType Scalar; + + int global_elems_x = mesh.global_box[0][1]; + int global_elems_y = mesh.global_box[1][1]; + int global_elems_z = mesh.global_box[2][1]; + + //We will iterate the local-element-box (local portion of the mesh), and + //get element-IDs in preparation for later assembling the FE operators + //into the global sparse linear-system. + + GlobalOrdinal num_elems = get_num_ids(local_elem_box); + std::vector elemIDs(num_elems); + + BoxIterator iter = BoxIterator::begin(local_elem_box); + BoxIterator end = BoxIterator::end(local_elem_box); + + for(size_t i=0; iter != end; ++iter, ++i) { + elemIDs[i] = get_id(global_elems_x, global_elems_y, global_elems_z, + iter.x, iter.y, iter.z); +//#ifdef MINIFE_DEBUG +//std::cout << "elem ID " << elemIDs[i] << " ("< elem_data; + compute_gradient_values(elem_data.grad_vals); + //Given an element-id, populate elem_data with the + //element's node_ids and nodal-coords: + +// TICK(); + get_elem_nodes_and_coords(mesh, elemIDs[i], elem_data); +// TOCK(t_gn); + + //Next compute element-diffusion-matrix and element-source-vector: + +// TICK(); + compute_element_matrix_and_vector(elem_data); +// TOCK(t_ce); + + //Now assemble the (dense) element-matrix and element-vector into the + //global sparse linear system: + +// TICK(); + sum_into_global_linear_system(elem_data, A, b); +// TOCK(t_si); + } +//std::cout << std::endl<<"get-nodes: " << t_gn << std::endl; +//std::cout << "compute-elems: " << t_ce << std::endl; +//std::cout << "sum-in: " << t_si << std::endl; +} + +}//namespace miniFE + +#endif + diff --git a/mkl/src/run_test b/mkl/src/run_test new file mode 100755 index 0000000..d73f782 --- /dev/null +++ b/mkl/src/run_test @@ -0,0 +1,8 @@ +#!/bin/sh + +./miniFE.x nx=10 verify_solution=1 + +if [ $? != 0 ] ; then \ +echo "test FAILED, verify_solution detected unacceptable error."; \ +fi + diff --git a/mkl/src/simple_mesh_description.hpp b/mkl/src/simple_mesh_description.hpp new file mode 100644 index 0000000..c7769e2 --- /dev/null +++ b/mkl/src/simple_mesh_description.hpp @@ -0,0 +1,243 @@ + +#ifndef _simple_mesh_description_hpp_ +#define _simple_mesh_description_hpp_ + +//@HEADER +// ************************************************************************ +// +// HPCCG: Simple Conjugate Gradient Benchmark Code +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include +#include +#include + +namespace miniFE { + +template +class simple_mesh_description { +public: + simple_mesh_description(const Box& global_box_in, const Box& local_box_in) + { + Box local_node_box; + for(int i=0; i<3; ++i) { + global_box[i][0] = global_box_in[i][0]; + global_box[i][1] = global_box_in[i][1]; + local_box[i][0] = local_box_in[i][0]; + local_box[i][1] = local_box_in[i][1]; + local_node_box[i][0] = local_box_in[i][0]; + local_node_box[i][1] = local_box_in[i][1]; + //num-owned-nodes == num-elems+1 in this dimension if the elem box is not empty + //and we are at the high end of the global range in that dimension: + if (local_box_in[i][1] > local_box_in[i][0] && local_box_in[i][1] == global_box[i][1]) local_node_box[i][1] += 1; + } + + int max_node_x = global_box[0][1]+1; + int max_node_y = global_box[1][1]+1; + int max_node_z = global_box[2][1]+1; + create_map_id_to_row(max_node_x, max_node_y, max_node_z, local_node_box, + map_ids_to_rows); + + //As described in analytic_soln.hpp, + //we will impose a 0 boundary-condition on faces x=0, y=0, z=0, y=1, z=1 + //we will impose a 1 boundary-condition on face x=1 + +#ifdef MINIFE_DEBUG +std::cout< 0) --miny; + if (local_node_box[Z][0] > 0) --minz; + if (local_node_box[Y][1] < max_node_y) ++maxy; + if (local_node_box[Z][1] < max_node_z) ++maxz; + + for(int iz=minz; iz(max_node_x, max_node_y, max_node_z, + 0, iy, iz); +#ifdef MINIFE_DEBUG +std::cout<<"x=0 BC, node "< 0) --minx; + if (local_node_box[Z][0] > 0) --minz; + if (local_node_box[X][1] < max_node_x) ++maxx; + if (local_node_box[Z][1] < max_node_z) ++maxz; + + for(int iz=minz; iz(max_node_x, max_node_y, max_node_z, + ix, 0, iz); +#ifdef MINIFE_DEBUG +std::cout<<"y=0 BC, node "< 0) --minz; + if (local_node_box[Y][0] > 0) --miny; + if (local_node_box[Z][1] < max_node_z) ++maxz; + if (local_node_box[Y][1] < max_node_y) ++maxy; + + for(int iy=miny; iy(max_node_x, max_node_y, max_node_z, + x1, iy, iz); + GlobalOrdinal row = map_id_to_row(nodeID); +#ifdef MINIFE_DEBUG +std::cout<<"x=1 BC, node "< 0) --minz; + if (local_node_box[X][0] > 0) --minx; + if (local_node_box[Z][1] < max_node_z) ++maxz; + if (local_node_box[X][1] < max_node_x) ++maxx; + + for(int ix=minx; ix(max_node_x, max_node_y, max_node_z, + ix, y1, iz); +#ifdef MINIFE_DEBUG +std::cout<<"y=1 BC, node "< 0) --miny; + if (local_node_box[X][0] > 0) --minx; + if (local_node_box[Y][1] < max_node_y) ++maxy; + if (local_node_box[X][1] < max_node_x) ++maxx; + + for(int ix=minx; ix(max_node_x, max_node_y, max_node_z, + ix, iy, z1); +#ifdef MINIFE_DEBUG +std::cout<<"z=1 BC, node "< bc_rows_0; + std::set bc_rows_1; + std::map map_ids_to_rows; + Box global_box; + Box local_box; +};//class simple_mesh_description + +}//namespace miniFE + +#endif diff --git a/mkl/src/time_kernels.hpp b/mkl/src/time_kernels.hpp new file mode 100644 index 0000000..d6d959c --- /dev/null +++ b/mkl/src/time_kernels.hpp @@ -0,0 +1,140 @@ +#ifndef _time_kernels_hpp_ +#define _time_kernels_hpp_ + +//@HEADER +// ************************************************************************ +// +// HPCCG: Simple Conjugate Gradient Benchmark Code +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#include + +#include +#include + +#ifdef MINIFE_HAVE_CUDA +#include +#endif + +namespace miniFE { + +template +void +time_kernels(OperatorType& A, + const VectorType& b, + VectorType& x, + Matvec matvec, + typename OperatorType::LocalOrdinalType max_iter, + typename OperatorType::ScalarType& xdotp, + timer_type* my_kern_times) +{ + typedef typename OperatorType::ScalarType ScalarType; + typedef typename OperatorType::LocalOrdinalType OrdinalType; + typedef typename TypeTraits::magnitude_type magnitude_type; + + timer_type t0 = 0, tWAXPY = 0, tDOT = 0, tMATVEC = 0; + + int myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + if (!A.has_local_indices) { + std::cerr << "miniFE::time_kernels ERROR, A.has_local_indices is false, needs to be true. This probably means " + << "miniFE::make_local_matrix(A) was not called prior to calling miniFE::time_kernels." + << std::endl; + return; + } + + OrdinalType nrows = A.rows.size(); + OrdinalType ncols = A.num_cols; + + VectorType p(0, ncols, b.compute_node); + + ScalarType one = 1.0; + ScalarType zero = 0.0; + + typedef typename VectorType::ComputeNodeType ComputeNodeType; + ComputeNodeType& compute_node = x.compute_node; + + //The following lines that create and initialize buffers are no-ops in many + //cases, but perform actual allocations and copies if a off-cpu device such as + //a GPU is being used by compute_node. + + //Do any required allocations for buffers that will be needed during CG: + ScalarType* d_x = compute_node.get_buffer(&x.coefs[0], x.coefs.size()); + ScalarType* d_p = compute_node.get_buffer(&p.coefs[0], p.coefs.size()); + ScalarType* d_b = compute_node.get_buffer(&b.coefs[0], b.coefs.size()); + OrdinalType* d_Arowoff = compute_node.get_buffer(&A.row_offsets[0], A.row_offsets.size()); + OrdinalType* d_Acols = compute_node.get_buffer(&A.packed_cols[0], A.packed_cols.size()); + ScalarType* d_Acoefs = compute_node.get_buffer(&A.packed_coefs[0], A.packed_coefs.size()); + + //Copy data to buffers that need to be initialized from input data: + compute_node.copy_to_buffer(&x.coefs[0], x.coefs.size(), d_x); + compute_node.copy_to_buffer(&b.coefs[0], b.coefs.size(), d_b); + compute_node.copy_to_buffer(&A.row_offsets[0], A.row_offsets.size(), d_Arowoff); + compute_node.copy_to_buffer(&A.packed_cols[0], A.packed_cols.size(), d_Acols); + compute_node.copy_to_buffer(&A.packed_coefs[0], A.packed_coefs.size(), d_Acoefs); + + TICK(); + for(OrdinalType i=0; i= box_[0][1]) { + x = box_[0][0]; + ++y; + if (y >= box_[1][1]) { + y = box_[1][0]; + ++z; + if (z >= box_[2][1]) { + z = box_[2][1]; + y = box_[1][1]; + x = box_[0][1]; + } + } + } + return *this; + } + + BoxIterator operator++(int) + { + BoxIterator temp = *this; + ++(*this); + return temp; + } + + bool operator==(const BoxIterator& rhs) const + { + return x == rhs.x && y == rhs.y && z == rhs.z; + } + + bool operator!=(const BoxIterator& rhs) const + { + return !(this->operator==(rhs)); + } + + int x; + int y; + int z; + +private: + BoxIterator(const Box& box, bool at_end = false) + : x(box[0][0]), + y(box[1][0]), + z(box[2][0]), + box_() + { + box_[0][0] = box[0][0]; box_[0][1] = box[0][1]; + box_[1][0] = box[1][0]; box_[1][1] = box[1][1]; + box_[2][0] = box[2][0]; box_[2][1] = box[2][1]; + if (at_end) { + x = box[0][1]; + y = box[1][1]; + z = box[2][1]; + } + } + + Box box_; +};//class BoxTraverser + +}//namespace miniFE + +#endif + diff --git a/mkl/utils/BoxPartition.cpp b/mkl/utils/BoxPartition.cpp new file mode 100644 index 0000000..cb167fb --- /dev/null +++ b/mkl/utils/BoxPartition.cpp @@ -0,0 +1,503 @@ +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include +#include + +#include +#include + +/*--------------------------------------------------------------------*/ + +static int box_map_local_entry( const Box& box , + const int ghost , + int local_x , + int local_y , + int local_z ) +{ + const int nx = 2 * ghost + box[0][1] - box[0][0] ; + const int ny = 2 * ghost + box[1][1] - box[1][0] ; + const int nz = 2 * ghost + box[2][1] - box[2][0] ; + int result = -1 ; + + local_x += ghost ; + local_y += ghost ; + local_z += ghost ; + + if ( 0 <= local_x && local_x < nx && + 0 <= local_y && local_y < ny && + 0 <= local_z && local_z < nz ) { + + result = local_z * ny * nx + local_y * nx + local_x ; + } + return result ; +} + +int box_map_local( const Box& box_local, + const int ghost , + const int box_local_map[] , + const int local_x , + const int local_y , + const int local_z ) +{ + int result = box_map_local_entry(box_local,ghost,local_x,local_y,local_z); + + if ( 0 <= result ) { + result = box_local_map[ result ]; + } + + return result ; +} + +/*--------------------------------------------------------------------*/ +/* Recursively split a box into into (up-ip) sub-boxes */ + +void box_partition( int ip , int up , int axis , + const Box& box, + Box* p_box ) +{ + const int np = up - ip ; + if ( 1 == np ) { + p_box[ip][0][0] = box[0][0] ; p_box[ip][0][1] = box[0][1] ; + p_box[ip][1][0] = box[1][0] ; p_box[ip][1][1] = box[1][1] ; + p_box[ip][2][0] = box[2][0] ; p_box[ip][2][1] = box[2][1] ; + } + else { + const int n = box[ axis ][1] - box[ axis ][0] ; + const int np_low = np / 2 ; /* Rounded down */ + const int np_upp = np - np_low ; + + const int n_upp = (int) (((double) n) * ( ((double)np_upp) / ((double)np))); + const int n_low = n - n_upp ; + const int next_axis = ( axis + 2 ) % 3 ; + + if ( np_low ) { /* P = [ip,ip+np_low) */ + Box dbox ; + dbox[0][0] = box[0][0] ; dbox[0][1] = box[0][1] ; + dbox[1][0] = box[1][0] ; dbox[1][1] = box[1][1] ; + dbox[2][0] = box[2][0] ; dbox[2][1] = box[2][1] ; + + dbox[ axis ][1] = dbox[ axis ][0] + n_low ; + + box_partition( ip, ip + np_low, next_axis, dbox, p_box ); + } + + if ( np_upp ) { /* P = [ip+np_low,ip+np_low+np_upp) */ + Box dbox; + dbox[0][0] = box[0][0] ; dbox[0][1] = box[0][1] ; + dbox[1][0] = box[1][0] ; dbox[1][1] = box[1][1] ; + dbox[2][0] = box[2][0] ; dbox[2][1] = box[2][1] ; + + ip += np_low ; + dbox[ axis ][0] += n_low ; + dbox[ axis ][1] = dbox[ axis ][0] + n_upp ; + + box_partition( ip, ip + np_upp, next_axis, dbox, p_box ); + } + } +} + +/*--------------------------------------------------------------------*/ + +static int box_disjoint( const Box& a , const Box& b) +{ + return a[0][1] <= b[0][0] || b[0][1] <= a[0][0] || + a[1][1] <= b[1][0] || b[1][1] <= a[1][0] || + a[2][1] <= b[2][0] || b[2][1] <= a[2][0] ; +} + +static void resize_int( int ** a , int * allocLen , int newLen ) +{ + int k = 32; + while ( k < newLen ) { k <<= 1 ; } + if ( NULL == *a ) + { *a = (int*)malloc( sizeof(int)*(*allocLen = k) ); } + else if ( *allocLen < k ) + { *a = (int*)realloc(*a , sizeof(int)*(*allocLen = k)); } +} + +static void box_partition_maps( + const int np , + const int my_p , + const Box* pbox, + const int ghost , + int ** map_local_id , + int ** map_recv_pc , + int ** map_send_pc , + int ** map_send_id ) +{ + const Box& my_box = pbox[my_p] ; + + const int my_ix = my_box[0][0] ; + const int my_iy = my_box[1][0] ; + const int my_iz = my_box[2][0] ; + const int my_nx = my_box[0][1] - my_box[0][0] ; + const int my_ny = my_box[1][1] - my_box[1][0] ; + const int my_nz = my_box[2][1] - my_box[2][0] ; + + const int my_use_nx = 2 * ghost + my_nx ; + const int my_use_ny = 2 * ghost + my_ny ; + const int my_use_nz = 2 * ghost + my_nz ; + + const int id_length = my_use_nx * my_use_ny * my_use_nz ; + + int * local_id = (int *) malloc( id_length * sizeof(int) ); + int * recv_pc = (int *) malloc( ( np + 1 ) * sizeof(int) ); + int * send_pc = (int *) malloc( ( np + 1 ) * sizeof(int) ); + + int * send_id = NULL ; + int send_id_size = 0 ; + + int iLocal , iSend ; + int i ; + + Box my_use_box; + + my_use_box[0][0] = my_box[0][0] - ghost ; + my_use_box[0][1] = my_box[0][1] + ghost ; + my_use_box[1][0] = my_box[1][0] - ghost ; + my_use_box[1][1] = my_box[1][1] + ghost ; + my_use_box[2][0] = my_box[2][0] - ghost ; + my_use_box[2][1] = my_box[2][1] + ghost ; + + for ( i = 0 ; i < id_length ; ++i ) { local_id[i] = -1 ; } + + iSend = 0 ; + iLocal = 0 ; + + /* The vector space is partitioned by processors */ + + for ( i = 0 ; i < np ; ++i ) { + const int ip = ( i + my_p ) % np ; + recv_pc[i] = iLocal ; + send_pc[i] = iSend ; + + if ( ! box_disjoint( my_use_box , pbox[ip] ) ) { + const int p_ix = pbox[ip][0][0] ; + const int p_iy = pbox[ip][1][0] ; + const int p_iz = pbox[ip][2][0] ; + const int p_ex = pbox[ip][0][1] ; + const int p_ey = pbox[ip][1][1] ; + const int p_ez = pbox[ip][2][1] ; + + int local_x , local_y , local_z ; + + /* Run the span of global cells that my processor uses */ + + for ( local_z = -ghost ; local_z < my_nz + ghost ; ++local_z ) { + for ( local_y = -ghost ; local_y < my_ny + ghost ; ++local_y ) { + for ( local_x = -ghost ; local_x < my_nx + ghost ; ++local_x ) { + + const int global_z = local_z + my_iz ; + const int global_y = local_y + my_iy ; + const int global_x = local_x + my_ix ; + + const int entry = + box_map_local_entry(my_box,ghost,local_x,local_y,local_z); + + if ( entry < 0 ) { abort(); } + + if ( p_iz <= global_z && global_z < p_ez && + p_iy <= global_y && global_y < p_ey && + p_ix <= global_x && global_x < p_ex ) { + + /* This ordinal is owned by processor 'ip' */ + + local_id[ entry ] = iLocal++ ; + +#if defined(DEBUG_PRINT) +if ( my_p != ip ) { + fprintf(stdout," (%d,%d,%d) : P%d recv at local %d from P%d\n", + global_x,global_y,global_z,my_p,local_id[entry],ip); + fflush(stdout); +} +#endif + } + + /* If in my ownership and used by the other processor */ + if ( my_p != ip && + /* In my ownership: */ + ( 0 <= local_z && local_z < my_nz && + 0 <= local_y && local_y < my_ny && + 0 <= local_x && local_x < my_nx ) && + /* In other processors usage: */ + ( p_iz - ghost <= global_z && global_z < p_ez + ghost && + p_iy - ghost <= global_y && global_y < p_ey + ghost && + p_ix - ghost <= global_x && global_x < p_ex + ghost ) ) { + + resize_int( & send_id , & send_id_size , (iSend + 1) ); + send_id[ iSend ] = local_id[ entry ] ; + ++iSend ; + +#if defined(DEBUG_PRINT) +{ + fprintf(stdout," (%d,%d,%d) : P%d send at local %d to P%d\n", + global_x,global_y,global_z,my_p,local_id[entry],ip); + fflush(stdout); +} +#endif + } + } + } + } + } + } + recv_pc[np] = iLocal ; + send_pc[np] = iSend ; + + *map_local_id = local_id ; + *map_recv_pc = recv_pc ; + *map_send_pc = send_pc ; + *map_send_id = send_id ; +} + +void box_partition_rcb( const int np , + const int my_p , + const Box& root_box, + const int ghost , + Box** pbox, + int ** map_local_id , + int ** map_recv_pc , + int ** map_send_pc , + int ** map_send_id ) +{ + *pbox = new Box[ np ]; + + box_partition( 0 , np , 2 , root_box , *pbox ); + + box_partition_maps( np , my_p , *pbox , ghost , + map_local_id , map_recv_pc , + map_send_pc , map_send_id ); +} + +/*--------------------------------------------------------------------*/ + +#ifdef UNIT_TEST + +static int box_contain( const Box& a , const Box& b ) +{ + return a[0][0] <= b[0][0] && b[0][1] <= a[0][1] && + a[1][0] <= b[1][0] && b[1][1] <= a[1][1] && + a[2][0] <= b[2][0] && b[2][1] <= a[2][1] ; +} + +static void box_print( FILE * fp , const Box& a ) +{ + fprintf(fp,"{ [ %d , %d ) , [ %d , %d ) , [ %d , %d ) }", + a[0][0] , a[0][1] , + a[1][0] , a[1][1] , + a[2][0] , a[2][1] ); +} + +static void test_box( const Box& box , const int np ) +{ + const int ncell_box = box[0][1] * box[1][1] * box[2][1] ; + int ncell_total = 0 ; + int ncell_min = ncell_box ; + int ncell_max = 0 ; + std::vector pbox(np); + int i , j ; + + box_partition( 0 , np , 2 , box , &pbox[0] ); + + for ( i = 0 ; i < np ; ++i ) { + const int ncell = ( pbox[i][0][1] - pbox[i][0][0] ) * + ( pbox[i][1][1] - pbox[i][1][0] ) * + ( pbox[i][2][1] - pbox[i][2][0] ); + + if ( ! box_contain( box , pbox[i] ) ) { + fprintf(stdout," OUT OF BOUNDS pbox[%d/%d] = ",i,np); + box_print(stdout,pbox[i]); + fprintf(stdout,"\n"); + abort(); + } + + for ( j = i + 1 ; j < np ; ++j ) { + if ( ! box_disjoint( pbox[i] , pbox[j] ) ) { + fprintf(stdout," NOT DISJOINT pbox[%d/%d] = ",i,np); + box_print(stdout, pbox[i]); + fprintf(stdout,"\n"); + fprintf(stdout," pbox[%d/%d] = ",j,np); + box_print(stdout, pbox[j]); + fprintf(stdout,"\n"); + abort(); + } + } + ncell_total += ncell ; + + if ( ncell_max < ncell ) { ncell_max = ncell ; } + if ( ncell < ncell_min ) { ncell_min = ncell ; } + } + + if ( ncell_total != ncell_box ) { + fprintf(stdout," WRONG CELL COUNT NP = %d\n",np); + abort(); + } + fprintf(stdout,"NP = %d, total = %d, avg = %d, min = %d, max = %d\n", + np,ncell_box,ncell_box/np,ncell_min,ncell_max); +} + +/*--------------------------------------------------------------------*/ + +static void test_maps( const Box& root_box , const int np ) +{ + const int ghost = 1 ; + const int nx_global = root_box[0][1] - root_box[0][0] ; + const int ny_global = root_box[1][1] - root_box[1][0] ; + int ieq , i , j ; + std::vector pbox(np); + int **local_values ; + int **map_local_id ; + int **map_recv_pc ; + int **map_send_pc ; + int **map_send_id ; + + box_partition( 0 , np , 2 , root_box , &pbox[0] ); + + local_values = (int **) malloc( sizeof(int*) * np ); + map_local_id = (int **) malloc( sizeof(int*) * np ); + map_recv_pc = (int **) malloc( sizeof(int*) * np ); + map_send_pc = (int **) malloc( sizeof(int*) * np ); + map_send_id = (int **) malloc( sizeof(int*) * np ); + + /* Set each local value to the global equation number */ + + for ( ieq = i = 0 ; i < np ; ++i ) { + const Box& mybox = pbox[i] ; + const int nx = mybox[0][1] - mybox[0][0] ; + const int ny = mybox[1][1] - mybox[1][0] ; + const int nz = mybox[2][1] - mybox[2][0] ; + int ix , iy , iz ; + + /* Generate the partition maps for this rank */ + box_partition_maps( np , i , &pbox[0] , ghost , + & map_local_id[i] , & map_recv_pc[i] , + & map_send_pc[i] , & map_send_id[i] ); + + local_values[i] = (int *) malloc( sizeof(int) * map_recv_pc[i][np] ); + + for ( iz = -ghost ; iz < nz + ghost ; ++iz ) { + for ( iy = -ghost ; iy < ny + ghost ; ++iy ) { + for ( ix = -ghost ; ix < nx + ghost ; ++ix ) { + const int ieq = box_map_local(mybox,ghost,map_local_id[i],ix,iy,iz); + + if ( 0 <= ieq ) { + const int ix_global = ix + mybox[0][0] ; + const int iy_global = iy + mybox[1][0] ; + const int iz_global = iz + mybox[2][0] ; + + if ( root_box[0][0] <= ix_global && ix_global < root_box[0][1] && + root_box[1][0] <= iy_global && iy_global < root_box[1][1] && + root_box[2][0] <= iz_global && iz_global < root_box[2][1] ) { + + local_values[i][ ieq ] = ix_global + + iy_global * nx_global + + iz_global * nx_global * ny_global ; + } + else { + local_values[i][ ieq ] = -1 ; + } + } + } + } + } + } + + /* Pair-wise compare the local values */ + /* i == receiving processor rank */ + /* ip == sending processor rank */ + /* j == receiving processor data entry for message from 'ip' */ + /* jp == sending processor data entry for message to 'i' */ + + for ( i = 0 ; i < np ; ++i ) { + for ( j = 1 ; j < np ; ++j ) { + const int ip = ( i + j ) % np ; + const int jp = ( i + np - ip ) % np ; + const int nrecv = map_recv_pc[i] [j+1] - map_recv_pc[i] [j] ; + const int nsend = map_send_pc[ip][jp+1] - map_send_pc[ip][jp] ; + int k ; + if ( nrecv != nsend ) { + fprintf(stderr,"P%d recv %d from P%d\n",i,nrecv,ip); + fprintf(stderr,"P%d send %d to P%d\n",ip,nsend,i); + abort(); + } + for ( k = 0 ; k < nrecv ; ++k ) { + const int irecv = map_recv_pc[i][j] + k ; + const int isend = map_send_pc[ip][jp] + k ; + const int val_irecv = local_values[i][irecv] ; + const int val_isend = local_values[ip][ map_send_id[ip][isend] ] ; + if ( val_irecv != val_isend ) { + fprintf(stderr,"P%d recv[%d] = %d , from P%d\n",i,k,val_irecv,ip); + fprintf(stderr,"P%d send[%d] = %d , to P%d\n",ip,k,val_isend,i); + abort(); + } + } + } + } + + for ( i = 0 ; i < np ; ++i ) { + free( map_local_id[i] ); + free( map_recv_pc[i] ); + free( map_send_pc[i] ); + free( map_send_id[i] ); + free( local_values[i] ); + } + free( map_send_id ); + free( map_send_pc ); + free( map_recv_pc ); + free( map_local_id ); + free( local_values ); +} + +/*--------------------------------------------------------------------*/ + +int main( int argc , char * argv[] ) +{ + int np_max = 256 ; + Box box = { 0 , 64 , 0 , 64 , 0 , 64 }; + int np = 0 ; + + switch( argc ) { + case 3: + sscanf(argv[1],"%d",&np); + sscanf(argv[2],"%dx%dx%d",& box[0][1] , & box[1][1] , & box[2][1] ); + if ( 0 < np ) { test_box( box , np ); } + if ( 0 < np ) { test_maps( box , np ); } + break ; + default: + for ( np = 1 ; np <= np_max ; ++np ) { + test_box( box , np ); + test_maps( box , np ); + } + break ; + } + return 0 ; +} + +#endif + + diff --git a/mkl/utils/BoxPartition.hpp b/mkl/utils/BoxPartition.hpp new file mode 100644 index 0000000..14e4076 --- /dev/null +++ b/mkl/utils/BoxPartition.hpp @@ -0,0 +1,103 @@ +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#ifndef _BoxPartition_hpp_ +#define _BoxPartition_hpp_ + +#include + +/** \brief Recursively split a box into (up-ip) sub-boxes + */ +void box_partition( int ip , int up , int axis , + const Box& box , + Box* p_box ); + +/** \brief Partition a { [ix,jx) X [iy,jy) X [iz,jz) } box. + * + * Use recursive coordinate bisection to partition a box + * into np disjoint sub-boxes. Allocate (via malloc) and + * populate the sub-boxes, mapping the local (x,y,z) to + * a local ordinal, and mappings for the send-recv messages + * to update the ghost cells. + * + * usage: + * + * my_nx = pbox[my_p][0][1] - pbox[my_p][0][0] ; + * my_ny = pbox[my_p][1][1] - pbox[my_p][1][0] ; + * my_nz = pbox[my_p][2][1] - pbox[my_p][2][0] ; + * + * for ( x = -ghost ; x < my_nx + ghost ; ++x ) { + * for ( y = -ghost ; y < my_ny + ghost ; ++y ) { + * for ( z = -ghost ; z < my_nz + ghost ; ++z ) { + * const int x_global = x + pbox[my_p][0][0] ; + * const int y_global = y + pbox[my_p][1][0] ; + * const int z_global = z + pbox[my_p][2][0] ; + * + * const int local_ordinal = + * box_map_local( pbox[my_p], ghost, map_local_id, x, y, z ); + * + * if ( 0 <= local_ordinal ) { + * } + * } + * + * for ( i = 1 ; i < np ; ++i ) { + * const int recv_processor = ( my_p + i ) % np ; + * const int recv_ordinal_begin = map_recv_pc[i]; + * const int recv_ordinal_end = map_recv_pc[i+1]; + * } + * + * for ( i = 1 ; i < np ; ++i ) { + * const int send_processor = ( my_p + i ) % np ; + * const int send_map_begin = map_send_pc[i]; + * const int send_map_end = map_send_pc[i+1]; + * for ( j = send_map_begin ; j < send_map_end ; ++j ) { + * send_ordinal = map_send_id[j] ; + * } + * } + */ +void box_partition_rcb( + const int np /**< [in] Number of partitions */ , + const int my_p /**< [in] My partition rank */ , + const Box& root_box /**< [in] 3D Box to partition */ , + const int ghost /**< [in] Ghost cell boundary */ , + Box* pbox /**< [out] Partition's 3D boxes */ , + int ** map_local_id /**< [out] Map local cells */ , + int ** map_recv_pc /**< [out] Receive spans per processor */ , + int ** map_send_pc /**< [out] Send prefix counts per processor */ , + int ** map_send_id /**< [out] Send message ordinals */ ); + +/* \brief Map a local (x,y,z) to a local ordinal. + */ +int box_map_local( const Box& box_local , + const int ghost , + const int map_local_id[] , + const int local_x , + const int local_y , + const int local_z ); + +#endif + diff --git a/mkl/utils/Parameters.hpp b/mkl/utils/Parameters.hpp new file mode 100644 index 0000000..44841cf --- /dev/null +++ b/mkl/utils/Parameters.hpp @@ -0,0 +1,64 @@ +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#ifndef _parameters_hpp_ +#define _parameters_hpp_ + +#include + +namespace miniFE { + +struct Parameters { + Parameters() + : nx(5), ny(nx), nz(nx), numthreads(1), + mv_overlap_comm_comp(0), use_locking(0), + load_imbalance(0), name(), elem_group_size(1), + use_elem_mat_fields(1), verify_solution(0), + device(0),num_devices(2),skip_device(9999),numa(1) + {} + + int nx; + int ny; + int nz; + int numthreads; + int mv_overlap_comm_comp; + int use_locking; + float load_imbalance; + std::string name; + int elem_group_size; + int use_elem_mat_fields; + int verify_solution; + int device; + int num_devices; + int skip_device; + int numa; +};//struct Parameters + +}//namespace miniFE + +#endif + diff --git a/mkl/utils/TypeTraits.hpp b/mkl/utils/TypeTraits.hpp new file mode 100644 index 0000000..c7bcb44 --- /dev/null +++ b/mkl/utils/TypeTraits.hpp @@ -0,0 +1,136 @@ +#ifndef _TypeTraits_hpp_ +#define _TypeTraits_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include + +#ifdef HAVE_MPI +#include +#endif + +namespace miniFE { + +template struct TypeTraits {}; + +template<> +struct TypeTraits { + typedef float magnitude_type; + + static const char* name() {return "float";} + +#ifdef HAVE_MPI + static MPI_Datatype mpi_type() {return MPI_FLOAT;} +#endif +}; + +template<> +struct TypeTraits { + typedef double magnitude_type; + + static const char* name() {return "double";} + +#ifdef HAVE_MPI + static MPI_Datatype mpi_type() {return MPI_DOUBLE;} +#endif +}; + +template<> +struct TypeTraits { + typedef int magnitude_type; + + static const char* name() {return "int";} + +#ifdef HAVE_MPI + static MPI_Datatype mpi_type() {return MPI_INT;} +#endif +}; + +template<> +struct TypeTraits { + typedef long int magnitude_type; + + static const char* name() {return "long int";} + +#ifdef HAVE_MPI + static MPI_Datatype mpi_type() {return MPI_LONG;} +#endif +}; + +#ifndef MINIFE_NO_LONG_LONG + +template<> +struct TypeTraits { + typedef long long magnitude_type; + + static const char* name() {return "long long";} + +#ifdef HAVE_MPI + static MPI_Datatype mpi_type() {return MPI_LONG_LONG;} +#endif +}; + +#endif + +template<> +struct TypeTraits { + typedef unsigned magnitude_type; + + static const char* name() {return "unsigned";} + +#ifdef HAVE_MPI + static MPI_Datatype mpi_type() {return MPI_UNSIGNED;} +#endif +}; + +template<> +struct TypeTraits > { + typedef float magnitude_type; + + static const char* name() {return "std::complex";} + +#ifdef HAVE_MPI + static MPI_Datatype mpi_type() {return MPI_COMPLEX;} +#endif +}; + +template<> +struct TypeTraits > { + typedef double magnitude_type; + + static const char* name() {return "std::complex";} + +#ifdef HAVE_MPI + static MPI_Datatype mpi_type() {return MPI_DOUBLE_COMPLEX;} +#endif +}; + +}//namespace miniFE + +#endif + diff --git a/mkl/utils/box_utils.hpp b/mkl/utils/box_utils.hpp new file mode 100644 index 0000000..f5bdb40 --- /dev/null +++ b/mkl/utils/box_utils.hpp @@ -0,0 +1,320 @@ +#ifndef _box_utils_hpp_ +#define _box_utils_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +#include +#include + +namespace miniFE { + +inline void copy_box(const Box& from_box, Box& to_box) +{ + for(int i=0; i<3; ++i) { + to_box[i][0] = from_box[i][0]; + to_box[i][1] = from_box[i][1]; + } +} + +template +#ifdef __CUDACC__ +__host__ __device__ __inline__ +#endif +void get_int_coords(GlobalOrdinal ID, int nx, int ny, int nz, + int& x, int& y, int& z) +{ + z = ID/(nx*ny); + y = (ID%(nx*ny))/nx; + x = ID%nx; +} + +template +#ifdef __CUDACC__ +__host__ __device__ __inline__ +#endif +void get_coords(GlobalOrdinal ID, int nx, int ny, int nz, + Scalar& x, Scalar& y, Scalar& z) +{ + const int xdiv = nx>1 ? nx-1 : 1; + const int ydiv = ny>1 ? ny-1 : 1; + const int zdiv = nz>1 ? nz-1 : 1; + +//This code assumes that ID is 0-based. +// +//compute coordinates that lie on (or in) the unit cube. +//that's why we're dividing by nz,ny,nx: + z = (1.0*(ID/(nx*ny)))/zdiv; + y = 1.0*((ID%(nx*ny))/nx)/ydiv; + x = 1.0*(ID%nx)/xdiv; +} + +template +GlobalOrdinal get_num_ids(const Box& box) +{ + int nx = box[0][1] - box[0][0]; + int ny = box[1][1] - box[1][0]; + int nz = box[2][1] - box[2][0]; + GlobalOrdinal tmp = nx*ny; + tmp *= nz; + return tmp; +} + +template +#ifdef __CUDACC__ +__host__ __device__ __inline__ +#endif +GlobalOrdinal get_id(int nx, int ny, int nz, + int x, int y, int z) +{ + if (x<0 || y<0 || z<0) return -1; + if (x>=nx || y>=ny || z>=nz) return -1; + + //form x + nx*y + nx*ny*z: + + GlobalOrdinal tmp = nx*ny; + tmp *= z; + tmp = x + nx * y + tmp; + return tmp; +} + +template +void get_ids(int nx, int ny, int nz, + const Box& box, + std::vector& ids, + bool include_ghost_layer=false) +{ + ids.clear(); + int minz = box[2][0]; + int maxz = box[2][1]; + int miny = box[1][0]; + int maxy = box[1][1]; + int minx = box[0][0]; + int maxx = box[0][1]; + + if (include_ghost_layer) { + if (minz > 0) minz--; + if (miny > 0) miny--; + if (minx > 0) minx--; + if (maxz < nz) maxz++; + if (maxy < ny) maxy++; + if (maxx < nx) maxx++; + } + + size_t ids_size = ((maxz - minz) * (maxy - miny)) * (maxx - minx); + ids.reserve(ids_size); + + for(int z=minz; z(nx, ny, nz, x, y, z)); + } + } + } +} + +template +void get_ghost_ids(int nx, int ny, int nz, + const Box& box, + std::vector& ids) +{ + ids.clear(); + int minz,maxz,miny,maxy,minx,maxx; + int orig_minz = minz = box[2][0]; + int orig_maxz = maxz = box[2][1]; + int orig_miny = miny = box[1][0]; + int orig_maxy = maxy = box[1][1]; + int orig_minx = minx = box[0][0]; + int orig_maxx = maxx = box[0][1]; + + if (minz > 0) minz--; + if (miny > 0) miny--; + if (minx > 0) minx--; + if (maxz < nz) maxz++; + if (maxy < ny) maxy++; + if (maxx < nx) maxx++; + + for(int z=minz; z= orig_maxx); + bool y_in_ghost_layer = (y < orig_miny) || (y >= orig_maxy); + bool z_in_ghost_layer = (z < orig_minz) || (z >= orig_maxz); + //we are in the ghost layer if any one of x,y,z are in the ghost layer + if (!x_in_ghost_layer && !y_in_ghost_layer && !z_in_ghost_layer) continue; + ids.push_back(get_id(nx, ny, nz, x, y, z)); + } + } + } +} + + inline void print_box(int myproc, const char* name, const Box& box, + const char* name2, const Box& box2) +{ + std::cout << "proc " << myproc << " "< box2[0][0] && box1[0][1] < box2[0][1]) || // range contains other + (box2[0][0] > box1[0][0] && box2[0][1] < box1[0][1]) || // range contains other + (box1[0][0] > box2[0][0] && box1[0][0] < box2[0][1]) || // min contained in rng + (box2[0][0] > box1[0][0] && box2[0][0] < box1[0][1]); // min contained in rng + if (!x_neighbor) { + x_neighbor = (box1[0][1] == box2[0][0]-1) || (box1[0][0] == box2[0][1]+1); + } + + bool y_neighbor = (box1[1][1] == box2[1][0]) || (box1[1][0] == box2[1][1]) || // min matches max + (box1[1][0] == box2[1][0]) || (box1[1][1] == box2[1][1]) || // mins or maxs match + (box1[1][0] > box2[1][0] && box1[1][1] < box2[1][1]) || // range contains other + (box2[1][0] > box1[1][0] && box2[1][1] < box1[1][1]) || // range contains other + (box1[1][0] > box2[1][0] && box1[1][0] < box2[1][1]) || // min contained in rng + (box2[1][0] > box1[1][0] && box2[1][0] < box1[1][1]); // min contained in rng + if (!y_neighbor) { + y_neighbor = (box1[1][1] == box2[1][0]-1) || (box1[1][0] == box2[1][1]+1); + } + + bool z_neighbor = (box1[2][1] == box2[2][0]) || (box1[2][0] == box2[2][1]) || // min matches max + (box1[2][0] == box2[2][0]) || (box1[2][1] == box2[2][1]) || // mins or maxs match + (box1[2][0] > box2[2][0] && box1[2][1] < box2[2][1]) || // range contains other + (box2[2][0] > box1[2][0] && box2[2][1] < box1[2][1]) || // range contains other + (box1[2][0] > box2[2][0] && box1[2][0] < box2[2][1]) || // min contained in rng + (box2[2][0] > box1[2][0] && box2[2][0] < box1[2][1]); // min contained in rng + if (!z_neighbor) { + z_neighbor = (box1[2][1] == box2[2][0]-1) || (box1[2][0] == box2[2][1]+1); + } + + return x_neighbor && y_neighbor && z_neighbor; +} + +template +void create_map_id_to_row(int global_nx, int global_ny, int global_nz, + const Box& box, + std::map& id_to_row) +{ + GlobalOrdinal num_my_ids = get_num_ids(box); + + typename std::vector all_ids; + bool include_ghost_layer = false; + get_ids(global_nx, global_ny, global_nz, box, all_ids, include_ghost_layer); + + GlobalOrdinal my_first_row = 0; + typename std::vector global_offsets; + std::vector all_boxes; + int numprocs = 1, myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); + + GlobalOrdinal local_num_ids = num_my_ids; + global_offsets.resize(numprocs); + MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); + MPI_Allgather(&local_num_ids, 1, mpi_dtype, &global_offsets[0], 1, mpi_dtype, MPI_COMM_WORLD); + GlobalOrdinal offset = 0; + for(int i=0; i(&box.ranges[0]); + MPI_Allgather(local_box_ranges, 6, MPI_INT, &all_boxes[0], 6, MPI_INT, MPI_COMM_WORLD); +#endif + + if (all_ids.size() > 0) { + id_to_row.insert(std::make_pair(all_ids[0], my_first_row)); + } + + for(size_t i=1; i 0) { + id_to_row.insert(std::make_pair(all_ids[0], first_row)); + } + for(size_t j=1; j::iterator iter = id_to_row.begin(), end = id_to_row.end(); +//for(; iter!=end; ++iter) { +// std::cout<<"proc "<first<<" :: "<second< +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace miniFE { + +template +size_t +compute_matrix_stats(const MatrixType& A, int myproc, int numprocs, YAML_Doc& ydoc) +{ + typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal; + typedef typename MatrixType::LocalOrdinalType LocalOrdinal; + typedef typename MatrixType::ScalarType Scalar; + + GlobalOrdinal min_nrows = 0, max_nrows = 0, global_nrows = 0; + int min_proc = 0, max_proc = 0; + + GlobalOrdinal local_nrows = A.rows.size(); + + get_global_min_max(local_nrows, global_nrows, min_nrows, min_proc, + max_nrows, max_proc); + + //Gather stats on global, min/max matrix num-nonzeros: + + double local_nnz = A.num_nonzeros(); + double dglobal_nnz = 0, dmin_nnz = 0, dmax_nnz = 0; + + get_global_min_max(local_nnz, dglobal_nnz, dmin_nnz, min_proc, + dmax_nnz, max_proc); + + double avg_nrows = global_nrows; + avg_nrows /= numprocs; + double avg_nnz = dglobal_nnz; + avg_nnz /= numprocs; + + double mem_overhead_MB = parallel_memory_overhead_MB(A); + + size_t global_nnz = static_cast(std::ceil(dglobal_nnz)); + size_t min_nnz = static_cast(std::ceil(dmin_nnz)); + size_t max_nnz = static_cast(std::ceil(dmax_nnz)); + size_t global_num_rows = global_nrows; + + if (myproc == 0) { + ydoc.add("Matrix attributes",""); + ydoc.get("Matrix attributes")->add("Global Nrows",global_num_rows); + ydoc.get("Matrix attributes")->add("Global NNZ",global_nnz); + + //compute how much memory the matrix occupies: + //num-bytes = sizeof(GlobalOrdinal)*global_nrows for A.rows + // + sizeof(LocalOrdinal)*global_nrows for A.rows_offsets + // + sizeof(GlobalOrdinal)*global_nnz for A.packed_cols + // + sizeof(Scalar)*global_nnz for A.packed_coefs + + double invGB = 1.0/(1024*1024*1024); + double memGB = invGB*global_nrows*sizeof(GlobalOrdinal); + memGB += invGB*global_nrows*sizeof(LocalOrdinal); + memGB += invGB*global_nnz*sizeof(GlobalOrdinal); + memGB += invGB*global_nnz*sizeof(Scalar); + ydoc.get("Matrix attributes")->add("Global Memory (GB)",memGB); + + ydoc.get("Matrix attributes")->add("Pll Memory Overhead (MB)",mem_overhead_MB); + + size_t min_num_rows = min_nrows; + size_t max_num_rows = max_nrows; + ydoc.get("Matrix attributes")->add("Rows per proc MIN",min_num_rows); + ydoc.get("Matrix attributes")->add("Rows per proc MAX",max_num_rows); + ydoc.get("Matrix attributes")->add("Rows per proc AVG",avg_nrows); + ydoc.get("Matrix attributes")->add("NNZ per proc MIN",min_nnz); + ydoc.get("Matrix attributes")->add("NNZ per proc MAX",max_nnz); + ydoc.get("Matrix attributes")->add("NNZ per proc AVG",avg_nnz); + } + + return global_nnz; +} + +}//namespace miniFE + +#endif + diff --git a/mkl/utils/imbalance.hpp b/mkl/utils/imbalance.hpp new file mode 100644 index 0000000..bd6c6c8 --- /dev/null +++ b/mkl/utils/imbalance.hpp @@ -0,0 +1,298 @@ +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#ifndef _imbalance_hpp_ +#define _imbalance_hpp_ + +#include + +#ifdef HAVE_MPI +#include +#endif + +#include +#include +#include + +namespace miniFE { + +const int X = 0; +const int Y = 1; +const int Z = 2; +const int NONE = 3; + +const int LOWER = 0; +const int UPPER = 1; + +template +void +compute_imbalance(const Box& global_box, + const Box& local_box, + float& largest_imbalance, + float& std_dev, + YAML_Doc& doc, + bool record_in_doc) +{ + int numprocs = 1, myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + GlobalOrdinal local_nrows = get_num_ids(local_box); + GlobalOrdinal min_nrows = 0, max_nrows = 0, global_nrows = 0; + int min_proc = myproc, max_proc = myproc; + get_global_min_max(local_nrows, global_nrows, min_nrows, min_proc, + max_nrows, max_proc); + + float avg_nrows = global_nrows; + avg_nrows /= numprocs; + + //largest_imbalance will be the difference between the min (or max) + //rows-per-processor and avg_nrows, represented as a percentage: + largest_imbalance = percentage_difference(min_nrows, avg_nrows); + + float tmp = percentage_difference(max_nrows, avg_nrows); + if (tmp > largest_imbalance) largest_imbalance = tmp; + + std_dev = compute_std_dev_as_percentage(local_nrows, avg_nrows); + + if (myproc == 0 && record_in_doc) { + doc.add("Rows-per-proc Load Imbalance",""); + doc.get("Rows-per-proc Load Imbalance")->add("Largest (from avg, %)",largest_imbalance); + doc.get("Rows-per-proc Load Imbalance")->add("Std Dev (%)",std_dev); + } +} + +std::pair +decide_how_to_grow(const Box& global_box, const Box& local_box) +{ + std::pair result(NONE,UPPER); + + if (local_box[Z][UPPER] < global_box[Z][UPPER]) { + result.first = Z; + result.second = UPPER; + return result; + } + if (local_box[Z][LOWER] > global_box[Z][LOWER]) { + result.first = Z; + result.second = LOWER; + return result; + } + if (local_box[Y][UPPER] < global_box[Y][UPPER]) { + result.first = Y; + result.second = UPPER; + return result; + } + if (local_box[Y][LOWER] > global_box[Y][LOWER]) { + result.first = Y; + result.second = LOWER; + return result; + } + if (local_box[X][UPPER] < global_box[X][UPPER]) { + result.first = X; + result.second = UPPER; + return result; + } + if (local_box[X][LOWER] > global_box[X][LOWER]) { + result.first = X; + result.second = LOWER; + return result; + } + return result; +} + +std::pair +decide_how_to_shrink(const Box& global_box, const Box& local_box) +{ + std::pair result(NONE,UPPER); + + if (local_box[Z][UPPER] < global_box[Z][UPPER] && local_box[Z][UPPER]-local_box[Z][LOWER] > 2) { + result.first = Z; + result.second = UPPER; + return result; + } + if (local_box[Z][LOWER] > global_box[Z][LOWER] && local_box[Z][UPPER]-local_box[Z][LOWER] > 2) { + result.first = Z; + result.second = LOWER; + return result; + } + if (local_box[Y][UPPER] < global_box[Y][UPPER] && local_box[Y][UPPER]-local_box[Y][LOWER] > 2) { + result.first = Y; + result.second = UPPER; + return result; + } + if (local_box[Y][LOWER] > global_box[Y][LOWER] && local_box[Y][UPPER]-local_box[Y][LOWER] > 2) { + result.first = Y; + result.second = LOWER; + return result; + } + if (local_box[X][UPPER] < global_box[X][UPPER] && local_box[X][UPPER]-local_box[X][LOWER] > 2) { + result.first = X; + result.second = UPPER; + return result; + } + if (local_box[X][LOWER] > global_box[X][LOWER] && local_box[X][UPPER]-local_box[X][LOWER] > 2) { + result.first = X; + result.second = LOWER; + return result; + } + return result; +} + +template +void +add_imbalance(const Box& global_box, + Box& local_box, + float imbalance, + YAML_Doc& doc) +{ + int numprocs = 1, myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + if (numprocs == 1) { + return; + } + + float cur_imbalance = 0, cur_std_dev = 0; + compute_imbalance(global_box, local_box, + cur_imbalance, cur_std_dev, doc, false); + + while (cur_imbalance < imbalance) { + GlobalOrdinal local_nrows = get_num_ids(local_box); + GlobalOrdinal min_nrows = 0, max_nrows = 0, global_nrows = 0; + int min_proc = myproc, max_proc = myproc; + get_global_min_max(local_nrows, global_nrows, min_nrows, min_proc, + max_nrows, max_proc); + + std::pair grow(NONE,UPPER); + int grow_axis_val = -1; + std::pair shrink(NONE,UPPER); + int shrink_axis_val = -1; + + if (myproc == max_proc) { + grow = decide_how_to_grow(global_box, local_box); + if (grow.first != NONE) { + grow_axis_val = local_box[grow.first][grow.second]; + } + } + if (myproc == min_proc) { + shrink = decide_how_to_shrink(global_box, local_box); + if (shrink.first != NONE) { + shrink_axis_val = local_box[shrink.first][shrink.second]; + } + } + + int grow_info[8] = {grow.first, grow.second, + local_box[X][0], local_box[X][1], + local_box[Y][0], local_box[Y][1], + local_box[Z][0], local_box[Z][1]}; + + int shrink_info[8] = {shrink.first, shrink.second, + local_box[X][0], local_box[X][1], + local_box[Y][0], local_box[Y][1], + local_box[Z][0], local_box[Z][1]}; +#ifdef HAVE_MPI + MPI_Bcast(&grow_info[0], 8, MPI_INT, max_proc, MPI_COMM_WORLD); + MPI_Bcast(&shrink_info[0], 8, MPI_INT, min_proc, MPI_COMM_WORLD); +#endif + + int grow_axis = grow_info[0]; + int grow_end = grow_info[1]; + int shrink_axis = shrink_info[0]; + int shrink_end = shrink_info[1]; + int grow_incr = 1; + if (grow_end == LOWER) grow_incr = -1; + int shrink_incr = -1; + if (shrink_end == LOWER) shrink_incr = 1; + if (grow_axis != NONE) grow_axis_val = grow_info[2+grow_axis*2+grow_end]; + if (shrink_axis != NONE) shrink_axis_val = shrink_info[2+shrink_axis*2+shrink_end]; + + if (grow_axis == NONE && shrink_axis == NONE) break; + + bool grow_status = grow_axis==NONE ? false : true; + if (grow_axis != NONE) { + if ((grow_incr == 1 && local_box[grow_axis][0] == grow_axis_val) || + (grow_incr == -1 && local_box[grow_axis][1] == grow_axis_val)) { + if (local_box[grow_axis][1] - local_box[grow_axis][0] < 2) { + grow_status = false; + } + } + } + + bool shrink_status = shrink_axis==NONE ? false : true; + if (shrink_axis != NONE) { + if ((shrink_incr == 1 && local_box[shrink_axis][0] == shrink_axis_val) || + (shrink_incr == -1 && local_box[shrink_axis][1] == shrink_axis_val)) { + if (local_box[shrink_axis][1] - local_box[shrink_axis][0] < 2) { + shrink_status = false; + } + } + } + +#ifdef HAVE_MPI + int statusints[2] = { grow_status ? 0 : 1, shrink_status ? 0 : 1 }; + int globalstatus[2] = { 0, 0 }; + MPI_Allreduce(&statusints, &globalstatus, 2, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + grow_status = globalstatus[0]>0 ? false : true; + shrink_status = globalstatus[1]>0 ? false : true; +#endif + + if (grow_status == false && shrink_status == false) break; + + if (grow_status && grow_axis != NONE) { + if (local_box[grow_axis][0] == grow_axis_val) { + local_box[grow_axis][0] += grow_incr; + } + + if (local_box[grow_axis][1] == grow_axis_val) { + local_box[grow_axis][1] += grow_incr; + } + } + + if (shrink_status && shrink_axis != NONE) { + if (local_box[shrink_axis][0] == shrink_axis_val) { + local_box[shrink_axis][0] += shrink_incr; + } + + if (local_box[shrink_axis][1] == shrink_axis_val) { + local_box[shrink_axis][1] += shrink_incr; + } + } + + compute_imbalance(global_box, local_box, + cur_imbalance, cur_std_dev, doc, false); + } +} + +}//namespace miniFE + +#endif + diff --git a/mkl/utils/miniFE_no_info.hpp b/mkl/utils/miniFE_no_info.hpp new file mode 100644 index 0000000..7dc5d6f --- /dev/null +++ b/mkl/utils/miniFE_no_info.hpp @@ -0,0 +1,39 @@ +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#ifndef miniFE_no_info_hpp +#define miniFE_no_info_hpp + +#define MINIFE_HOSTNAME "unknown" +#define MINIFE_KERNEL_NAME "unknown" +#define MINIFE_KERNEL_RELEASE "unknown" +#define MINIFE_PROCESSOR "unknown" + +#define MINIFE_CXX "unknown" +#define MINIFE_CXXFLAGS "unknown" + +#endif diff --git a/mkl/utils/miniFE_version.h b/mkl/utils/miniFE_version.h new file mode 100644 index 0000000..6ae8398 --- /dev/null +++ b/mkl/utils/miniFE_version.h @@ -0,0 +1,35 @@ +#ifndef _minife_version_h_ +#define _minife_version_h_ + +//@HEADER +// ************************************************************************ +// +// miniFE: simple finite-element assembly and linear-solve +// Copyright (2006) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define MINIFE_VERSION "2.0" + +#endif + diff --git a/mkl/utils/mytimer.cpp b/mkl/utils/mytimer.cpp new file mode 100644 index 0000000..c896263 --- /dev/null +++ b/mkl/utils/mytimer.cpp @@ -0,0 +1,132 @@ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +namespace miniFE { + +///////////////////////////////////////////////////////////////////////// + +// Function to return time in seconds. +// If compiled with no flags, return CPU time (user and system). +// If compiled with -DWALL, returns elapsed time. + +///////////////////////////////////////////////////////////////////////// + +#if defined(HAVE_MPI) && defined(USE_MPI_WTIME) + +timer_type mytimer() +{ + return((timer_type) MPI_Wtime()); +} + + +#elif defined(UseClock) + +#include +timer_type mytimer(void) +{ + clock_t t1; + static clock_t t0=0; + static timer_type CPS = CLOCKS_PER_SEC; + timer_type d; + + if (t0 == 0) t0 = clock(); + t1 = clock() - t0; + d = t1 / CPS; + return(d); +} + +#elif defined(WALL) + +#include +#include +#include +timer_type mytimer(void) +{ + struct timeval tp; + static long start=0, startu; + if (!start) + { + gettimeofday(&tp, NULL); + start = tp.tv_sec; + startu = tp.tv_usec; + return(0.0); + } + gettimeofday(&tp, NULL); + return( ((timer_type) (tp.tv_sec - start)) + (tp.tv_usec-startu)/1000000.0 ); +} + +#elif defined(UseTimes) + +#include +#include +#include +timer_type mytimer(void) +{ + struct tms ts; + static timer_type ClockTick=0.0; + + if (ClockTick == 0.0) ClockTick = (timer_type) sysconf(_SC_CLK_TCK); + times(&ts); + return( (timer_type) ts.tms_utime / ClockTick ); +} + +#else + +#include +#include +#include +timer_type mytimer(void) +{ +//This function now uses gettimeofday instead of getrusage. See note below. +// + struct timeval tv; + struct timezone tz; + gettimeofday(&tv, &tz); + return ( (timer_type)tv.tv_sec + tv.tv_usec/1000000.0 ); + +//The below use of 'getrusage' is not used because it doesn't do the right thing +//for the case of using threads. It adds up the time spent in multiple threads, +//rather than giving elapsed time. +// +// struct rusage ruse; +// getrusage(RUSAGE_SELF, &ruse); +// return( (timer_type)(ruse.ru_utime.tv_sec+ruse.ru_utime.tv_usec / 1000000.0) ); +} + +#endif + +}//namespace miniFE + diff --git a/mkl/utils/mytimer.hpp b/mkl/utils/mytimer.hpp new file mode 100644 index 0000000..824dbee --- /dev/null +++ b/mkl/utils/mytimer.hpp @@ -0,0 +1,52 @@ +#ifndef _mytimer_hpp_ +#define _mytimer_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +namespace miniFE { + +typedef double timer_type; + +timer_type mytimer(); + +enum CG_TIMES { + WAXPY = 0, + DOT = 1, + MATVEC = 2, + MATVECDOT = 3, + TOTAL = 4, + NUM_TIMERS = 5 +}; + +//Use TICK and TOCK to time a code section +#define TICK() t0 = mytimer(); +#define TOCK(t) t += mytimer() - t0; + +}//namespace miniFE + +#endif diff --git a/mkl/utils/outstream.hpp b/mkl/utils/outstream.hpp new file mode 100644 index 0000000..bff02cd --- /dev/null +++ b/mkl/utils/outstream.hpp @@ -0,0 +1,45 @@ +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#ifndef _outstream_hpp_ +#define _outstream_hpp_ + +#include +#include + +inline +std::ostream& outstream(int np=1, int p=0) +{ + static bool first = true; + static std::ostringstream oss; + if (first) oss << "minife_debug."< + +#include +#include + +namespace Mantevo { + +//------------------------------------------------------------- +void read_args_into_string(int argc, char** argv, std::string& arg_string) +{ + arg_string = argv[0]; + for(int i=1; i +#include + +//Parameter-parsing Utilities: +// +//The functions declared below are intended to assist with parsing +//input-parameters which may be command-line arguments and/or lines in a +//text file. +// +// Scenario: You want your program to accept parameters that are specified +// as command-line arguments and/or as lines in a text file (such +// as a YAML output file). i.e., your program can be run like this: +// % program.exe foo=3.14159 bar: 42 +// or +// % program.exe input_file=params.txt +// or +// % program.exe foo=3.14159 input_file = params.txt +// +//Example: +// Here is example code to obtain parameters using the 3 functions +// 'read_args_into_string', 'read_file_into_string' and 'parse_parameter': +// +// std::string arg_string; +// +// //put command-line-arguments into 'arg_string': +// read_args_into_string(argc, argv, arg_string); +// +// //do the command-line-arguments specify an 'input_file'? +// std::string filename = +// parse_parameter(arg_string,"input_file","none-specified"); +// +// if (filename != "none-specified") { +// std::string tmp; +// read_file_into_string(filename, tmp); +// arg_string += tmp; +// } +// +// //now parse the parameters: +// float foo = parse_parameter(arg_string, "foo", -9.9); +// int bar = parse_parameter(arg_string, "bar", -1); +// +//See the comments below for parse_parameter, for formatting requirements of +//named parameter-value pairs. +// + +namespace Mantevo { + +/** + * Concatenate command-line arguments into a single string. + * + * Note: this function is purely serial. If argc and argv have different + * values on different MPI processes, then you need to resolve that by + * broadcasting arg_string's contents. + */ +void read_args_into_string(int argc, char** argv, std::string& arg_string); + +/** + * Read the contents of a text-file into a single string. + * + * Note: this function is purely serial. If you want file_contents on multiple + * MPI processes, you need to broadcast it (or call this function on each + * MPI process...). + */ +void read_file_into_string(const std::string& filename, + std::string& file_contents); + +/** + * Parse a named parameter value from input 'arg_string'. + * + * Search 'arg_string' for an occurrence of param_name and attempt to parse + * a value into the return-type. If param_name is not found, then default_value + * is returned. + * + * Example: + * arg_string = "foo = 3.14159"; + * float foo = parse_parameter(arg_string, "foo", -999.9); + * //foo should now contain the value 3.14159; if 'foo' was not found in + * //arg_string, then -999.9 would have been returned. + * + * Other legal name-value separators are ':' and ' '. Extra spaces are also ok, + * e.g. "foo : 3.114159". + * + * Note that if a YAML file is read into a string, that would be a valid input + * string for this function. + */ +template +T parse_parameter(const std::string& arg_string, + const std::string& param_name, + const T& default_value) +{ + std::string::size_type pos = arg_string.find(param_name); + if (pos == std::string::npos) { + //if param_name is not found in arg_string, return default_value: + return default_value; + } + + pos += param_name.size(); + + if (arg_string.size() <= pos) return default_value; + + //skip past ' ', '=' or ':': + while(pos < arg_string.size() && + (arg_string[pos] == ' ' || + arg_string[pos] == '=' || + arg_string[pos] == ':')) + { + ++pos; + } + + if (arg_string[pos] == '=' || arg_string[pos] == ':') ++pos; + + std::string str = arg_string.substr(pos); + + std::istringstream isstr(str); + + T return_val = default_value; + + //parse value into return_val: + isstr >> return_val; + + //if parse failed, return default_value: + if (!isstr) return default_value; + + return return_val; +} + +}//namespace Mantevo + +#endif + diff --git a/mkl/utils/utils.cpp b/mkl/utils/utils.cpp new file mode 100644 index 0000000..29fcc8f --- /dev/null +++ b/mkl/utils/utils.cpp @@ -0,0 +1,136 @@ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include +#include +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +#ifdef MINIFE_HAVE_TPI +#include +#endif + +#ifdef MINIFE_HAVE_TBB +#include +#endif + +#include +#include +#include + +namespace miniFE { + +//------------------------------------------------------------- +void get_parameters(int argc, char** argv, Parameters& params) +{ + std::string argstring; + Mantevo::read_args_into_string(argc, argv, argstring); + + std::string garbage("garbage"); + std::string filename = + Mantevo::parse_parameter(argstring, "input_file", garbage); + + if (filename != garbage) { + Mantevo::read_file_into_string(filename, argstring); + } + + params.nx = Mantevo::parse_parameter(argstring, "nx", 10); + params.ny = Mantevo::parse_parameter(argstring, "ny", params.nx); + params.nz = Mantevo::parse_parameter(argstring, "nz", params.ny); + params.load_imbalance = + Mantevo::parse_parameter(argstring, "load_imbalance", 0); + params.numthreads = Mantevo::parse_parameter(argstring, "numthreads", 1); + params.mv_overlap_comm_comp = Mantevo::parse_parameter(argstring, "mv_overlap_comm_comp", 0); + params.use_locking = Mantevo::parse_parameter(argstring, "use_locking", 0); + params.name = Mantevo::parse_parameter(argstring, "name",""); + params.elem_group_size = Mantevo::parse_parameter(argstring, "elem_group_size", 1); + params.use_elem_mat_fields = Mantevo::parse_parameter(argstring, "use_elem_mat_fields", 1); + params.verify_solution = Mantevo::parse_parameter(argstring, "verify_solution", 0); + params.device = Mantevo::parse_parameter(argstring, "device", 0); + params.num_devices = Mantevo::parse_parameter(argstring, "num_devices", 2); + params.skip_device = Mantevo::parse_parameter(argstring, "skip_device", 9999); + params.numa = Mantevo::parse_parameter(argstring, "numa", 1); +} + +//------------------------------------------------------------- +void broadcast_parameters(Parameters& params) +{ +#ifdef HAVE_MPI + const int num_int_params = 13; + int iparams[num_int_params] = {params.nx, params.ny, params.nz, params.numthreads, params.mv_overlap_comm_comp, params.use_locking, + params.elem_group_size, params.use_elem_mat_fields, params.verify_solution, + params.device, params.num_devices,params.skip_device,params.numa}; + MPI_Bcast(&iparams[0], num_int_params, MPI_INT, 0, MPI_COMM_WORLD); + params.nx = iparams[0]; + params.ny = iparams[1]; + params.nz = iparams[2]; + params.numthreads = iparams[3]; + params.mv_overlap_comm_comp = iparams[4]; + params.use_locking = iparams[5]; + params.elem_group_size = iparams[6]; + params.use_elem_mat_fields = iparams[7]; + params.verify_solution = iparams[8]; + params.device = iparams[9]; + params.num_devices = iparams[10]; + params.skip_device = iparams[11]; + params.numa = iparams[12]; + + float fparams[1] = {params.load_imbalance}; + MPI_Bcast(&fparams[0], 1, MPI_FLOAT, 0, MPI_COMM_WORLD); + params.load_imbalance = fparams[0]; + +#endif +} + +//------------------------------------------------------------- +void initialize_mpi(int argc, char** argv, int& numprocs, int& myproc) +{ +#ifdef HAVE_MPI + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#else + numprocs = 1; + myproc = 0; +#endif +} + +//------------------------------------------------------------- +void finalize_mpi() +{ +#ifdef HAVE_MPI + MPI_Finalize(); +#endif +} + +}//namespace miniFE + diff --git a/mkl/utils/utils.hpp b/mkl/utils/utils.hpp new file mode 100644 index 0000000..263294d --- /dev/null +++ b/mkl/utils/utils.hpp @@ -0,0 +1,204 @@ +#ifndef _utils_hpp_ +#define _utils_hpp_ + +//@HEADER +// ************************************************************************ +// +// MiniFE: Simple Finite Element Assembly and Solve +// Copyright (2006-2013) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +// USA +// +// ************************************************************************ +//@HEADER + +#include +#include +#include +#include + +#ifdef HAVE_MPI +#include +#endif + +#include +#include + +namespace miniFE { + +void get_parameters(int argc, char** argv, Parameters& params); + +void broadcast_parameters(Parameters& params); + +void initialize_mpi(int argc, char** argv, int& numprocs, int& myproc); + +void finalize_mpi(); + +template +Scalar percentage_difference(Scalar value, Scalar average) +{ + //result will be the difference between value and average, represented as + //a percentage of average. + //Examples: + // if value=100 and average=50, result is 100% + // if value=500 and average=400, result is 25% + + //Note: if average is 0, result is undefined. We'll return -1.0; + + Scalar result = std::abs(value-average); + if (std::abs(average) > 1.e-5) { + result /= average; + result *= 100; + } + else result = -1; + + return result; +} + +template +void get_global_min_max(GlobalOrdinal local_n, + GlobalOrdinal& global_n, + GlobalOrdinal& min_n, + int& min_proc, + GlobalOrdinal& max_n, + int& max_proc) +{ +//Given a local_n, compute global_n, min/max, etc. All computed results +//will be returned on all processors. +// + int numprocs = 1, myproc = 0; +#ifdef HAVE_MPI + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); +#endif + + std::vector all_n(numprocs, 0); + all_n[myproc] = local_n; +#ifdef HAVE_MPI + std::vector tmp(all_n); + MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); + MPI_Allreduce(&tmp[0], &all_n[0], numprocs, mpi_dtype, MPI_MAX, MPI_COMM_WORLD); +#endif + + global_n = 0; + min_n= 5*local_n; + min_proc = 0; + max_n= 0; + max_proc = 0; + + for(int i=0; i= max_n) { + max_n = all_n[i]; + max_proc = i; + } + } +} + +template +Scalar compute_std_dev_as_percentage(Scalar local_nrows, + Scalar avg_nrows) +{ +//compute and return a standard deviation for the deviation of local_nrows from the average. +//the std. dev. will be expressed as a percentage of avg_nrows. +// +//Input argument local_nrows is really a integer, but taking it as a floating-point scalar is +//harmless. +// +#ifdef HAVE_MPI + int numprocs = 1, myproc = 0; + MPI_Comm_size(MPI_COMM_WORLD, &numprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); + MPI_Datatype mpi_dtype = TypeTraits::mpi_type(); + +//If it's significantly more efficient, we may consider using MPI_Gather below instead of +//MPI_Allgather. We really only need to compute std.dev. on proc 0... +// +//(But for now, use MPI_Allgather and compute on all procs.) + + std::vector all_nrows(numprocs, 0); + MPI_Allgather(&local_nrows, 1, mpi_dtype, &all_nrows[0], 1, mpi_dtype, MPI_COMM_WORLD); + + //turn all_nrows contents into deviations, add to sum-of-squares-of-deviations: + Scalar sum_sqr_dev = 0; + for(size_t i=0; i1 ? std::sqrt(tmp1/(numprocs-1)) : 0; + + //std_dev is now the standard deviation of rows-per-processor with respect + //to avg_nrows. + //Next turn std_dev into a percentage of avg_nrows: + std_dev /= avg_nrows; + std_dev *= 100; + return std_dev; +#else + return 0; +#endif +} + +template +GlobalOrdinal find_row_for_id(GlobalOrdinal id, + const std::map& ids_to_rows) +{ + typename std::map::const_iterator + iter = ids_to_rows.lower_bound(id); + + if (iter == ids_to_rows.end() || iter->first != id) { + if (ids_to_rows.size() > 0) { + --iter; + } + else { + std::cout << "ERROR, failed to map id to row."<first == id) { + return iter->second; + } + + if (iter == ids_to_rows.begin() && iter->first > id) { + std::cout << "ERROR, id:" << id << ", ids_to_rows.begin(): " << iter->first<first; + + if (offset < 0) { + std::cout << "ERROR, negative offset in find_row_for_id for id="<second + offset; +} + +}//namespace miniFE + +#endif +