Add error callback to sm BTL. Call it when allocation of the initial circular
buffer fails. If cb is already allocated, but it is full and allocation of additional cb fails, we spin waiting for receiver to free space in existing cb. This commit was SVN r13635.
Этот коммит содержится в:
родитель
2e042c91cf
Коммит
4d4b0a022a
@ -338,7 +338,7 @@ static inline int ompi_fifo_write_to_head_same_base_addr(void *data,
|
||||
if(error_code != OMPI_CB_ERROR) {
|
||||
fifo->head->cb_overflow = false;
|
||||
opal_atomic_unlock(&(fifo->fifo_lock));
|
||||
return error_code;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* see if next queue is available - while the next queue
|
||||
@ -384,11 +384,11 @@ static inline int ompi_fifo_write_to_head_same_base_addr(void *data,
|
||||
error_code=ompi_cb_fifo_write_to_head_same_base_addr(data,
|
||||
(ompi_cb_fifo_t *)&(fifo->head->cb_fifo));
|
||||
if( OMPI_CB_ERROR == error_code ) {
|
||||
return error_code;
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
return error_code;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
|
@ -108,7 +108,7 @@ mca_btl_sm_t mca_btl_sm[2] = {
|
||||
NULL, /* get */
|
||||
mca_btl_base_dump,
|
||||
NULL, /* mpool */
|
||||
NULL /* register error */
|
||||
mca_btl_sm_register_error_cb /* register error */
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -136,7 +136,7 @@ mca_btl_sm_t mca_btl_sm[2] = {
|
||||
NULL, /* get function */
|
||||
mca_btl_base_dump,
|
||||
NULL, /* mpool */
|
||||
NULL /* register error */
|
||||
mca_btl_sm_register_error_cb /* register error */
|
||||
}
|
||||
}
|
||||
};
|
||||
@ -770,6 +770,17 @@ int mca_btl_sm_register(
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Register callback function for error handling..
|
||||
*/
|
||||
int mca_btl_sm_register_error_cb(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
mca_btl_base_module_error_cb_fn_t cbfunc)
|
||||
{
|
||||
mca_btl_sm_t *sm_btl = (mca_btl_sm_t *)btl;
|
||||
sm_btl->error_cb = cbfunc;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate a segment.
|
||||
|
@ -179,11 +179,23 @@ struct mca_btl_sm_t {
|
||||
mca_btl_base_module_t super; /**< base BTL interface */
|
||||
bool btl_inited; /**< flag indicating if btl has been inited */
|
||||
mca_btl_sm_recv_reg_t sm_reg[256];
|
||||
mca_btl_base_module_error_cb_fn_t error_cb;
|
||||
};
|
||||
typedef struct mca_btl_sm_t mca_btl_sm_t;
|
||||
|
||||
extern mca_btl_sm_t mca_btl_sm[2];
|
||||
|
||||
/**
|
||||
* Register a callback function that is called on error..
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @return Status indicating if cleanup was successful
|
||||
*/
|
||||
|
||||
int mca_btl_sm_register_error_cb(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
mca_btl_base_module_error_cb_fn_t cbfunc
|
||||
);
|
||||
|
||||
/**
|
||||
* Cleanup any resources held by the BTL.
|
||||
|
@ -49,6 +49,7 @@
|
||||
#include "ompi/mca/pml/base/pml_base_module_exchange.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/mca/common/sm/common_sm_mmap.h"
|
||||
#include "ompi/mca/btl/base/btl_base_error.h"
|
||||
#include "btl_sm.h"
|
||||
#include "btl_sm_frag.h"
|
||||
#include "btl_sm_fifo.h"
|
||||
@ -343,7 +344,7 @@ int mca_btl_sm_component_progress(void)
|
||||
ompi_fifo_t *fifo = NULL;
|
||||
int my_smp_rank=mca_btl_sm_component.my_smp_rank;
|
||||
int proc;
|
||||
int rc = 0;
|
||||
int rc = 0, btl = 0;
|
||||
|
||||
/* send progress is made by the PML */
|
||||
|
||||
@ -405,7 +406,7 @@ int mca_btl_sm_component_progress(void)
|
||||
MCA_BTL_SM_FIFO_WRITE( mca_btl_sm_component.sm_peers[peer_smp_rank],
|
||||
my_smp_rank, peer_smp_rank, frag, rc );
|
||||
if(OMPI_SUCCESS != rc)
|
||||
return rc;
|
||||
goto err;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@ -416,7 +417,7 @@ int mca_btl_sm_component_progress(void)
|
||||
MCA_BTL_SM_FIFO_WRITE( mca_btl_sm_component.sm_peers[peer_smp_rank],
|
||||
my_smp_rank, peer_smp_rank, frag, rc );
|
||||
if(OMPI_SUCCESS != rc)
|
||||
return rc;
|
||||
goto err;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -424,6 +425,7 @@ int mca_btl_sm_component_progress(void)
|
||||
} /* end peer_local_smp_rank loop */
|
||||
|
||||
|
||||
btl = 1;
|
||||
/* loop over fifo's - procs with different base shared memory
|
||||
* virtual address as this process */
|
||||
for( proc=0 ; proc < mca_btl_sm_component.num_smp_procs_different_base_addr
|
||||
@ -497,7 +499,7 @@ int mca_btl_sm_component_progress(void)
|
||||
MCA_BTL_SM_FIFO_WRITE( mca_btl_sm_component.sm_peers[peer_smp_rank],
|
||||
my_smp_rank, peer_smp_rank, frag, rc );
|
||||
if(OMPI_SUCCESS != rc)
|
||||
return rc;
|
||||
goto err;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@ -508,11 +510,15 @@ int mca_btl_sm_component_progress(void)
|
||||
MCA_BTL_SM_FIFO_WRITE( mca_btl_sm_component.sm_peers[peer_smp_rank],
|
||||
my_smp_rank, peer_smp_rank, frag, rc );
|
||||
if(OMPI_SUCCESS != rc)
|
||||
return rc;
|
||||
goto err;
|
||||
break;
|
||||
}
|
||||
}
|
||||
rc++;
|
||||
} /* end peer_local_smp_rank loop */
|
||||
return rc;
|
||||
err:
|
||||
BTL_ERROR(("SM faild to send message due to shortage of shared memory.\n"));
|
||||
mca_btl_sm[btl].error_cb(&mca_btl_sm[btl].super, MCA_BTL_ERROR_FLAGS_FATAL);
|
||||
return rc;
|
||||
}
|
||||
|
@ -29,12 +29,11 @@ do { \
|
||||
} \
|
||||
\
|
||||
/* post fragment */ \
|
||||
rc=ompi_fifo_write_to_head_same_base_addr(frag, fifo, \
|
||||
mca_btl_sm_component.sm_mpool); \
|
||||
if( 0 <= rc ) { \
|
||||
MCA_BTL_SM_SIGNAL_PEER(endpoint_peer); \
|
||||
rc=OMPI_SUCCESS; \
|
||||
} \
|
||||
while(ompi_fifo_write_to_head_same_base_addr(frag, fifo, \
|
||||
mca_btl_sm_component.sm_mpool) != OMPI_SUCCESS) \
|
||||
opal_progress(); \
|
||||
MCA_BTL_SM_SIGNAL_PEER(endpoint_peer); \
|
||||
rc=OMPI_SUCCESS; \
|
||||
if(opal_using_threads()) \
|
||||
opal_atomic_unlock(&fifo->head_lock); \
|
||||
} while(0)
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user