1
1
fix: do not fail on blm allocation error, wait for some puts to complete and retry

fixed by Roman, reviewed by Mike/Alex
cmr=v1.7.5:reviewer=ompi-rm1.7

This commit was SVN r30779.
Этот коммит содержится в:
Mike Dubman 2014-02-20 09:53:32 +00:00
родитель 63803f5e61
Коммит 684e78e669
3 изменённых файлов: 48 добавлений и 6 удалений

Просмотреть файл

@ -107,19 +107,57 @@ static inline void calc_nfrags(mca_bml_base_btl_t* bml_btl,
*nfrags = 1 + (size - 1) / (*frag_size);
}
static int mca_spml_yoda_fence_internal(int puts_wait)
{
int n_puts_wait;
/* Waiting for certain number of puts : 'puts_wait'
* if 'puts_wait' == 0 waiting for all puts ('n_active_puts')
* if 'puts_wait' > 'n_active_puts' waiting for 'n_active_puts' */
n_puts_wait = puts_wait > 0 ? mca_spml_yoda.n_active_puts - puts_wait : 0;
if (n_puts_wait < 0) {
n_puts_wait = 0;
}
while (n_puts_wait < mca_spml_yoda.n_active_puts) {
oshmem_request_wait_any_completion();
}
return OSHMEM_SUCCESS;
}
static inline void mca_spml_yoda_bml_alloc( mca_bml_base_btl_t* bml_btl,
mca_btl_base_descriptor_t** des,
uint8_t order, size_t size, uint32_t flags,
int use_send)
{
bool is_done;
bool is_fence_complete;
is_done = false;
is_fence_complete = false;
if (use_send) {
size = (0 == size ? size : size + SPML_YODA_SEND_CONTEXT_SIZE);
}
mca_bml_base_alloc(bml_btl,
do {
mca_bml_base_alloc(bml_btl,
des,
MCA_BTL_NO_ORDER,
size,
flags);
if (OPAL_UNLIKELY(!(*des) || !(*des)->des_src ) && !is_fence_complete) {
mca_spml_yoda_fence_internal(mca_spml_yoda.bml_alloc_threshold);
is_fence_complete = true;
} else {
is_done = true;
}
} while (!is_done);
}
static inline void spml_yoda_prepare_for_put(void* buffer, size_t size, void* p_src, void* p_dst, int use_send)
@ -861,11 +899,7 @@ int mca_spml_yoda_put_nb(void* dst_addr,
int mca_spml_yoda_fence(void)
{
while (0 < mca_spml_yoda.n_active_puts) {
oshmem_request_wait_any_completion();
}
return OSHMEM_SUCCESS;
return mca_spml_yoda_fence_internal(0);
}
int mca_spml_yoda_wait_gets(void)

Просмотреть файл

@ -66,6 +66,8 @@ struct mca_spml_yoda_t {
int free_list_num; /* initial size of free list */
int free_list_max; /* maximum size of free list */
int free_list_inc; /* number of elements to grow free list */
int bml_alloc_threshold; /* number of puts to wait
in case of put/get temporary buffer allocation failture */
/* lock queue access */
opal_mutex_t lock;

Просмотреть файл

@ -78,6 +78,12 @@ static int mca_spml_yoda_component_register(void)
mca_spml_yoda_param_register_int("free_list_max", 1024, 0);
mca_spml_yoda.free_list_inc =
mca_spml_yoda_param_register_int("free_list_inc", 16, 0);
mca_spml_yoda.bml_alloc_threshold =
mca_spml_yoda_param_register_int("bml_alloc_threshold",
3,
"number of puts to wait \
in case of put/get temporary buffer \
allocation failture");
mca_spml_yoda.priority =
mca_spml_yoda_param_register_int("priority",
10,