A couple of changes to improve shared memory behavior when resources get
constrained: * Make sure we always have a number of eager fragments available that scales with the number of processes communicating with a given proc over shared memory * Use FREE_LIST_GET instead of FREE_LIST_WAIT to return an error to the PML when resource exhaustion occurs * Don't dereference the frag during alloc unless we're sure it's not NULL Reviewed by: Galen Refs trac:413 This commit was SVN r12053. The following Trac tickets were found above: Ticket 413 --> https://svn.open-mpi.org/trac/ompi/ticket/413
Этот коммит содержится в:
родитель
b73d9ba3ca
Коммит
51b2a0fd3f
@ -242,3 +242,27 @@ int ompi_free_list_parse( ompi_free_list_t* list,
|
||||
position->last_memory = (unsigned char*)((opal_list_item_t*)position->last_memory)->opal_list_next;
|
||||
goto dig_for_the_requests;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
ompi_free_list_resize(ompi_free_list_t* flist, size_t size)
|
||||
{
|
||||
int ret = OMPI_SUCCESS;
|
||||
|
||||
if (flist->fl_num_allocated > size) {
|
||||
return OMPI_SUCCESS;
|
||||
} else {
|
||||
size_t inc_num;
|
||||
OPAL_THREAD_LOCK(&((fl)->fl_lock));
|
||||
inc_num = size - flist->fl_num_allocated +
|
||||
flist->fl_num_per_alloc - (inc_num % flist->fl_num_per_alloc);
|
||||
if (flist->fl_num_allocated + inc_num > flist->fl_max_to_alloc) {
|
||||
OPAL_THREAD_UNLOCK(&((fl)->fl_lock));
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
ret = ompi_free_list_grow(flist, inc_num);
|
||||
OPAL_THREAD_UNLOCK(&((fl)->fl_lock));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -100,6 +100,12 @@ static inline int ompi_free_list_init(
|
||||
|
||||
OMPI_DECLSPEC int ompi_free_list_grow(ompi_free_list_t* flist, size_t num_elements);
|
||||
|
||||
/* Grow the free list to be *at least* size elemenets. This function
|
||||
will not shrink the list if it is already larger than size and may
|
||||
grow it past size if necessary (it will grow in
|
||||
num_elements_per_alloc chunks) */
|
||||
OMPI_DECLSPEC int ompi_free_list_resize(ompi_free_list_t *flist, size_t size);
|
||||
|
||||
/* Allow to walk through the all allocated items. Not thread-safe, not
|
||||
* protected, this function should never be used except for debugging purposes.
|
||||
* The position should never be NULL, and it should be set to {NULL, NULL}
|
||||
|
@ -623,6 +623,13 @@ int mca_btl_sm_add_procs_same_base_addr(
|
||||
}
|
||||
}
|
||||
|
||||
/* make sure we have enough eager fragmnents for each process */
|
||||
return_code = ompi_free_list_resize(&mca_btl_sm_component.sm_frags1,
|
||||
(mca_btl_sm_component.num_smp_procs + n_local_procs) * 2);
|
||||
if (OMPI_SUCCESS != return_code) {
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
/* update the local smp process count */
|
||||
mca_btl_sm_component.num_smp_procs+=n_local_procs;
|
||||
|
||||
@ -778,10 +785,14 @@ extern mca_btl_base_descriptor_t* mca_btl_sm_alloc(
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
frag->segment.seg_len = size;
|
||||
|
||||
if (frag != NULL) {
|
||||
frag->segment.seg_len = size;
|
||||
}
|
||||
|
||||
return (mca_btl_base_descriptor_t*)frag;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return a segment allocated by this BTL.
|
||||
*
|
||||
|
@ -125,7 +125,7 @@ int mca_btl_sm_component_open(void)
|
||||
mca_btl_sm_component.sm_free_list_max =
|
||||
mca_btl_sm_param_register_int("free_list_max", -1);
|
||||
mca_btl_sm_component.sm_free_list_inc =
|
||||
mca_btl_sm_param_register_int("free_list_inc", 256);
|
||||
mca_btl_sm_param_register_int("free_list_inc", 64);
|
||||
mca_btl_sm_component.sm_exclusivity =
|
||||
mca_btl_sm_param_register_int("exclusivity", MCA_BTL_EXCLUSIVITY_HIGH-1);
|
||||
mca_btl_sm_component.sm_latency =
|
||||
|
@ -33,12 +33,14 @@ static inline void mca_btl_sm_frag_constructor(mca_btl_sm_frag_t* frag)
|
||||
static void mca_btl_sm_frag1_constructor(mca_btl_sm_frag_t* frag)
|
||||
{
|
||||
frag->size = mca_btl_sm_component.eager_limit;
|
||||
frag->my_list = &mca_btl_sm_component.sm_frags1;
|
||||
mca_btl_sm_frag_constructor(frag);
|
||||
}
|
||||
|
||||
static void mca_btl_sm_frag2_constructor(mca_btl_sm_frag_t* frag)
|
||||
{
|
||||
frag->size = mca_btl_sm_component.max_frag_size;
|
||||
frag->my_list = &mca_btl_sm_component.sm_frags2;
|
||||
mca_btl_sm_frag_constructor(frag);
|
||||
}
|
||||
|
||||
|
@ -56,20 +56,18 @@ OBJ_CLASS_DECLARATION(mca_btl_sm_frag_t);
|
||||
OBJ_CLASS_DECLARATION(mca_btl_sm_frag1_t);
|
||||
OBJ_CLASS_DECLARATION(mca_btl_sm_frag2_t);
|
||||
|
||||
#define MCA_BTL_SM_FRAG_ALLOC1(frag, rc) \
|
||||
{ \
|
||||
ompi_free_list_item_t* item; \
|
||||
OMPI_FREE_LIST_WAIT(&mca_btl_sm_component.sm_frags1, item, rc); \
|
||||
frag = (mca_btl_sm_frag_t*)item; \
|
||||
frag->my_list = &mca_btl_sm_component.sm_frags1; \
|
||||
#define MCA_BTL_SM_FRAG_ALLOC1(frag, rc) \
|
||||
{ \
|
||||
ompi_free_list_item_t* item; \
|
||||
OMPI_FREE_LIST_GET(&mca_btl_sm_component.sm_frags1, item, rc); \
|
||||
frag = (mca_btl_sm_frag_t*)item; \
|
||||
}
|
||||
|
||||
#define MCA_BTL_SM_FRAG_ALLOC2(frag, rc) \
|
||||
{ \
|
||||
ompi_free_list_item_t* item; \
|
||||
OMPI_FREE_LIST_WAIT(&mca_btl_sm_component.sm_frags2, item, rc); \
|
||||
frag = (mca_btl_sm_frag_t*)item; \
|
||||
frag->my_list = &mca_btl_sm_component.sm_frags2; \
|
||||
#define MCA_BTL_SM_FRAG_ALLOC2(frag, rc) \
|
||||
{ \
|
||||
ompi_free_list_item_t* item; \
|
||||
OMPI_FREE_LIST_GET(&mca_btl_sm_component.sm_frags2, item, rc); \
|
||||
frag = (mca_btl_sm_frag_t*)item; \
|
||||
}
|
||||
|
||||
#define MCA_BTL_SM_FRAG_RETURN(frag) \
|
||||
@ -77,4 +75,3 @@ OBJ_CLASS_DECLARATION(mca_btl_sm_frag2_t);
|
||||
OMPI_FREE_LIST_RETURN(frag->my_list, (ompi_free_list_item_t*)(frag)); \
|
||||
}
|
||||
#endif
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user