b89f8fabc9
The project includes following components and frameworks: - ML Collective component - NETPATTERNS and COMMPATTERNS common components - BCOL framework - SBGP framework Note: By default the ML collective component is disabled. In order to enable new collectives user should bump up the priority of ml component (coll_ml_priority) ============================================= Primary Contributors (in alphabetical order): Ishai Rabinovich (Mellanox) Joshua S. Ladd (ORNL / Mellanox) Manjunath Gorentla Venkata (ORNL) Mike Dubman (Mellanox) Noam Bloch (Mellanox) Pavel (Pasha) Shamis (ORNL / Mellanox) Richard Graham (ORNL / Mellanox) Vasily Filipov (Mellanox) This commit was SVN r27078.
321 строка
9.4 KiB
C
321 строка
9.4 KiB
C
/*
|
|
* Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
|
|
* Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "ompi_config.h"
|
|
#include "opal/class/opal_list.h"
|
|
#include "opal/threads/mutex.h"
|
|
#include "coll_ml.h"
|
|
#include "coll_ml_mca.h"
|
|
#include "coll_ml_lmngr.h"
|
|
|
|
/* Constructor for list memory manager */
|
|
static void construct_lmngr(mca_coll_ml_lmngr_t *lmngr)
|
|
{
|
|
mca_coll_ml_component_t *cm = &mca_coll_ml_component;
|
|
|
|
ML_VERBOSE(7, ("Constructing new list manager %p", (void *)lmngr));
|
|
|
|
/* No real memory is allocated, only basic init.
|
|
The real memory will be allocated on demand, on first block allocation */
|
|
|
|
/* I caching this block size, alignment and list size
|
|
since maybe in future we will want to define different parameters
|
|
for lists */
|
|
lmngr->list_block_size = cm->lmngr_block_size;
|
|
lmngr->list_alignment = cm->lmngr_alignment;
|
|
lmngr->list_size = cm->lmngr_size;
|
|
lmngr->base_addr = NULL; /* If the base addr is not null, the struct was initilized
|
|
and memory was allocated */
|
|
/* Not sure that lock is required */
|
|
OBJ_CONSTRUCT(&lmngr->mem_lock, opal_mutex_t);
|
|
|
|
/* Only construct the list, no memry initialisation */
|
|
OBJ_CONSTRUCT(&lmngr->blocks_list, opal_list_t);
|
|
}
|
|
|
|
static void destruct_lmngr(mca_coll_ml_lmngr_t *lmngr)
|
|
{
|
|
int max_nc = lmngr->n_resources;
|
|
int rc, i;
|
|
bcol_base_network_context_t *nc;
|
|
opal_list_item_t *item;
|
|
|
|
ML_VERBOSE(6, ("Destructing list manager %p", (void *)lmngr));
|
|
|
|
while(!opal_list_is_empty(&lmngr->blocks_list)) {
|
|
item = opal_list_remove_first(&lmngr->blocks_list);
|
|
OBJ_DESTRUCT(item);
|
|
}
|
|
|
|
OBJ_DESTRUCT(&lmngr->blocks_list);
|
|
|
|
if (NULL != lmngr->base_addr) {
|
|
for( i = 0; i < max_nc; i++ ) {
|
|
nc = lmngr->net_context[i];
|
|
rc = nc->deregister_memory_fn(nc->context_data,
|
|
lmngr->reg_desc[nc->context_id]);
|
|
if(rc != OMPI_SUCCESS) {
|
|
ML_ERROR(("Failed to unregister , lmngr %p", (void *)lmngr));
|
|
}
|
|
}
|
|
|
|
ML_VERBOSE(10, ("Release base addr %p", lmngr->base_addr));
|
|
|
|
free(lmngr->base_addr);
|
|
lmngr->base_addr = NULL;
|
|
}
|
|
|
|
lmngr->list_block_size = 0;
|
|
lmngr->list_alignment = 0;
|
|
lmngr->list_size = 0;
|
|
|
|
OBJ_DESTRUCT(&lmngr->mem_lock);
|
|
}
|
|
|
|
OBJ_CLASS_INSTANCE(mca_coll_ml_lmngr_t,
|
|
opal_object_t,
|
|
construct_lmngr,
|
|
destruct_lmngr);
|
|
|
|
static void lmngr_block_constructor(mca_coll_ml_lmngr_block_t *item)
|
|
{
|
|
item->base_addr = NULL;
|
|
}
|
|
|
|
static void lnmgr_block_destructor(mca_coll_ml_lmngr_block_t *item)
|
|
{
|
|
/* I have nothing to do here */
|
|
}
|
|
|
|
OBJ_CLASS_INSTANCE(mca_coll_ml_lmngr_block_t,
|
|
opal_list_item_t,
|
|
lmngr_block_constructor,
|
|
lnmgr_block_destructor);
|
|
|
|
|
|
int mca_coll_ml_lmngr_tune(mca_coll_ml_lmngr_t *lmngr,
|
|
size_t block_size, size_t list_size, size_t alignment)
|
|
{
|
|
ML_VERBOSE(7, ("Tunning list manager"));
|
|
|
|
if (OPAL_UNLIKELY(NULL == lmngr->base_addr)) {
|
|
ML_VERBOSE(7, ("The list manager is already initialized, you can not tune it"));
|
|
return OMPI_ERROR;
|
|
}
|
|
|
|
lmngr->list_block_size = block_size;
|
|
lmngr->list_alignment = alignment;
|
|
lmngr->list_size = list_size;
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
int mca_coll_ml_lmngr_reg(void)
|
|
{
|
|
int ival, tmp;
|
|
int ret = OMPI_SUCCESS;
|
|
|
|
mca_coll_ml_component_t *cm = &mca_coll_ml_component;
|
|
|
|
#define CHECK(expr) do {\
|
|
tmp = (expr); \
|
|
if (OMPI_SUCCESS != tmp) ret = tmp; \
|
|
} while (0)
|
|
|
|
ML_VERBOSE(7, ("Setting parameters for list manager"));
|
|
|
|
CHECK(reg_int("memory_manager_list_size", NULL,
|
|
"Memory manager list size", 8, &ival, 0));
|
|
cm->lmngr_size = ival;
|
|
|
|
/* The size list couldn't be less than possible max of ML modules,
|
|
it = max supported communicators by ML */
|
|
if (cm->lmngr_size < cm->max_comm) {
|
|
cm->lmngr_size = cm->max_comm;
|
|
}
|
|
|
|
CHECK(reg_int("memory_manager_block_size", NULL,
|
|
"Memory manager block size",
|
|
cm->payload_buffer_size *
|
|
cm->n_payload_buffs_per_bank *
|
|
cm->n_payload_mem_banks *
|
|
cm->lmngr_size, &ival, 0));
|
|
mca_coll_ml_component.lmngr_block_size = ival;
|
|
|
|
CHECK(reg_int("memory_manager_alignment", NULL,
|
|
"Memory manager alignment", 4 * 1024, &ival, 0));
|
|
cm->lmngr_alignment = ival;
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int lmngr_register(mca_coll_ml_lmngr_t *lmngr, bcol_base_network_context_t *nc)
|
|
{
|
|
int rc, j;
|
|
int max_nc = lmngr->n_resources;
|
|
|
|
rc = nc->register_memory_fn(nc->context_data,
|
|
lmngr->base_addr,
|
|
lmngr->list_size * lmngr->list_block_size,
|
|
&lmngr->reg_desc[nc->context_id]);
|
|
|
|
if(rc != OMPI_SUCCESS) {
|
|
int ret_val;
|
|
ML_VERBOSE(7, ("Failed to register [%d], unrolling the registration", rc));
|
|
/* deregistser the successful registrations */
|
|
for( j = 0; j < max_nc; j++ ) {
|
|
/* set the registration parameter to point to the current
|
|
* resource description */
|
|
nc = lmngr->net_context[j];
|
|
ret_val = nc->deregister_memory_fn(nc->context_data,
|
|
lmngr->reg_desc[nc->context_id]);
|
|
if(ret_val != OMPI_SUCCESS) {
|
|
return ret_val;
|
|
}
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
static int mca_coll_ml_lmngr_init(mca_coll_ml_lmngr_t *lmngr)
|
|
{
|
|
int i, num_blocks;
|
|
int rc;
|
|
unsigned char *addr;
|
|
bcol_base_network_context_t *nc;
|
|
|
|
ML_VERBOSE(7, ("List initialization"));
|
|
|
|
#ifdef HAVE_POSIX_MEMALIGN
|
|
if((errno = posix_memalign(&lmngr->base_addr,
|
|
lmngr->list_alignment,
|
|
lmngr->list_size * lmngr->list_block_size)) != 0) {
|
|
ML_ERROR(("Failed to allocate memory: %s [%d]", errno, strerror(errno)));
|
|
return OMPI_ERROR;
|
|
}
|
|
#else
|
|
lmngr->base_addr =
|
|
malloc(lmngr->list_size * lmngr->list_block_size + lmngr->list_alignment);
|
|
if(NULL == lmngr->base_addr) {
|
|
ML_ERROR(("Failed to allocate memory: %s [%d]", errno, strerror(errno)));
|
|
return OMPI_ERROR;
|
|
}
|
|
|
|
lmngr->base_addr = (void*)OPAL_ALIGN((uintptr_t)lmngr->base_addr,
|
|
lmngr->list_align, uintptr_t);
|
|
#endif
|
|
|
|
assert(lmngr->n_resources < MCA_COLL_ML_MAX_REG_INFO);
|
|
|
|
for(i= 0 ;i < lmngr->n_resources ;i++) {
|
|
nc = lmngr->net_context[i];
|
|
ML_VERBOSE(7, ("Call registration for resource index %d", i));
|
|
rc = lmngr_register(lmngr, nc);
|
|
if (OMPI_SUCCESS != rc) {
|
|
ML_ERROR(("Failed to lmngr register: %s [%d]", errno, strerror(errno)));
|
|
return rc;
|
|
}
|
|
}
|
|
|
|
/* slice the memory to blocks */
|
|
addr = (unsigned char *) lmngr->base_addr;
|
|
for(num_blocks = 0; num_blocks < (int)lmngr->list_size; num_blocks++) {
|
|
mca_coll_ml_lmngr_block_t *item = OBJ_NEW(mca_coll_ml_lmngr_block_t);
|
|
item->base_addr = (void *)addr;
|
|
item->lmngr = lmngr;
|
|
/* ML_VERBOSE(10, ("Appending block # %d %p", num_blocks, (void *)addr)); */
|
|
opal_list_append(&lmngr->blocks_list, (opal_list_item_t *)item);
|
|
/* advance the address */
|
|
addr += lmngr->list_block_size;
|
|
}
|
|
|
|
ML_VERBOSE(7, ("List initialization done %d",
|
|
opal_list_get_size(&lmngr->blocks_list)));
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
mca_coll_ml_lmngr_block_t* mca_coll_ml_lmngr_alloc (
|
|
mca_coll_ml_lmngr_t *lmngr)
|
|
{
|
|
int rc;
|
|
opal_list_t *list = &lmngr->blocks_list;
|
|
|
|
/* Check if the list manager was initialized */
|
|
if(OPAL_UNLIKELY(NULL == lmngr->base_addr)) {
|
|
ML_VERBOSE(7 ,("Starting memory initialization\n"));
|
|
rc = mca_coll_ml_lmngr_init(lmngr);
|
|
if (OMPI_SUCCESS != rc) {
|
|
ML_ERROR(("Failed to init memory\n"));
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
if(OPAL_UNLIKELY(opal_list_is_empty(list))) {
|
|
/* Upper layer need to handle the NULL */
|
|
ML_ERROR(("List manager is empty.\n"));
|
|
return NULL;
|
|
}
|
|
|
|
return (mca_coll_ml_lmngr_block_t *)opal_list_remove_first(list);
|
|
}
|
|
|
|
void mca_coll_ml_lmngr_free(mca_coll_ml_lmngr_block_t *block)
|
|
{
|
|
opal_list_append(&block->lmngr->blocks_list, (opal_list_item_t *)block);
|
|
}
|
|
|
|
int mca_coll_ml_lmngr_append_nc(mca_coll_ml_lmngr_t *lmngr, bcol_base_network_context_t *nc)
|
|
{
|
|
int i, rc;
|
|
|
|
ML_VERBOSE(7, ("Append new network context %p to list manager %p",
|
|
nc, lmngr));
|
|
|
|
if (NULL == nc) {
|
|
return OMPI_ERROR;
|
|
}
|
|
|
|
/* check if we already have the context on the list.
|
|
if we do have - do not do anything, just return success
|
|
*/
|
|
if (OPAL_UNLIKELY(MCA_COLL_ML_MAX_REG_INFO == lmngr->n_resources)) {
|
|
ML_ERROR(("MPI overflows maximum supported network contexts is %d"));
|
|
return OMPI_ERROR;
|
|
}
|
|
|
|
for (i = 0; i < lmngr->n_resources; i++) {
|
|
if (lmngr->net_context[i] == nc) {
|
|
ML_VERBOSE(7, ("It is not new "));
|
|
return OMPI_SUCCESS;
|
|
}
|
|
}
|
|
|
|
ML_VERBOSE(7, ("Adding new context"));
|
|
|
|
/* Setting context id */
|
|
nc->context_id = lmngr->n_resources;
|
|
lmngr->net_context[lmngr->n_resources] = nc;
|
|
|
|
lmngr->n_resources++;
|
|
|
|
/* Register the memory with new context */
|
|
if (NULL != lmngr->base_addr) {
|
|
rc = lmngr_register(lmngr, nc);
|
|
if (OMPI_SUCCESS == rc) {
|
|
return rc;
|
|
}
|
|
}
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|