misc fixes, changes to support multiple mvapi btl's
This commit was SVN r6890.
Этот коммит содержится в:
родитель
0dd5b5514c
Коммит
f248db3789
@ -97,10 +97,16 @@ int mca_bml_r2_progress( void ) {
|
||||
|
||||
static int mca_bml_r2_add_btls( void )
|
||||
{
|
||||
/* build an array of r2s and r2 modules */
|
||||
opal_list_t* btls = &mca_btl_base_modules_initialized;
|
||||
opal_list_t *btls = NULL;
|
||||
mca_btl_base_selected_module_t* selected_btl;
|
||||
size_t num_btls = opal_list_get_size(btls);
|
||||
size_t num_btls = 0;
|
||||
|
||||
if(mca_bml_r2.btls_added == true)
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
/* build an array of r2s and r2 modules */
|
||||
btls = &mca_btl_base_modules_initialized;
|
||||
num_btls = opal_list_get_size(btls);
|
||||
|
||||
if(mca_bml_r2.btls_added == true)
|
||||
return OMPI_SUCCESS;
|
||||
@ -151,20 +157,33 @@ int mca_bml_r2_add_procs(
|
||||
int rc;
|
||||
size_t p_index;
|
||||
struct mca_btl_base_endpoint_t ** btl_endpoints = NULL;
|
||||
|
||||
struct ompi_proc_t** new_procs = NULL;
|
||||
size_t n_new_procs = 0;
|
||||
|
||||
if(nprocs == 0)
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
|
||||
if(OMPI_SUCCESS != (rc = mca_bml_r2_add_btls()) )
|
||||
return rc;
|
||||
|
||||
new_procs = (struct ompi_proc_t **)
|
||||
malloc(nprocs * sizeof(struct ompi_proc_t *));
|
||||
|
||||
bml_endpoints = (struct mca_bml_base_endpoint_t **)
|
||||
malloc(nprocs * sizeof(struct mca_bml_base_endpoint_t*));
|
||||
memset(bml_endpoints, 0, nprocs * sizeof(struct mca_bml_base_endpoint_t*));
|
||||
|
||||
|
||||
for(p_index = 0; p_index < nprocs; p_index++) {
|
||||
struct ompi_proc_t* proc;
|
||||
proc = procs[p_index];
|
||||
OBJ_RETAIN(proc);
|
||||
|
||||
if(NULL != proc->proc_pml) {
|
||||
bml_endpoints[p] = (mca_bml_base_endpoint_t*) proc->proc_pml;
|
||||
|
||||
} else {
|
||||
new_procs[n_new_procs++] = proc;
|
||||
}
|
||||
}
|
||||
|
||||
@ -172,9 +191,6 @@ int mca_bml_r2_add_procs(
|
||||
/* attempt to add all procs to each r2 */
|
||||
btl_endpoints = (struct mca_btl_base_endpoint_t **)
|
||||
malloc(nprocs * sizeof(struct mca_btl_base_endpoint_t*));
|
||||
bml_endpoints = (struct mca_bml_base_endpoint_t **)
|
||||
malloc(nprocs * sizeof(struct mca_bml_base_endpoint_t*));
|
||||
memset(bml_endpoints, 0, nprocs * sizeof(struct mca_bml_base_endpoint_t*));
|
||||
|
||||
for(p_index = 0; p_index < mca_bml_r2.num_btl_modules; p_index++) {
|
||||
mca_btl_base_module_t* btl = mca_bml_r2.btl_modules[p_index];
|
||||
@ -188,19 +204,17 @@ int mca_bml_r2_add_procs(
|
||||
ompi_bitmap_clear_all_bits(reachable);
|
||||
memset(btl_endpoints, 0, nprocs *sizeof(struct mca_btl_base_endpoint_t*));
|
||||
|
||||
rc = btl->btl_add_procs(btl, nprocs, procs, btl_endpoints, reachable);
|
||||
rc = btl->btl_add_procs(btl, n_new_procs, new_procs, btl_endpoints, reachable);
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
free(btl_endpoints);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* for each proc that is reachable - add the endpoint to the bml_endpoints array(s) */
|
||||
for(p=0; p<nprocs; p++) {
|
||||
for(p=0; p<n_new_procs; p++) {
|
||||
if(ompi_bitmap_is_set_bit(reachable, p)) {
|
||||
ompi_proc_t *proc = procs[p];
|
||||
|
||||
/* mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_pml; */
|
||||
mca_bml_base_endpoint_t * bml_endpoint = proc->proc_pml;
|
||||
ompi_proc_t *proc = new_procs[p];
|
||||
mca_bml_base_endpoint_t * bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_pml;
|
||||
mca_bml_base_btl_t* bml_btl;
|
||||
size_t size;
|
||||
|
||||
@ -222,7 +236,7 @@ int mca_bml_r2_add_procs(
|
||||
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_send, mca_bml_r2.num_btl_modules);
|
||||
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_rdma, mca_bml_r2.num_btl_modules);
|
||||
bml_endpoint->btl_proc = proc;
|
||||
proc->proc_pml = bml_endpoint;
|
||||
proc->proc_pml = (struct mca_pml_proc_t*) bml_endpoint;
|
||||
|
||||
}
|
||||
|
||||
@ -263,7 +277,7 @@ int mca_bml_r2_add_procs(
|
||||
bml_btl->btl_send = btl->btl_send;
|
||||
bml_btl->btl_put = btl->btl_put;
|
||||
bml_btl->btl_get = btl->btl_get;
|
||||
|
||||
bml_btl->btl_flags = btl->btl_flags;
|
||||
}
|
||||
}
|
||||
if(btl_inuse > 0 && NULL != btl->btl_component->btl_progress) {
|
||||
@ -285,8 +299,8 @@ int mca_bml_r2_add_procs(
|
||||
free(btl_endpoints);
|
||||
|
||||
/* iterate back through procs and compute metrics for registered r2s */
|
||||
for(p=0; p<nprocs; p++) {
|
||||
ompi_proc_t *proc = procs[p];
|
||||
for(p=0; p<n_new_procs; p++) {
|
||||
ompi_proc_t *proc = new_procs[p];
|
||||
mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_pml;
|
||||
double total_bandwidth = 0;
|
||||
uint32_t latency = 0;
|
||||
@ -349,6 +363,8 @@ int mca_bml_r2_add_procs(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(new_procs);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -362,8 +378,20 @@ int mca_bml_r2_del_procs(size_t nprocs,
|
||||
{
|
||||
size_t p;
|
||||
int rc;
|
||||
for(p = 0; p < nprocs; p++) {
|
||||
ompi_proc_t *proc = procs[p];
|
||||
struct ompi_proc_t** del_procs = (struct ompi_proc_t**)
|
||||
malloc(nprocs * sizeof(struct ompi_proc_t*));
|
||||
size_t n_del_procs = 0;
|
||||
|
||||
for(p =0; p < nprocs; p++) {
|
||||
ompi_proc_t *proc = procs[p];
|
||||
if(((opal_object_t*)proc)->obj_reference_count == 1) {
|
||||
del_procs[n_del_procs++] = proc;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for(p = 0; p < n_del_procs; p++) {
|
||||
ompi_proc_t *proc = del_procs[p];
|
||||
mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_pml;
|
||||
size_t f_index, f_size;
|
||||
size_t n_index, n_size;
|
||||
@ -404,6 +432,7 @@ int mca_bml_r2_del_procs(size_t nprocs,
|
||||
}
|
||||
}
|
||||
|
||||
OBJ_RELEASE(proc);
|
||||
/* do any required cleanup */
|
||||
OBJ_RELEASE(bml_endpoint);
|
||||
|
||||
|
@ -36,6 +36,7 @@
|
||||
|
||||
#include "mca/btl/btl.h"
|
||||
#include "mca/btl/base/base.h"
|
||||
#include "btl_mvapi_endpoint.h"
|
||||
|
||||
#include <vapi.h>
|
||||
#include <mtl_common.h>
|
||||
@ -58,7 +59,7 @@ struct mca_btl_mvapi_component_t {
|
||||
uint32_t ib_num_btls;
|
||||
/**< number of hcas available to the IB component */
|
||||
|
||||
struct mca_btl_mvapi_module_t *mvapi_btls;
|
||||
struct mca_btl_mvapi_module_t *mvapi_btls;
|
||||
/**< array of available PTLs */
|
||||
|
||||
int ib_free_list_num;
|
||||
@ -133,7 +134,8 @@ typedef mca_btl_base_recv_reg_t mca_btl_mvapi_recv_reg_t;
|
||||
struct mca_btl_mvapi_module_t {
|
||||
mca_btl_base_module_t super; /**< base PTL interface */
|
||||
bool btl_inited;
|
||||
mca_btl_mvapi_recv_reg_t ib_reg[256];
|
||||
mca_btl_mvapi_recv_reg_t ib_reg[256];
|
||||
mca_btl_mvapi_addr_t mvapi_addr; /* contains only the subnet right now */
|
||||
VAPI_hca_id_t hca_id; /**< ID of HCA */
|
||||
IB_port_t port_id; /**< ID of the PORT */
|
||||
VAPI_hca_port_t port; /**< IB port of this PTL */
|
||||
@ -174,6 +176,7 @@ struct mca_btl_mvapi_module_t {
|
||||
/**< an array to allow posting of rr in one swoop */
|
||||
size_t ib_inline_max; /**< max size of inline send*/
|
||||
|
||||
|
||||
}; typedef struct mca_btl_mvapi_module_t mca_btl_mvapi_module_t;
|
||||
|
||||
|
||||
|
@ -223,6 +223,37 @@ int mca_btl_mvapi_component_close(void)
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Register GM component addressing information. The MCA framework
|
||||
* will make this available to all peers.
|
||||
*/
|
||||
|
||||
static int
|
||||
mca_btl_mvapi_modex_send(void)
|
||||
{
|
||||
int rc;
|
||||
size_t i;
|
||||
size_t size;
|
||||
mca_btl_mvapi_addr_t *addrs;
|
||||
|
||||
size = mca_btl_mvapi_component.ib_num_btls * sizeof (mca_btl_mvapi_addr_t);
|
||||
addrs = (mca_btl_mvapi_addr_t *)malloc (size);
|
||||
if (NULL == addrs) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
for (i = 0; i < mca_btl_mvapi_component.ib_num_btls; i++) {
|
||||
mca_btl_mvapi_module_t *btl = &mca_btl_mvapi_component.mvapi_btls[i];
|
||||
addrs[i] = btl->mvapi_addr;
|
||||
}
|
||||
rc = mca_pml_base_modex_send (&mca_btl_mvapi_component.super.btl_version, addrs, size);
|
||||
free (addrs);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* IB component initialization:
|
||||
* (1) read interface list from kernel and compare against component parameters
|
||||
@ -315,6 +346,8 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
|
||||
mvapi_btl->nic = hca_hndl;
|
||||
mvapi_btl->port_id = (IB_port_t) j;
|
||||
mvapi_btl->port = hca_port;
|
||||
mvapi_btl->mvapi_addr.subnet = hca_port.sm_lid;
|
||||
|
||||
opal_list_append(&btl_list, (opal_list_item_t*) ib_selected);
|
||||
mca_btl_mvapi_component.ib_num_btls ++;
|
||||
|
||||
@ -342,6 +375,11 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
|
||||
|
||||
|
||||
for(i = 0; i < mca_btl_mvapi_component.ib_num_btls; i++){
|
||||
|
||||
/* uint16_t tbl_len_in = 0; */
|
||||
/* uint16_t tbl_len_out = 0; */
|
||||
/* IB_gid_t *gid_tbl_p = NULL; */
|
||||
|
||||
item = opal_list_remove_first(&btl_list);
|
||||
ib_selected = (mca_btl_base_selected_module_t*)item;
|
||||
mvapi_btl = (mca_btl_mvapi_module_t*) ib_selected->btl_module;
|
||||
@ -374,7 +412,36 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
|
||||
free(hca_ids);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* vapi_ret = VAPI_query_hca_gid_tbl(mvapi_btl->nic, */
|
||||
/* mvapi_btl->port_id, */
|
||||
/* tbl_len_in, */
|
||||
/* &tbl_len_out, */
|
||||
/* gid_tbl_p); */
|
||||
/* if(OMPI_SUCCESS != vapi_ret) { */
|
||||
/* BTL_ERROR(("error querying gid table to obtain subnet mask")); */
|
||||
/* return NULL; */
|
||||
/* } */
|
||||
/* if(tbl_len_out == 0) { */
|
||||
/* BTL_ERROR(("error querying gid table, table length 0!")); */
|
||||
/* return NULL; */
|
||||
/* } */
|
||||
/* tbl_len_in = tbl_len_out; */
|
||||
/* gid_tbl_p = (IB_gid_t*) malloc(tbl_len_out * sizeof(IB_gid_t*)); */
|
||||
/* vapi_ret = VAPI_query_hca_gid_tbl(mvapi_btl->nic, */
|
||||
/* mvapi_btl->port_id, */
|
||||
/* tbl_len_in, */
|
||||
/* &tbl_len_out, */
|
||||
/* gid_tbl_p); */
|
||||
/* if(OMPI_SUCCESS != vapi_ret) { */
|
||||
/* BTL_ERROR(("error querying gid table to obtain subnet mask")); */
|
||||
/* return NULL; */
|
||||
/* } */
|
||||
/* /\* first 64 bits of the first gid entry should be the subnet mask *\/ */
|
||||
/* memcpy(&mvapi_btl->mvapi_addr.subnet, &gid_tbl_p[0], 8); */
|
||||
|
||||
|
||||
hca_pd.hca = mvapi_btl->nic;
|
||||
hca_pd.pd_tag = mvapi_btl->ptag;
|
||||
|
||||
@ -463,7 +530,7 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
|
||||
|
||||
/* Post OOB receive to support dynamic connection setup */
|
||||
mca_btl_mvapi_post_recv();
|
||||
|
||||
mca_btl_mvapi_modex_send();
|
||||
*num_btl_modules = mca_btl_mvapi_component.ib_num_btls;
|
||||
free(hca_ids);
|
||||
return btls;
|
||||
|
@ -23,12 +23,27 @@
|
||||
#include "mca/btl/btl.h"
|
||||
#include "btl_mvapi_frag.h"
|
||||
#include "btl_mvapi.h"
|
||||
|
||||
#include <vapi.h>
|
||||
#include <mtl_common.h>
|
||||
#include <vapi_common.h>
|
||||
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
#define MAX_POST_RR (16)
|
||||
OBJ_CLASS_DECLARATION(mca_btl_mvapi_endpoint_t);
|
||||
|
||||
|
||||
struct mca_btl_mvapi_frag_t;
|
||||
|
||||
struct mca_btl_mvapi_addr_t {
|
||||
uint32_t subnet;
|
||||
};
|
||||
typedef struct mca_btl_mvapi_addr_t mca_btl_mvapi_addr_t;
|
||||
|
||||
|
||||
/**
|
||||
* State of IB endpoint connection.
|
||||
*/
|
||||
@ -113,6 +128,8 @@ struct mca_btl_base_endpoint_t {
|
||||
uint32_t rr_posted_high; /**< number of high priority rr posted to the nic*/
|
||||
uint32_t rr_posted_low; /**< number of low priority rr posted to the nic*/
|
||||
|
||||
mca_btl_mvapi_addr_t endpoint_addr;
|
||||
|
||||
};
|
||||
|
||||
typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
|
||||
|
@ -98,50 +98,76 @@ static mca_btl_mvapi_proc_t* mca_btl_mvapi_proc_lookup_ompi(ompi_proc_t* ompi_pr
|
||||
|
||||
mca_btl_mvapi_proc_t* mca_btl_mvapi_proc_create(ompi_proc_t* ompi_proc)
|
||||
{
|
||||
mca_btl_mvapi_proc_t* module_proc = NULL;
|
||||
mca_btl_mvapi_proc_t* mvapi_proc = NULL;
|
||||
size_t size;
|
||||
int rc;
|
||||
|
||||
|
||||
/* Check if we have already created a IB proc
|
||||
* structure for this ompi process */
|
||||
module_proc = mca_btl_mvapi_proc_lookup_ompi(ompi_proc);
|
||||
mvapi_proc = mca_btl_mvapi_proc_lookup_ompi(ompi_proc);
|
||||
|
||||
if(module_proc != NULL) {
|
||||
if(mvapi_proc != NULL) {
|
||||
|
||||
/* Gotcha! */
|
||||
return module_proc;
|
||||
return mvapi_proc;
|
||||
}
|
||||
|
||||
/* Oops! First time, gotta create a new IB proc
|
||||
* out of the ompi_proc ... */
|
||||
|
||||
module_proc = OBJ_NEW(mca_btl_mvapi_proc_t);
|
||||
mvapi_proc = OBJ_NEW(mca_btl_mvapi_proc_t);
|
||||
|
||||
/* Initialize number of peer */
|
||||
module_proc->proc_endpoint_count = 0;
|
||||
mvapi_proc->proc_endpoint_count = 0;
|
||||
|
||||
module_proc->proc_ompi = ompi_proc;
|
||||
mvapi_proc->proc_ompi = ompi_proc;
|
||||
|
||||
/* build a unique identifier (of arbitrary
|
||||
* size) to represent the proc */
|
||||
module_proc->proc_guid = ompi_proc->proc_name;
|
||||
mvapi_proc->proc_guid = ompi_proc->proc_name;
|
||||
|
||||
/* IB module doesn't have addresses exported at
|
||||
* initialization, so the addr_count is set to one. */
|
||||
module_proc->proc_addr_count = 1;
|
||||
/* query for the peer address info */
|
||||
rc = mca_pml_base_modex_recv(
|
||||
&mca_btl_mvapi_component.super.btl_version,
|
||||
ompi_proc,
|
||||
(void*)&mvapi_proc->proc_addrs,
|
||||
&size
|
||||
);
|
||||
|
||||
|
||||
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
opal_output(0, "[%s:%d] mca_pml_base_modex_recv failed for peer [%d,%d,%d]",
|
||||
__FILE__,__LINE__,ORTE_NAME_ARGS(&ompi_proc->proc_name));
|
||||
OBJ_RELEASE(mvapi_proc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if((size % sizeof(mca_btl_mvapi_addr_t)) != 0) {
|
||||
opal_output(0, "[%s:%d] invalid mvapi address for peer [%d,%d,%d]",
|
||||
__FILE__,__LINE__,ORTE_NAME_ARGS(&ompi_proc->proc_name));
|
||||
OBJ_RELEASE(mvapi_proc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
mvapi_proc->proc_addr_count = size/sizeof(mca_btl_mvapi_addr_t);
|
||||
|
||||
|
||||
/* XXX: Right now, there can be only 1 peer associated
|
||||
* with a proc. Needs a little bit change in
|
||||
* mca_btl_mvapi_proc_t to allow on demand increasing of
|
||||
* number of endpoints for this proc */
|
||||
|
||||
module_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
|
||||
malloc(module_proc->proc_addr_count * sizeof(mca_btl_base_endpoint_t*));
|
||||
mvapi_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
|
||||
malloc(mvapi_proc->proc_addr_count * sizeof(mca_btl_base_endpoint_t*));
|
||||
|
||||
if(NULL == module_proc->proc_endpoints) {
|
||||
OBJ_RELEASE(module_proc);
|
||||
if(NULL == mvapi_proc->proc_endpoints) {
|
||||
OBJ_RELEASE(mvapi_proc);
|
||||
return NULL;
|
||||
}
|
||||
return module_proc;
|
||||
return mvapi_proc;
|
||||
}
|
||||
|
||||
|
||||
@ -150,12 +176,18 @@ mca_btl_mvapi_proc_t* mca_btl_mvapi_proc_create(ompi_proc_t* ompi_proc)
|
||||
* already held. Insert a btl instance into the proc array and assign
|
||||
* it an address.
|
||||
*/
|
||||
int mca_btl_mvapi_proc_insert(mca_btl_mvapi_proc_t* module_proc,
|
||||
mca_btl_base_endpoint_t* module_endpoint)
|
||||
int mca_btl_mvapi_proc_insert(mca_btl_mvapi_proc_t* mvapi_proc,
|
||||
mca_btl_base_endpoint_t* mvapi_endpoint)
|
||||
{
|
||||
mca_btl_mvapi_module_t* mvapi_btl = mvapi_endpoint->endpoint_btl;
|
||||
|
||||
/* insert into endpoint array */
|
||||
module_endpoint->endpoint_proc = module_proc;
|
||||
module_proc->proc_endpoints[module_proc->proc_endpoint_count++] = module_endpoint;
|
||||
|
||||
if(mvapi_proc->proc_addr_count <= mvapi_proc->proc_endpoint_count)
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
|
||||
mvapi_endpoint->endpoint_proc = mvapi_proc;
|
||||
mvapi_endpoint->endpoint_addr = mvapi_proc->proc_addrs[mvapi_proc->proc_endpoint_count];
|
||||
mvapi_proc->proc_endpoints[mvapi_proc->proc_endpoint_count++] = mvapi_endpoint;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -44,6 +44,7 @@ struct mca_btl_mvapi_proc_t {
|
||||
orte_process_name_t proc_guid;
|
||||
/**< globally unique identifier for the process */
|
||||
|
||||
struct mca_btl_mvapi_addr_t* proc_addrs;
|
||||
size_t proc_addr_count;
|
||||
/**< number of addresses published by endpoint */
|
||||
|
||||
@ -55,6 +56,7 @@ struct mca_btl_mvapi_proc_t {
|
||||
|
||||
opal_mutex_t proc_lock;
|
||||
/**< lock to protect against concurrent access to proc state */
|
||||
|
||||
};
|
||||
typedef struct mca_btl_mvapi_proc_t mca_btl_mvapi_proc_t;
|
||||
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user