1
1

misc fixes, changes to support multiple mvapi btl's

This commit was SVN r6890.
Этот коммит содержится в:
Galen Shipman 2005-08-15 19:39:56 +00:00
родитель 0dd5b5514c
Коммит f248db3789
6 изменённых файлов: 195 добавлений и 45 удалений

Просмотреть файл

@ -97,10 +97,16 @@ int mca_bml_r2_progress( void ) {
static int mca_bml_r2_add_btls( void )
{
/* build an array of r2s and r2 modules */
opal_list_t* btls = &mca_btl_base_modules_initialized;
opal_list_t *btls = NULL;
mca_btl_base_selected_module_t* selected_btl;
size_t num_btls = opal_list_get_size(btls);
size_t num_btls = 0;
if(mca_bml_r2.btls_added == true)
return OMPI_SUCCESS;
/* build an array of r2s and r2 modules */
btls = &mca_btl_base_modules_initialized;
num_btls = opal_list_get_size(btls);
if(mca_bml_r2.btls_added == true)
return OMPI_SUCCESS;
@ -151,20 +157,33 @@ int mca_bml_r2_add_procs(
int rc;
size_t p_index;
struct mca_btl_base_endpoint_t ** btl_endpoints = NULL;
struct ompi_proc_t** new_procs = NULL;
size_t n_new_procs = 0;
if(nprocs == 0)
return OMPI_SUCCESS;
if(OMPI_SUCCESS != (rc = mca_bml_r2_add_btls()) )
return rc;
new_procs = (struct ompi_proc_t **)
malloc(nprocs * sizeof(struct ompi_proc_t *));
bml_endpoints = (struct mca_bml_base_endpoint_t **)
malloc(nprocs * sizeof(struct mca_bml_base_endpoint_t*));
memset(bml_endpoints, 0, nprocs * sizeof(struct mca_bml_base_endpoint_t*));
for(p_index = 0; p_index < nprocs; p_index++) {
struct ompi_proc_t* proc;
proc = procs[p_index];
OBJ_RETAIN(proc);
if(NULL != proc->proc_pml) {
bml_endpoints[p] = (mca_bml_base_endpoint_t*) proc->proc_pml;
} else {
new_procs[n_new_procs++] = proc;
}
}
@ -172,9 +191,6 @@ int mca_bml_r2_add_procs(
/* attempt to add all procs to each r2 */
btl_endpoints = (struct mca_btl_base_endpoint_t **)
malloc(nprocs * sizeof(struct mca_btl_base_endpoint_t*));
bml_endpoints = (struct mca_bml_base_endpoint_t **)
malloc(nprocs * sizeof(struct mca_bml_base_endpoint_t*));
memset(bml_endpoints, 0, nprocs * sizeof(struct mca_bml_base_endpoint_t*));
for(p_index = 0; p_index < mca_bml_r2.num_btl_modules; p_index++) {
mca_btl_base_module_t* btl = mca_bml_r2.btl_modules[p_index];
@ -188,19 +204,17 @@ int mca_bml_r2_add_procs(
ompi_bitmap_clear_all_bits(reachable);
memset(btl_endpoints, 0, nprocs *sizeof(struct mca_btl_base_endpoint_t*));
rc = btl->btl_add_procs(btl, nprocs, procs, btl_endpoints, reachable);
rc = btl->btl_add_procs(btl, n_new_procs, new_procs, btl_endpoints, reachable);
if(OMPI_SUCCESS != rc) {
free(btl_endpoints);
return rc;
}
/* for each proc that is reachable - add the endpoint to the bml_endpoints array(s) */
for(p=0; p<nprocs; p++) {
for(p=0; p<n_new_procs; p++) {
if(ompi_bitmap_is_set_bit(reachable, p)) {
ompi_proc_t *proc = procs[p];
/* mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_pml; */
mca_bml_base_endpoint_t * bml_endpoint = proc->proc_pml;
ompi_proc_t *proc = new_procs[p];
mca_bml_base_endpoint_t * bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_pml;
mca_bml_base_btl_t* bml_btl;
size_t size;
@ -222,7 +236,7 @@ int mca_bml_r2_add_procs(
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_send, mca_bml_r2.num_btl_modules);
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_rdma, mca_bml_r2.num_btl_modules);
bml_endpoint->btl_proc = proc;
proc->proc_pml = bml_endpoint;
proc->proc_pml = (struct mca_pml_proc_t*) bml_endpoint;
}
@ -263,7 +277,7 @@ int mca_bml_r2_add_procs(
bml_btl->btl_send = btl->btl_send;
bml_btl->btl_put = btl->btl_put;
bml_btl->btl_get = btl->btl_get;
bml_btl->btl_flags = btl->btl_flags;
}
}
if(btl_inuse > 0 && NULL != btl->btl_component->btl_progress) {
@ -285,8 +299,8 @@ int mca_bml_r2_add_procs(
free(btl_endpoints);
/* iterate back through procs and compute metrics for registered r2s */
for(p=0; p<nprocs; p++) {
ompi_proc_t *proc = procs[p];
for(p=0; p<n_new_procs; p++) {
ompi_proc_t *proc = new_procs[p];
mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_pml;
double total_bandwidth = 0;
uint32_t latency = 0;
@ -349,6 +363,8 @@ int mca_bml_r2_add_procs(
}
}
}
free(new_procs);
return OMPI_SUCCESS;
}
@ -362,8 +378,20 @@ int mca_bml_r2_del_procs(size_t nprocs,
{
size_t p;
int rc;
for(p = 0; p < nprocs; p++) {
ompi_proc_t *proc = procs[p];
struct ompi_proc_t** del_procs = (struct ompi_proc_t**)
malloc(nprocs * sizeof(struct ompi_proc_t*));
size_t n_del_procs = 0;
for(p =0; p < nprocs; p++) {
ompi_proc_t *proc = procs[p];
if(((opal_object_t*)proc)->obj_reference_count == 1) {
del_procs[n_del_procs++] = proc;
}
}
for(p = 0; p < n_del_procs; p++) {
ompi_proc_t *proc = del_procs[p];
mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_pml;
size_t f_index, f_size;
size_t n_index, n_size;
@ -404,6 +432,7 @@ int mca_bml_r2_del_procs(size_t nprocs,
}
}
OBJ_RELEASE(proc);
/* do any required cleanup */
OBJ_RELEASE(bml_endpoint);

Просмотреть файл

@ -36,6 +36,7 @@
#include "mca/btl/btl.h"
#include "mca/btl/base/base.h"
#include "btl_mvapi_endpoint.h"
#include <vapi.h>
#include <mtl_common.h>
@ -58,7 +59,7 @@ struct mca_btl_mvapi_component_t {
uint32_t ib_num_btls;
/**< number of hcas available to the IB component */
struct mca_btl_mvapi_module_t *mvapi_btls;
struct mca_btl_mvapi_module_t *mvapi_btls;
/**< array of available PTLs */
int ib_free_list_num;
@ -133,7 +134,8 @@ typedef mca_btl_base_recv_reg_t mca_btl_mvapi_recv_reg_t;
struct mca_btl_mvapi_module_t {
mca_btl_base_module_t super; /**< base PTL interface */
bool btl_inited;
mca_btl_mvapi_recv_reg_t ib_reg[256];
mca_btl_mvapi_recv_reg_t ib_reg[256];
mca_btl_mvapi_addr_t mvapi_addr; /* contains only the subnet right now */
VAPI_hca_id_t hca_id; /**< ID of HCA */
IB_port_t port_id; /**< ID of the PORT */
VAPI_hca_port_t port; /**< IB port of this PTL */
@ -174,6 +176,7 @@ struct mca_btl_mvapi_module_t {
/**< an array to allow posting of rr in one swoop */
size_t ib_inline_max; /**< max size of inline send*/
}; typedef struct mca_btl_mvapi_module_t mca_btl_mvapi_module_t;

Просмотреть файл

@ -223,6 +223,37 @@ int mca_btl_mvapi_component_close(void)
return OMPI_SUCCESS;
}
/*
* Register GM component addressing information. The MCA framework
* will make this available to all peers.
*/
static int
mca_btl_mvapi_modex_send(void)
{
int rc;
size_t i;
size_t size;
mca_btl_mvapi_addr_t *addrs;
size = mca_btl_mvapi_component.ib_num_btls * sizeof (mca_btl_mvapi_addr_t);
addrs = (mca_btl_mvapi_addr_t *)malloc (size);
if (NULL == addrs) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
for (i = 0; i < mca_btl_mvapi_component.ib_num_btls; i++) {
mca_btl_mvapi_module_t *btl = &mca_btl_mvapi_component.mvapi_btls[i];
addrs[i] = btl->mvapi_addr;
}
rc = mca_pml_base_modex_send (&mca_btl_mvapi_component.super.btl_version, addrs, size);
free (addrs);
return rc;
}
/*
* IB component initialization:
* (1) read interface list from kernel and compare against component parameters
@ -315,6 +346,8 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
mvapi_btl->nic = hca_hndl;
mvapi_btl->port_id = (IB_port_t) j;
mvapi_btl->port = hca_port;
mvapi_btl->mvapi_addr.subnet = hca_port.sm_lid;
opal_list_append(&btl_list, (opal_list_item_t*) ib_selected);
mca_btl_mvapi_component.ib_num_btls ++;
@ -342,6 +375,11 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
for(i = 0; i < mca_btl_mvapi_component.ib_num_btls; i++){
/* uint16_t tbl_len_in = 0; */
/* uint16_t tbl_len_out = 0; */
/* IB_gid_t *gid_tbl_p = NULL; */
item = opal_list_remove_first(&btl_list);
ib_selected = (mca_btl_base_selected_module_t*)item;
mvapi_btl = (mca_btl_mvapi_module_t*) ib_selected->btl_module;
@ -374,7 +412,36 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
free(hca_ids);
return NULL;
}
/* vapi_ret = VAPI_query_hca_gid_tbl(mvapi_btl->nic, */
/* mvapi_btl->port_id, */
/* tbl_len_in, */
/* &tbl_len_out, */
/* gid_tbl_p); */
/* if(OMPI_SUCCESS != vapi_ret) { */
/* BTL_ERROR(("error querying gid table to obtain subnet mask")); */
/* return NULL; */
/* } */
/* if(tbl_len_out == 0) { */
/* BTL_ERROR(("error querying gid table, table length 0!")); */
/* return NULL; */
/* } */
/* tbl_len_in = tbl_len_out; */
/* gid_tbl_p = (IB_gid_t*) malloc(tbl_len_out * sizeof(IB_gid_t*)); */
/* vapi_ret = VAPI_query_hca_gid_tbl(mvapi_btl->nic, */
/* mvapi_btl->port_id, */
/* tbl_len_in, */
/* &tbl_len_out, */
/* gid_tbl_p); */
/* if(OMPI_SUCCESS != vapi_ret) { */
/* BTL_ERROR(("error querying gid table to obtain subnet mask")); */
/* return NULL; */
/* } */
/* /\* first 64 bits of the first gid entry should be the subnet mask *\/ */
/* memcpy(&mvapi_btl->mvapi_addr.subnet, &gid_tbl_p[0], 8); */
hca_pd.hca = mvapi_btl->nic;
hca_pd.pd_tag = mvapi_btl->ptag;
@ -463,7 +530,7 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
/* Post OOB receive to support dynamic connection setup */
mca_btl_mvapi_post_recv();
mca_btl_mvapi_modex_send();
*num_btl_modules = mca_btl_mvapi_component.ib_num_btls;
free(hca_ids);
return btls;

Просмотреть файл

@ -23,12 +23,27 @@
#include "mca/btl/btl.h"
#include "btl_mvapi_frag.h"
#include "btl_mvapi.h"
#include <vapi.h>
#include <mtl_common.h>
#include <vapi_common.h>
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
#define MAX_POST_RR (16)
OBJ_CLASS_DECLARATION(mca_btl_mvapi_endpoint_t);
struct mca_btl_mvapi_frag_t;
struct mca_btl_mvapi_addr_t {
uint32_t subnet;
};
typedef struct mca_btl_mvapi_addr_t mca_btl_mvapi_addr_t;
/**
* State of IB endpoint connection.
*/
@ -113,6 +128,8 @@ struct mca_btl_base_endpoint_t {
uint32_t rr_posted_high; /**< number of high priority rr posted to the nic*/
uint32_t rr_posted_low; /**< number of low priority rr posted to the nic*/
mca_btl_mvapi_addr_t endpoint_addr;
};
typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;

Просмотреть файл

@ -98,50 +98,76 @@ static mca_btl_mvapi_proc_t* mca_btl_mvapi_proc_lookup_ompi(ompi_proc_t* ompi_pr
mca_btl_mvapi_proc_t* mca_btl_mvapi_proc_create(ompi_proc_t* ompi_proc)
{
mca_btl_mvapi_proc_t* module_proc = NULL;
mca_btl_mvapi_proc_t* mvapi_proc = NULL;
size_t size;
int rc;
/* Check if we have already created a IB proc
* structure for this ompi process */
module_proc = mca_btl_mvapi_proc_lookup_ompi(ompi_proc);
mvapi_proc = mca_btl_mvapi_proc_lookup_ompi(ompi_proc);
if(module_proc != NULL) {
if(mvapi_proc != NULL) {
/* Gotcha! */
return module_proc;
return mvapi_proc;
}
/* Oops! First time, gotta create a new IB proc
* out of the ompi_proc ... */
module_proc = OBJ_NEW(mca_btl_mvapi_proc_t);
mvapi_proc = OBJ_NEW(mca_btl_mvapi_proc_t);
/* Initialize number of peer */
module_proc->proc_endpoint_count = 0;
mvapi_proc->proc_endpoint_count = 0;
module_proc->proc_ompi = ompi_proc;
mvapi_proc->proc_ompi = ompi_proc;
/* build a unique identifier (of arbitrary
* size) to represent the proc */
module_proc->proc_guid = ompi_proc->proc_name;
mvapi_proc->proc_guid = ompi_proc->proc_name;
/* IB module doesn't have addresses exported at
* initialization, so the addr_count is set to one. */
module_proc->proc_addr_count = 1;
/* query for the peer address info */
rc = mca_pml_base_modex_recv(
&mca_btl_mvapi_component.super.btl_version,
ompi_proc,
(void*)&mvapi_proc->proc_addrs,
&size
);
if(OMPI_SUCCESS != rc) {
opal_output(0, "[%s:%d] mca_pml_base_modex_recv failed for peer [%d,%d,%d]",
__FILE__,__LINE__,ORTE_NAME_ARGS(&ompi_proc->proc_name));
OBJ_RELEASE(mvapi_proc);
return NULL;
}
if((size % sizeof(mca_btl_mvapi_addr_t)) != 0) {
opal_output(0, "[%s:%d] invalid mvapi address for peer [%d,%d,%d]",
__FILE__,__LINE__,ORTE_NAME_ARGS(&ompi_proc->proc_name));
OBJ_RELEASE(mvapi_proc);
return NULL;
}
mvapi_proc->proc_addr_count = size/sizeof(mca_btl_mvapi_addr_t);
/* XXX: Right now, there can be only 1 peer associated
* with a proc. Needs a little bit change in
* mca_btl_mvapi_proc_t to allow on demand increasing of
* number of endpoints for this proc */
module_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
malloc(module_proc->proc_addr_count * sizeof(mca_btl_base_endpoint_t*));
mvapi_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
malloc(mvapi_proc->proc_addr_count * sizeof(mca_btl_base_endpoint_t*));
if(NULL == module_proc->proc_endpoints) {
OBJ_RELEASE(module_proc);
if(NULL == mvapi_proc->proc_endpoints) {
OBJ_RELEASE(mvapi_proc);
return NULL;
}
return module_proc;
return mvapi_proc;
}
@ -150,12 +176,18 @@ mca_btl_mvapi_proc_t* mca_btl_mvapi_proc_create(ompi_proc_t* ompi_proc)
* already held. Insert a btl instance into the proc array and assign
* it an address.
*/
int mca_btl_mvapi_proc_insert(mca_btl_mvapi_proc_t* module_proc,
mca_btl_base_endpoint_t* module_endpoint)
int mca_btl_mvapi_proc_insert(mca_btl_mvapi_proc_t* mvapi_proc,
mca_btl_base_endpoint_t* mvapi_endpoint)
{
mca_btl_mvapi_module_t* mvapi_btl = mvapi_endpoint->endpoint_btl;
/* insert into endpoint array */
module_endpoint->endpoint_proc = module_proc;
module_proc->proc_endpoints[module_proc->proc_endpoint_count++] = module_endpoint;
if(mvapi_proc->proc_addr_count <= mvapi_proc->proc_endpoint_count)
return OMPI_ERR_OUT_OF_RESOURCE;
mvapi_endpoint->endpoint_proc = mvapi_proc;
mvapi_endpoint->endpoint_addr = mvapi_proc->proc_addrs[mvapi_proc->proc_endpoint_count];
mvapi_proc->proc_endpoints[mvapi_proc->proc_endpoint_count++] = mvapi_endpoint;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -44,6 +44,7 @@ struct mca_btl_mvapi_proc_t {
orte_process_name_t proc_guid;
/**< globally unique identifier for the process */
struct mca_btl_mvapi_addr_t* proc_addrs;
size_t proc_addr_count;
/**< number of addresses published by endpoint */
@ -55,6 +56,7 @@ struct mca_btl_mvapi_proc_t {
opal_mutex_t proc_lock;
/**< lock to protect against concurrent access to proc state */
};
typedef struct mca_btl_mvapi_proc_t mca_btl_mvapi_proc_t;