1
1
we are looking at subnet_id's and we are counting active ports per subnet. 
move subnet count out of procs loop,, no need to do it there... 

This commit was SVN r13105.
Этот коммит содержится в:
Galen Shipman 2007-01-12 22:42:20 +00:00
родитель 075161afa9
Коммит df099a4731
5 изменённых файлов: 43 добавлений и 42 удалений

Просмотреть файл

@ -85,9 +85,19 @@ int mca_btl_openib_add_procs(
{
mca_btl_openib_module_t* openib_btl = (mca_btl_openib_module_t*)btl;
int i,j, rc;
int remote_subnets;
int local_subnets;
int rem_subnet_id_port_cnt;
int lcl_subnet_id_port_cnt = 0;
int btl_rank = 0;
for(j=0; j < mca_btl_openib_component.ib_num_btls; j++){
if(mca_btl_openib_component.openib_btls[j].port_info.subnet_id
== openib_btl->port_info.subnet_id) {
lcl_subnet_id_port_cnt++;
}
if(openib_btl == &(mca_btl_openib_component.openib_btls[j])) {
btl_rank = j;
}
}
for(i = 0; i < (int) nprocs; i++) {
struct ompi_proc_t* ompi_proc = ompi_procs[i];
@ -99,42 +109,33 @@ int mca_btl_openib_add_procs(
return OMPI_ERR_OUT_OF_RESOURCE;
}
remote_subnets = 0;
rem_subnet_id_port_cnt = 0;
/* check if the remote proc has a reachable subnet first */
BTL_VERBOSE(("got %d port_infos \n", ib_proc->proc_port_count));
for(j = 0; j < (int) ib_proc->proc_port_count; j++){
BTL_VERBOSE(("got a subnet %016x\n",
ib_proc->proc_ports[j].subnet));
if(ib_proc->proc_ports[j].subnet ==
openib_btl->port_info.subnet) {
ib_proc->proc_ports[j].subnet_id));
if(ib_proc->proc_ports[j].subnet_id ==
openib_btl->port_info.subnet_id) {
BTL_VERBOSE(("Got a matching subnet!\n"));
remote_subnets++;
rem_subnet_id_port_cnt ++;
}
}
if(!remote_subnets) {
if(!rem_subnet_id_port_cnt ) {
/* no use trying to communicate with this endpointlater */
BTL_VERBOSE(("No matching subnet was found, moving on.. \n"));
BTL_VERBOSE(("No matching subnet id was found, moving on.. \n"));
continue;
}
local_subnets = 0;
for(j=0; j < mca_btl_openib_component.ib_num_btls; j++){
if(mca_btl_openib_component.openib_btls[j].port_info.subnet
== openib_btl->port_info.subnet) {
local_subnets++;
}
if(openib_btl == &(mca_btl_openib_component.openib_btls[j])) {
btl_rank = j;
}
}
#if 0
num_endpoints = remote_subnets / local_subnets +
(btl_rank < (remote_subnets / local_subnets)) ? 1:0;
num_endpoints = rem_subnet_id_port_cnt / lcl_subnet_id_port_cnt +
(btl_rank < (rem_subnet_id_port_cnt / lcl_subnet_id_port_cnt)) ? 1:0;
#endif
if(remote_subnets < local_subnets &&
btl_rank >= remote_subnets) {
BTL_VERBOSE(("Not enough remote subnets, moving on.. \n"));
if(rem_subnet_id_port_cnt < lcl_subnet_id_port_cnt &&
btl_rank >= rem_subnet_id_port_cnt ) {
BTL_VERBOSE(("Not enough remote ports on this subnet id, moving on.. \n"));
continue;
}
@ -151,7 +152,7 @@ int mca_btl_openib_add_procs(
}
endpoint->endpoint_btl = openib_btl;
endpoint->subnet = openib_btl->port_info.subnet;
endpoint->subnet_id = openib_btl->port_info.subnet_id;
rc = mca_btl_openib_proc_insert(ib_proc, endpoint);
if(rc != OMPI_SUCCESS) {
OBJ_RELEASE(endpoint);

Просмотреть файл

@ -148,7 +148,7 @@ struct mca_btl_openib_port_info_t {
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t padding[4];
#endif
uint64_t subnet;
uint64_t subnet_id;
};
typedef struct mca_btl_openib_port_info_t mca_btl_openib_port_info_t;
@ -178,7 +178,7 @@ struct mca_btl_openib_module_t {
mca_btl_base_module_t super; /**< base PTL interface */
bool btl_inited;
mca_btl_openib_recv_reg_t ib_reg[256];
mca_btl_openib_port_info_t port_info; /* contains only the subnet right now */
mca_btl_openib_port_info_t port_info; /* contains only the subnet id right now */
mca_btl_openib_hca_t *hca;
uint8_t port_num; /**< ID of the PORT */
struct ibv_cq *ib_cq[2];

Просмотреть файл

@ -277,14 +277,14 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_hca_t *hca,
mca_btl_openib_module_t *openib_btl;
mca_btl_base_selected_module_t *ib_selected;
union ibv_gid gid;
uint64_t subnet;
uint64_t subnet_id;
ibv_query_gid(hca->ib_dev_context, port_num, 0, &gid);
subnet = ntoh64(gid.global.subnet_prefix);
BTL_VERBOSE(("my subnet is %016x\n", subnet));
subnet_id = ntoh64(gid.global.subnet_prefix);
BTL_VERBOSE(("my subnet_id is %016x\n", subnet_id));
if(mca_btl_openib_component.ib_num_btls > 0 &&
IB_DEFAULT_GID_PREFIX == subnet &&
IB_DEFAULT_GID_PREFIX == subnet_id &&
mca_btl_openib_component.warn_default_gid_prefix) {
opal_show_help("help-mpi-btl-openib.txt", "default subnet prefix",
true, orte_system_info.nodename);
@ -315,8 +315,8 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_hca_t *hca,
openib_btl->port_num = (uint8_t) port_num;
openib_btl->lid = lid;
openib_btl->src_path_bits = lid - ib_port_attr->lid;
/* store the subnet for multi-nic support */
openib_btl->port_info.subnet = subnet;
/* store the subnet_id for multi-nic support */
openib_btl->port_info.subnet_id = subnet_id;
openib_btl->port_info.mtu = hca->mtu;
openib_btl->ib_reg[MCA_BTL_TAG_BTL].cbfunc = btl_openib_control;
openib_btl->ib_reg[MCA_BTL_TAG_BTL].cbdata = NULL;

Просмотреть файл

@ -278,7 +278,7 @@ static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
endpoint->rem_info.rem_lid = 0;
endpoint->rem_info.rem_psn_hp = 0;
endpoint->rem_info.rem_psn_lp = 0;
endpoint->rem_info.rem_subnet = 0;
endpoint->rem_info.rem_subnet_id = 0;
endpoint->rem_info.rem_mtu = 0;
}
@ -349,7 +349,7 @@ static int mca_btl_openib_endpoint_send_connect_data(mca_btl_base_endpoint_t* en
return rc;
}
rc = orte_dss.pack(buffer, &endpoint->subnet, 1, ORTE_UINT64);
rc = orte_dss.pack(buffer, &endpoint->subnet_id, 1, ORTE_UINT64);
if(rc != ORTE_SUCCESS) {
ORTE_ERROR_LOG(rc);
return rc;
@ -376,7 +376,7 @@ static int mca_btl_openib_endpoint_send_connect_data(mca_btl_base_endpoint_t* en
endpoint->lcl_qp[BTL_OPENIB_HP_QP]->qp_num,
endpoint->lcl_qp[BTL_OPENIB_LP_QP]->qp_num,
endpoint->endpoint_btl->lid,
endpoint->subnet));
endpoint->subnet_id));
@ -617,7 +617,7 @@ static void mca_btl_openib_endpoint_recv(
ORTE_ERROR_LOG(rc);
return;
}
rc = orte_dss.unpack(buffer, &rem_info.rem_subnet, &cnt, ORTE_UINT64);
rc = orte_dss.unpack(buffer, &rem_info.rem_subnet_id, &cnt, ORTE_UINT64);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return;
@ -637,7 +637,7 @@ static void mca_btl_openib_endpoint_recv(
rem_info.rem_qp_num_hp,
rem_info.rem_qp_num_lp,
rem_info.rem_lid,
rem_info.rem_subnet));
rem_info.rem_subnet_id));
for(ib_proc = (mca_btl_openib_proc_t*)
opal_list_get_first(&mca_btl_openib_component.ib_procs);
@ -669,7 +669,7 @@ static void mca_btl_openib_endpoint_recv(
port_info = ib_proc->proc_ports[i];
ib_endpoint = ib_proc->proc_endpoints[i];
if(!ib_endpoint->rem_info.rem_lid &&
ib_endpoint->subnet == rem_info.rem_subnet) {
ib_endpoint->subnet_id == rem_info.rem_subnet_id) {
/* found a match based on subnet! */
found = true;
break;

Просмотреть файл

@ -78,8 +78,8 @@ struct mca_btl_openib_rem_info_t {
uint32_t rem_psn_lp;
/* Remote processes port sequence number (Low and High) */
uint64_t rem_subnet;
/* subnet of remote process */
uint64_t rem_subnet_id;
/* subnet id of remote process */
/* MTU of remote process */
uint32_t rem_mtu;
@ -146,7 +146,7 @@ struct mca_btl_base_endpoint_t {
int32_t sd_credits[2]; /**< number of send wqe entries being used to return credits */
int32_t sd_wqe[2]; /**< number of available send wqe entries */
uint64_t subnet; /**< subnet of this endpoint*/
uint64_t subnet_id; /**< subnet id of this endpoint*/
int32_t eager_recv_count; /**< number of eager received */
mca_btl_openib_eager_rdma_remote_t eager_rdma_remote;