* Fix problem caused by r15848: the test parser was looking for
semicolons but the new specitifcation string used colons. The text parser now looks for colons. * Changed all opal_output() error messages to much-more-helpful/descriptive opal_show_help() messages. * A few minor style/indenting fixes This commit was SVN r15850. The following SVN revision numbers were found above: r15848 --> open-mpi/ompi@dd30597f39
Этот коммит содержится в:
родитель
dd30597f39
Коммит
d7c5fea096
@ -472,7 +472,8 @@ static int32_t atoi_param(char *param, int32_t dflt)
|
|||||||
return atoi(param);
|
return atoi(param);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mca_btl_openib_mca_setup_qps(void) {
|
static int mca_btl_openib_mca_setup_qps(void)
|
||||||
|
{
|
||||||
/* All the multi-qp stuff.. */
|
/* All the multi-qp stuff.. */
|
||||||
char *str;
|
char *str;
|
||||||
char **queues, **params = NULL;
|
char **queues, **params = NULL;
|
||||||
@ -481,25 +482,26 @@ static int mca_btl_openib_mca_setup_qps(void) {
|
|||||||
uint32_t max_qp_size, max_size_needed;
|
uint32_t max_qp_size, max_size_needed;
|
||||||
|
|
||||||
reg_string("receive_queues",
|
reg_string("receive_queues",
|
||||||
"Colon-delimited, coma delimited list of receive queues: P,4096,8,6,4;P,32768,8,6,4",
|
"Colon-delimited, coma delimited list of receive queues: P,4096,8,6,4:P,32768,8,6,4",
|
||||||
default_qps, &str, 0);
|
default_qps, &str, 0);
|
||||||
queues = opal_argv_split(str, ';');
|
queues = opal_argv_split(str, ':');
|
||||||
|
|
||||||
if(opal_argv_count(queues) == 0) {
|
if (0 == opal_argv_count(queues)) {
|
||||||
opal_output(0, "At least one QP has to be specified in"
|
opal_show_help("help-mpi-btl-openib.txt",
|
||||||
" btl_openib_receive_queues\n");
|
"no qps in receive_queues", true,
|
||||||
|
orte_system_info.nodename, str);
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
while(queues[qp] != NULL) {
|
while (queues[qp] != NULL) {
|
||||||
if(strncmp("P,", queues[qp], 2) == 0) {
|
if (0 == strncmp("P,", queues[qp], 2)) {
|
||||||
num_pp_qps++;
|
num_pp_qps++;
|
||||||
} else if(strncmp("S,", queues[qp], 2) == 0) {
|
} else if (0 == strncmp("S,", queues[qp], 2)) {
|
||||||
num_srq_qps++;
|
num_srq_qps++;
|
||||||
} else {
|
} else {
|
||||||
opal_output(0, "Unknown QP type \"%s\" is specified in "
|
opal_show_help("help-mpi-btl-openib.txt",
|
||||||
"btl_openib_receive_queues. Only 'S' - shared or "
|
"invalid qp type in receive_queues", true,
|
||||||
"'P' - point-to-point are supported\n", queues[qp]);
|
orte_system_info.nodename, str, queues[qp]);
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
qp++;
|
qp++;
|
||||||
@ -520,10 +522,11 @@ static int mca_btl_openib_mca_setup_qps(void) {
|
|||||||
params = opal_argv_split_with_empty(queues[qp], ',');
|
params = opal_argv_split_with_empty(queues[qp], ',');
|
||||||
count = opal_argv_count(params);
|
count = opal_argv_count(params);
|
||||||
|
|
||||||
if(params[0][0] == 'P') {
|
if ('P' == params[0][0]) {
|
||||||
if(count < 2 || count > 6) {
|
if (count < 3 || count > 6) {
|
||||||
opal_output(0, "Wrong QP specification (QP %d \"%s\"). "
|
opal_show_help("help-mpi-btl-openib.txt",
|
||||||
"Point-to-point QP get 1-5 parameters\n", qp, queues[qp]);
|
"invalid pp qp specification", true,
|
||||||
|
orte_system_info.nodename, queues[qp]);
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
mca_btl_openib_component.qp_infos[qp].size = atoi_param(P(1), 0);
|
mca_btl_openib_component.qp_infos[qp].size = atoi_param(P(1), 0);
|
||||||
@ -547,9 +550,10 @@ static int mca_btl_openib_mca_setup_qps(void) {
|
|||||||
|
|
||||||
mca_btl_openib_component.qp_infos[qp].type = MCA_BTL_OPENIB_PP_QP;
|
mca_btl_openib_component.qp_infos[qp].type = MCA_BTL_OPENIB_PP_QP;
|
||||||
} else if(params[0][0] =='S') {
|
} else if(params[0][0] =='S') {
|
||||||
if(count < 2 || count > 5) {
|
if(count < 3 || count > 5) {
|
||||||
opal_output(0, "Wrong QP specification (QP %d \"%s\"). "
|
opal_show_help("help-mpi-btl-openib.txt",
|
||||||
"Shared QP get 1-4 parameters\n", qp, queues[qp]);
|
"invalid srq specification", true,
|
||||||
|
orte_system_info.nodename, queues[qp]);
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
mca_btl_openib_component.qp_infos[qp].size = atoi_param(P(1), 0);
|
mca_btl_openib_component.qp_infos[qp].size = atoi_param(P(1), 0);
|
||||||
@ -567,15 +571,16 @@ static int mca_btl_openib_mca_setup_qps(void) {
|
|||||||
mca_btl_openib_component.qp_infos[qp].type = MCA_BTL_OPENIB_SRQ_QP;
|
mca_btl_openib_component.qp_infos[qp].type = MCA_BTL_OPENIB_SRQ_QP;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(mca_btl_openib_component.qp_infos[qp].rd_num <=
|
if (mca_btl_openib_component.qp_infos[qp].rd_num <=
|
||||||
mca_btl_openib_component.qp_infos[qp].rd_low) {
|
mca_btl_openib_component.qp_infos[qp].rd_low) {
|
||||||
opal_output(0, "Wrong QP specification (QP %d \"%s\"). "
|
opal_show_help("help-mpi-btl-openib.txt",
|
||||||
"rd_num should be bigger than rd_low\n", qp,
|
"rd_num must be > rd_low", true,
|
||||||
queues[qp]);
|
orte_system_info.nodename, queues[qp]);
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
while(params[i] != NULL)
|
while (NULL != params[i]) {
|
||||||
free(params[i++]);
|
free(params[i++]);
|
||||||
|
}
|
||||||
free(params);
|
free(params);
|
||||||
qp++;
|
qp++;
|
||||||
}
|
}
|
||||||
@ -584,17 +589,22 @@ static int mca_btl_openib_mca_setup_qps(void) {
|
|||||||
max_qp_size = mca_btl_openib_component.qp_infos[mca_btl_openib_component.num_qps - 1].size;
|
max_qp_size = mca_btl_openib_component.qp_infos[mca_btl_openib_component.num_qps - 1].size;
|
||||||
|
|
||||||
max_size_needed = (mca_btl_openib_module.super.btl_eager_limit >
|
max_size_needed = (mca_btl_openib_module.super.btl_eager_limit >
|
||||||
mca_btl_openib_module.super.btl_max_send_size) ?
|
mca_btl_openib_module.super.btl_max_send_size) ?
|
||||||
mca_btl_openib_module.super.btl_eager_limit :
|
mca_btl_openib_module.super.btl_eager_limit :
|
||||||
mca_btl_openib_module.super.btl_max_send_size;
|
mca_btl_openib_module.super.btl_max_send_size;
|
||||||
|
|
||||||
if(max_qp_size < max_size_needed) {
|
if (max_qp_size < max_size_needed) {
|
||||||
opal_output(0, "The biggest QP is not big enough. "
|
opal_show_help("help-mpi-btl-openib.txt",
|
||||||
"%d bytes configured, but maximum send size may be %d\n",
|
"biggest qp size is too small", true,
|
||||||
max_qp_size, max_size_needed);
|
orte_system_info.nodename, max_qp_size,
|
||||||
|
max_size_needed);
|
||||||
ret = OMPI_ERROR;
|
ret = OMPI_ERROR;
|
||||||
goto error;
|
goto error;
|
||||||
} else if(max_qp_size > max_size_needed) {
|
} else if (max_qp_size > max_size_needed) {
|
||||||
|
opal_show_help("help-mpi-btl-openib.txt",
|
||||||
|
"biggest qp size is too big", true,
|
||||||
|
orte_system_info.nodename, max_qp_size,
|
||||||
|
max_size_needed);
|
||||||
opal_output(0, "The biggest QP size is bigger than maximum send size. "
|
opal_output(0, "The biggest QP size is bigger than maximum send size. "
|
||||||
"This is not optimal configuration as memory will be waisted.\n");
|
"This is not optimal configuration as memory will be waisted.\n");
|
||||||
}
|
}
|
||||||
|
@ -248,3 +248,107 @@ WARNING: An invalid MCA parameter value was found for the OpenFabrics
|
|||||||
|
|
||||||
Problem: %s
|
Problem: %s
|
||||||
Resolution: %s
|
Resolution: %s
|
||||||
|
#
|
||||||
|
[no qps in receive_queues]
|
||||||
|
WARNING: No queue pairs were defined in the btl_openib_receive_queues
|
||||||
|
MCA parameter. At least one queue pair must be defined. The openib
|
||||||
|
BTL will therefore be deactivated for this run.
|
||||||
|
|
||||||
|
Host: %s
|
||||||
|
#
|
||||||
|
[invalid qp type in receive_queues]
|
||||||
|
WARNING: An invalid queue pair type was specified in the
|
||||||
|
btl_openib_receive_queues MCA parameter. The openib BTL will be
|
||||||
|
deactivated for this run.
|
||||||
|
|
||||||
|
Valid queue pair types are "P" for per-peer and "S" for shared receive
|
||||||
|
queue.
|
||||||
|
|
||||||
|
Host: %s
|
||||||
|
btl_openib_receive_queues: %s
|
||||||
|
Bad specification: %s
|
||||||
|
#
|
||||||
|
[invalid pp qp specification]
|
||||||
|
WARNING: An invalid per-peer receive queue specification was detected
|
||||||
|
as part of the btl_openib_receive_queues MCA parameter. The openib
|
||||||
|
BTL will therefore be deactivated for this run.
|
||||||
|
|
||||||
|
Per-peer receive queues require between 1 and 5 parameters:
|
||||||
|
|
||||||
|
1. Buffer size in bytes (mandatory)
|
||||||
|
2. Number of buffers (optional; defaults to 8)
|
||||||
|
3. Low buffer count watermark (optional; defaults to (num_buffers / 2))
|
||||||
|
4. Credit window size (optional; defaults to (low_watermark / 2))
|
||||||
|
5. Number of buffers reserved for credit messages (optional;
|
||||||
|
defaults to (num_buffers*2-1)/credit_window)
|
||||||
|
|
||||||
|
Example: P,128,256,128,16
|
||||||
|
- 128 byte buffers
|
||||||
|
- 256 buffers to receive incoming MPI messages
|
||||||
|
- When the number of available buffers reaches 128, re-post 128 more
|
||||||
|
buffers to reach a total of 256
|
||||||
|
- If the number of available credits reaches 16, send an explicit
|
||||||
|
credit message to the sender
|
||||||
|
- Defaulting to ((256 * 2) - 1) / 16 = 31; this many buffers are
|
||||||
|
reserved for explicit credit messages
|
||||||
|
|
||||||
|
Host: %s
|
||||||
|
Bad queue specification: %s
|
||||||
|
#
|
||||||
|
[invalid srq specification]
|
||||||
|
WARNING: An invalid shared receive queue specification was detected as
|
||||||
|
part of the btl_openib_receive_queues MCA parameter. The openib BTL
|
||||||
|
will therefore be deactivated for this run.
|
||||||
|
|
||||||
|
Shared receive queues can take between 2 and 4 parameters:
|
||||||
|
|
||||||
|
1. Buffer size in bytes (mandatory)
|
||||||
|
2. Number of buffers (optional; defaults to 16)
|
||||||
|
3. Low buffer count watermark (optional; defaults to (num_buffers / 2))
|
||||||
|
4. Maximum number of outstanding sends a sender can have (optional;
|
||||||
|
defaults to (low_watermark / 4)
|
||||||
|
|
||||||
|
Example: S,1024,256,128,32
|
||||||
|
- 1024 byte buffers
|
||||||
|
- 256 buffers to receive incoming MPI messages
|
||||||
|
- When the number of available buffers reaches 128, re-post 128 more
|
||||||
|
buffers to reach a total of 256
|
||||||
|
- A sender will not send to a peer unless it has less than 32
|
||||||
|
outstanding sends to that peer.
|
||||||
|
|
||||||
|
Host: %s
|
||||||
|
Bad queue specification: %s
|
||||||
|
#
|
||||||
|
[rd_num must be > rd_low]
|
||||||
|
WARNING: The number of buffers for a queue pair specified via the
|
||||||
|
btl_openib_receive_queues MCA parameter must be greater than the low
|
||||||
|
buffer count watermark. The openib BTL will therefore be deactivated
|
||||||
|
for this run.
|
||||||
|
|
||||||
|
Host: %s
|
||||||
|
Bad queue specification: %s
|
||||||
|
#
|
||||||
|
[biggest qp size is too small]
|
||||||
|
WARNING: The largest queue pair buffer size specified in the
|
||||||
|
btl_openib_receive_queues MCA parameter is smaller than the maximum
|
||||||
|
send size (i.e., the btl_openib_max_send_size MCA parameter), meaning
|
||||||
|
that no queue is large enough to receive the largest possible incoming
|
||||||
|
message fragment. The openib BTL will therefore be deactivated for
|
||||||
|
this run.
|
||||||
|
|
||||||
|
Host: %s
|
||||||
|
Largest buffer size: %d
|
||||||
|
Maximum send fragment size: %d
|
||||||
|
#
|
||||||
|
[biggest qp size is too big]
|
||||||
|
WARNING: The largest queue pair buffer size specified in the
|
||||||
|
btl_openib_receive_queues MCA parameter is larger than the maximum
|
||||||
|
send size (i.e., the btl_openib_max_send_size MCA parameter). This
|
||||||
|
means that memory will be wasted because the largest possible incoming
|
||||||
|
message fragment will not fill a buffer allocated for incoming
|
||||||
|
fragments.
|
||||||
|
|
||||||
|
Host: %s
|
||||||
|
Largest buffer size: %d
|
||||||
|
Maximum send fragment size: %d
|
||||||
|
#
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user