1
1
openmpi/orte/util/nidmap.c

1116 строки
34 KiB
C
Исходник Обычный вид История

Move from the use of regex to compression We've been fighting the battle of trying to create a regex generator and parser that can handle arbitrary hostname schemes - without long-term success. The worst of it is that there is no way of checking to see if the computed regex is correct short of parsing it and doing a character-by-character comparison with the original string. Ugh...there has to be a better solution. One option is to investigate using 3rd-party regex libraries as those are coming from communities whose sole focus is resolving that problem. However, someone would need to spend the time to investigate it, and we'd have to find a license-friendly implementation. Another option is to quit beating our heads against the wall and just compress the information. It won't be as much of a reduction, but we also won't keep hitting scenarios where things break. In this case, it seems that "perfection" is definitely the enemy of "good enough". This PR implements the compression option while retaining the possibility of people adding regex-generating components. The compression code used in ORTE is consolidated into the opal/compress framework. That framework currently held bzip and gzip components for use in compressing checkpoint files - since we no longer support C/R, I have .opal_ignore'd those components. However, I have left the original framework APIs alone in case someone ever decides to redo C/R. The APIs of interest here are added to the framework - specifically, the "compress_block" and "decompress_block" functions. I then moved the ORTE zlib compression code into a new component in this framework. Unfortunately, the framework currently is a single-select one - i.e., only one active component at a time. Since I .opal_ignore'd the other two and made the priority of zlib high, this isn't a problem. However, if someone wants to re-enable bzip/gzip or add another component, they might need to transition opal/compress to a multi-select framework. Included changes: * Consolidate the compression code into the opal/compress framework * Move the ORTE zlib compression code into a new opal/compress/zlib component * Ignore the bzip and gzip components in opal/compress framework * Add a "compress_base_limit" MCA param to set the threshold above which we compress data - defaults to 4096 bytes * Delete stale brucks and rcd components from orte/grpcomm framework * Delete the orte/regx framework * Update the launch system to use opal/compress instead of string regex * Provide a default module if no zlib is available * Fix some misc multi-node issues * Properly generate the nidmap in response to a "connection warmup" message so the remote daemon knows the children it needs to launch. * Remove stale references to orte_node_regex * opal_byte_object_t's are not OPAL objects - properly release allocated memory. * Set the topology * Currently only handling homogeneous case * Update the compress framework files to conform * Consolidate open/close into one "frame" file. Ensure we open/close the framework Signed-off-by: Ralph Castain <rhc@pmix.org>
2019-01-30 03:02:21 +03:00
/*
* Copyright (c) 2016-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2018-2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
Move from the use of regex to compression We've been fighting the battle of trying to create a regex generator and parser that can handle arbitrary hostname schemes - without long-term success. The worst of it is that there is no way of checking to see if the computed regex is correct short of parsing it and doing a character-by-character comparison with the original string. Ugh...there has to be a better solution. One option is to investigate using 3rd-party regex libraries as those are coming from communities whose sole focus is resolving that problem. However, someone would need to spend the time to investigate it, and we'd have to find a license-friendly implementation. Another option is to quit beating our heads against the wall and just compress the information. It won't be as much of a reduction, but we also won't keep hitting scenarios where things break. In this case, it seems that "perfection" is definitely the enemy of "good enough". This PR implements the compression option while retaining the possibility of people adding regex-generating components. The compression code used in ORTE is consolidated into the opal/compress framework. That framework currently held bzip and gzip components for use in compressing checkpoint files - since we no longer support C/R, I have .opal_ignore'd those components. However, I have left the original framework APIs alone in case someone ever decides to redo C/R. The APIs of interest here are added to the framework - specifically, the "compress_block" and "decompress_block" functions. I then moved the ORTE zlib compression code into a new component in this framework. Unfortunately, the framework currently is a single-select one - i.e., only one active component at a time. Since I .opal_ignore'd the other two and made the priority of zlib high, this isn't a problem. However, if someone wants to re-enable bzip/gzip or add another component, they might need to transition opal/compress to a multi-select framework. Included changes: * Consolidate the compression code into the opal/compress framework * Move the ORTE zlib compression code into a new opal/compress/zlib component * Ignore the bzip and gzip components in opal/compress framework * Add a "compress_base_limit" MCA param to set the threshold above which we compress data - defaults to 4096 bytes * Delete stale brucks and rcd components from orte/grpcomm framework * Delete the orte/regx framework * Update the launch system to use opal/compress instead of string regex * Provide a default module if no zlib is available * Fix some misc multi-node issues * Properly generate the nidmap in response to a "connection warmup" message so the remote daemon knows the children it needs to launch. * Remove stale references to orte_node_regex * opal_byte_object_t's are not OPAL objects - properly release allocated memory. * Set the topology * Currently only handling homogeneous case * Update the compress framework files to conform * Consolidate open/close into one "frame" file. Ensure we open/close the framework Signed-off-by: Ralph Castain <rhc@pmix.org>
2019-01-30 03:02:21 +03:00
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include "orte_config.h"
#include "orte/types.h"
#include "opal/types.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <ctype.h>
#include "opal/dss/dss_types.h"
#include "opal/mca/compress/compress.h"
#include "opal/util/argv.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmaps/base/base.h"
Move from the use of regex to compression We've been fighting the battle of trying to create a regex generator and parser that can handle arbitrary hostname schemes - without long-term success. The worst of it is that there is no way of checking to see if the computed regex is correct short of parsing it and doing a character-by-character comparison with the original string. Ugh...there has to be a better solution. One option is to investigate using 3rd-party regex libraries as those are coming from communities whose sole focus is resolving that problem. However, someone would need to spend the time to investigate it, and we'd have to find a license-friendly implementation. Another option is to quit beating our heads against the wall and just compress the information. It won't be as much of a reduction, but we also won't keep hitting scenarios where things break. In this case, it seems that "perfection" is definitely the enemy of "good enough". This PR implements the compression option while retaining the possibility of people adding regex-generating components. The compression code used in ORTE is consolidated into the opal/compress framework. That framework currently held bzip and gzip components for use in compressing checkpoint files - since we no longer support C/R, I have .opal_ignore'd those components. However, I have left the original framework APIs alone in case someone ever decides to redo C/R. The APIs of interest here are added to the framework - specifically, the "compress_block" and "decompress_block" functions. I then moved the ORTE zlib compression code into a new component in this framework. Unfortunately, the framework currently is a single-select one - i.e., only one active component at a time. Since I .opal_ignore'd the other two and made the priority of zlib high, this isn't a problem. However, if someone wants to re-enable bzip/gzip or add another component, they might need to transition opal/compress to a multi-select framework. Included changes: * Consolidate the compression code into the opal/compress framework * Move the ORTE zlib compression code into a new opal/compress/zlib component * Ignore the bzip and gzip components in opal/compress framework * Add a "compress_base_limit" MCA param to set the threshold above which we compress data - defaults to 4096 bytes * Delete stale brucks and rcd components from orte/grpcomm framework * Delete the orte/regx framework * Update the launch system to use opal/compress instead of string regex * Provide a default module if no zlib is available * Fix some misc multi-node issues * Properly generate the nidmap in response to a "connection warmup" message so the remote daemon knows the children it needs to launch. * Remove stale references to orte_node_regex * opal_byte_object_t's are not OPAL objects - properly release allocated memory. * Set the topology * Currently only handling homogeneous case * Update the compress framework files to conform * Consolidate open/close into one "frame" file. Ensure we open/close the framework Signed-off-by: Ralph Castain <rhc@pmix.org>
2019-01-30 03:02:21 +03:00
#include "orte/mca/routed/routed.h"
#include "orte/runtime/orte_globals.h"
#include "orte/util/nidmap.h"
int orte_util_nidmap_create(opal_pointer_array_t *pool,
opal_buffer_t *buffer)
{
char *raw = NULL;
uint8_t *vpids=NULL, u8;
Move from the use of regex to compression We've been fighting the battle of trying to create a regex generator and parser that can handle arbitrary hostname schemes - without long-term success. The worst of it is that there is no way of checking to see if the computed regex is correct short of parsing it and doing a character-by-character comparison with the original string. Ugh...there has to be a better solution. One option is to investigate using 3rd-party regex libraries as those are coming from communities whose sole focus is resolving that problem. However, someone would need to spend the time to investigate it, and we'd have to find a license-friendly implementation. Another option is to quit beating our heads against the wall and just compress the information. It won't be as much of a reduction, but we also won't keep hitting scenarios where things break. In this case, it seems that "perfection" is definitely the enemy of "good enough". This PR implements the compression option while retaining the possibility of people adding regex-generating components. The compression code used in ORTE is consolidated into the opal/compress framework. That framework currently held bzip and gzip components for use in compressing checkpoint files - since we no longer support C/R, I have .opal_ignore'd those components. However, I have left the original framework APIs alone in case someone ever decides to redo C/R. The APIs of interest here are added to the framework - specifically, the "compress_block" and "decompress_block" functions. I then moved the ORTE zlib compression code into a new component in this framework. Unfortunately, the framework currently is a single-select one - i.e., only one active component at a time. Since I .opal_ignore'd the other two and made the priority of zlib high, this isn't a problem. However, if someone wants to re-enable bzip/gzip or add another component, they might need to transition opal/compress to a multi-select framework. Included changes: * Consolidate the compression code into the opal/compress framework * Move the ORTE zlib compression code into a new opal/compress/zlib component * Ignore the bzip and gzip components in opal/compress framework * Add a "compress_base_limit" MCA param to set the threshold above which we compress data - defaults to 4096 bytes * Delete stale brucks and rcd components from orte/grpcomm framework * Delete the orte/regx framework * Update the launch system to use opal/compress instead of string regex * Provide a default module if no zlib is available * Fix some misc multi-node issues * Properly generate the nidmap in response to a "connection warmup" message so the remote daemon knows the children it needs to launch. * Remove stale references to orte_node_regex * opal_byte_object_t's are not OPAL objects - properly release allocated memory. * Set the topology * Currently only handling homogeneous case * Update the compress framework files to conform * Consolidate open/close into one "frame" file. Ensure we open/close the framework Signed-off-by: Ralph Castain <rhc@pmix.org>
2019-01-30 03:02:21 +03:00
uint16_t u16;
uint32_t u32;
int n, ndaemons, rc, nbytes;
Move from the use of regex to compression We've been fighting the battle of trying to create a regex generator and parser that can handle arbitrary hostname schemes - without long-term success. The worst of it is that there is no way of checking to see if the computed regex is correct short of parsing it and doing a character-by-character comparison with the original string. Ugh...there has to be a better solution. One option is to investigate using 3rd-party regex libraries as those are coming from communities whose sole focus is resolving that problem. However, someone would need to spend the time to investigate it, and we'd have to find a license-friendly implementation. Another option is to quit beating our heads against the wall and just compress the information. It won't be as much of a reduction, but we also won't keep hitting scenarios where things break. In this case, it seems that "perfection" is definitely the enemy of "good enough". This PR implements the compression option while retaining the possibility of people adding regex-generating components. The compression code used in ORTE is consolidated into the opal/compress framework. That framework currently held bzip and gzip components for use in compressing checkpoint files - since we no longer support C/R, I have .opal_ignore'd those components. However, I have left the original framework APIs alone in case someone ever decides to redo C/R. The APIs of interest here are added to the framework - specifically, the "compress_block" and "decompress_block" functions. I then moved the ORTE zlib compression code into a new component in this framework. Unfortunately, the framework currently is a single-select one - i.e., only one active component at a time. Since I .opal_ignore'd the other two and made the priority of zlib high, this isn't a problem. However, if someone wants to re-enable bzip/gzip or add another component, they might need to transition opal/compress to a multi-select framework. Included changes: * Consolidate the compression code into the opal/compress framework * Move the ORTE zlib compression code into a new opal/compress/zlib component * Ignore the bzip and gzip components in opal/compress framework * Add a "compress_base_limit" MCA param to set the threshold above which we compress data - defaults to 4096 bytes * Delete stale brucks and rcd components from orte/grpcomm framework * Delete the orte/regx framework * Update the launch system to use opal/compress instead of string regex * Provide a default module if no zlib is available * Fix some misc multi-node issues * Properly generate the nidmap in response to a "connection warmup" message so the remote daemon knows the children it needs to launch. * Remove stale references to orte_node_regex * opal_byte_object_t's are not OPAL objects - properly release allocated memory. * Set the topology * Currently only handling homogeneous case * Update the compress framework files to conform * Consolidate open/close into one "frame" file. Ensure we open/close the framework Signed-off-by: Ralph Castain <rhc@pmix.org>
2019-01-30 03:02:21 +03:00
bool compressed;
char **names = NULL, **ranks = NULL;
orte_node_t *nptr;
opal_byte_object_t bo, *boptr;
size_t sz;
/* pack a flag indicating if the HNP was included in the allocation */
if (orte_hnp_is_allocated) {
u8 = 1;
} else {
u8 = 0;
}
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &u8, 1, OPAL_UINT8))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack a flag indicating if we are in a managed allocation */
if (orte_managed_allocation) {
u8 = 1;
} else {
u8 = 0;
}
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &u8, 1, OPAL_UINT8))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* daemon vpids start from 0 and increase linearly by one
* up to the number of nodes in the system. The vpid is
* a 32-bit value. We don't know how many of the nodes
* in the system have daemons - we may not be using them
* all just yet. However, even the largest systems won't
* have more than a million nodes for quite some time,
* so for now we'll just allocate enough space to hold
* them all. Someone can optimize this further later */
if (256 >= pool->size) {
nbytes = 1;
} else if (65536 >= pool->size) {
nbytes = 2;
} else {
nbytes = 4;
}
vpids = (uint8_t*)malloc(nbytes * pool->size);
ndaemons = 0;
for (n=0; n < pool->size; n++) {
if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(pool, n))) {
continue;
}
/* add the hostname to the argv */
opal_argv_append_nosize(&names, nptr->name);
/* store the vpid */
if (1 == nbytes) {
if (NULL == nptr->daemon) {
vpids[ndaemons] = UINT8_MAX;
} else {
vpids[ndaemons] = nptr->daemon->name.vpid;
}
} else if (2 == nbytes) {
if (NULL == nptr->daemon) {
u16 = UINT16_MAX;
} else {
u16 = nptr->daemon->name.vpid;
}
memcpy(&vpids[nbytes*ndaemons], &u16, 2);
} else {
if (NULL == nptr->daemon) {
u32 = UINT32_MAX;
} else {
u32 = nptr->daemon->name.vpid;
}
memcpy(&vpids[nbytes*ndaemons], &u32, 4);
}
++ndaemons;
}
/* construct the string of node names for compression */
raw = opal_argv_join(names, ',');
if (opal_compress.compress_block((uint8_t*)raw, strlen(raw)+1,
(uint8_t**)&bo.bytes, &sz)) {
/* mark that this was compressed */
compressed = true;
bo.size = sz;
} else {
/* mark that this was not compressed */
compressed = false;
bo.bytes = (uint8_t*)raw;
bo.size = strlen(raw)+1;
}
/* indicate compression */
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &compressed, 1, OPAL_BOOL))) {
if (compressed) {
free(bo.bytes);
}
goto cleanup;
}
/* if compressed, provide the uncompressed size */
if (compressed) {
sz = strlen(raw)+1;
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &sz, 1, OPAL_SIZE))) {
free(bo.bytes);
goto cleanup;
}
}
/* add the object */
boptr = &bo;
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &boptr, 1, OPAL_BYTE_OBJECT))) {
if (compressed) {
free(bo.bytes);
}
goto cleanup;
}
if (compressed) {
free(bo.bytes);
}
/* compress the vpids */
if (opal_compress.compress_block(vpids, nbytes*ndaemons,
(uint8_t**)&bo.bytes, &sz)) {
/* mark that this was compressed */
compressed = true;
bo.size = sz;
} else {
/* mark that this was not compressed */
compressed = false;
bo.bytes = vpids;
bo.size = nbytes*ndaemons;
}
/* indicate compression */
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &compressed, 1, OPAL_BOOL))) {
if (compressed) {
free(bo.bytes);
}
goto cleanup;
}
/* provide the #bytes/vpid */
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &nbytes, 1, OPAL_INT))) {
if (compressed) {
free(bo.bytes);
}
goto cleanup;
}
/* if compressed, provide the uncompressed size */
if (compressed) {
sz = nbytes*ndaemons;
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &sz, 1, OPAL_SIZE))) {
free(bo.bytes);
goto cleanup;
}
}
/* add the object */
boptr = &bo;
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &boptr, 1, OPAL_BYTE_OBJECT))) {
if (compressed) {
free(bo.bytes);
}
goto cleanup;
}
if (compressed) {
free(bo.bytes);
}
cleanup:
if (NULL != names) {
opal_argv_free(names);
}
if (NULL != raw) {
free(raw);
}
if (NULL != ranks) {
opal_argv_free(ranks);
}
if (NULL != vpids) {
free(vpids);
}
return rc;
}
int orte_util_decode_nidmap(opal_buffer_t *buf)
{
uint8_t u8, *vp8 = NULL;
uint16_t *vp16 = NULL;
Move from the use of regex to compression We've been fighting the battle of trying to create a regex generator and parser that can handle arbitrary hostname schemes - without long-term success. The worst of it is that there is no way of checking to see if the computed regex is correct short of parsing it and doing a character-by-character comparison with the original string. Ugh...there has to be a better solution. One option is to investigate using 3rd-party regex libraries as those are coming from communities whose sole focus is resolving that problem. However, someone would need to spend the time to investigate it, and we'd have to find a license-friendly implementation. Another option is to quit beating our heads against the wall and just compress the information. It won't be as much of a reduction, but we also won't keep hitting scenarios where things break. In this case, it seems that "perfection" is definitely the enemy of "good enough". This PR implements the compression option while retaining the possibility of people adding regex-generating components. The compression code used in ORTE is consolidated into the opal/compress framework. That framework currently held bzip and gzip components for use in compressing checkpoint files - since we no longer support C/R, I have .opal_ignore'd those components. However, I have left the original framework APIs alone in case someone ever decides to redo C/R. The APIs of interest here are added to the framework - specifically, the "compress_block" and "decompress_block" functions. I then moved the ORTE zlib compression code into a new component in this framework. Unfortunately, the framework currently is a single-select one - i.e., only one active component at a time. Since I .opal_ignore'd the other two and made the priority of zlib high, this isn't a problem. However, if someone wants to re-enable bzip/gzip or add another component, they might need to transition opal/compress to a multi-select framework. Included changes: * Consolidate the compression code into the opal/compress framework * Move the ORTE zlib compression code into a new opal/compress/zlib component * Ignore the bzip and gzip components in opal/compress framework * Add a "compress_base_limit" MCA param to set the threshold above which we compress data - defaults to 4096 bytes * Delete stale brucks and rcd components from orte/grpcomm framework * Delete the orte/regx framework * Update the launch system to use opal/compress instead of string regex * Provide a default module if no zlib is available * Fix some misc multi-node issues * Properly generate the nidmap in response to a "connection warmup" message so the remote daemon knows the children it needs to launch. * Remove stale references to orte_node_regex * opal_byte_object_t's are not OPAL objects - properly release allocated memory. * Set the topology * Currently only handling homogeneous case * Update the compress framework files to conform * Consolidate open/close into one "frame" file. Ensure we open/close the framework Signed-off-by: Ralph Castain <rhc@pmix.org>
2019-01-30 03:02:21 +03:00
uint32_t *vp32 = NULL, vpid;
int cnt, rc, nbytes, n;
bool compressed;
size_t sz;
opal_byte_object_t *boptr;
char *raw = NULL, **names = NULL;
orte_node_t *nd;
orte_job_t *daemons;
orte_proc_t *proc;
orte_topology_t *t;
/* unpack the flag indicating if HNP is in allocation */
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &u8, &cnt, OPAL_UINT8))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (1 == u8) {
orte_hnp_is_allocated = true;
} else {
orte_hnp_is_allocated = false;
}
/* unpack the flag indicating if we are in managed allocation */
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &u8, &cnt, OPAL_UINT8))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (1 == u8) {
orte_managed_allocation = true;
} else {
orte_managed_allocation = false;
}
/* unpack compression flag for node names */
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &compressed, &cnt, OPAL_BOOL))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* if compressed, get the uncompressed size */
if (compressed) {
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &sz, &cnt, OPAL_SIZE))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
}
/* unpack the nodename object */
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &boptr, &cnt, OPAL_BYTE_OBJECT))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* if compressed, decompress */
if (compressed) {
if (!opal_compress.decompress_block((uint8_t**)&raw, sz,
boptr->bytes, boptr->size)) {
ORTE_ERROR_LOG(ORTE_ERROR);
if (NULL != boptr->bytes) {
free(boptr->bytes);
}
free(boptr);
rc = ORTE_ERROR;
goto cleanup;
}
} else {
raw = (char*)boptr->bytes;
boptr->bytes = NULL;
boptr->size = 0;
}
if (NULL != boptr->bytes) {
free(boptr->bytes);
}
free(boptr);
names = opal_argv_split(raw, ',');
free(raw);
/* unpack compression flag for daemon vpids */
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &compressed, &cnt, OPAL_BOOL))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* unpack the #bytes/vpid */
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &nbytes, &cnt, OPAL_INT))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* if compressed, get the uncompressed size */
if (compressed) {
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &sz, &cnt, OPAL_SIZE))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
}
/* unpack the vpid object */
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &boptr, &cnt, OPAL_BYTE_OBJECT))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* if compressed, decompress */
if (compressed) {
if (!opal_compress.decompress_block((uint8_t**)&vp8, sz,
boptr->bytes, boptr->size)) {
ORTE_ERROR_LOG(ORTE_ERROR);
if (NULL != boptr->bytes) {
free(boptr->bytes);
}
free(boptr);
rc = ORTE_ERROR;
goto cleanup;
}
} else {
vp8 = (uint8_t*)boptr->bytes;
boptr->bytes = NULL;
boptr->size = 0;
}
if (NULL != boptr->bytes) {
free(boptr->bytes);
}
free(boptr);
if (2 == nbytes) {
vp16 = (uint16_t*)vp8;
vp8 = NULL;
} else if (4 == nbytes) {
vp32 = (uint32_t*)vp8;
vp8 = NULL;
}
/* if we are the HNP, we don't need any of this stuff */
if (ORTE_PROC_IS_HNP) {
goto cleanup;
}
/* get the daemon job object */
daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
/* get our topology */
for (n=0; n < orte_node_topologies->size; n++) {
if (NULL != (t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, n))) {
break;
}
}
/* create the node pool array - this will include
* _all_ nodes known to the allocation */
for (n=0; NULL != names[n]; n++) {
/* add this name to the pool */
nd = OBJ_NEW(orte_node_t);
nd->name = strdup(names[n]);
Move from the use of regex to compression We've been fighting the battle of trying to create a regex generator and parser that can handle arbitrary hostname schemes - without long-term success. The worst of it is that there is no way of checking to see if the computed regex is correct short of parsing it and doing a character-by-character comparison with the original string. Ugh...there has to be a better solution. One option is to investigate using 3rd-party regex libraries as those are coming from communities whose sole focus is resolving that problem. However, someone would need to spend the time to investigate it, and we'd have to find a license-friendly implementation. Another option is to quit beating our heads against the wall and just compress the information. It won't be as much of a reduction, but we also won't keep hitting scenarios where things break. In this case, it seems that "perfection" is definitely the enemy of "good enough". This PR implements the compression option while retaining the possibility of people adding regex-generating components. The compression code used in ORTE is consolidated into the opal/compress framework. That framework currently held bzip and gzip components for use in compressing checkpoint files - since we no longer support C/R, I have .opal_ignore'd those components. However, I have left the original framework APIs alone in case someone ever decides to redo C/R. The APIs of interest here are added to the framework - specifically, the "compress_block" and "decompress_block" functions. I then moved the ORTE zlib compression code into a new component in this framework. Unfortunately, the framework currently is a single-select one - i.e., only one active component at a time. Since I .opal_ignore'd the other two and made the priority of zlib high, this isn't a problem. However, if someone wants to re-enable bzip/gzip or add another component, they might need to transition opal/compress to a multi-select framework. Included changes: * Consolidate the compression code into the opal/compress framework * Move the ORTE zlib compression code into a new opal/compress/zlib component * Ignore the bzip and gzip components in opal/compress framework * Add a "compress_base_limit" MCA param to set the threshold above which we compress data - defaults to 4096 bytes * Delete stale brucks and rcd components from orte/grpcomm framework * Delete the orte/regx framework * Update the launch system to use opal/compress instead of string regex * Provide a default module if no zlib is available * Fix some misc multi-node issues * Properly generate the nidmap in response to a "connection warmup" message so the remote daemon knows the children it needs to launch. * Remove stale references to orte_node_regex * opal_byte_object_t's are not OPAL objects - properly release allocated memory. * Set the topology * Currently only handling homogeneous case * Update the compress framework files to conform * Consolidate open/close into one "frame" file. Ensure we open/close the framework Signed-off-by: Ralph Castain <rhc@pmix.org>
2019-01-30 03:02:21 +03:00
opal_pointer_array_set_item(orte_node_pool, n, nd);
/* set the topology - always default to homogeneous
* as that is the most common scenario */
Move from the use of regex to compression We've been fighting the battle of trying to create a regex generator and parser that can handle arbitrary hostname schemes - without long-term success. The worst of it is that there is no way of checking to see if the computed regex is correct short of parsing it and doing a character-by-character comparison with the original string. Ugh...there has to be a better solution. One option is to investigate using 3rd-party regex libraries as those are coming from communities whose sole focus is resolving that problem. However, someone would need to spend the time to investigate it, and we'd have to find a license-friendly implementation. Another option is to quit beating our heads against the wall and just compress the information. It won't be as much of a reduction, but we also won't keep hitting scenarios where things break. In this case, it seems that "perfection" is definitely the enemy of "good enough". This PR implements the compression option while retaining the possibility of people adding regex-generating components. The compression code used in ORTE is consolidated into the opal/compress framework. That framework currently held bzip and gzip components for use in compressing checkpoint files - since we no longer support C/R, I have .opal_ignore'd those components. However, I have left the original framework APIs alone in case someone ever decides to redo C/R. The APIs of interest here are added to the framework - specifically, the "compress_block" and "decompress_block" functions. I then moved the ORTE zlib compression code into a new component in this framework. Unfortunately, the framework currently is a single-select one - i.e., only one active component at a time. Since I .opal_ignore'd the other two and made the priority of zlib high, this isn't a problem. However, if someone wants to re-enable bzip/gzip or add another component, they might need to transition opal/compress to a multi-select framework. Included changes: * Consolidate the compression code into the opal/compress framework * Move the ORTE zlib compression code into a new opal/compress/zlib component * Ignore the bzip and gzip components in opal/compress framework * Add a "compress_base_limit" MCA param to set the threshold above which we compress data - defaults to 4096 bytes * Delete stale brucks and rcd components from orte/grpcomm framework * Delete the orte/regx framework * Update the launch system to use opal/compress instead of string regex * Provide a default module if no zlib is available * Fix some misc multi-node issues * Properly generate the nidmap in response to a "connection warmup" message so the remote daemon knows the children it needs to launch. * Remove stale references to orte_node_regex * opal_byte_object_t's are not OPAL objects - properly release allocated memory. * Set the topology * Currently only handling homogeneous case * Update the compress framework files to conform * Consolidate open/close into one "frame" file. Ensure we open/close the framework Signed-off-by: Ralph Castain <rhc@pmix.org>
2019-01-30 03:02:21 +03:00
nd->topology = t;
/* see if it has a daemon on it */
if (1 == nbytes && UINT8_MAX != vp8[n]) {
vpid = vp8[n];
} else if (2 == nbytes && UINT16_MAX != vp16[n]) {
vpid = vp16[n];
} else if (4 == nbytes && UINT32_MAX != vp32[n]) {
vpid = vp32[n];
} else {
vpid = UINT32_MAX;
}
if (UINT32_MAX != vpid) {
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, vpid))) {
proc = OBJ_NEW(orte_proc_t);
proc->name.jobid = ORTE_PROC_MY_NAME->jobid;
proc->name.vpid = vpid;
proc->state = ORTE_PROC_STATE_RUNNING;
ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_ALIVE);
daemons->num_procs++;
opal_pointer_array_set_item(daemons->procs, proc->name.vpid, proc);
}
nd->index = proc->name.vpid;
OBJ_RETAIN(nd);
proc->node = nd;
OBJ_RETAIN(proc);
nd->daemon = proc;
Move from the use of regex to compression We've been fighting the battle of trying to create a regex generator and parser that can handle arbitrary hostname schemes - without long-term success. The worst of it is that there is no way of checking to see if the computed regex is correct short of parsing it and doing a character-by-character comparison with the original string. Ugh...there has to be a better solution. One option is to investigate using 3rd-party regex libraries as those are coming from communities whose sole focus is resolving that problem. However, someone would need to spend the time to investigate it, and we'd have to find a license-friendly implementation. Another option is to quit beating our heads against the wall and just compress the information. It won't be as much of a reduction, but we also won't keep hitting scenarios where things break. In this case, it seems that "perfection" is definitely the enemy of "good enough". This PR implements the compression option while retaining the possibility of people adding regex-generating components. The compression code used in ORTE is consolidated into the opal/compress framework. That framework currently held bzip and gzip components for use in compressing checkpoint files - since we no longer support C/R, I have .opal_ignore'd those components. However, I have left the original framework APIs alone in case someone ever decides to redo C/R. The APIs of interest here are added to the framework - specifically, the "compress_block" and "decompress_block" functions. I then moved the ORTE zlib compression code into a new component in this framework. Unfortunately, the framework currently is a single-select one - i.e., only one active component at a time. Since I .opal_ignore'd the other two and made the priority of zlib high, this isn't a problem. However, if someone wants to re-enable bzip/gzip or add another component, they might need to transition opal/compress to a multi-select framework. Included changes: * Consolidate the compression code into the opal/compress framework * Move the ORTE zlib compression code into a new opal/compress/zlib component * Ignore the bzip and gzip components in opal/compress framework * Add a "compress_base_limit" MCA param to set the threshold above which we compress data - defaults to 4096 bytes * Delete stale brucks and rcd components from orte/grpcomm framework * Delete the orte/regx framework * Update the launch system to use opal/compress instead of string regex * Provide a default module if no zlib is available * Fix some misc multi-node issues * Properly generate the nidmap in response to a "connection warmup" message so the remote daemon knows the children it needs to launch. * Remove stale references to orte_node_regex * opal_byte_object_t's are not OPAL objects - properly release allocated memory. * Set the topology * Currently only handling homogeneous case * Update the compress framework files to conform * Consolidate open/close into one "frame" file. Ensure we open/close the framework Signed-off-by: Ralph Castain <rhc@pmix.org>
2019-01-30 03:02:21 +03:00
}
}
/* update num procs */
if (orte_process_info.num_procs != daemons->num_procs) {
orte_process_info.num_procs = daemons->num_procs;
}
/* need to update the routing plan */
orte_routed.update_routing_plan();
Move from the use of regex to compression We've been fighting the battle of trying to create a regex generator and parser that can handle arbitrary hostname schemes - without long-term success. The worst of it is that there is no way of checking to see if the computed regex is correct short of parsing it and doing a character-by-character comparison with the original string. Ugh...there has to be a better solution. One option is to investigate using 3rd-party regex libraries as those are coming from communities whose sole focus is resolving that problem. However, someone would need to spend the time to investigate it, and we'd have to find a license-friendly implementation. Another option is to quit beating our heads against the wall and just compress the information. It won't be as much of a reduction, but we also won't keep hitting scenarios where things break. In this case, it seems that "perfection" is definitely the enemy of "good enough". This PR implements the compression option while retaining the possibility of people adding regex-generating components. The compression code used in ORTE is consolidated into the opal/compress framework. That framework currently held bzip and gzip components for use in compressing checkpoint files - since we no longer support C/R, I have .opal_ignore'd those components. However, I have left the original framework APIs alone in case someone ever decides to redo C/R. The APIs of interest here are added to the framework - specifically, the "compress_block" and "decompress_block" functions. I then moved the ORTE zlib compression code into a new component in this framework. Unfortunately, the framework currently is a single-select one - i.e., only one active component at a time. Since I .opal_ignore'd the other two and made the priority of zlib high, this isn't a problem. However, if someone wants to re-enable bzip/gzip or add another component, they might need to transition opal/compress to a multi-select framework. Included changes: * Consolidate the compression code into the opal/compress framework * Move the ORTE zlib compression code into a new opal/compress/zlib component * Ignore the bzip and gzip components in opal/compress framework * Add a "compress_base_limit" MCA param to set the threshold above which we compress data - defaults to 4096 bytes * Delete stale brucks and rcd components from orte/grpcomm framework * Delete the orte/regx framework * Update the launch system to use opal/compress instead of string regex * Provide a default module if no zlib is available * Fix some misc multi-node issues * Properly generate the nidmap in response to a "connection warmup" message so the remote daemon knows the children it needs to launch. * Remove stale references to orte_node_regex * opal_byte_object_t's are not OPAL objects - properly release allocated memory. * Set the topology * Currently only handling homogeneous case * Update the compress framework files to conform * Consolidate open/close into one "frame" file. Ensure we open/close the framework Signed-off-by: Ralph Castain <rhc@pmix.org>
2019-01-30 03:02:21 +03:00
if (orte_process_info.max_procs < orte_process_info.num_procs) {
orte_process_info.max_procs = orte_process_info.num_procs;
}
cleanup:
if (NULL != vp8) {
free(vp8);
}
if (NULL != vp16) {
free(vp16);
}
if (NULL != vp32) {
free(vp32);
}
if (NULL != names) {
opal_argv_free(names);
}
Move from the use of regex to compression We've been fighting the battle of trying to create a regex generator and parser that can handle arbitrary hostname schemes - without long-term success. The worst of it is that there is no way of checking to see if the computed regex is correct short of parsing it and doing a character-by-character comparison with the original string. Ugh...there has to be a better solution. One option is to investigate using 3rd-party regex libraries as those are coming from communities whose sole focus is resolving that problem. However, someone would need to spend the time to investigate it, and we'd have to find a license-friendly implementation. Another option is to quit beating our heads against the wall and just compress the information. It won't be as much of a reduction, but we also won't keep hitting scenarios where things break. In this case, it seems that "perfection" is definitely the enemy of "good enough". This PR implements the compression option while retaining the possibility of people adding regex-generating components. The compression code used in ORTE is consolidated into the opal/compress framework. That framework currently held bzip and gzip components for use in compressing checkpoint files - since we no longer support C/R, I have .opal_ignore'd those components. However, I have left the original framework APIs alone in case someone ever decides to redo C/R. The APIs of interest here are added to the framework - specifically, the "compress_block" and "decompress_block" functions. I then moved the ORTE zlib compression code into a new component in this framework. Unfortunately, the framework currently is a single-select one - i.e., only one active component at a time. Since I .opal_ignore'd the other two and made the priority of zlib high, this isn't a problem. However, if someone wants to re-enable bzip/gzip or add another component, they might need to transition opal/compress to a multi-select framework. Included changes: * Consolidate the compression code into the opal/compress framework * Move the ORTE zlib compression code into a new opal/compress/zlib component * Ignore the bzip and gzip components in opal/compress framework * Add a "compress_base_limit" MCA param to set the threshold above which we compress data - defaults to 4096 bytes * Delete stale brucks and rcd components from orte/grpcomm framework * Delete the orte/regx framework * Update the launch system to use opal/compress instead of string regex * Provide a default module if no zlib is available * Fix some misc multi-node issues * Properly generate the nidmap in response to a "connection warmup" message so the remote daemon knows the children it needs to launch. * Remove stale references to orte_node_regex * opal_byte_object_t's are not OPAL objects - properly release allocated memory. * Set the topology * Currently only handling homogeneous case * Update the compress framework files to conform * Consolidate open/close into one "frame" file. Ensure we open/close the framework Signed-off-by: Ralph Castain <rhc@pmix.org>
2019-01-30 03:02:21 +03:00
return rc;
}
typedef struct {
opal_list_item_t super;
orte_topology_t *t;
} orte_tptr_trk_t;
static OBJ_CLASS_INSTANCE(orte_tptr_trk_t,
opal_list_item_t,
NULL, NULL);
int orte_util_pass_node_info(opal_buffer_t *buffer)
{
uint16_t *slots=NULL, slot = UINT16_MAX;
uint8_t *flags=NULL, flag = UINT8_MAX, *topologies = NULL;
int8_t i8, ntopos;
int rc, n, nbitmap, nstart;
bool compressed, unislots = true, uniflags = true, unitopos = true;
orte_node_t *nptr;
opal_byte_object_t bo, *boptr;
size_t sz, nslots;
opal_buffer_t bucket;
orte_tptr_trk_t *trk;
opal_list_t topos;
orte_topology_t *t;
/* make room for the number of slots on each node */
nslots = sizeof(uint16_t) * orte_node_pool->size;
slots = (uint16_t*)malloc(nslots);
/* and for the flags for each node - only need one bit/node */
nbitmap = (orte_node_pool->size / 8) + 1;
flags = (uint8_t*)calloc(1, nbitmap);
/* handle the topologies - as the most common case by far
* is to have homogeneous topologies, we only send them
* if something is different. We know that the HNP is
* the first topology, and that any differing topology
* on the compute nodes must follow. So send the topologies
* if and only if:
*
* (a) the HNP is being used to house application procs and
* there is more than one topology in our array; or
*
* (b) the HNP is not being used, but there are more than
* two topologies in our array, thus indicating that
* there are multiple topologies on the compute nodes
*/
if (!orte_hnp_is_allocated || (ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) & ORTE_MAPPING_NO_USE_LOCAL)) {
nstart = 1;
} else {
nstart = 0;
}
OBJ_CONSTRUCT(&topos, opal_list_t);
OBJ_CONSTRUCT(&bucket, opal_buffer_t);
for (n=nstart; n < orte_node_topologies->size; n++) {
if (NULL == (t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, n))) {
continue;
}
trk = OBJ_NEW(orte_tptr_trk_t);
trk->t = t;
opal_list_append(&topos, &trk->super);
/* pack this topology string */
if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &t->sig, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&bucket);
goto cleanup;
}
/* pack the topology itself */
if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &t->topo, 1, OPAL_HWLOC_TOPO))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&bucket);
goto cleanup;
}
}
/* pack the number of topologies in allocation */
ntopos = opal_list_get_size(&topos);
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &ntopos, 1, OPAL_INT8))) {
goto cleanup;
}
if (1 < ntopos) {
/* need to send them along */
opal_dss.copy_payload(buffer, &bucket);
/* allocate space to report them */
ntopos = orte_node_pool->size;
topologies = (uint8_t*)malloc(ntopos);
unitopos = false;
}
OBJ_DESTRUCT(&bucket);
for (n=0; n < orte_node_pool->size; n++) {
if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) {
continue;
}
/* store the topology, if required */
if (!unitopos) {
topologies[n] = 0;
if (0 == nstart || 0 < n) {
OPAL_LIST_FOREACH(trk, &topos, orte_tptr_trk_t) {
if (trk->t == nptr->topology) {
break;
}
topologies[n]++;
}
}
}
/* store the number of slots */
slots[n] = nptr->slots;
if (UINT16_MAX == slot) {
slot = nptr->slots;
} else if (slot != nptr->slots) {
unislots = false;
}
/* store the flag */
if (ORTE_FLAG_TEST(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
flags[n/8] |= (1 << (7 - (n % 8)));
if (UINT8_MAX == flag) {
flag = 1;
} else if (1 != flag) {
uniflags = false;
}
} else {
if (UINT8_MAX == flag) {
flag = 0;
} else if (0 != flag) {
uniflags = false;
}
}
}
/* deal with the topology assignments */
if (!unitopos) {
if (opal_compress.compress_block((uint8_t*)topologies, ntopos,
(uint8_t**)&bo.bytes, &sz)) {
/* mark that this was compressed */
i8 = 1;
compressed = true;
bo.size = sz;
} else {
/* mark that this was not compressed */
i8 = 0;
compressed = false;
bo.bytes = topologies;
bo.size = nbitmap;
}
/* indicate compression */
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &i8, 1, OPAL_INT8))) {
if (compressed) {
free(bo.bytes);
}
goto cleanup;
}
/* if compressed, provide the uncompressed size */
if (compressed) {
sz = nslots;
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &sz, 1, OPAL_SIZE))) {
free(bo.bytes);
goto cleanup;
}
}
/* add the object */
boptr = &bo;
rc = opal_dss.pack(buffer, &boptr, 1, OPAL_BYTE_OBJECT);
if (compressed) {
free(bo.bytes);
}
}
/* if we have uniform #slots, then just flag it - no
* need to pass anything */
if (unislots) {
i8 = -1 * slot;
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &i8, 1, OPAL_INT8))) {
goto cleanup;
}
} else {
if (opal_compress.compress_block((uint8_t*)slots, nslots,
(uint8_t**)&bo.bytes, &sz)) {
/* mark that this was compressed */
i8 = 1;
compressed = true;
bo.size = sz;
} else {
/* mark that this was not compressed */
i8 = 0;
compressed = false;
bo.bytes = flags;
bo.size = nbitmap;
}
/* indicate compression */
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &i8, 1, OPAL_INT8))) {
if (compressed) {
free(bo.bytes);
}
goto cleanup;
}
/* if compressed, provide the uncompressed size */
if (compressed) {
sz = nslots;
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &sz, 1, OPAL_SIZE))) {
free(bo.bytes);
goto cleanup;
}
}
/* add the object */
boptr = &bo;
rc = opal_dss.pack(buffer, &boptr, 1, OPAL_BYTE_OBJECT);
if (compressed) {
free(bo.bytes);
}
}
/* if we have uniform flags, then just flag it - no
* need to pass anything */
if (uniflags) {
if (1 == flag) {
i8 = -1;
} else {
i8 = -2;
}
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &i8, 1, OPAL_INT8))) {
goto cleanup;
}
} else {
if (opal_compress.compress_block(flags, nbitmap,
(uint8_t**)&bo.bytes, &sz)) {
/* mark that this was compressed */
i8 = 2;
compressed = true;
bo.size = sz;
} else {
/* mark that this was not compressed */
i8 = 3;
compressed = false;
bo.bytes = flags;
bo.size = nbitmap;
}
/* indicate compression */
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &i8, 1, OPAL_INT8))) {
if (compressed) {
free(bo.bytes);
}
goto cleanup;
}
/* if compressed, provide the uncompressed size */
if (compressed) {
sz = nbitmap;
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &sz, 1, OPAL_SIZE))) {
free(bo.bytes);
goto cleanup;
}
}
/* add the object */
boptr = &bo;
rc = opal_dss.pack(buffer, &boptr, 1, OPAL_BYTE_OBJECT);
if (compressed) {
free(bo.bytes);
}
}
cleanup:
if (NULL != slots) {
free(slots);
}
if (NULL != flags) {
free(flags);
}
return rc;
}
int orte_util_parse_node_info(opal_buffer_t *buf)
{
int8_t i8;
int rc = ORTE_SUCCESS, cnt, n, m;
orte_node_t *nptr;
size_t sz;
opal_byte_object_t *boptr;
uint16_t *slots = NULL;
uint8_t *flags = NULL;
uint8_t *topologies = NULL;
orte_topology_t *t2, **tps = NULL;
hwloc_topology_t topo;
char *sig;
/* check to see if we have uniform topologies */
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &i8, &cnt, OPAL_INT8))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* we already defaulted to uniform topology, so only need to
* process this if it is non-uniform */
if (1 < i8) {
/* create an array to cache these */
tps = (orte_topology_t**)malloc(sizeof(orte_topology_t*));
for (n=0; n < i8; n++) {
cnt = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buf, &sig, &cnt, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
cnt = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buf, &topo, &cnt, OPAL_HWLOC_TOPO))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* new topology - record it */
t2 = OBJ_NEW(orte_topology_t);
t2->sig = sig;
t2->topo = topo;
opal_pointer_array_add(orte_node_topologies, t2);
/* keep a cached copy */
tps[n] = t2;
}
/* now get the array of assigned topologies */
/* if compressed, get the uncompressed size */
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &sz, &cnt, OPAL_SIZE))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* unpack the topologies object */
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &boptr, &cnt, OPAL_BYTE_OBJECT))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* if compressed, decompress */
if (1 == i8) {
if (!opal_compress.decompress_block((uint8_t**)&topologies, sz,
boptr->bytes, boptr->size)) {
ORTE_ERROR_LOG(ORTE_ERROR);
if (NULL != boptr->bytes) {
free(boptr->bytes);
}
free(boptr);
rc = ORTE_ERROR;
goto cleanup;
}
} else {
topologies = (uint8_t*)boptr->bytes;
boptr->bytes = NULL;
boptr->size = 0;
}
if (NULL != boptr->bytes) {
free(boptr->bytes);
}
free(boptr);
/* cycle across the node pool and assign the values */
for (n=0, m=0; n < orte_node_pool->size; n++) {
if (NULL != (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) {
nptr->topology = tps[topologies[m]];
++m;
}
}
}
/* check to see if we have uniform slot assignments */
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &i8, &cnt, OPAL_INT8))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* if so, then make every node the same */
if (0 > i8) {
i8 = -1 * i8;
for (n=0; n < orte_node_pool->size; n++) {
if (NULL != (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) {
nptr->slots = i8;
}
}
} else {
/* if compressed, get the uncompressed size */
if (1 == i8) {
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &sz, &cnt, OPAL_SIZE))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
}
/* unpack the slots object */
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &boptr, &cnt, OPAL_BYTE_OBJECT))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* if compressed, decompress */
if (1 == i8) {
if (!opal_compress.decompress_block((uint8_t**)&slots, sz,
boptr->bytes, boptr->size)) {
ORTE_ERROR_LOG(ORTE_ERROR);
if (NULL != boptr->bytes) {
free(boptr->bytes);
}
free(boptr);
rc = ORTE_ERROR;
goto cleanup;
}
} else {
slots = (uint16_t*)boptr->bytes;
boptr->bytes = NULL;
boptr->size = 0;
}
if (NULL != boptr->bytes) {
free(boptr->bytes);
}
free(boptr);
/* cycle across the node pool and assign the values */
for (n=0, m=0; n < orte_node_pool->size; n++) {
if (NULL != (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) {
nptr->slots = slots[m];
++m;
}
}
}
/* check to see if we have uniform flag assignments */
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &i8, &cnt, OPAL_INT8))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* if so, then make every node the same */
if (0 > i8) {
i8 += 2;
for (n=0; n < orte_node_pool->size; n++) {
if (NULL != (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) {
if (i8) {
ORTE_FLAG_SET(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN);
} else {
ORTE_FLAG_UNSET(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN);
}
}
}
} else {
/* if compressed, get the uncompressed size */
if (1 == i8) {
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &sz, &cnt, OPAL_SIZE))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
}
/* unpack the slots object */
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &boptr, &cnt, OPAL_BYTE_OBJECT))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* if compressed, decompress */
if (1 == i8) {
if (!opal_compress.decompress_block((uint8_t**)&flags, sz,
boptr->bytes, boptr->size)) {
ORTE_ERROR_LOG(ORTE_ERROR);
if (NULL != boptr->bytes) {
free(boptr->bytes);
}
free(boptr);
rc = ORTE_ERROR;
goto cleanup;
}
} else {
flags = (uint8_t*)boptr->bytes;
boptr->bytes = NULL;
boptr->size = 0;
}
if (NULL != boptr->bytes) {
free(boptr->bytes);
}
free(boptr);
/* cycle across the node pool and assign the values */
for (n=0, m=0; n < orte_node_pool->size; n++) {
if (NULL != (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) {
if (flags[m]) {
ORTE_FLAG_SET(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN);
} else {
ORTE_FLAG_UNSET(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN);
}
++m;
}
}
}
cleanup:
if (NULL != slots) {
free(slots);
}
if (NULL != flags) {
free(flags);
}
if (NULL != tps) {
free(tps);
}
if (NULL != topologies) {
free(topologies);
}
return rc;
}
Move from the use of regex to compression We've been fighting the battle of trying to create a regex generator and parser that can handle arbitrary hostname schemes - without long-term success. The worst of it is that there is no way of checking to see if the computed regex is correct short of parsing it and doing a character-by-character comparison with the original string. Ugh...there has to be a better solution. One option is to investigate using 3rd-party regex libraries as those are coming from communities whose sole focus is resolving that problem. However, someone would need to spend the time to investigate it, and we'd have to find a license-friendly implementation. Another option is to quit beating our heads against the wall and just compress the information. It won't be as much of a reduction, but we also won't keep hitting scenarios where things break. In this case, it seems that "perfection" is definitely the enemy of "good enough". This PR implements the compression option while retaining the possibility of people adding regex-generating components. The compression code used in ORTE is consolidated into the opal/compress framework. That framework currently held bzip and gzip components for use in compressing checkpoint files - since we no longer support C/R, I have .opal_ignore'd those components. However, I have left the original framework APIs alone in case someone ever decides to redo C/R. The APIs of interest here are added to the framework - specifically, the "compress_block" and "decompress_block" functions. I then moved the ORTE zlib compression code into a new component in this framework. Unfortunately, the framework currently is a single-select one - i.e., only one active component at a time. Since I .opal_ignore'd the other two and made the priority of zlib high, this isn't a problem. However, if someone wants to re-enable bzip/gzip or add another component, they might need to transition opal/compress to a multi-select framework. Included changes: * Consolidate the compression code into the opal/compress framework * Move the ORTE zlib compression code into a new opal/compress/zlib component * Ignore the bzip and gzip components in opal/compress framework * Add a "compress_base_limit" MCA param to set the threshold above which we compress data - defaults to 4096 bytes * Delete stale brucks and rcd components from orte/grpcomm framework * Delete the orte/regx framework * Update the launch system to use opal/compress instead of string regex * Provide a default module if no zlib is available * Fix some misc multi-node issues * Properly generate the nidmap in response to a "connection warmup" message so the remote daemon knows the children it needs to launch. * Remove stale references to orte_node_regex * opal_byte_object_t's are not OPAL objects - properly release allocated memory. * Set the topology * Currently only handling homogeneous case * Update the compress framework files to conform * Consolidate open/close into one "frame" file. Ensure we open/close the framework Signed-off-by: Ralph Castain <rhc@pmix.org>
2019-01-30 03:02:21 +03:00
int orte_util_generate_ppn(orte_job_t *jdata,
opal_buffer_t *buf)
{
uint16_t *ppn=NULL;
size_t nbytes;
int rc = ORTE_SUCCESS;
orte_app_idx_t i;
int j, k;
opal_byte_object_t bo, *boptr;
bool compressed;
orte_node_t *nptr;
orte_proc_t *proc;
size_t sz;
/* make room for the number of procs on each node */
nbytes = sizeof(uint16_t) * orte_node_pool->size;
ppn = (uint16_t*)malloc(nbytes);
for (i=0; i < jdata->num_apps; i++) {
/* reset the #procs */
memset(ppn, 0, nbytes);
/* for each app_context, compute the #procs on
* each node of the allocation */
for (j=0; j < orte_node_pool->size; j++) {
if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, j))) {
continue;
}
if (NULL == nptr->daemon) {
continue;
}
for (k=0; k < nptr->procs->size; k++) {
if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(nptr->procs, k))) {
if (proc->name.jobid == jdata->jobid) {
++ppn[j];
}
}
}
}
if (opal_compress.compress_block((uint8_t*)ppn, nbytes,
(uint8_t**)&bo.bytes, &sz)) {
/* mark that this was compressed */
compressed = true;
bo.size = sz;
} else {
/* mark that this was not compressed */
compressed = false;
bo.bytes = (uint8_t*)ppn;
bo.size = nbytes;
}
/* indicate compression */
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &compressed, 1, OPAL_BOOL))) {
if (compressed) {
free(bo.bytes);
}
goto cleanup;
}
/* if compressed, provide the uncompressed size */
if (compressed) {
sz = nbytes;
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &sz, 1, OPAL_SIZE))) {
free(bo.bytes);
goto cleanup;
}
}
/* add the object */
boptr = &bo;
rc = opal_dss.pack(buf, &boptr, 1, OPAL_BYTE_OBJECT);
if (OPAL_SUCCESS != rc) {
break;
}
}
cleanup:
free(ppn);
return rc;
}
int orte_util_decode_ppn(orte_job_t *jdata,
opal_buffer_t *buf)
{
orte_app_idx_t n;
int m, cnt, rc;
opal_byte_object_t *boptr;
bool compressed;
size_t sz;
uint16_t *ppn, k;
orte_node_t *node;
orte_proc_t *proc;
for (n=0; n < jdata->num_apps; n++) {
/* unpack the compression flag */
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &compressed, &cnt, OPAL_BOOL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* if compressed, unpack the raw size */
if (compressed) {
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &sz, &cnt, OPAL_SIZE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
/* unpack the byte object describing this app */
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &boptr, &cnt, OPAL_BYTE_OBJECT))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_PROC_IS_HNP) {
/* just discard it */
free(boptr->bytes);
free(boptr);
continue;
}
/* decompress if required */
if (compressed) {
if (!opal_compress.decompress_block((uint8_t**)&ppn, sz,
boptr->bytes, boptr->size)) {
ORTE_ERROR_LOG(ORTE_ERROR);
OBJ_RELEASE(boptr);
return ORTE_ERROR;
}
} else {
ppn = (uint16_t*)boptr->bytes;
boptr->bytes = NULL;
boptr->size = 0;
}
if (NULL != boptr->bytes) {
free(boptr->bytes);
}
free(boptr);
/* cycle thru the node pool */
for (m=0; m < orte_node_pool->size; m++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, m))) {
continue;
}
if (0 < ppn[m]) {
if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
OBJ_RETAIN(node);
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
opal_pointer_array_add(jdata->map->nodes, node);
}
/* create a proc object for each one */
for (k=0; k < ppn[m]; k++) {
proc = OBJ_NEW(orte_proc_t);
proc->name.jobid = jdata->jobid;
/* leave the vpid undefined as this will be determined
* later when we do the overall ranking */
proc->app_idx = n;
proc->parent = node->daemon->name.vpid;
OBJ_RETAIN(node);
proc->node = node;
/* flag the proc as ready for launch */
proc->state = ORTE_PROC_STATE_INIT;
opal_pointer_array_add(node->procs, proc);
/* we will add the proc to the jdata array when we
* compute its rank */
}
node->num_procs += ppn[m];
}
}
free(ppn);
}
return ORTE_SUCCESS;
}