e10f476c87
and implementation. This has shown drastic performance benefit when transferring Many files at roughly the same time. I tested this for many different filem operations and everything was working fine. Let me know if you have any problems with this functionality. Some Notes: - opal-checkpoint now has a 'quiet' flag to keep it from being too verbose. - FileM RSH component is fully non-blocking. - FileM RSH component has incomming connection throttling since by default ssh only allows 10 concurrent scp connections to any single host. This default can be adjusted via an MCA parameter. {{{-mca filem_rsh_max_incomming 10}}} - There is an MCA parameter for max outgoing connections, but it is currently not implemented. If someone needs it then it should not be hard to implement. {{{-mca filem_rsh_max_outgoing 10}}} - Changed the FileM request structure so that it is a bit more explicit and flexible. - Moved the 'preload-binary' and 'preload-files' functionality into odls/base allowing for code reuse in the 'process' and 'default' ODLS components. - Fixed a bug in the process name resolution which broke the 'preload-*' functionality due to GPR table structure changes. - The FileM RSH component might be able to see even more speedup from using a thread pool to operate on the work_pool structures, but that is for future work. - Added a 'opal-show-help' file to ODLS Base This commit was SVN r16252.
379 строки
11 KiB
C
379 строки
11 KiB
C
/*
|
|
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
|
* University Research and Technology
|
|
* Corporation. All rights reserved.
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
/**
|
|
* @file
|
|
*
|
|
* Remote File Management (FileM) Interface
|
|
*
|
|
*/
|
|
|
|
#ifndef MCA_FILEM_H
|
|
#define MCA_FILEM_H
|
|
|
|
#include "orte_config.h"
|
|
#include "orte/orte_constants.h"
|
|
#include "orte/orte_types.h"
|
|
|
|
#include "opal/mca/mca.h"
|
|
#include "opal/mca/base/base.h"
|
|
#include "orte/mca/ns/ns.h"
|
|
|
|
#include "opal/class/opal_object.h"
|
|
|
|
#if defined(c_plusplus) || defined(__cplusplus)
|
|
extern "C" {
|
|
#endif
|
|
|
|
/**
|
|
* A set of flags that determine the type of the file
|
|
* in question
|
|
*/
|
|
#define ORTE_FILEM_TYPE_FILE 0
|
|
#define ORTE_FILEM_TYPE_DIR 1
|
|
#define ORTE_FILEM_TYPE_UNKNOWN 2
|
|
|
|
/**
|
|
* Type of moment
|
|
*/
|
|
#define ORTE_FILEM_MOVE_TYPE_PUT 0
|
|
#define ORTE_FILEM_MOVE_TYPE_GET 1
|
|
#define ORTE_FILEM_MOVE_TYPE_RM 2
|
|
#define ORTE_FILEM_MOVE_TYPE_UNKNOWN 3
|
|
|
|
/**
|
|
* Define a Process Set
|
|
*
|
|
* Source: A single source of the operation.
|
|
* Sink: Desitination of the operation.
|
|
*/
|
|
struct orte_filem_base_process_set_1_0_0_t {
|
|
/** This is an object, so must have a super */
|
|
opal_list_item_t super;
|
|
|
|
/** Source Process */
|
|
orte_process_name_t source;
|
|
|
|
/** Sink Process */
|
|
orte_process_name_t sink;
|
|
};
|
|
typedef struct orte_filem_base_process_set_1_0_0_t orte_filem_base_process_set_1_0_0_t;
|
|
typedef struct orte_filem_base_process_set_1_0_0_t orte_filem_base_process_set_t;
|
|
|
|
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_filem_base_process_set_t);
|
|
|
|
/**
|
|
* Define a File Pair
|
|
*
|
|
* Local: Local file reference
|
|
* Remove: Remote file reference
|
|
*
|
|
* Note: If multiple process sinks are used it is assumed that the
|
|
* file reference is the same for each of the sinks. If this is not
|
|
* true then more than one filem request needs to be created.
|
|
*/
|
|
struct orte_filem_base_file_set_1_0_0_t {
|
|
/** This is an object, so must have a super */
|
|
opal_list_item_t super;
|
|
|
|
/* Local file reference */
|
|
char * local_target;
|
|
|
|
/* Remove file reference */
|
|
char * remote_target;
|
|
|
|
/* Type of file to move */
|
|
int target_flag;
|
|
};
|
|
typedef struct orte_filem_base_file_set_1_0_0_t orte_filem_base_file_set_1_0_0_t;
|
|
typedef struct orte_filem_base_file_set_1_0_0_t orte_filem_base_file_set_t;
|
|
|
|
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_filem_base_file_set_t);
|
|
|
|
/**
|
|
* Definition of a file movement request
|
|
* This will allow:
|
|
* - The movement of one or more files
|
|
* - to/from one or more processes
|
|
* in a single call of the API function. Allowing the implementation
|
|
* to optimize the sending/receiving of data.
|
|
* Used for the following:
|
|
*
|
|
*/
|
|
struct orte_filem_base_request_1_0_0_t {
|
|
/** This is an object, so must have a super */
|
|
opal_list_item_t super;
|
|
|
|
/*
|
|
* A list of process sets
|
|
*/
|
|
opal_list_t process_sets;
|
|
|
|
/*
|
|
* A list of file pairings
|
|
*/
|
|
opal_list_t file_sets;
|
|
|
|
/*
|
|
* Internal use:
|
|
* Number of movements
|
|
*/
|
|
int num_mv;
|
|
|
|
/*
|
|
* Internal use:
|
|
* Boolean to indianate if transfer is complete
|
|
*/
|
|
bool *is_done;
|
|
|
|
/*
|
|
* Internal use:
|
|
* Boolean to indianate if transfer is active
|
|
*/
|
|
bool *is_active;
|
|
|
|
/*
|
|
* Internal use:
|
|
* Exit status of the copy command
|
|
*/
|
|
int32_t *exit_status;
|
|
|
|
/*
|
|
* Internal use:
|
|
* Movement type
|
|
*/
|
|
int movement_type;
|
|
};
|
|
typedef struct orte_filem_base_request_1_0_0_t orte_filem_base_request_1_0_0_t;
|
|
typedef struct orte_filem_base_request_1_0_0_t orte_filem_base_request_t;
|
|
|
|
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_filem_base_request_t);
|
|
|
|
/**
|
|
* Query function for FILEM components.
|
|
* Returns a priority to rank it agaianst other available FILEM components.
|
|
*/
|
|
typedef struct orte_filem_base_module_1_0_0_t *
|
|
(*orte_filem_base_component_query_1_0_0_fn_t)
|
|
(int *priority);
|
|
|
|
/**
|
|
* Module initialization function.
|
|
* Returns ORTE_SUCCESS
|
|
*/
|
|
typedef int (*orte_filem_base_module_init_fn_t)
|
|
(void);
|
|
|
|
/**
|
|
* Module finalization function.
|
|
* Returns ORTE_SUCCESS
|
|
*/
|
|
typedef int (*orte_filem_base_module_finalize_fn_t)
|
|
(void);
|
|
|
|
/**
|
|
* Put a file or directory on the remote machine
|
|
*
|
|
* Note: By using a relative path for the remote file/directory, the filem
|
|
* component will negotiate the correct absolute path for that file/directory
|
|
* for the remote machine.
|
|
*
|
|
* @param request FileM request describing the files/directories to send,
|
|
* the remote files/directories to use, and the processes to see the change.
|
|
*
|
|
* @return ORTE_SUCCESS on successful file transer
|
|
* @return ORTE_ERROR on failed file transfer
|
|
*/
|
|
typedef int (*orte_filem_base_put_fn_t)
|
|
(orte_filem_base_request_t *request);
|
|
|
|
/**
|
|
* Put a file or directory on the remote machine (Async)
|
|
*
|
|
* Note: By using a relative path for the remote file/directory, the filem
|
|
* component will negotiate the correct absolute path for that file/directory
|
|
* for the remote machine.
|
|
*
|
|
* @param request FileM request describing the files/directories to send,
|
|
* the remote files/directories to use, and the processes to see the change.
|
|
*
|
|
* @return ORTE_SUCCESS on successful file transer
|
|
* @return ORTE_ERROR on failed file transfer
|
|
*/
|
|
typedef int (*orte_filem_base_put_nb_fn_t)
|
|
(orte_filem_base_request_t *request);
|
|
|
|
/**
|
|
* Get a file from the remote machine
|
|
*
|
|
* Note: By using a relative path for the remote file/directory, the filem
|
|
* component will negotiate the correct absolute path for that file/directory
|
|
* for the remote machine.
|
|
*
|
|
* @param request FileM request describing the files/directories to receive,
|
|
* the remote files/directories to use, and the processes to see the change.
|
|
*
|
|
* @return ORTE_SUCCESS on successful file transer
|
|
* @return ORTE_ERROR on failed file transfer
|
|
*/
|
|
typedef int (*orte_filem_base_get_fn_t)
|
|
(orte_filem_base_request_t *request);
|
|
|
|
/**
|
|
* Get a file from the remote machine (Async)
|
|
*
|
|
* Note: By using a relative path for the remote file/directory, the filem
|
|
* component will negotiate the correct absolute path for that file/directory
|
|
* for the remote machine.
|
|
*
|
|
* @param request FileM request describing the files/directories to receive,
|
|
* the remote files/directories to use, and the processes to see the change.
|
|
*
|
|
* @return ORTE_SUCCESS on successful file transer
|
|
* @return ORTE_ERROR on failed file transfer
|
|
*/
|
|
typedef int (*orte_filem_base_get_nb_fn_t)
|
|
(orte_filem_base_request_t *request);
|
|
|
|
/**
|
|
* Remove a file from the remote machine
|
|
*
|
|
* Note: By using a relative path for the remote file/directory, the filem
|
|
* component will negotiate the correct absolute path for that file/directory
|
|
* for the remote machine.
|
|
*
|
|
* @param request FileM request describing the remote files/directories to remove,
|
|
* the processes to see the change.
|
|
*
|
|
* @return ORTE_SUCCESS on success
|
|
* @return ORTE_ERROR on fail
|
|
*/
|
|
typedef int (*orte_filem_base_rm_fn_t)
|
|
(orte_filem_base_request_t *request);
|
|
|
|
/**
|
|
* Remove a file from the remote machine (Async)
|
|
*
|
|
* Note: By using a relative path for the remote file/directory, the filem
|
|
* component will negotiate the correct absolute path for that file/directory
|
|
* for the remote machine.
|
|
*
|
|
* @param request FileM request describing the remote files/directories to remove,
|
|
* the processes to see the change.
|
|
*
|
|
* @return ORTE_SUCCESS on success
|
|
* @return ORTE_ERROR on fail
|
|
*/
|
|
typedef int (*orte_filem_base_rm_nb_fn_t)
|
|
(orte_filem_base_request_t *request);
|
|
|
|
/**
|
|
* Wait for a single file movement request to finish
|
|
*
|
|
* @param request FileM request describing the remote files/directories.
|
|
*
|
|
* The request must have been passed through one of the non-blocking functions
|
|
* before calling wait or wait_all otherwise ORTE_ERROR will be returned.
|
|
*
|
|
* @return ORTE_SUCCESS on success
|
|
* @return ORTE_ERROR on fail
|
|
*/
|
|
typedef int (*orte_filem_base_wait_fn_t)
|
|
(orte_filem_base_request_t *request);
|
|
|
|
/**
|
|
* Wait for a multiple file movement requests to finish
|
|
*
|
|
* @param request_list opal_list_t of FileM requests describing the remote files/directories.
|
|
*
|
|
* The request must have been passed through one of the non-blocking functions
|
|
* before calling wait or wait_all otherwise ORTE_ERROR will be returned.
|
|
*
|
|
* @return ORTE_SUCCESS on success
|
|
* @return ORTE_ERROR on fail
|
|
*/
|
|
typedef int (*orte_filem_base_wait_all_fn_t)
|
|
(opal_list_t *request_list);
|
|
|
|
/**
|
|
* Structure for FILEM v1.0.0 components.
|
|
*/
|
|
struct orte_filem_base_component_1_0_0_t {
|
|
/** MCA base component */
|
|
mca_base_component_t filem_version;
|
|
/** MCA base data */
|
|
mca_base_component_data_1_0_0_t filem_data;
|
|
|
|
/** Component Query for Selection Function */
|
|
orte_filem_base_component_query_1_0_0_fn_t filem_query;
|
|
|
|
/** Verbosity Level */
|
|
int verbose;
|
|
/** Output Handle for opal_output */
|
|
int output_handle;
|
|
/** Default Priority */
|
|
int priority;
|
|
};
|
|
typedef struct orte_filem_base_component_1_0_0_t orte_filem_base_component_1_0_0_t;
|
|
typedef struct orte_filem_base_component_1_0_0_t orte_filem_base_component_t;
|
|
|
|
/**
|
|
* Structure for FILEM v1.0.0 modules
|
|
*/
|
|
struct orte_filem_base_module_1_0_0_t {
|
|
/** Initialization Function */
|
|
orte_filem_base_module_init_fn_t filem_init;
|
|
/** Finalization Function */
|
|
orte_filem_base_module_finalize_fn_t filem_finalize;
|
|
|
|
/** Put a file on the remote machine */
|
|
orte_filem_base_put_fn_t put;
|
|
orte_filem_base_put_nb_fn_t put_nb;
|
|
/** Get a file from the remote machine */
|
|
orte_filem_base_get_fn_t get;
|
|
orte_filem_base_get_nb_fn_t get_nb;
|
|
|
|
/** Remove a file on the remote machine */
|
|
orte_filem_base_rm_fn_t rm;
|
|
orte_filem_base_rm_nb_fn_t rm_nb;
|
|
|
|
/** Test functions for the non-blocking versions */
|
|
orte_filem_base_wait_fn_t wait;
|
|
orte_filem_base_wait_all_fn_t wait_all;
|
|
|
|
};
|
|
typedef struct orte_filem_base_module_1_0_0_t orte_filem_base_module_1_0_0_t;
|
|
typedef struct orte_filem_base_module_1_0_0_t orte_filem_base_module_t;
|
|
|
|
ORTE_DECLSPEC extern orte_filem_base_module_t orte_filem;
|
|
|
|
/**
|
|
* Macro for use in components that are of type FILEM v1.0.0
|
|
*/
|
|
#define ORTE_FILEM_BASE_VERSION_1_0_0 \
|
|
/* FILEM v1.0 is chained to MCA v1.0 */ \
|
|
MCA_BASE_VERSION_1_0_0, \
|
|
/* FILEM v1.0 */ \
|
|
"filem", 1, 0, 0
|
|
|
|
#if defined(c_plusplus) || defined(__cplusplus)
|
|
}
|
|
#endif
|
|
|
|
#endif /* ORTE_FILEM_H */
|
|
|