1
1

hint processing should not open files

move opening of files from hint processing and into open routines.

This is MPICH commit 92f1c69f0de8 and 22a77dceda11

see https://trac.mpich.org/projects/mpich/ticket/2261
Ref: https://github.com/open-mpi/ompi/issues/158

Signed-off-by: Pavan Balaji <balaji@anl.gov>
Этот коммит содержится в:
Rob Latham 2015-06-03 11:37:59 -05:00
родитель 377bad18bd
Коммит 6e9cbe397f
2 изменённых файлов: 123 добавлений и 106 удалений

Просмотреть файл

@ -20,8 +20,7 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
char *value; char *value;
int flag; int flag;
ADIO_Offset stripe_val[3], str_factor = -1, str_unit=0, start_iodev=-1; ADIO_Offset stripe_val[3], str_factor = -1, str_unit=0, start_iodev=-1;
struct lov_user_md lum = { 0 }; int err, myrank;
int err, myrank, fd_sys, perm, amode, old_mask;
static char myname[] = "ADIOI_LUSTRE_SETINFO"; static char myname[] = "ADIOI_LUSTRE_SETINFO";
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
@ -47,18 +46,25 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
/* striping information */ /* striping information */
ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
value, &flag); value, &flag);
if (flag) if (flag) {
ADIOI_Info_set(fd->info, "striping_unit", value);
str_unit=atoll(value); str_unit=atoll(value);
}
ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
value, &flag); value, &flag);
if (flag) if (flag) {
ADIOI_Info_set(fd->info, "striping_factor", value);
str_factor=atoll(value); str_factor=atoll(value);
}
ADIOI_Info_get(users_info, "romio_lustre_start_iodevice", ADIOI_Info_get(users_info, "romio_lustre_start_iodevice",
MPI_MAX_INFO_VAL, value, &flag); MPI_MAX_INFO_VAL, value, &flag);
if (flag) if (flag) {
ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value);
start_iodev=atoll(value); start_iodev=atoll(value);
}
/* direct read and write */ /* direct read and write */
ADIOI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL, ADIOI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL,
@ -84,72 +90,20 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
} }
MPI_Bcast(stripe_val, 3, MPI_OFFSET, 0, fd->comm); MPI_Bcast(stripe_val, 3, MPI_OFFSET, 0, fd->comm);
/* do not open file in hint processing. Open file in open routines,
* where we can better deal with EXCL flag . Continue to check the
* "all processors set a value" condition holds. */
if (stripe_val[0] != str_factor if (stripe_val[0] != str_factor
|| stripe_val[1] != str_unit || stripe_val[1] != str_unit
|| stripe_val[2] != start_iodev) { || stripe_val[2] != start_iodev) {
FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: All keys" MPIO_ERR_CREATE_CODE_INFO_NOT_SAME("ADIOI_LUSTRE_SetInfo",
"-striping_factor:striping_unit:start_iodevice " "str_factor or str_unit or start_iodev",
"need to be identical across all processes\n"); error_code);
MPI_Abort(MPI_COMM_WORLD, 1); ADIOI_Free(value);
} else if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) { return;
/* if user has specified striping info, process 0 tries to set it */
if (!myrank) {
if (fd->perm == ADIO_PERM_NULL) {
old_mask = umask(022);
umask(old_mask);
perm = old_mask ^ 0666;
} }
else perm = fd->perm;
amode = 0;
if (fd->access_mode & ADIO_CREATE)
amode = amode | O_CREAT;
if (fd->access_mode & ADIO_RDONLY)
amode = amode | O_RDONLY;
if (fd->access_mode & ADIO_WRONLY)
amode = amode | O_WRONLY;
if (fd->access_mode & ADIO_RDWR)
amode = amode | O_RDWR;
if (fd->access_mode & ADIO_EXCL)
amode = amode | O_EXCL;
/* we need to create file so ensure this is set */
amode = amode | O_LOV_DELAY_CREATE | O_CREAT;
fd_sys = open(fd->filename, amode, perm);
if (fd_sys == -1) {
if (errno != EEXIST)
fprintf(stderr,
"Failure to open file %s %d %d\n",strerror(errno), amode, perm);
} else {
lum.lmm_magic = LOV_USER_MAGIC;
lum.lmm_pattern = 0;
lum.lmm_stripe_size = str_unit;
/* crude check for overflow of lustre internal datatypes.
* Silently cap to large value if user provides a value
* larger than lustre supports */
if (lum.lmm_stripe_size != str_unit) {
lum.lmm_stripe_size = UINT_MAX;
}
lum.lmm_stripe_count = str_factor;
if ( lum.lmm_stripe_count != str_factor) {
lum.lmm_stripe_count = USHRT_MAX;
}
lum.lmm_stripe_offset = start_iodev;
if (lum.lmm_stripe_offset != start_iodev) {
lum.lmm_stripe_offset = USHRT_MAX;
} }
err = ioctl(fd_sys, LL_IOC_LOV_SETSTRIPE, &lum);
if (err == -1 && errno != EEXIST) {
fprintf(stderr, "Failure to set stripe info %s \n", strerror(errno));
}
close(fd_sys);
}
} /* End of striping parameters validation */
}
MPI_Barrier(fd->comm);
}
/* get other hint */ /* get other hint */
if (users_info != MPI_INFO_NULL) { if (users_info != MPI_INFO_NULL) {
/* CO: IO Clients/OST, /* CO: IO Clients/OST,

Просмотреть файл

@ -18,14 +18,17 @@
void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code) void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
{ {
int perm, old_mask, amode, amode_direct; int perm, old_mask, amode, amode_direct;
int lumlen; int lumlen, myrank, flag, set_layout=0, err;
struct lov_user_md *lum = NULL; struct lov_user_md *lum = NULL;
char *value; char *value;
ADIO_Offset str_factor = -1, str_unit=0, start_iodev=-1;
#if defined(MPICH) || !defined(PRINT_ERR_MSG) #if defined(MPICH) || !defined(PRINT_ERR_MSG)
static char myname[] = "ADIOI_LUSTRE_OPEN"; static char myname[] = "ADIOI_LUSTRE_OPEN";
#endif #endif
MPI_Comm_rank(fd->comm, &myrank);
if (fd->perm == ADIO_PERM_NULL) { if (fd->perm == ADIO_PERM_NULL) {
old_mask = umask(022); old_mask = umask(022);
umask(old_mask); umask(old_mask);
@ -47,26 +50,86 @@ void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
amode_direct = amode | O_DIRECT; amode_direct = amode | O_DIRECT;
fd->fd_sys = open(fd->filename, amode|O_CREAT, perm); }
/* odd length here because lov_user_md contains some fixed data and
if (fd->fd_sys != -1) {
int err;
/* get file striping information and set it in info */
/* odd malloc here because lov_user_md contains some fixed data and
* then a list of 'lmm_objects' representing stripe */ * then a list of 'lmm_objects' representing stripe */
lumlen = sizeof(struct lov_user_md) + lumlen = sizeof(struct lov_user_md) +
MAX_LOV_UUID_COUNT * sizeof(struct lov_user_ost_data); MAX_LOV_UUID_COUNT * sizeof(struct lov_user_ost_data);
/* furthermore, Pascal Deveze reports that, even though we pass a lum = (struct lov_user_md *)ADIOI_Calloc(1,lumlen);
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
/* we already validated in LUSTRE_SetInfo that these are going to be the same */
if (fd->info != MPI_INFO_NULL) {
/* striping information */
ADIOI_Info_get(fd->info, "striping_unit", MPI_MAX_INFO_VAL,
value, &flag);
if (flag)
str_unit=atoll(value);
ADIOI_Info_get(fd->info, "striping_factor", MPI_MAX_INFO_VAL,
value, &flag);
if (flag)
str_factor=atoll(value);
ADIOI_Info_get(fd->info, "romio_lustre_start_iodevice",
MPI_MAX_INFO_VAL, value, &flag);
if (flag)
start_iodev=atoll(value);
}
if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0))
set_layout = 1;
/* if hints were set, we need to delay creation of any lustre objects.
* However, if we open the file with O_LOV_DELAY_CREATE and don't call the
* follow-up ioctl, subsequent writes will fail */
if (myrank == 0 && set_layout)
amode = amode | O_LOV_DELAY_CREATE;
fd->fd_sys = open(fd->filename, amode, perm);
if (fd->fd_sys == -1) goto fn_exit;
/* we can only set these hints on new files */
/* It was strange and buggy to open the file in the hint path. Instead,
* we'll apply the file tunings at open time */
if ((amode & O_CREAT) && set_layout ) {
/* if user has specified striping info, process 0 tries to set it */
if (!myrank) {
lum->lmm_magic = LOV_USER_MAGIC;
lum->lmm_pattern = 0;
/* crude check for overflow of lustre internal datatypes.
* Silently cap to large value if user provides a value
* larger than lustre supports */
if (str_unit > UINT_MAX)
lum->lmm_stripe_size = UINT_MAX;
else
lum->lmm_stripe_size = str_unit;
if (str_factor > USHRT_MAX)
lum->lmm_stripe_count = USHRT_MAX;
else
lum->lmm_stripe_count = str_factor;
if (start_iodev > USHRT_MAX)
lum->lmm_stripe_offset = USHRT_MAX;
else
lum->lmm_stripe_offset = start_iodev;
err = ioctl(fd->fd_sys, LL_IOC_LOV_SETSTRIPE, lum);
if (err == -1 && errno != EEXIST) {
fprintf(stderr, "Failure to set stripe info %s \n", strerror(errno));
/* not a fatal error, but user might care to know */
}
} /* End of striping parameters validation */
}
/* Pascal Deveze reports that, even though we pass a
* "GETSTRIPE" (read) flag to the ioctl, if some of the values of this * "GETSTRIPE" (read) flag to the ioctl, if some of the values of this
* struct are uninitialzed, the call can give an error. calloc in case * struct are uninitialzed, the call can give an error. zero it out in case
* there are other members that must be initialized and in case * there are other members that must be initialized and in case
* lov_user_md struct changes in future */ * lov_user_md struct changes in future */
lum = (struct lov_user_md *)ADIOI_Calloc(1,lumlen); memset(lum, 0, lumlen);
lum->lmm_magic = LOV_USER_MAGIC; lum->lmm_magic = LOV_USER_MAGIC;
err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *)lum); err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *)lum);
if (!err) { if (!err) {
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
fd->hints->striping_unit = lum->lmm_stripe_size; fd->hints->striping_unit = lum->lmm_stripe_size;
sprintf(value, "%d", lum->lmm_stripe_size); sprintf(value, "%d", lum->lmm_stripe_size);
@ -80,13 +143,10 @@ void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
sprintf(value, "%d", lum->lmm_stripe_offset); sprintf(value, "%d", lum->lmm_stripe_offset);
ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value); ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value);
ADIOI_Free(value);
} }
ADIOI_Free(lum);
if (fd->access_mode & ADIO_APPEND) if (fd->access_mode & ADIO_APPEND)
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END); fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
}
if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND)) if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND))
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END); fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
@ -101,6 +161,9 @@ void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
fd->direct_write = fd->direct_read = 0; fd->direct_write = fd->direct_read = 0;
} }
} }
fn_exit:
ADIOI_Free(lum);
ADIOI_Free(value);
/* --BEGIN ERROR HANDLING-- */ /* --BEGIN ERROR HANDLING-- */
if (fd->fd_sys == -1 || ((fd->fd_direct == -1) && if (fd->fd_sys == -1 || ((fd->fd_direct == -1) &&