1
1

Add support for optional "extended" information for each file/dir entry

Unfortunately, there wasn't a single bit free in struct dir.flags, so I
had to increase its size to 16 bit. This commit is just the initial
preparation, there's still a few things to do:

- Add "extended information" cli flag to enable/disable this
  functionality.
- Export and import extended information when requested
- Do something with the data.

I also did a few memory measurements on a file list with 12769842 items:

  before this commit:    1.239 GiB
  without extended info: 1.318 GiB
  with extended info:    1.698 GiB

It's surprising what adding a single byte to a struct can do to the
memory usage. :(
Этот коммит содержится в:
Yorhel 2018-01-23 13:17:06 +01:00
родитель 7338454322
Коммит 77aca35fce
9 изменённых файлов: 149 добавлений и 139 удалений

Просмотреть файл

@ -60,15 +60,16 @@ struct dir_output {
* scanned directory.
*
* The *item struct has the following fields set when item() is called:
* size, asize, ino, dev, flags (only DIR,FILE,ERR,OTHFS,EXL,HLNKC) and name.
* size, asize, ino, dev, flags (only DIR,FILE,ERR,OTHFS,EXL,HLNKC).
* All other fields/flags should be initialzed to NULL or 0.
* *item may be overwritten or freed in subsequent calls, so this function
* should make a copy if necessary.
* The name and dir_ext fields are given separately.
* All pointers may be overwritten or freed in subsequent calls, so this
* function should make a copy if necessary.
*
* The function should return non-zero on error, at which point errno is
* assumed to be set to something sensible.
*/
int (*item)(struct dir *);
int (*item)(struct dir *, const char *, struct dir_ext *);
/* Finalizes the output to go to the next program state or exit ncdu. Called
* after item(NULL) has been called for the root item or before any item()
@ -129,11 +130,6 @@ void dir_setlasterr(const char *);
extern char *dir_fatalerr;
void dir_seterr(const char *, ...);
/* Return an empty struct dir with the given name, for use with
* dir_output.item(). Returned memory may be freed/overwritten on a subsequent
* call. */
struct dir *dir_createstruct(const char *);
extern int dir_ui;
int dir_key(int);
void dir_draw();

Просмотреть файл

@ -35,7 +35,7 @@ char *dir_curpath; /* Full path of the last seen item. */
struct dir_output dir_output;
char *dir_fatalerr; /* Error message on a fatal error. (NULL if there was no fatal error) */
int dir_ui; /* User interface to use */
int confirm_quit_while_scanning_stage_1_passed; /* Additional check before quitting */
static int confirm_quit_while_scanning_stage_1_passed; /* Additional check before quitting */
static char *lasterr; /* Path where the last error occured. */
static int curpathl; /* Allocated length of dir_curpath */
static int lasterrl; /* ^ of lasterr */
@ -106,20 +106,6 @@ void dir_seterr(const char *fmt, ...) {
}
struct dir *dir_createstruct(const char *name) {
static struct dir *d = NULL;
static size_t len = 0;
size_t req = SDIRSIZE+strlen(name);
if(len < req) {
len = req < SDIRSIZE+256 ? SDIRSIZE+256 : req < len*2 ? len*2 : req;
d = realloc(d, len);
}
memset(d, 0, SDIRSIZE);
strcpy(d->name, name);
return d;
}
static void draw_progress() {
static const char scantext[] = "Scanning...";
static const char loadtext[] = "Loading...";

Просмотреть файл

@ -74,9 +74,9 @@ static void output_int(uint64_t n) {
}
static void output_info(struct dir *d) {
static void output_info(struct dir *d, const char *name, struct dir_ext *e) {
fputs("{\"name\":\"", stream);
output_string(d->name);
output_string(name);
fputc('"', stream);
/* No need for asize/dsize if they're 0 (which happens with excluded or failed-to-stat files) */
@ -109,6 +109,8 @@ static void output_info(struct dir *d) {
else if(d->flags & FF_OTHFS)
fputs(",\"excluded\":\"othfs\"", stream);
/* TODO: Output extended info if -e is given */
fputc('}', stream);
}
@ -118,7 +120,7 @@ static void output_info(struct dir *d) {
* item() call do we check for ferror(). This greatly simplifies the code, but
* assumes that calls to fwrite()/fput./etc don't do any weird stuff when
* called with a stream that's in an error state. */
static int item(struct dir *item) {
static int item(struct dir *item, const char *name, struct dir_ext *ext) {
if(!item) {
nstack_pop(&stack);
if(!stack.top) { /* closing of the root item */
@ -143,7 +145,7 @@ static int item(struct dir *item) {
if(item->flags & FF_DIR)
fputc('[', stream);
output_info(item);
output_info(item, name, ext);
if(item->flags & FF_DIR)
nstack_push(&stack, item->dev);

Просмотреть файл

@ -69,6 +69,11 @@ struct ctx {
char *buf; /* points into readbuf, always zero-terminated. */
char *lastfill; /* points into readbuf, location of the zero terminator. */
/* scratch space */
struct dir *buf_dir;
struct dir_ext buf_ext[1];
char buf_name[MAX_VAL];
char val[MAX_VAL];
char readbuf[READ_BUF_SIZE];
} *ctx;
@ -415,18 +420,11 @@ static int itemdir(uint64_t dev) {
}
static int iteminfo(struct dir **item, uint64_t dev, int isdir) {
static struct dir *dirbuf;
struct dir *tmp, *d;
/* Reads a JSON object representing a struct dir/dir_ext item. Writes to
* ctx->buf_dir, ctx->buf_ext and ctx->buf_name. */
static int iteminfo() {
uint64_t iv;
if(!dirbuf)
dirbuf = malloc(sizeof(struct dir));
d = dirbuf;
memset(d, 0, sizeof(struct dir));
d->flags |= isdir ? FF_DIR : FF_FILE;
d->dev = dev;
E(*ctx->buf != '{', "Expected JSON object");
con(1);
@ -437,47 +435,46 @@ static int iteminfo(struct dir **item, uint64_t dev, int isdir) {
ctx->val[MAX_VAL-1] = 1;
C(rstring(ctx->val, MAX_VAL));
E(ctx->val[MAX_VAL-1] != 1, "Too large string value");
tmp = dir_createstruct(ctx->val);
memcpy(tmp, d, SDIRSIZE-1);
d = tmp;
strcpy(ctx->buf_name, ctx->val);
} else if(strcmp(ctx->val, "asize") == 0) { /* asize */
C(rint64(&iv, INT64_MAX));
d->asize = iv;
ctx->buf_dir->asize = iv;
} else if(strcmp(ctx->val, "dsize") == 0) { /* dsize */
C(rint64(&iv, INT64_MAX));
d->size = iv;
ctx->buf_dir->size = iv;
} else if(strcmp(ctx->val, "dev") == 0) { /* dev */
C(rint64(&iv, UINT64_MAX));
d->dev = iv;
ctx->buf_dir->dev = iv;
} else if(strcmp(ctx->val, "ino") == 0) { /* ino */
C(rint64(&iv, UINT64_MAX));
d->ino = iv;
ctx->buf_dir->ino = iv;
} else if(strcmp(ctx->val, "hlnkc") == 0) { /* hlnkc */
if(*ctx->buf == 't') {
C(rlit("true", 4));
d->flags |= FF_HLNKC;
ctx->buf_dir->flags |= FF_HLNKC;
} else
C(rlit("false", 5));
} else if(strcmp(ctx->val, "read_error") == 0) { /* read_error */
if(*ctx->buf == 't') {
C(rlit("true", 4));
d->flags |= FF_ERR;
ctx->buf_dir->flags |= FF_ERR;
} else
C(rlit("false", 5));
} else if(strcmp(ctx->val, "excluded") == 0) { /* excluded */
C(rstring(ctx->val, 8));
if(strcmp(ctx->val, "otherfs") == 0)
d->flags |= FF_OTHFS;
ctx->buf_dir->flags |= FF_OTHFS;
else
d->flags |= FF_EXL;
ctx->buf_dir->flags |= FF_EXL;
} else if(strcmp(ctx->val, "notreg") == 0) { /* notreg */
if(*ctx->buf == 't') {
C(rlit("true", 4));
d->flags &= ~FF_FILE;
ctx->buf_dir->flags &= ~FF_FILE;
} else
C(rlit("false", 5));
} else
C(rval());
/* TODO: Extended attributes */
C(cons());
if(*ctx->buf == '}')
@ -487,8 +484,7 @@ static int iteminfo(struct dir **item, uint64_t dev, int isdir) {
}
con(1);
E(!*d->name, "No name field present in item information object");
*item = d;
E(!*ctx->buf_name, "No name field present in item information object");
ctx->items++;
/* Only call input_handle() once for every 32 items. Importing items is so
* fast that the time spent in input_handle() dominates when called every
@ -502,7 +498,6 @@ static int iteminfo(struct dir **item, uint64_t dev, int isdir) {
static int item(uint64_t dev) {
int isdir = 0;
int isroot = ctx->items == 0;
struct dir *d = NULL;
if(*ctx->buf == '[') {
isdir = 1;
@ -510,25 +505,31 @@ static int item(uint64_t dev) {
C(cons());
}
C(iteminfo(&d, dev, isdir));
dev = d->dev;
memset(ctx->buf_dir, 0, offsetof(struct dir, name));
memset(ctx->buf_ext, 0, sizeof(struct dir_ext));
*ctx->buf_name = 0;
ctx->buf_dir->flags |= isdir ? FF_DIR : FF_FILE;
ctx->buf_dir->dev = dev;
C(iteminfo());
dev = ctx->buf_dir->dev;
if(isroot)
dir_curpath_set(d->name);
dir_curpath_set(ctx->buf_name);
else
dir_curpath_enter(d->name);
dir_curpath_enter(ctx->buf_name);
if(isdir) {
if(dir_output.item(d)) {
if(dir_output.item(ctx->buf_dir, ctx->buf_name, ctx->buf_ext)) {
dir_seterr("Output error: %s", strerror(errno));
return 1;
}
C(itemdir(dev));
if(dir_output.item(NULL)) {
if(dir_output.item(NULL, 0, NULL)) {
dir_seterr("Output error: %s", strerror(errno));
return 1;
}
} else if(dir_output.item(d)) {
} else if(dir_output.item(ctx->buf_dir, ctx->buf_name, ctx->buf_ext)) {
dir_seterr("Output error: %s", strerror(errno));
return 1;
}
@ -563,6 +564,7 @@ static int process() {
if(fclose(ctx->stream) && !dir_fatalerr && !fail)
dir_seterr("Error closing file: %s", strerror(errno));
free(ctx->buf_dir);
free(ctx);
while(dir_fatalerr && !input_handle(0))
@ -583,6 +585,7 @@ int dir_import_init(const char *fn) {
ctx->line = 1;
ctx->byte = ctx->eof = ctx->items = 0;
ctx->buf = ctx->lastfill = ctx->readbuf;
ctx->buf_dir = malloc(dir_memsize(""));
ctx->readbuf[0] = 0;
dir_curpath_set(fn);

Просмотреть файл

@ -92,24 +92,6 @@ static void hlink_check(struct dir *d) {
}
/* Make a copy of *item so that we'll keep it in memory. In the special case
* of !root && orig, we need to copy over the name of *orig instead of *item.
*/
static struct dir *item_copy(struct dir *item) {
struct dir *t;
if(!root && orig) {
t = malloc(SDIRSIZE+strlen(orig->name));
memcpy(t, item, SDIRSIZE);
strcpy(t->name, orig->name);
} else {
t = malloc(SDIRSIZE+strlen(item->name));
memcpy(t, item, SDIRSIZE+strlen(item->name));
}
return t;
}
/* Add item to the correct place in the memory structure */
static void item_add(struct dir *item) {
if(!root) {
@ -129,16 +111,25 @@ static void item_add(struct dir *item) {
}
static int item(struct dir *item) {
struct dir *t;
static int item(struct dir *dir, const char *name, struct dir_ext *ext) {
struct dir *t, *item;
/* Go back to parent dir */
if(!item) {
if(!dir) {
curdir = curdir->parent;
return 0;
}
item = item_copy(item);
if(!root && orig)
name = orig->name;
/* TODO: Don't allocate ext if -e flag is not given */
item = malloc(dir->flags & FF_EXT ? dir_ext_memsize(name) : dir_memsize(name));
memcpy(item, dir, offsetof(struct dir, name));
strcpy(item->name, name);
if(dir->flags & FF_EXT)
memcpy(dir_ext_ptr(item), ext, sizeof(struct dir_ext));
item_add(item);
/* Ensure that any next items will go to this directory */

Просмотреть файл

@ -45,28 +45,39 @@ int dir_scan_smfs; /* Stay on the same filesystem */
static uint64_t curdev; /* current device we're scanning on */
/* scratch space */
static struct dir *buf_dir;
static struct dir_ext buf_ext[1];
/* Populates the struct dir item with information from the stat struct. Sets
* everything necessary for output_dir.item() except FF_ERR and FF_EXL. */
static void stat_to_dir(struct dir *d, struct stat *fs) {
d->ino = (uint64_t)fs->st_ino;
d->dev = (uint64_t)fs->st_dev;
/* Populates the buf_dir and buf_ext with information from the stat struct.
* Sets everything necessary for output_dir.item() except FF_ERR and FF_EXL. */
static void stat_to_dir(struct stat *fs) {
buf_dir->flags |= FF_EXT; /* We always read extended data because it doesn't have an additional cost */
buf_dir->ino = (uint64_t)fs->st_ino;
buf_dir->dev = (uint64_t)fs->st_dev;
if(S_ISREG(fs->st_mode))
d->flags |= FF_FILE;
buf_dir->flags |= FF_FILE;
else if(S_ISDIR(fs->st_mode))
d->flags |= FF_DIR;
buf_dir->flags |= FF_DIR;
if(!S_ISDIR(fs->st_mode) && fs->st_nlink > 1)
d->flags |= FF_HLNKC;
buf_dir->flags |= FF_HLNKC;
if(dir_scan_smfs && curdev != d->dev)
d->flags |= FF_OTHFS;
if(dir_scan_smfs && curdev != buf_dir->dev)
buf_dir->flags |= FF_OTHFS;
if(!(d->flags & (FF_OTHFS|FF_EXL))) {
d->size = fs->st_blocks * S_BLKSIZE;
d->asize = fs->st_size;
if(!(buf_dir->flags & (FF_OTHFS|FF_EXL))) {
buf_dir->size = fs->st_blocks * S_BLKSIZE;
buf_dir->asize = fs->st_size;
}
buf_ext->mode = fs->st_mode;
buf_ext->mtime = fs->st_mtim;
buf_ext->uid = (int)fs->st_uid;
buf_ext->gid = (int)fs->st_gid;
}
@ -117,15 +128,15 @@ static char *dir_read(int *err) {
static int dir_walk(char *);
/* Tries to recurse into the given directory item */
static int dir_scan_recurse(struct dir *d) {
/* Tries to recurse into the current directory item (buf_dir is assumed to be the current dir) */
static int dir_scan_recurse(const char *name) {
int fail = 0;
char *dir;
if(chdir(d->name)) {
if(chdir(name)) {
dir_setlasterr(dir_curpath);
d->flags |= FF_ERR;
if(dir_output.item(d) || dir_output.item(NULL)) {
buf_dir->flags |= FF_ERR;
if(dir_output.item(buf_dir, name, buf_ext) || dir_output.item(NULL, 0, NULL)) {
dir_seterr("Output error: %s", strerror(errno));
return 1;
}
@ -134,8 +145,8 @@ static int dir_scan_recurse(struct dir *d) {
if((dir = dir_read(&fail)) == NULL) {
dir_setlasterr(dir_curpath);
d->flags |= FF_ERR;
if(dir_output.item(d) || dir_output.item(NULL)) {
buf_dir->flags |= FF_ERR;
if(dir_output.item(buf_dir, name, buf_ext) || dir_output.item(NULL, 0, NULL)) {
dir_seterr("Output error: %s", strerror(errno));
return 1;
}
@ -148,14 +159,14 @@ static int dir_scan_recurse(struct dir *d) {
/* readdir() failed halfway, not fatal. */
if(fail)
d->flags |= FF_ERR;
buf_dir->flags |= FF_ERR;
if(dir_output.item(d)) {
if(dir_output.item(buf_dir, name, buf_ext)) {
dir_seterr("Output error: %s", strerror(errno));
return 1;
}
fail = dir_walk(dir);
if(dir_output.item(NULL)) {
if(dir_output.item(NULL, 0, NULL)) {
dir_seterr("Output error: %s", strerror(errno));
return 1;
}
@ -172,45 +183,45 @@ static int dir_scan_recurse(struct dir *d) {
/* Scans and adds a single item. Recurses into dir_walk() again if this is a
* directory. Assumes we're chdir'ed in the directory in which this item
* resides, i.e. d->name is a valid relative path to the item. */
static int dir_scan_item(struct dir *d) {
* resides. */
static int dir_scan_item(const char *name) {
struct stat st;
int fail = 0;
#ifdef __CYGWIN__
/* /proc/registry names may contain slashes */
if(strchr(d->name, '/') || strchr(d->name, '\\')) {
d->flags |= FF_ERR;
if(strchr(name, '/') || strchr(name, '\\')) {
buf_dir->flags |= FF_ERR;
dir_setlasterr(dir_curpath);
}
#endif
if(exclude_match(dir_curpath))
d->flags |= FF_EXL;
buf_dir->flags |= FF_EXL;
if(!(d->flags & (FF_ERR|FF_EXL)) && lstat(d->name, &st)) {
d->flags |= FF_ERR;
if(!(buf_dir->flags & (FF_ERR|FF_EXL)) && lstat(name, &st)) {
buf_dir->flags |= FF_ERR;
dir_setlasterr(dir_curpath);
}
if(!(d->flags & (FF_ERR|FF_EXL)))
stat_to_dir(d, &st);
if(!(buf_dir->flags & (FF_ERR|FF_EXL)))
stat_to_dir(&st);
if(cachedir_tags && (d->flags & FF_DIR) && !(d->flags & (FF_ERR|FF_EXL|FF_OTHFS)))
if(has_cachedir_tag(d->name)) {
d->flags |= FF_EXL;
d->size = d->asize = 0;
if(cachedir_tags && (buf_dir->flags & FF_DIR) && !(buf_dir->flags & (FF_ERR|FF_EXL|FF_OTHFS)))
if(has_cachedir_tag(buf_dir->name)) {
buf_dir->flags |= FF_EXL;
buf_dir->size = buf_dir->asize = 0;
}
/* Recurse into the dir or output the item */
if(d->flags & FF_DIR && !(d->flags & (FF_ERR|FF_EXL|FF_OTHFS)))
fail = dir_scan_recurse(d);
else if(d->flags & FF_DIR) {
if(dir_output.item(d) || dir_output.item(NULL)) {
if(buf_dir->flags & FF_DIR && !(buf_dir->flags & (FF_ERR|FF_EXL|FF_OTHFS)))
fail = dir_scan_recurse(name);
else if(buf_dir->flags & FF_DIR) {
if(dir_output.item(buf_dir, name, buf_ext) || dir_output.item(NULL, 0, NULL)) {
dir_seterr("Output error: %s", strerror(errno));
fail = 1;
}
} else if(dir_output.item(d)) {
} else if(dir_output.item(buf_dir, name, buf_ext)) {
dir_seterr("Output error: %s", strerror(errno));
fail = 1;
}
@ -223,15 +234,15 @@ static int dir_scan_item(struct dir *d) {
* the filenames as returned by dir_read(), and will be freed automatically by
* this function. */
static int dir_walk(char *dir) {
struct dir *d;
int fail = 0;
char *cur;
fail = 0;
for(cur=dir; !fail&&cur&&*cur; cur+=strlen(cur)+1) {
dir_curpath_enter(cur);
d = dir_createstruct(cur);
fail = dir_scan_item(d);
memset(buf_dir, 0, offsetof(struct dir, name));
memset(buf_ext, 0, sizeof(struct dir_ext));
fail = dir_scan_item(cur);
dir_curpath_leave();
}
@ -245,7 +256,9 @@ static int process() {
char *dir;
int fail = 0;
struct stat fs;
struct dir *d;
memset(buf_dir, 0, offsetof(struct dir, name));
memset(buf_ext, 0, sizeof(struct dir_ext));
if((path = path_real(dir_curpath)) == NULL)
dir_seterr("Error obtaining full path: %s", strerror(errno));
@ -268,18 +281,17 @@ static int process() {
if(!dir_fatalerr) {
curdev = (uint64_t)fs.st_dev;
d = dir_createstruct(dir_curpath);
if(fail)
d->flags |= FF_ERR;
stat_to_dir(d, &fs);
buf_dir->flags |= FF_ERR;
stat_to_dir(&fs);
if(dir_output.item(d)) {
if(dir_output.item(buf_dir, dir_curpath, buf_ext)) {
dir_seterr("Output error: %s", strerror(errno));
fail = 1;
}
if(!fail)
fail = dir_walk(dir);
if(!fail && dir_output.item(NULL)) {
if(!fail && dir_output.item(NULL, 0, NULL)) {
dir_seterr("Output error: %s", strerror(errno));
fail = 1;
}
@ -291,13 +303,11 @@ static int process() {
}
extern int confirm_quit_while_scanning_stage_1_passed;
void dir_scan_init(const char *path) {
dir_curpath_set(path);
dir_setlasterr(NULL);
dir_seterr(NULL);
dir_process = process;
buf_dir = malloc(dir_memsize(""));
pstate = ST_CALC;
confirm_quit_while_scanning_stage_1_passed = 0;
}

Просмотреть файл

@ -207,7 +207,7 @@ void dirlist_open(struct dir *d) {
/* set the reference to the parent dir */
if(d->parent) {
if(!parent_alloc)
parent_alloc = calloc(1, SDIRSIZE + 3);
parent_alloc = calloc(1, dir_memsize(".."));
dirlist_parent = parent_alloc;
strcpy(dirlist_parent->name, "..");
dirlist_parent->next = head;

Просмотреть файл

@ -30,6 +30,7 @@
#include <stdio.h>
#include <stddef.h>
#include <limits.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
@ -49,6 +50,7 @@
#define FF_SERR 0x20 /* error in subdirectory */
#define FF_HLNKC 0x40 /* hard link candidate (file with st_nlink > 1) */
#define FF_BSEL 0x80 /* selected */
#define FF_EXT 0x100 /* extended struct available */
/* Program states */
#define ST_CALC 0
@ -65,11 +67,9 @@ struct dir {
uint64_t ino, dev;
struct dir *parent, *next, *prev, *sub, *hlnk;
int items;
unsigned char flags;
unsigned short flags;
char name[];
};
/* sizeof(total dir) = SDIRSIZE + strlen(name) = offsetof(struct dir, name) + strlen(name) + 1 */
#define SDIRSIZE (offsetof(struct dir, name)+1)
/* A note on the ino and dev fields above: ino is usually represented as ino_t,
* which POSIX specifies to be an unsigned integer. dev is usually represented
@ -81,6 +81,15 @@ struct dir {
* information is lost in this conversion, and the semantics remain the same.
*/
/* Extended information for a struct dir. This struct is stored in the same
* memory region as struct dir, placed after the name field. See util.h for
* macros to help manage this. */
struct dir_ext {
struct timespec mtime;
int uid, gid;
unsigned short mode;
};
/* program state */
extern int pstate;

Просмотреть файл

@ -74,6 +74,19 @@ extern int subwinr, subwinc;
extern int si;
/* Macros/functions for managing struct dir and struct dir_ext */
#define dir_memsize(n) (offsetof(struct dir, name)+1+strlen(n))
#define dir_ext_offset(n) ((dir_memsize(n) + 7) & ~7)
#define dir_ext_memsize(n) (dir_ext_offset(n) + sizeof(struct dir_ext))
static inline struct dir_ext *dir_ext_ptr(struct dir *d) {
return d->flags & FF_EXT
? (struct dir_ext *) ( ((char *)d) + dir_ext_offset(d->name) )
: NULL;
}
/* Instead of using several ncurses windows, we only draw to stdscr.
* the functions nccreate, ncprint and the macros ncaddstr and ncaddch
* mimic the behaviour of ncurses windows.