From dean@arctic.org Sat Nov 11 16:35:39 2006 Date: Sat, 11 Nov 2006 16:35:39 -0800 (PST) From: dean gaudet To: James Youngman Cc: bug-findutils@gnu.org Subject: Re: [patch] -sparse predicate On Sat, 11 Nov 2006, James Youngman wrote: > Actually, the units in which st_blocks is measures are not st_blksize. > All that st_blksize tells you is a "preferred" IO request size for > efficient I/O. See the RATIONALE section of > http://www.opengroup.org/onlinepubs/009695399/basedefs/sys/stat.h.html oh yeah, i should have known that. > Anyway, that problem is easily fixed I think, since gnulib already has > a way of figuring out the system's block size. ok... grabbed cvs findutils and cvs gnulib... hmm... are you thinking of the get_fs_usage() operation? that's the only one i see using statfs-related calls. > Thanks again for the contribution. At a guess, I would say that it's > likely to make it into findutils 4.3.2. It's unlikely to appear in > 4.2.x because I'm pretty much trying to limit changes in those > releases to just bugfixes. that's cool! so, i've updated the patch using get_fs_usage. i left the disk parameter NULL, which means it'll fail on ultrix... it seems maybe gnulib needs a get_fs_blocksize or something which isn't so heavyweight. i noticed there was already precedence for a simple one filesystem cache in the filesystem_type code... so i've done that here as well. thanks -dean diff -pru -xCVS -xgnulib -xautom4te.cache -xMakefile.in findutils.orig/doc/find.texi findutils/doc/find.texi --- findutils.orig/doc/find.texi 2006-08-20 13:39:48.000000000 -0700 +++ findutils/doc/find.texi 2006-11-11 15:49:43.268004118 -0800 @@ -924,6 +924,12 @@ useful with @samp{-depth} (@pxref{Direct (@pxref{Single File}). @end deffn +@deffn Test -sparse +True if the file is a regular file and has a size larger than the +number of blocks allocated multiplied by the blocksize. This generally +occurs if the file is sparse. +@end deffn + @node Type @section Type diff -pru -xCVS -xgnulib -xautom4te.cache -xMakefile.in findutils.orig/find/defs.h findutils/find/defs.h --- findutils.orig/find/defs.h 2006-08-20 11:18:42.000000000 -0700 +++ findutils/find/defs.h 2006-11-11 15:49:43.260001599 -0800 @@ -513,6 +513,7 @@ boolean pred_readable PARAMS((char *path boolean pred_regex PARAMS((char *pathname, struct stat *stat_buf, struct predicate *pred_ptr)); boolean pred_samefile PARAMS((char *pathname, struct stat *stat_buf, struct predicate *pred_ptr)); boolean pred_size PARAMS((char *pathname, struct stat *stat_buf, struct predicate *pred_ptr)); +boolean pred_sparse PARAMS((char *pathname, struct stat *stat_buf, struct predicate *pred_ptr)); boolean pred_true PARAMS((char *pathname, struct stat *stat_buf, struct predicate *pred_ptr)); boolean pred_type PARAMS((char *pathname, struct stat *stat_buf, struct predicate *pred_ptr)); boolean pred_uid PARAMS((char *pathname, struct stat *stat_buf, struct predicate *pred_ptr)); diff -pru -xCVS -xgnulib -xautom4te.cache -xMakefile.in findutils.orig/find/find.1 findutils/find/find.1 --- findutils.orig/find/find.1 2006-08-20 13:39:48.000000000 -0700 +++ findutils/find/find.1 2006-11-11 15:49:43.272005379 -0800 @@ -520,6 +520,10 @@ sparse files that are not actually alloc differently. The `b' suffix always denotes 512-byte blocks and never 1 Kilobyte blocks, which is different to the behaviour of \-ls. +.IP \-sparse +True if the file is a regular file and has a size larger than the +number of blocks allocated multiplied by the blocksize. This generally +occurs if the file is sparse. .IP \-true Always true. .IP "\-type \fIc\fR" diff -pru -xCVS -xgnulib -xautom4te.cache -xMakefile.in findutils.orig/find/parser.c findutils/find/parser.c --- findutils.orig/find/parser.c 2006-08-21 15:35:13.000000000 -0700 +++ findutils/find/parser.c 2006-11-11 15:49:57.932623506 -0800 @@ -136,6 +136,7 @@ static boolean parse_samefile PARAM static boolean parse_show_control_chars PARAMS((const struct parser_table*, char *argv[], int *arg_ptr)); #endif static boolean parse_size PARAMS((const struct parser_table*, char *argv[], int *arg_ptr)); +static boolean parse_sparse PARAMS((const struct parser_table*, char *argv[], int *arg_ptr)); static boolean parse_time PARAMS((const struct parser_table*, char *argv[], int *arg_ptr)); static boolean parse_true PARAMS((const struct parser_table*, char *argv[], int *arg_ptr)); static boolean parse_type PARAMS((const struct parser_table*, char *argv[], int *arg_ptr)); @@ -270,6 +271,7 @@ static struct parser_table const parse_t PARSE_OPTION ("show-control-chars", show_control_chars), /* GNU, 4.3.0+ */ #endif PARSE_TEST ("size", size), + PARSE_TEST ("sparse", sparse), /* GNU */ PARSE_TEST ("type", type), PARSE_TEST ("uid", uid), /* GNU */ PARSE_TEST ("used", used), /* GNU */ @@ -1609,6 +1611,15 @@ parse_regextype (const struct parser_tab return parse_noop(entry, argv, arg_ptr); } +static boolean +parse_sparse (const struct parser_table* entry, char **argv, int *arg_ptr) +{ + (void) argv; + (void) arg_ptr; + + insert_primary (entry); + return true; +} static boolean parse_regex (const struct parser_table* entry, char **argv, int *arg_ptr) diff -pru -xCVS -xgnulib -xautom4te.cache -xMakefile.in findutils.orig/find/pred.c findutils/find/pred.c --- findutils.orig/find/pred.c 2006-11-07 23:28:50.000000000 -0800 +++ findutils/find/pred.c 2006-11-11 16:28:36.193040437 -0800 @@ -38,6 +38,7 @@ #include "buildcmd.h" #include "yesno.h" #include "listfile.h" +#include "fsusage.h" #if ENABLE_NLS # include @@ -213,6 +214,7 @@ struct pred_assoc pred_table[] = {pred_regex, "regex "}, {pred_samefile,"samefile "}, {pred_size, "size "}, + {pred_sparse, "sparse "}, {pred_true, "true "}, {pred_type, "type "}, {pred_uid, "uid "}, @@ -1355,6 +1357,31 @@ pred_size (char *pathname, struct stat * } boolean +pred_sparse (char *pathname, struct stat *stat_buf, struct predicate *pred_ptr) +{ + static dev_t cur_dev = -1; + static struct fs_usage fsu; + + (void) pathname; + (void) pred_ptr; + + if (!S_ISREG (stat_buf->st_mode)) + return false; + + if (stat_buf->st_dev != cur_dev) { + if (get_fs_usage(state.rel_pathname, NULL, &fsu)) { + // could produce error here, but it's likely to occur every attempt + // on the same filesystem, and that would be quite spammy. + //error (0, errno, "get_fs_usage(%s)", pathname); + return false; + } + cur_dev = stat_buf->st_dev; + } + + return stat_buf->st_size > stat_buf->st_blocks * fsu.fsu_blocksize; +} + +boolean pred_samefile (char *pathname, struct stat *stat_buf, struct predicate *pred_ptr) { /* Potential optimisation: because of the loop protection, we always diff -pru -xCVS -xgnulib -xautom4te.cache -xMakefile.in findutils.orig/find/tree.c findutils/find/tree.c --- findutils.orig/find/tree.c 2006-01-04 11:22:38.000000000 -0800 +++ findutils/find/tree.c 2006-11-11 16:04:27.645317216 -0800 @@ -1033,6 +1033,7 @@ static struct pred_cost_lookup costlooku { pred_regex , NeedsNothing }, { pred_samefile , NeedsStatInfo }, { pred_size , NeedsStatInfo }, + { pred_sparse , NeedsStatInfo }, { pred_true , NeedsNothing }, { pred_type , NeedsType }, { pred_uid , NeedsStatInfo }, diff -pru -xCVS -xgnulib -xautom4te.cache -xMakefile.in findutils.orig/import-gnulib.sh findutils/import-gnulib.sh --- findutils.orig/import-gnulib.sh 2006-08-20 11:18:42.000000000 -0700 +++ findutils/import-gnulib.sh 2006-11-11 15:48:53.084196480 -0800 @@ -53,7 +53,7 @@ build-aux/texinfo.tex # Modules needed for findutils. findutils_modules="\ -alloca argmatch dirname error fileblocks fnmatch-gnu fopen-safer fts \ +alloca argmatch dirname error fileblocks fnmatch-gnu fopen-safer fsusage fts \ getline getopt human idcache lstat malloc memcmp memset mktime \ modechange pathmax quotearg realloc regex rpmatch savedir \ stpcpy strdup strftime strstr strtol strtoul strtoull strtoumax \ diff -Npru -xCVS -xgnulib -xautom4te.cache -xMakefile.in findutils.orig/find/testsuite/find.gnu/sparse.exp findutils/find/testsuite/find.gnu/sparse.exp --- findutils.orig/find/testsuite/find.gnu/sparse.exp 1969-12-31 16:00:00.000000000 -0800 +++ findutils/find/testsuite/find.gnu/sparse.exp 2006-11-11 15:49:43.272005379 -0800 @@ -0,0 +1,9 @@ +# tests for -sparse +exec rm -rf tmp +exec mkdir tmp +exec echo hi > tmp/notsparse +# note that some filesystems (such as XFS) won't create a sparse file when the +# "holes" are too small. hopefully this hole is large enough. +exec dd if=/dev/zero of=tmp/sparse seek=10000 bs=4096 count=1 2>/dev/null +find_start p { tmp -sparse } +exec rm -rf tmp diff -Npru -xCVS -xgnulib -xautom4te.cache -xMakefile.in findutils.orig/find/testsuite/find.gnu/sparse.xo findutils/find/testsuite/find.gnu/sparse.xo --- findutils.orig/find/testsuite/find.gnu/sparse.xo 1969-12-31 16:00:00.000000000 -0800 +++ findutils/find/testsuite/find.gnu/sparse.xo 2006-11-11 15:49:43.272005379 -0800 @@ -0,0 +1 @@ +tmp/sparse