stat: Add option to list holes

Add a new -h option that causes stat to print a list of holes for each
file argument.

Sponsored by:	Klara, Inc.
Reviewed by:	markj
Differential Revision:	https://reviews.freebsd.org/D52481
This commit is contained in:
Dag-Erling Smørgrav 2025-09-16 15:37:57 +02:00
parent e13b5298ec
commit 1a7a067da4
3 changed files with 250 additions and 43 deletions

View file

@ -6,6 +6,8 @@
.\" This code is derived from software contributed to The NetBSD Foundation
.\" by Andrew Brown and Jan Schaumann.
.\"
.\" Copyright (c) 2025 Klara, Inc.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
@ -27,7 +29,7 @@
.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
.\" POSSIBILITY OF SUCH DAMAGE.
.\"
.Dd June 22, 2017
.Dd September 9, 2025
.Dt STAT 1
.Os
.Sh NAME
@ -36,7 +38,7 @@
.Nd display file status
.Sh SYNOPSIS
.Nm
.Op Fl FHLnq
.Op Fl FHhLnq
.Op Fl f Ar format | Fl l | r | s | x
.Op Fl t Ar timefmt
.Op Ar
@ -129,6 +131,45 @@ and use
instead of
.Xr lstat 2 .
This requires root privileges.
.It Fl h
For each file argument, print a line consisting of a comma-separated
list of holes, a space, and the file name.
Each hole is reported as its starting offset as a decimal number
followed by a hyphen and the ending offset (one less than the starting
offset of the data region that follows the hole) as a decimal number.
If the file ends in a hole, the ending offset of the final hole will
be one less than the size of the file.
Otherwise, the final entry in the list (indeed, the only entry in the
list, if the file is not sparse), is a single decimal number
corresponding to the size of the file, representing the virtual hole
at the end of the file.
.Pp
If the argument is a directory, instead of a list of holes, a single
number is printed, corresponding to the minimum hole size for that
directory as reported by
.Xr pathconf 2 ,
followed by a space and the directory name.
.Pp
Please note that the only way to retrieve information about the holes
in a file is to open it and walk the list of holes and data regions
using
.Xr lseek 2 .
If the file is being modified by another process at the same time as
.Nm
is inspecting it, the result may be inconsistent.
.Pp
This option cannot be combined with the
.Fl F ,
.Fl f ,
.Fl H ,
.Fl L ,
.Fl l ,
.Fl r ,
.Fl s ,
.Fl t ,
or
.Fl x
options.
.It Fl L
Use
.Xr stat 2

View file

@ -7,6 +7,8 @@
* This code is derived from software contributed to The NetBSD Foundation
* by Andrew Brown.
*
* Copyright (c) 2025 Klara, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -47,18 +49,19 @@ __RCSID("$NetBSD: stat.c,v 1.33 2011/01/15 22:54:10 njoly Exp $"
#endif /* HAVE_CONFIG_H */
#include <sys/param.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mount.h>
#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <grp.h>
#include <limits.h>
#include <locale.h>
#include <paths.h>
#include <pwd.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -178,22 +181,24 @@ __RCSID("$NetBSD: stat.c,v 1.33 2011/01/15 22:54:10 njoly Exp $"
#define SHOW_filename 'N'
#define SHOW_sizerdev 'Z'
void usage(const char *);
void output(const struct stat *, const char *,
const char *, int, int);
int format1(const struct stat *, /* stat info */
static void usage(const char *);
static void output(const struct stat *, const char *, const char *, int);
static int format1(const struct stat *, /* stat info */
const char *, /* the file name */
const char *, int, /* the format string itself */
char *, size_t, /* a place to put the output */
int, int, int, int, /* the parsed format */
int, int);
int hex2byte(const char [2]);
static int hex2byte(const char [2]);
#if HAVE_STRUCT_STAT_ST_FLAGS
char *xfflagstostr(unsigned long);
static char *xfflagstostr(unsigned long);
#endif
static int fdlistholes(int, const char *);
static int listholes(const char *);
static const char *timefmt;
static int linkfail;
static bool nonl;
#define addchar(s, c, nl) \
do { \
@ -205,20 +210,22 @@ int
main(int argc, char *argv[])
{
struct stat st;
int ch, rc, errs, am_readlink;
int lsF, fmtchar, usestat, nfs_handle, fn, nonl, quiet;
const char *statfmt, *options, *synopsis;
char dname[sizeof _PATH_DEV + SPECNAMELEN] = _PATH_DEV;
fhandle_t fhnd;
const char *statfmt, *options, *synopsis;
const char *file;
fhandle_t fhnd;
int ch, rc, errs, am_readlink, fn, fmtchar;
bool lsF, holes, usestat, nfs_handle, quiet;
am_readlink = 0;
lsF = 0;
errs = 0;
lsF = false;
fmtchar = '\0';
usestat = 0;
nfs_handle = 0;
nonl = 0;
quiet = 0;
holes = false;
usestat = false;
nfs_handle = false;
nonl = false;
quiet = false;
linkfail = 0;
statfmt = NULL;
timefmt = NULL;
@ -231,28 +238,35 @@ main(int argc, char *argv[])
fmtchar = 'f';
quiet = 1;
} else {
options = "f:FHlLnqrst:x";
synopsis = "[-FLnq] [-f format | -l | -r | -s | -x] "
options = "Ff:HhLlnqrst:x";
synopsis = "[-FHhLnq] [-f format | -l | -r | -s | -x] "
"[-t timefmt] [file|handle ...]";
}
while ((ch = getopt(argc, argv, options)) != -1)
switch (ch) {
case 'F':
lsF = 1;
lsF = true;
break;
case 'H':
nfs_handle = 1;
nfs_handle = true;
break;
case 'h':
holes = true;
break;
case 'L':
usestat = 1;
usestat = true;
break;
case 'n':
nonl = 1;
nonl = true;
break;
case 't':
timefmt = optarg;
break;
case 'q':
quiet = 1;
quiet = true;
break;
/* remaining cases are purposefully out of order */
case 'f':
if (am_readlink) {
statfmt = "%R";
@ -269,9 +283,6 @@ main(int argc, char *argv[])
fmtchar, ch);
fmtchar = ch;
break;
case 't':
timefmt = optarg;
break;
default:
usage(synopsis);
}
@ -280,6 +291,28 @@ main(int argc, char *argv[])
argv += optind;
fn = 1;
if (holes) {
if (fmtchar || lsF || nfs_handle || usestat || timefmt)
usage(synopsis);
if (argc > 0) {
while (argc-- > 0) {
if (listholes(*argv) != 0) {
if (!quiet)
warn("%s", *argv);
errs++;
}
argv++;
}
} else {
if (fdlistholes(STDIN_FILENO, "stdin") != 0) {
if (!quiet)
warn("stdin");
errs++;
}
}
exit(errs ? 1 : 0);
}
if (fmtchar == '\0') {
if (lsF)
fmtchar = 'l';
@ -318,7 +351,6 @@ main(int argc, char *argv[])
if (timefmt == NULL)
timefmt = TIME_FORMAT;
errs = 0;
do {
if (argc == 0) {
if (fdevname_r(STDIN_FILENO, dname +
@ -361,8 +393,7 @@ main(int argc, char *argv[])
errno == ENOENT &&
(rc = lstat(file, &st)) == -1)
errno = ENOENT;
}
else
} else
rc = lstat(file, &st);
}
@ -371,9 +402,8 @@ main(int argc, char *argv[])
linkfail = 1;
if (!quiet)
warn("%s", file);
}
else
output(&st, file, statfmt, fn, nonl);
} else
output(&st, file, statfmt, fn);
argv++;
argc--;
@ -387,7 +417,7 @@ main(int argc, char *argv[])
/*
* fflagstostr() wrapper that leaks only once
*/
char *
static char *
xfflagstostr(unsigned long fflags)
{
static char *str = NULL;
@ -402,10 +432,9 @@ xfflagstostr(unsigned long fflags)
}
#endif /* HAVE_STRUCT_STAT_ST_FLAGS */
void
static void
usage(const char *synopsis)
{
(void)fprintf(stderr, "usage: %s %s\n", getprogname(), synopsis);
exit(1);
}
@ -413,9 +442,8 @@ usage(const char *synopsis)
/*
* Parses a format string.
*/
void
output(const struct stat *st, const char *file,
const char *statfmt, int fn, int nonl)
static void
output(const struct stat *st, const char *file, const char *statfmt, int fn)
{
int flags, size, prec, ofmt, hilo, what;
char buf[PATH_MAX + 4 + 1];
@ -606,7 +634,7 @@ output(const struct stat *st, const char *file,
/*
* Arranges output according to a single parsed format substring.
*/
int
static int
format1(const struct stat *st,
const char *file,
const char *fmt, int flen,
@ -1073,7 +1101,7 @@ format1(const struct stat *st,
(void)strcat(lfmt, "ll");
switch (ofmt) {
case FMTF_DECIMAL: (void)strcat(lfmt, "d"); break;
case FMTF_OCTAL: (void)strcat(lfmt, "o"); break;
case FMTF_OCTAL: (void)strcat(lfmt, "o"); break;
case FMTF_UNSIGNED: (void)strcat(lfmt, "u"); break;
case FMTF_HEX: (void)strcat(lfmt, "x"); break;
}
@ -1083,9 +1111,75 @@ format1(const struct stat *st,
#define hex2nibble(c) (c <= '9' ? c - '0' : toupper(c) - 'A' + 10)
int
static int
hex2byte(const char c[2]) {
if (!(ishexnumber(c[0]) && ishexnumber(c[1])))
return -1;
return (hex2nibble(c[0]) << 4) + hex2nibble(c[1]);
}
static int
fdlistholes(int fd, const char *fn)
{
struct stat sb;
off_t pos = 0, off;
long l;
if (fstat(fd, &sb) < 0)
return (-1);
if (S_ISDIR(sb.st_mode)) {
if ((l = fpathconf(fd, _PC_MIN_HOLE_SIZE)) < 0)
return (-1);
printf("%ld", l);
} else if (!S_ISREG(sb.st_mode)) {
errno = ESPIPE;
return (-1);
} else {
for (;;) {
if ((off = lseek(fd, pos, SEEK_HOLE)) < 0) {
if (errno != ENXIO)
return (-1);
/*
* This can only happen if the file was
* truncated while we were scanning it, or
* on the initial seek if the file is
* empty. Report the virtual hole at the
* end of the file at this position.
*/
off = pos;
}
printf("%jd", (intmax_t)off);
pos = off;
if ((off = lseek(fd, pos, SEEK_DATA)) < 0) {
if (errno != ENXIO)
return (-1);
/*
* There are no more data regions in the
* file, or it got truncated. However, we
* may not be at the end yet.
*/
if ((off = lseek(fd, 0, SEEK_END)) > pos)
printf("-%jd", (intmax_t)off - 1);
break;
}
printf("-%jd,", (intmax_t)off - 1);
pos = off;
}
}
printf(" %s", fn);
if (!nonl)
printf("\n");
return (0);
}
static int
listholes(const char *fn)
{
int fd, ret;
if ((fd = open(fn, O_RDONLY)) < 0)
return (-1);
ret = fdlistholes(fd, fn);
close(fd);
return (ret);
}

View file

@ -1,6 +1,7 @@
#
# Copyright (c) 2017 Dell EMC
# All rights reserved.
# Copyright (c) 2025 Klara, Inc.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
@ -45,6 +46,76 @@ F_flag_body()
atf_check -o match:'.* f\|' stat -Fn f
}
atf_test_case h_flag cleanup
h_flag_head()
{
atf_set "descr" "Verify the output format for -h"
atf_set "require.user" "root"
}
h_flag_body()
{
# POSIX defines a hole as “[a] contiguous region of bytes
# within a file, all having the value of zero” and requires
# that “all seekable files shall have a virtual hole starting
# at the current size of the file” but says “it is up to the
# implementation to define when sparse files can be created
# and with what granularity for the size of holes”. It also
# defines a sparse file as “[a] file that contains more holes
# than just the virtual hole at the end of the file”. That's
# pretty much the extent of its discussion of holes, apart
# from the description of SEEK_HOLE and SEEK_DATA in the lseek
# manual page. In other words, there is no portable way to
# reliably create a hole in a file on any given file system.
#
# On FreeBSD, this test is likely to run on either tmpfs, ufs
# (ffs2), or zfs. Of those three, only tmpfs has predictable
# semantics and supports all possible configurations (the
# minimum hole size on zfs is variable for small files, and
# ufs will not allow a file to end in a hole).
atf_check mkdir mnt
atf_check mount -t tmpfs tmpfs mnt
cd mnt
# For a directory, prints the minimum hole size, which on
# tmpfs is the system page size.
ps=$(sysctl -n hw.pagesize)
atf_check -o inline:"$((ps)) .\n" stat -h .
atf_check -o inline:"$((ps)) ." stat -hn .
# For a file, prints a list of holes.
atf_check truncate -s 0 foo
atf_check -o inline:"0 foo" \
stat -hn foo
atf_check truncate -s "$((ps))" foo
atf_check -o inline:"0-$((ps-1)) foo" \
stat -hn foo
atf_check dd status=none if=/COPYRIGHT of=foo \
oseek="$((ps))" bs=1 count=1
atf_check -o inline:"0-$((ps-1)),$((ps+1)) foo" \
stat -hn foo
atf_check truncate -s "$((ps*3))" foo
atf_check -o inline:"0-$((ps-1)),$((ps*2))-$((ps*3-1)) foo" \
stat -hn foo
# Test multiple files.
atf_check dd status=none if=/COPYRIGHT of=bar
sz=$(stat -f%z bar)
atf_check -o inline:"0-$((ps-1)),$((ps*2))-$((ps*3-1)) foo
$((sz)) bar
" \
stat -h foo bar
# For a device, fail.
atf_check -s exit:1 -e match:"/dev/null: Illegal seek" \
stat -h /dev/null
}
h_flag_cleanup()
{
if [ -d mnt ]; then
umount mnt || true
fi
}
atf_test_case l_flag
l_flag_head()
{
@ -233,6 +304,7 @@ atf_init_test_cases()
{
atf_add_test_case F_flag
#atf_add_test_case H_flag
atf_add_test_case h_flag
#atf_add_test_case L_flag
#atf_add_test_case f_flag
atf_add_test_case l_flag