Skip to content

Commit

Permalink
Merge branch 'master' into NAS-130821-2
Browse files Browse the repository at this point in the history
Signed-off-by: Umer Saleem <[email protected]>
  • Loading branch information
usaleem-ix committed Sep 10, 2024
2 parents 927f6ea + 63253db commit 80110e7
Show file tree
Hide file tree
Showing 37 changed files with 1,419 additions and 160 deletions.
55 changes: 45 additions & 10 deletions cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -2045,7 +2045,7 @@ dump_all_ddts(spa_t *spa)

for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
ddt_t *ddt = spa->spa_ddt[c];
if (!ddt)
if (!ddt || ddt->ddt_version == DDT_VERSION_UNCONFIGURED)
continue;
for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
for (ddt_class_t class = 0; class < DDT_CLASSES;
Expand All @@ -2072,6 +2072,32 @@ dump_all_ddts(spa_t *spa)
}

dump_dedup_ratio(&dds_total);

/*
* Dump a histogram of unique class entry age
*/
if (dump_opt['D'] == 3 && getenv("ZDB_DDT_UNIQUE_AGE_HIST") != NULL) {
ddt_age_histo_t histogram;

(void) printf("DDT walk unique, building age histogram...\n");
ddt_prune_walk(spa, 0, &histogram);

/*
* print out histogram for unique entry class birth
*/
if (histogram.dah_entries > 0) {
(void) printf("%5s %9s %4s\n",
"age", "blocks", "amnt");
(void) printf("%5s %9s %4s\n",
"-----", "---------", "----");
for (int i = 0; i < HIST_BINS; i++) {
(void) printf("%5d %9d %4d%%\n", 1 << i,
(int)histogram.dah_age_histo[i],
(int)((histogram.dah_age_histo[i] * 100) /
histogram.dah_entries));
}
}
}
}

static void
Expand Down Expand Up @@ -5749,12 +5775,17 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
ddt_entry_t *dde = ddt_lookup(ddt, bp);

/*
* ddt_lookup() can only return NULL if this block didn't exist
* ddt_lookup() can return NULL if this block didn't exist
* in the DDT and creating it would take the DDT over its
* quota. Since we got the block from disk, it must exist in
* the DDT, so this can't happen.
* the DDT, so this can't happen. However, when unique entries
* are pruned, the dedup bit can be set with no corresponding
* entry in the DDT.
*/
VERIFY3P(dde, !=, NULL);
if (dde == NULL) {
ddt_exit(ddt);
goto skipped;
}

/* Get the phys for this variant */
ddt_phys_variant_t v = ddt_phys_select(ddt, dde, bp);
Expand All @@ -5774,8 +5805,8 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
(void *)(((uintptr_t)dde->dde_io) | (1 << v));

/* Consume a reference for this block. */
VERIFY3U(ddt_phys_total_refcnt(ddt, dde->dde_phys), >, 0);
ddt_phys_decref(dde->dde_phys, v);
if (ddt_phys_total_refcnt(ddt, dde->dde_phys) > 0)
ddt_phys_decref(dde->dde_phys, v);

/*
* If this entry has a single flat phys, it may have been
Expand Down Expand Up @@ -5864,6 +5895,7 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
}
}

skipped:
for (i = 0; i < 4; i++) {
int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
int t = (i & 1) ? type : ZDB_OT_TOTAL;
Expand Down Expand Up @@ -8138,7 +8170,7 @@ dump_mos_leaks(spa_t *spa)

for (uint64_t c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
ddt_t *ddt = spa->spa_ddt[c];
if (!ddt)
if (!ddt || ddt->ddt_version == DDT_VERSION_UNCONFIGURED)
continue;

/* DDT store objects */
Expand All @@ -8150,11 +8182,14 @@ dump_mos_leaks(spa_t *spa)
}

/* FDT container */
mos_obj_refd(ddt->ddt_dir_object);
if (ddt->ddt_version == DDT_VERSION_FDT)
mos_obj_refd(ddt->ddt_dir_object);

/* FDT log objects */
mos_obj_refd(ddt->ddt_log[0].ddl_object);
mos_obj_refd(ddt->ddt_log[1].ddl_object);
if (ddt->ddt_flags & DDT_FLAG_LOG) {
mos_obj_refd(ddt->ddt_log[0].ddl_object);
mos_obj_refd(ddt->ddt_log[1].ddl_object);
}
}

if (spa->spa_brt != NULL) {
Expand Down
113 changes: 112 additions & 1 deletion cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
#include "zpool_util.h"
#include "zfs_comutil.h"
#include "zfeature_common.h"
#include "zfs_valstr.h"

#include "statcommon.h"

Expand Down Expand Up @@ -130,6 +131,8 @@ static int zpool_do_version(int, char **);

static int zpool_do_wait(int, char **);

static int zpool_do_ddt_prune(int, char **);

static int zpool_do_help(int argc, char **argv);

static zpool_compat_status_t zpool_do_load_compat(
Expand Down Expand Up @@ -170,6 +173,7 @@ typedef enum {
HELP_CLEAR,
HELP_CREATE,
HELP_CHECKPOINT,
HELP_DDT_PRUNE,
HELP_DESTROY,
HELP_DETACH,
HELP_EXPORT,
Expand Down Expand Up @@ -426,6 +430,8 @@ static zpool_command_t command_table[] = {
{ "sync", zpool_do_sync, HELP_SYNC },
{ NULL },
{ "wait", zpool_do_wait, HELP_WAIT },
{ NULL },
{ "ddtprune", zpool_do_ddt_prune, HELP_DDT_PRUNE },
};

#define NCOMMAND (ARRAY_SIZE(command_table))
Expand Down Expand Up @@ -545,6 +551,8 @@ get_usage(zpool_help_t idx)
case HELP_WAIT:
return (gettext("\twait [-Hp] [-T d|u] [-t <activity>[,...]] "
"<pool> [interval]\n"));
case HELP_DDT_PRUNE:
return (gettext("\tddtprune -d|-p <amount> <pool>\n"));
default:
__builtin_unreachable();
}
Expand Down Expand Up @@ -11929,6 +11937,7 @@ static void
zpool_do_events_nvprint(nvlist_t *nvl, int depth)
{
nvpair_t *nvp;
static char flagstr[256];

for (nvp = nvlist_next_nvpair(nvl, NULL);
nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) {
Expand Down Expand Up @@ -11988,7 +11997,21 @@ zpool_do_events_nvprint(nvlist_t *nvl, int depth)

case DATA_TYPE_UINT32:
(void) nvpair_value_uint32(nvp, &i32);
printf(gettext("0x%x"), i32);
if (strcmp(name,
FM_EREPORT_PAYLOAD_ZFS_ZIO_STAGE) == 0 ||
strcmp(name,
FM_EREPORT_PAYLOAD_ZFS_ZIO_PIPELINE) == 0) {
zfs_valstr_zio_stage(i32, flagstr,
sizeof (flagstr));
printf(gettext("0x%x [%s]"), i32, flagstr);
} else if (strcmp(name,
FM_EREPORT_PAYLOAD_ZFS_ZIO_PRIORITY) == 0) {
zfs_valstr_zio_priority(i32, flagstr,
sizeof (flagstr));
printf(gettext("0x%x [%s]"), i32, flagstr);
} else {
printf(gettext("0x%x"), i32);
}
break;

case DATA_TYPE_INT64:
Expand All @@ -12009,6 +12032,12 @@ zpool_do_events_nvprint(nvlist_t *nvl, int depth)
printf(gettext("\"%s\" (0x%llx)"),
zpool_state_to_name(i64, VDEV_AUX_NONE),
(u_longlong_t)i64);
} else if (strcmp(name,
FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS) == 0) {
zfs_valstr_zio_flag(i64, flagstr,
sizeof (flagstr));
printf(gettext("0x%llx [%s]"),
(u_longlong_t)i64, flagstr);
} else {
printf(gettext("0x%llx"), (u_longlong_t)i64);
}
Expand Down Expand Up @@ -13342,6 +13371,88 @@ found:;
return (error);
}

/*
* zpool ddtprune -d|-p <amount> <pool>
*
* -d <days> Prune entries <days> old and older
* -p <percent> Prune <percent> amount of entries
*
* Prune single reference entries from DDT to satisfy the amount specified.
*/
int
zpool_do_ddt_prune(int argc, char **argv)
{
zpool_ddt_prune_unit_t unit = ZPOOL_DDT_PRUNE_NONE;
uint64_t amount = 0;
zpool_handle_t *zhp;
char *endptr;
int c;

while ((c = getopt(argc, argv, "d:p:")) != -1) {
switch (c) {
case 'd':
if (unit == ZPOOL_DDT_PRUNE_PERCENTAGE) {
(void) fprintf(stderr, gettext("-d cannot be "
"combined with -p option\n"));
usage(B_FALSE);
}
errno = 0;
amount = strtoull(optarg, &endptr, 0);
if (errno != 0 || *endptr != '\0' || amount == 0) {
(void) fprintf(stderr,
gettext("invalid days value\n"));
usage(B_FALSE);
}
amount *= 86400; /* convert days to seconds */
unit = ZPOOL_DDT_PRUNE_AGE;
break;
case 'p':
if (unit == ZPOOL_DDT_PRUNE_AGE) {
(void) fprintf(stderr, gettext("-p cannot be "
"combined with -d option\n"));
usage(B_FALSE);
}
errno = 0;
amount = strtoull(optarg, &endptr, 0);
if (errno != 0 || *endptr != '\0' ||
amount == 0 || amount > 100) {
(void) fprintf(stderr,
gettext("invalid percentage value\n"));
usage(B_FALSE);
}
unit = ZPOOL_DDT_PRUNE_PERCENTAGE;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
usage(B_FALSE);
}
}
argc -= optind;
argv += optind;

if (unit == ZPOOL_DDT_PRUNE_NONE) {
(void) fprintf(stderr,
gettext("missing amount option (-d|-p <value>)\n"));
usage(B_FALSE);
} else if (argc < 1) {
(void) fprintf(stderr, gettext("missing pool argument\n"));
usage(B_FALSE);
} else if (argc > 1) {
(void) fprintf(stderr, gettext("too many arguments\n"));
usage(B_FALSE);
}
zhp = zpool_open(g_zfs, argv[0]);
if (zhp == NULL)
return (-1);

int error = zpool_ddt_prune(zhp, unit, amount);

zpool_close(zhp);

return (error);
}

static int
find_command_idx(const char *command, int *idx)
{
Expand Down
33 changes: 31 additions & 2 deletions cmd/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,8 @@ extern unsigned long zio_decompress_fail_fraction;
extern unsigned long zfs_reconstruct_indirect_damage_fraction;
extern uint64_t raidz_expand_max_reflow_bytes;
extern uint_t raidz_expand_pause_point;
extern boolean_t ddt_prune_artificial_age;
extern boolean_t ddt_dump_prune_histogram;


static ztest_shared_opts_t *ztest_shared_opts;
Expand Down Expand Up @@ -446,6 +448,7 @@ ztest_func_t ztest_fletcher;
ztest_func_t ztest_fletcher_incr;
ztest_func_t ztest_verify_dnode_bt;
ztest_func_t ztest_pool_prefetch_ddt;
ztest_func_t ztest_ddt_prune;

static uint64_t zopt_always = 0ULL * NANOSEC; /* all the time */
static uint64_t zopt_incessant = 1ULL * NANOSEC / 10; /* every 1/10 second */
Expand Down Expand Up @@ -502,6 +505,7 @@ static ztest_info_t ztest_info[] = {
ZTI_INIT(ztest_fletcher_incr, 1, &zopt_rarely),
ZTI_INIT(ztest_verify_dnode_bt, 1, &zopt_sometimes),
ZTI_INIT(ztest_pool_prefetch_ddt, 1, &zopt_rarely),
ZTI_INIT(ztest_ddt_prune, 1, &zopt_rarely),
};

#define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t))
Expand Down Expand Up @@ -6211,13 +6215,14 @@ void
ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id)
{
(void) zd, (void) id;
nvlist_t *props = NULL;

(void) pthread_rwlock_rdlock(&ztest_name_lock);

(void) ztest_spa_prop_set_uint64(ZPOOL_PROP_AUTOTRIM, ztest_random(2));

VERIFY0(spa_prop_get(ztest_spa, &props));
nvlist_t *props = fnvlist_alloc();

VERIFY0(spa_prop_get(ztest_spa, props));

if (ztest_opts.zo_verbose >= 6)
dump_nvlist(props, 4);
Expand Down Expand Up @@ -7288,6 +7293,17 @@ ztest_trim(ztest_ds_t *zd, uint64_t id)
mutex_exit(&ztest_vdev_lock);
}

void
ztest_ddt_prune(ztest_ds_t *zd, uint64_t id)
{
(void) zd, (void) id;

spa_t *spa = ztest_spa;
uint64_t pct = ztest_random(15) + 1;

(void) ddt_prune_unique_entries(spa, ZPOOL_DDT_PRUNE_PERCENTAGE, pct);
}

/*
* Verify pool integrity by running zdb.
*/
Expand Down Expand Up @@ -7469,6 +7485,13 @@ ztest_resume_thread(void *arg)
{
spa_t *spa = arg;

/*
* Synthesize aged DDT entries for ddt prune testing
*/
ddt_prune_artificial_age = B_TRUE;
if (ztest_opts.zo_verbose >= 3)
ddt_dump_prune_histogram = B_TRUE;

while (!ztest_exiting) {
if (spa_suspended(spa))
ztest_resume(spa);
Expand Down Expand Up @@ -8587,6 +8610,12 @@ ztest_init(ztest_shared_t *zs)
if (i == SPA_FEATURE_LOG_SPACEMAP && ztest_random(4) == 0)
continue;

/*
* split 50/50 between legacy and fast dedup
*/
if (i == SPA_FEATURE_FAST_DEDUP && ztest_random(2) != 0)
continue;

VERIFY3S(-1, !=, asprintf(&buf, "feature@%s",
spa_feature_table[i].fi_uname));
fnvlist_add_uint64(props, buf, 0);
Expand Down
1 change: 1 addition & 0 deletions contrib/debian/openzfs-zfsutils.install
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ usr/share/man/man8/zpool-clear.8
usr/share/man/man8/zpool-create.8
usr/share/man/man8/zpool-destroy.8
usr/share/man/man8/zpool-detach.8
usr/share/man/man8/zpool-ddtprune.8
usr/share/man/man8/zpool-events.8
usr/share/man/man8/zpool-export.8
usr/share/man/man8/zpool-get.8
Expand Down
1 change: 1 addition & 0 deletions include/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ COMMON_H = \
zfs_fletcher.h \
zfs_namecheck.h \
zfs_prop.h \
zfs_valstr.h \
\
sys/abd.h \
sys/abd_impl.h \
Expand Down
3 changes: 3 additions & 0 deletions include/libzfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,9 @@ _LIBZFS_H int zpool_reopen_one(zpool_handle_t *, void *);

_LIBZFS_H int zpool_sync_one(zpool_handle_t *, void *);

_LIBZFS_H int zpool_ddt_prune(zpool_handle_t *, zpool_ddt_prune_unit_t,
uint64_t);

_LIBZFS_H int zpool_vdev_online(zpool_handle_t *, const char *, int,
vdev_state_t *);
_LIBZFS_H int zpool_vdev_offline(zpool_handle_t *, const char *, boolean_t);
Expand Down
Loading

0 comments on commit 80110e7

Please sign in to comment.