openzfs
diff --git a/‎include/sys/ddt.h
Lines changed: 2 additions & 5 deletions b/‎include/sys/ddt.h
Lines changed: 2 additions & 5 deletions
diff --git a/‎include/sys/vdev.h
Lines changed: 1 addition & 0 deletions b/‎include/sys/vdev.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎include/sys/zfs_debug.h
Lines changed: 1 addition & 0 deletions b/‎include/sys/zfs_debug.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎man/man4/zfs.4
Lines changed: 43 additions & 28 deletions b/‎man/man4/zfs.4
Lines changed: 43 additions & 28 deletions
@@ -285,14 +285,11 @@ typedef struct {
 	ddt_log_t	*ddt_log_active;	/* pointers into ddt_log */
 	ddt_log_t	*ddt_log_flushing;	/* swapped when flush starts */
 
-	hrtime_t	ddt_flush_start;	/* log flush start this txg */
-	uint32_t	ddt_flush_pass;		/* log flush pass this txg */
-
-	int32_t		ddt_flush_count;	/* entries flushed this txg */
-	int32_t		ddt_flush_min;		/* min rem entries to flush */
 	int32_t		ddt_log_ingest_rate;	/* rolling log ingest rate */
 	int32_t		ddt_log_flush_rate;	/* rolling log flush rate */
 	int32_t		ddt_log_flush_time_rate; /* avg time spent flushing */
+	uint32_t	ddt_log_flush_pressure;	/* pressure to apply for cap */
+	uint32_t	ddt_log_flush_prev_backlog; /* prev backlog size */
 
 	uint64_t	ddt_flush_force_txg;	/* flush hard before this txg */
 
 
@@ -171,6 +171,7 @@ extern void vdev_queue_change_io_priority(zio_t *zio, zio_priority_t priority);
 extern uint32_t vdev_queue_length(vdev_t *vd);
 extern uint64_t vdev_queue_last_offset(vdev_t *vd);
 extern uint64_t vdev_queue_class_length(vdev_t *vq, zio_priority_t p);
+extern boolean_t vdev_queue_pool_busy(spa_t *spa);
 
 extern void vdev_config_dirty(vdev_t *vd);
 extern void vdev_config_clean(vdev_t *vd);
 
@@ -59,6 +59,7 @@ extern int zfs_dbgmsg_enable;
 #define	ZFS_DEBUG_METASLAB_ALLOC	(1 << 13)
 #define	ZFS_DEBUG_BRT			(1 << 14)
 #define	ZFS_DEBUG_RAIDZ_RECONSTRUCT	(1 << 15)
+#define	ZFS_DEBUG_DDT			(1 << 16)
 
 extern void __set_error(const char *file, const char *func, int line, int err);
 extern void __zfs_dbgmsg(char *buf);
 
@@ -1026,27 +1026,6 @@ milliseconds until the operation completes.
 .It Sy zfs_dedup_prefetch Ns = Ns Sy 0 Ns | Ns 1 Pq int
 Enable prefetching dedup-ed blocks which are going to be freed.
 .
-.It Sy zfs_dedup_log_flush_passes_max Ns = Ns Sy 8 Ns Pq uint
-Maximum number of dedup log flush passes (iterations) each transaction.
-.Pp
-At the start of each transaction, OpenZFS will estimate how many entries it
-needs to flush out to keep up with the change rate, taking the amount and time
-taken to flush on previous txgs into account (see
-.Sy zfs_dedup_log_flush_flow_rate_txgs ) .
-It will spread this amount into a number of passes.
-At each pass, it will use the amount already flushed and the total time taken
-by flushing and by other IO to recompute how much it should do for the remainder
-of the txg.
-.Pp
-Reducing the max number of passes will make flushing more aggressive, flushing
-out more entries on each pass.
-This can be faster, but also more likely to compete with other IO.
-Increasing the max number of passes will put fewer entries onto each pass,
-keeping the overhead of dedup changes to a minimum but possibly causing a large
-number of changes to be dumped on the last pass, which can blow out the txg
-sync time beyond
-.Sy zfs_txg_timeout .
-.
 .It Sy zfs_dedup_log_flush_min_time_ms Ns = Ns Sy 1000 Ns Pq uint
 Minimum time to spend on dedup log flush each transaction.
 .Pp
@@ -1056,22 +1035,58 @@ up to
 This occurs even if doing so would delay the transaction, that is, other IO
 completes under this time.
 .
-.It Sy zfs_dedup_log_flush_entries_min Ns = Ns Sy 1000 Ns Pq uint
+.It Sy zfs_dedup_log_flush_entries_min Ns = Ns Sy 100 Ns Pq uint
 Flush at least this many entries each transaction.
 .Pp
-OpenZFS will estimate how many entries it needs to flush each transaction to
-keep up with the ingest rate (see
-.Sy zfs_dedup_log_flush_flow_rate_txgs ) .
-This sets the minimum for that estimate.
+OpenZFS will a fraction of the log every TXG, to keep the size proportional
+to the ingest rate (see
+.Sy zfs_dedup_log_flush_txgs) .
+This sets the minimum for that estimate, which prevents the backlog from
+never completely draining if the ingest rate falls.
 Raising it can force OpenZFS to flush more aggressively, keeping the log small
 and so reducing pool import times, but can make it less able to back off if
 log flushing would compete with other IO too much.
 .
+.It Sy zfs_dedup_log_flush_entries_max Ns = Ns Sy UINT_MAX Ns Pq uint
+Flush at most this many entries each transaction.
+.Pp
+Mostly used for debugging purposes.
+.It Sy zfs_dedup_log_flush_txgs Ns = Ns Sy 100 Ns Pq uint
+Target number of TXGs to process the whole dedup log.
+.Pp
+Every TXG, OpenZFS will process the inverse of this number times the size
+of the DDT backlog.
+This will keep the backlog at a size roughly equal to the ingest rate
+times this value.
+This offers a balance between a more efficient DDT log, with better
+aggregation, and shorter import times, which increase as the size of the
+DDT log increases.
+Increasing this value will result in a more efficient DDT log, but longer
+import times.
+.It Sy zfs_dedup_log_cap Ns = Ns Sy UINT_MAX Ns Pq uint
+Soft cap for the size of the current dedup log.
+.Pp
+If the log is larger than this size, we increase the aggressiveness of
+the flushing to try to bring it back down to the soft cap.
+Setting it will reduce import times, but will reduce the efficiency of
+the DDT log, increasing the expected number of IOs required to flush the same
+amount of data.
+.It Sy zfs_dedup_log_hard_cap Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Whether to treat the log cap as a hard cap or not.
+.Pp
+The default is 0.
+If this is set to 1, the
+.Sy zfs_dedup_log_cap
+acts more like a hard cap than a soft cap.
+When set to 0, the soft cap will increase the maximum number of log entries we flush
+in a given txg.
+When set to 1, it will also increase the minimum number of log entries we flush.
+Enabling it will reduce worst-case import times, at the cost of increased TXG
+sync times.
 .It Sy zfs_dedup_log_flush_flow_rate_txgs Ns = Ns Sy 10 Ns Pq uint
 Number of transactions to use to compute the flow rate.
 .Pp
-OpenZFS will estimate how many entries it needs to flush each transaction by
-monitoring the number of entries changed (ingest rate), number of entries
+OpenZFS will estimate number of entries changed (ingest rate), number of entries
 flushed (flush rate) and time spent flushing (flush time rate) and combining
 these into an overall "flow rate".
 It will use an exponential weighted moving average over some number of recent