Skip to content

Commit b412026

Browse files
Fix an N^2 in batch hydration (#172)
This hadn't been noticed previously since N is generally small, eg. 50 or 60 at most. Working with a Metabase dashboard that loaded 10000 fields (100 each from 100 tables) in a single `select` + `hydrate`, I found this was consuming about 3 seconds. The slow path was effectively a very slow `conj`: taking the `acc` vector, and then for each `annotated-instances` doing: ```clojure (recur (vec (concat acc nil [(first hydrated-instances)])) ...) ``` which is pouring the whole vector into a lazy sequence and back into a vector, once for each instance in the list. The new version first creates an "index" of instances that need hydration, and later uses that index to map the instances in the hydrated instances batch to their places in the initial collection. Co-authored-by: Oleksandr Yakushev <[email protected]>
1 parent 4fa533b commit b412026

File tree

2 files changed

+36
-63
lines changed

2 files changed

+36
-63
lines changed

src/toucan2/tools/hydrate.clj

Lines changed: 20 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -430,48 +430,29 @@
430430
;;; `:needs-hydration?`, replace it with the first hydrated instance; repeat this process until we have spliced all
431431
;;; the hydrated instances back in.
432432

433-
(defn- merge-hydrated-instances
434-
"Merge the annotated instances as returned by [[annotate-instances]] and the hydrated instances as returned
435-
by [[hydrate-annotated-instances]] back into a single un-annotated sequence.
436-
437-
(merge-hydrated-instances
438-
[{:needs-hydration? false, :instance {:x 1, :y 1}}
439-
{:needs-hydration? false, :instance {:x 2, :y 2}}
440-
{:needs-hydration? true, :instance {:x 3}}
441-
{:needs-hydration? true, :instance {:x 4}}
442-
{:needs-hydration? false, :instance {:x 5, :y 5}}
443-
{:needs-hydration? true, :instance {:x 6}}]
444-
[{:x 3, :y 3} {:x 4, :y 4} {:x 6, :y 6}])
445-
=>
446-
[{:x 1, :y 1}
447-
{:x 2, :y 2}
448-
{:x 3, :y 3}
449-
{:x 4, :y 4}
450-
{:x 5, :y 5}
451-
{:x 6, :y 6}]"
452-
[annotated-instances hydrated-instances]
453-
(loop [acc [], annotated-instances annotated-instances, hydrated-instances hydrated-instances]
454-
(if (empty? hydrated-instances)
455-
(concat acc (map :instance annotated-instances))
456-
(let [[not-hydrated [_needed-hydration & more]] (split-with (complement :needs-hydration?) annotated-instances)]
457-
(recur (vec (concat acc (map :instance not-hydrated) [(first hydrated-instances)]))
458-
more
459-
(rest hydrated-instances))))))
460-
461-
(defn- annotate-instances [model k instances]
462-
(for [instance instances]
463-
{:needs-hydration? (needs-hydration? model k instance)
464-
:instance instance}))
465-
466-
(defn- hydrate-annotated-instances [model k annotated-instances]
467-
(when-let [instances-that-need-hydration (not-empty (map :instance (filter :needs-hydration? annotated-instances)))]
468-
(batched-hydrate model k instances-that-need-hydration)))
433+
(defn- index-instances-needing-hydration
434+
"Given a list of instances, return a list of the same length and order where each element is a tuple `[i instance]`.
435+
For instances that need hydration, `i` represents the serial number of instances needing hydration. For instances that
436+
don't need hydration, `i` is nil."
437+
[model k instances]
438+
(let [idx (volatile! -1)]
439+
(mapv (fn [instance]
440+
[(when (needs-hydration? model k instance)
441+
(vswap! idx inc))
442+
instance])
443+
instances)))
469444

470445
(m/defmethod hydrate-with-strategy ::multimethod-batched
471446
[model _strategy k instances]
472-
(let [annotated-instances (annotate-instances model k instances)
473-
hydrated-instances (hydrate-annotated-instances model k annotated-instances)]
474-
(merge-hydrated-instances annotated-instances hydrated-instances)))
447+
(let [indexed (index-instances-needing-hydration model k instances)
448+
instances-to-hydrate (not-empty (map second (filter first indexed))) ;; Important: keep doesn't work here.
449+
hydrated-instances (when instances-to-hydrate
450+
(vec (batched-hydrate model k instances-to-hydrate)))]
451+
(mapv (fn [[i unhydrated-instance]]
452+
(if i
453+
(nth hydrated-instances i)
454+
unhydrated-instance))
455+
indexed)))
475456

476457

477458
;;; Method-Based Simple Hydration (using impls of [[simple-hydrate]])

test/toucan2/tools/hydrate_test.clj

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
(ns toucan2.tools.hydrate-test
22
(:require
3-
[clojure.math.combinatorics :as math.combo]
43
[clojure.test :refer :all]
54
[methodical.core :as m]
65
[toucan2.connection :as conn]
@@ -352,29 +351,22 @@
352351
(is (= {:f [:a 100]}
353352
(hydrate/hydrate {:f [:a 100]} :p)))))
354353

355-
(deftest ^:parallel merge-hydrated-instances-test
356-
(doseq [[a b c d e f :as order] (take 1 (math.combo/permutations (range 1 7)))
357-
:let [annotated-instances (map (fn [i]
358-
(case (long i)
359-
1 {:needs-hydration? false, :instance {:x 1, :y 1}}
360-
2 {:needs-hydration? false, :instance {:x 2, :y 2}}
361-
3 {:needs-hydration? true, :instance {:x 3}}
362-
4 {:needs-hydration? true, :instance {:x 4}}
363-
5 {:needs-hydration? false, :instance {:x 5, :y 5}}
364-
6 {:needs-hydration? true, :instance {:x 6}}))
365-
order)
366-
hydrated-instances (->> order
367-
(filter #{3 4 6})
368-
(map (fn [i]
369-
{:x i, :y i})))]]
370-
(testing (pr-str order)
371-
(is (= [{:x a, :y a}
372-
{:x b, :y b}
373-
{:x c, :y c}
374-
{:x d, :y d}
375-
{:x e, :y e}
376-
{:x f, :y f}]
377-
(#'hydrate/merge-hydrated-instances annotated-instances hydrated-instances))))))
354+
(deftest ^:synchronized index-instances-needing-hydration-test
355+
(let [instances [{:x 1, :y 1}
356+
{:x 2, :y 2}
357+
{:x 3}
358+
{:x 4}
359+
{:x 5, :y 5}
360+
{:x 6}]]
361+
(with-redefs [hydrate/needs-hydration? (fn [_ _ instance]
362+
(= (count instance) 1))]
363+
(is (= [[nil {:x 1, :y 1}]
364+
[nil {:x 2, :y 2}]
365+
[0 {:x 3}]
366+
[1 {:x 4}]
367+
[nil {:x 5, :y 5}]
368+
[2 {:x 6}]]
369+
(#'hydrate/index-instances-needing-hydration nil nil instances))))))
378370

379371
(m/defmethod hydrate/batched-hydrate [:default ::is-bird?]
380372
[_model _k rows]

0 commit comments

Comments
 (0)