|
1 | 1 | from collections import defaultdict
|
2 | 2 | from contextlib import asynccontextmanager
|
3 | 3 | from json import loads as json_loads
|
| 4 | +from math import ceil |
4 | 5 | from typing import (
|
5 | 6 | Any,
|
6 | 7 | Literal,
|
|
33 | 34 |
|
34 | 35 | TableType = TypeVar("TableType", bound=TableBase)
|
35 | 36 |
|
| 37 | +# PostgreSQL has a limit of 65535 parameters per query |
| 38 | +PG_MAX_PARAMETERS = 65535 |
| 39 | + |
36 | 40 |
|
37 | 41 | class DBConnection:
|
38 | 42 | """
|
@@ -235,37 +239,98 @@ async def insert(self, objects: Sequence[TableBase]):
|
235 | 239 | if not objects:
|
236 | 240 | return
|
237 | 241 |
|
238 |
| - for model, model_objects in self._aggregate_models_by_table(objects): |
239 |
| - table_name = QueryIdentifier(model.get_table_name()) |
240 |
| - fields = { |
241 |
| - field: info |
242 |
| - for field, info in model.model_fields.items() |
243 |
| - if (not info.exclude and not info.autoincrement) |
244 |
| - } |
245 |
| - field_string = ", ".join(f'"{field}"' for field in fields) |
246 |
| - primary_key = self._get_primary_key(model) |
247 |
| - |
248 |
| - placeholders = ", ".join(f"${i}" for i in range(1, len(fields) + 1)) |
249 |
| - query = f"INSERT INTO {table_name} ({field_string}) VALUES ({placeholders})" |
250 |
| - if primary_key: |
251 |
| - query += f" RETURNING {primary_key}" |
| 242 | + # Reuse a single transaction for all inserts |
| 243 | + async with self._ensure_transaction(): |
| 244 | + for model, model_objects in self._aggregate_models_by_table(objects): |
| 245 | + # For each table, build batched insert queries |
| 246 | + table_name = QueryIdentifier(model.get_table_name()) |
| 247 | + fields = { |
| 248 | + field: info |
| 249 | + for field, info in model.model_fields.items() |
| 250 | + if (not info.exclude and not info.autoincrement) |
| 251 | + } |
| 252 | + primary_key = self._get_primary_key(model) |
| 253 | + field_names = list( |
| 254 | + fields.keys() |
| 255 | + ) # Iterate over these in order for each row |
| 256 | + field_identifiers = ", ".join(f'"{f}"' for f in field_names) |
| 257 | + |
| 258 | + # Calculate max batch size based on number of fields |
| 259 | + # Each row uses len(fields) parameters, so max_batch_size * len(fields) <= PG_MAX_PARAMETERS |
| 260 | + max_batch_size = PG_MAX_PARAMETERS // len(fields) |
| 261 | + # Cap at 5000 rows per batch to avoid excessive memory usage |
| 262 | + max_batch_size = min(max_batch_size, 5000) |
| 263 | + |
| 264 | + total = len(model_objects) |
| 265 | + num_batches = ceil(total / max_batch_size) |
| 266 | + |
| 267 | + for batch_idx in range(num_batches): |
| 268 | + start_idx = batch_idx * max_batch_size |
| 269 | + end_idx = (batch_idx + 1) * max_batch_size |
| 270 | + batch_objects = model_objects[start_idx:end_idx] |
| 271 | + |
| 272 | + # Build the multi-row VALUES clause |
| 273 | + # e.g. for 3 rows with 2 columns, we'd want: |
| 274 | + # VALUES ($1, $2), ($3, $4), ($5, $6) |
| 275 | + num_rows = len(batch_objects) |
| 276 | + if not num_rows: |
| 277 | + continue |
252 | 278 |
|
253 |
| - async with self._ensure_transaction(): |
254 |
| - for obj in model_objects: |
255 |
| - obj_values = obj.model_dump() |
256 |
| - values = [ |
257 |
| - info.to_db_value(obj_values[field]) |
258 |
| - for field, info in fields.items() |
259 |
| - ] |
260 |
| - result = await self.conn.fetchrow(query, *values) |
| 279 | + # placeholders per row: ($1, $2, ...) |
| 280 | + # but we have to shift the placeholder index for each row |
| 281 | + placeholders: list[str] = [] |
| 282 | + values: list[Any] = [] |
| 283 | + param_index = 1 |
| 284 | + |
| 285 | + for obj in batch_objects: |
| 286 | + obj_values = obj.model_dump() |
| 287 | + row_values = [] |
| 288 | + for field in field_names: |
| 289 | + info = fields[field] |
| 290 | + row_values.append(info.to_db_value(obj_values[field])) |
| 291 | + values.extend(row_values) |
| 292 | + row_placeholder = ( |
| 293 | + "(" |
| 294 | + + ", ".join( |
| 295 | + f"${p}" |
| 296 | + for p in range( |
| 297 | + param_index, param_index + len(field_names) |
| 298 | + ) |
| 299 | + ) |
| 300 | + + ")" |
| 301 | + ) |
| 302 | + placeholders.append(row_placeholder) |
| 303 | + param_index += len(field_names) |
| 304 | + |
| 305 | + placeholders_clause = ", ".join(placeholders) |
| 306 | + |
| 307 | + query = f""" |
| 308 | + INSERT INTO {table_name} ({field_identifiers}) |
| 309 | + VALUES {placeholders_clause} |
| 310 | + """ |
| 311 | + if primary_key: |
| 312 | + query += f" RETURNING {primary_key}" |
| 313 | + |
| 314 | + # Insert them in one go |
| 315 | + if primary_key: |
| 316 | + rows = await self.conn.fetch(query, *values) |
| 317 | + # 'rows' should be a list of Record objects, one per inserted row |
| 318 | + # Update each object in the same order |
| 319 | + for obj, row in zip(batch_objects, rows): |
| 320 | + setattr(obj, primary_key, row[primary_key]) |
| 321 | + else: |
| 322 | + # No need to fetch anything if there's no primary key |
| 323 | + await self.conn.execute(query, *values) |
261 | 324 |
|
262 |
| - if primary_key and result: |
263 |
| - setattr(obj, primary_key, result[primary_key]) |
264 |
| - obj.clear_modified_attributes() |
| 325 | + # Mark as unmodified |
| 326 | + for obj in batch_objects: |
| 327 | + obj.clear_modified_attributes() |
265 | 328 |
|
| 329 | + # Register modification callbacks outside the main insert loop |
266 | 330 | for obj in objects:
|
267 | 331 | obj.register_modified_callback(self.modification_tracker.track_modification)
|
268 | 332 |
|
| 333 | + # Clear modification status |
269 | 334 | self.modification_tracker.clear_status(objects)
|
270 | 335 |
|
271 | 336 | @overload
|
|
0 commit comments