Skip to content

Commit

Permalink
Parallelize categorization calls
Browse files Browse the repository at this point in the history
GitOrigin-RevId: 010f5b66dcb80114e2a15c9268619ac6ea24681a
  • Loading branch information
alyssachvasta authored and copybara-github committed Jan 28, 2025
1 parent 7d3d947 commit 44600ff
Showing 1 changed file with 19 additions and 10 deletions.
29 changes: 19 additions & 10 deletions src/sensemaker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import { ModelSettings, Model } from "./models/model";
import { groundSummary, parseStringIntoSummary } from "./validation/grounding";
import { GroupedSummaryStats, SummaryStats } from "./stats_util";
import { summaryContainsStats } from "./validation/stats_checker";
import { resolvePromisesInParallel } from "./tasks/summarization_subtasks/recursive_summarization";

// Class to make sense of conversation data. Uses LLMs to learn what topics were discussed and
// categorize comments. Then these categorized comments can be used with optional Vote data to
Expand Down Expand Up @@ -219,8 +220,10 @@ export class Sensemaker {

const instructions = generateCategorizationPrompt(topics, includeSubtopics);

// Call the model in batches, validate results and retry if needed.
const categorized: CommentRecord[] = [];
// TODO: Consider the effects of smaller batch sizes. 1 comment per batch was much faster, but
// the distribution was significantly different from what we're currently seeing. More testing
// is needed to determine the ideal size and distribution.
const batchesToCategorize: Promise<CommentRecord[]>[] = [];
for (
let i = 0;
i < comments.length;
Expand All @@ -230,17 +233,23 @@ export class Sensemaker {
i,
i + this.modelSettings.defaultModel.categorizationBatchSize
);
const categorizedBatch = await categorizeWithRetry(
this.modelSettings.defaultModel,
instructions,
uncategorizedBatch,
includeSubtopics,
topics,
additionalContext
batchesToCategorize.push(
categorizeWithRetry(
this.modelSettings.defaultModel,
instructions,
uncategorizedBatch,
includeSubtopics,
topics,
additionalContext
)
);
categorized.push(...categorizedBatch);
}

const categorized: CommentRecord[] = [];
await resolvePromisesInParallel(batchesToCategorize).then((results: CommentRecord[][]) => {
results.forEach((batch) => categorized.push(...batch));
});

const categorizedComments = hydrateCommentRecord(categorized, comments);
console.log(`Categorization took ${(performance.now() - startTime) / (1000 * 60)} minutes.`);
return categorizedComments;
Expand Down

0 comments on commit 44600ff

Please sign in to comment.