Skip to content

FEATURE: add inferred concepts system #1330

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .claude/settings.local.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"permissions": {
"allow": [
"Bash(bundle exec rails g migration:*)"
],
"deny": []
}
}
69 changes: 69 additions & 0 deletions app/jobs/regular/generate_inferred_concepts.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# frozen_string_literal: true

module Jobs
class GenerateInferredConcepts < ::Jobs::Base
sidekiq_options queue: "low"

# Process items to generate new concepts
#
# @param args [Hash] Contains job arguments
# @option args [String] :item_type Required - Type of items to process ('topics' or 'posts')
# @option args [Array<Integer>] :item_ids Required - List of item IDs to process
# @option args [Integer] :batch_size (100) Number of items to process in each batch
# @option args [Boolean] :match_only (false) Only match against existing concepts without generating new ones
def execute(args = {})
return if args[:item_ids].blank? || args[:item_type].blank?

if %w[topics posts].exclude?(args[:item_type])
Rails.logger.error("Invalid item_type for GenerateInferredConcepts: #{args[:item_type]}")
return
end

# Process items in smaller batches to avoid memory issues
batch_size = args[:batch_size] || 100

# Get the list of item IDs
item_ids = args[:item_ids]
match_only = args[:match_only] || false

# Process items in batches
item_ids.each_slice(batch_size) do |batch_item_ids|
process_batch(batch_item_ids, args[:item_type], match_only)
end
end

private

def process_batch(item_ids, item_type, match_only)
klass = item_type.singularize.classify.constantize
items = klass.where(id: item_ids)

items.each do |item|
begin
process_item(item, item_type, match_only)
rescue => e
Rails.logger.error(
"Error generating concepts from #{item_type.singularize} #{item.id}: #{e.message}\n#{e.backtrace.join("\n")}",
)
end
end
end

def process_item(item, item_type, match_only)
# Use the Manager method that handles both identifying and creating concepts
if match_only
if item_type == "topics"
DiscourseAi::InferredConcepts::Manager.match_topic_to_concepts(item)
else # posts
DiscourseAi::InferredConcepts::Manager.match_post_to_concepts(item)
end
else
if item_type == "topics"
DiscourseAi::InferredConcepts::Manager.analyze_topic(item)
else # posts
DiscourseAi::InferredConcepts::Manager.analyze_post(item)
end
end
end
end
end
81 changes: 81 additions & 0 deletions app/jobs/scheduled/generate_concepts_from_popular_items.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# frozen_string_literal: true

module Jobs
class GenerateConceptsFromPopularItems < ::Jobs::Scheduled
every 1.day

# This job runs daily and generates new concepts from popular topics and posts
# It selects items based on engagement metrics and generates concepts from their content
def execute(args = {})
return unless SiteSetting.inferred_concepts_enabled

process_popular_topics
process_popular_posts
end

private

def process_popular_topics

# Find candidate topics that are popular and don't have concepts yet
candidates = DiscourseAi::InferredConcepts::Manager.find_candidate_topics(
limit: SiteSetting.inferred_concepts_daily_topics_limit || 20,
min_posts: SiteSetting.inferred_concepts_min_posts || 5,
min_likes: SiteSetting.inferred_concepts_min_likes || 10,
min_views: SiteSetting.inferred_concepts_min_views || 100,
created_after: SiteSetting.inferred_concepts_lookback_days.days.ago
)

return if candidates.blank?

# Process candidate topics - first generate concepts, then match
Jobs.enqueue(
:generate_inferred_concepts,
item_type: 'topics',
item_ids: candidates.map(&:id),
batch_size: 10
)

# Schedule a follow-up job to match existing concepts
Jobs.enqueue_in(
1.hour,
:generate_inferred_concepts,
item_type: 'topics',
item_ids: candidates.map(&:id),
batch_size: 10,
match_only: true
)
end

def process_popular_posts

# Find candidate posts that are popular and don't have concepts yet
candidates = DiscourseAi::InferredConcepts::Manager.find_candidate_posts(
limit: SiteSetting.inferred_concepts_daily_posts_limit || 30,
min_likes: SiteSetting.inferred_concepts_post_min_likes || 5,
exclude_first_posts: true,
created_after: SiteSetting.inferred_concepts_lookback_days.days.ago
)

return if candidates.blank?

# Process candidate posts - first generate concepts, then match
Jobs.enqueue(
:generate_inferred_concepts,
item_type: 'posts',
item_ids: candidates.map(&:id),
batch_size: 10
)

# Schedule a follow-up job to match against existing concepts
Jobs.enqueue_in(
1.hour,
:generate_inferred_concepts,
item_type: 'posts',
item_ids: candidates.map(&:id),
batch_size: 10,
match_only: true
)
end
end
end
22 changes: 22 additions & 0 deletions app/models/inferred_concept.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# frozen_string_literal: true

class InferredConcept < ActiveRecord::Base
has_and_belongs_to_many :topics
has_and_belongs_to_many :posts

validates :name, presence: true, uniqueness: true
end

# == Schema Information
#
# Table name: inferred_concepts
#
# id :bigint not null, primary key
# name :string not null
# created_at :datetime not null
# updated_at :datetime not null
#
# Indexes
#
# index_inferred_concepts_on_name (name) UNIQUE
#
34 changes: 34 additions & 0 deletions app/serializers/ai_inferred_concept_post_serializer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# frozen_string_literal: true

class AiInferredConceptPostSerializer < ApplicationSerializer
attributes :id,
:post_number,
:topic_id,
:topic_title,
:username,
:avatar_template,
:created_at,
:updated_at,
:excerpt,
:truncated,
:inferred_concepts

def avatar_template
User.avatar_template(object.username, object.uploaded_avatar_id)
end

def excerpt
Post.excerpt(object.cooked)
end

def truncated
object.cooked.length > SiteSetting.post_excerpt_maxlength
end

def inferred_concepts
ActiveModel::ArraySerializer.new(
object.inferred_concepts,
each_serializer: InferredConceptSerializer
)
end
end
5 changes: 5 additions & 0 deletions app/serializers/inferred_concept_serializer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# frozen_string_literal: true

class InferredConceptSerializer < ApplicationSerializer
attributes :id, :name, :created_at, :updated_at
end
6 changes: 6 additions & 0 deletions config/locales/server.en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,12 @@ en:
short_summarizer:
name: "Summarizer (short form)"
description: "Default persona used to power AI short summaries for topic lists' items"
concept_finder:
name: "Concept Finder"
description: "AI Bot specialized in identifying concepts and themes in content"
concept_matcher:
name: "Concept Matcher"
description: "AI Bot specialized in matching content against existing concepts"
topic_not_found: "Summary unavailable, topic not found!"
summarizing: "Summarizing topic"
searching: "Searching for: '%{query}'"
Expand Down
33 changes: 33 additions & 0 deletions config/settings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -391,3 +391,36 @@ discourse_ai:
default: true
client: true

inferred_concepts_enabled:
default: false
client: true
description: "Enable the inferred concepts system that automatically generates and applies concepts to topics"
inferred_concepts_daily_topics_limit:
default: 20
client: false
description: "Maximum number of topics to process each day for concept generation"
inferred_concepts_min_posts:
default: 5
client: false
description: "Minimum number of posts a topic must have to be considered for concept generation"
inferred_concepts_min_likes:
default: 10
client: false
description: "Minimum number of likes a topic must have to be considered for concept generation"
inferred_concepts_min_views:
default: 100
client: false
description: "Minimum number of views a topic must have to be considered for concept generation"
inferred_concepts_lookback_days:
default: 30
client: false
description: "Only consider topics created within this many days for concept generation"
inferred_concepts_daily_posts_limit:
default: 30
client: false
description: "Maximum number of posts to process each day for concept generation"
inferred_concepts_post_min_likes:
default: 5
client: false
description: "Minimum number of likes a post must have to be considered for concept generation"

5 changes: 4 additions & 1 deletion db/fixtures/personas/603_ai_personas.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# frozen_string_literal: true

summarization_personas = [DiscourseAi::Personas::Summarizer, DiscourseAi::Personas::ShortSummarizer]
concepts_personas = [DiscourseAi::Personas::ConceptFinder, DiscourseAi::Personas::ConceptMatcher]

disabled_personas = summarization_personas + concepts_personas

def from_setting(setting_name)
DB.query_single(
Expand Down Expand Up @@ -33,7 +36,7 @@ def from_setting(setting_name)
persona.allowed_group_ids = [Group::AUTO_GROUPS[:trust_level_0]]
end

persona.enabled = !summarization_personas.include?(persona_class)
persona.enabled = disabled_personas.exclude?(persona_class)
persona.priority = true if persona_class == DiscourseAi::Personas::General
end

Expand Down
11 changes: 11 additions & 0 deletions db/migrate/20250508182047_create_inferred_concepts_table.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# frozen_string_literal: true
class CreateInferredConceptsTable < ActiveRecord::Migration[7.2]
def change
create_table :inferred_concepts do |t|
t.string :name, null: false
t.timestamps
end

add_index :inferred_concepts, :name, unique: true
end
end
15 changes: 15 additions & 0 deletions db/migrate/20250508183456_create_topics_inferred_concepts.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# frozen_string_literal: true

class CreateTopicsInferredConcepts < ActiveRecord::Migration[7.0]
def change
create_table :topics_inferred_concepts do |t|
t.integer :topic_id, null: false
t.integer :inferred_concept_id, null: false
t.timestamps
end

add_index :topics_inferred_concepts, [:topic_id, :inferred_concept_id], unique: true, name: 'idx_unique_topic_inferred_concept'
add_index :topics_inferred_concepts, :topic_id
add_index :topics_inferred_concepts, :inferred_concept_id
end
end
15 changes: 15 additions & 0 deletions db/migrate/20250509000001_create_posts_inferred_concepts.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# frozen_string_literal: true

class CreatePostsInferredConcepts < ActiveRecord::Migration[7.0]
def change
create_table :posts_inferred_concepts do |t|
t.integer :post_id, null: false
t.integer :inferred_concept_id, null: false
t.timestamps
end

add_index :posts_inferred_concepts, [:post_id, :inferred_concept_id], unique: true, name: 'idx_unique_post_inferred_concept'
add_index :posts_inferred_concepts, :post_id
add_index :posts_inferred_concepts, :inferred_concept_id
end
end
Loading
Loading