diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 000000000..650de3e30 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,8 @@ +{ + "permissions": { + "allow": [ + "Bash(bundle exec rails g migration:*)" + ], + "deny": [] + } +} \ No newline at end of file diff --git a/app/jobs/regular/generate_inferred_concepts.rb b/app/jobs/regular/generate_inferred_concepts.rb new file mode 100644 index 000000000..06b0a7500 --- /dev/null +++ b/app/jobs/regular/generate_inferred_concepts.rb @@ -0,0 +1,69 @@ +# frozen_string_literal: true + +module Jobs + class GenerateInferredConcepts < ::Jobs::Base + sidekiq_options queue: "low" + + # Process items to generate new concepts + # + # @param args [Hash] Contains job arguments + # @option args [String] :item_type Required - Type of items to process ('topics' or 'posts') + # @option args [Array] :item_ids Required - List of item IDs to process + # @option args [Integer] :batch_size (100) Number of items to process in each batch + # @option args [Boolean] :match_only (false) Only match against existing concepts without generating new ones + def execute(args = {}) + return if args[:item_ids].blank? || args[:item_type].blank? + + if %w[topics posts].exclude?(args[:item_type]) + Rails.logger.error("Invalid item_type for GenerateInferredConcepts: #{args[:item_type]}") + return + end + + # Process items in smaller batches to avoid memory issues + batch_size = args[:batch_size] || 100 + + # Get the list of item IDs + item_ids = args[:item_ids] + match_only = args[:match_only] || false + + # Process items in batches + item_ids.each_slice(batch_size) do |batch_item_ids| + process_batch(batch_item_ids, args[:item_type], match_only) + end + end + + private + + def process_batch(item_ids, item_type, match_only) + klass = item_type.singularize.classify.constantize + items = klass.where(id: item_ids) + + items.each do |item| + begin + process_item(item, item_type, match_only) + rescue => e + Rails.logger.error( + "Error generating concepts from #{item_type.singularize} #{item.id}: #{e.message}\n#{e.backtrace.join("\n")}", + ) + end + end + end + + def process_item(item, item_type, match_only) + # Use the Manager method that handles both identifying and creating concepts + if match_only + if item_type == "topics" + DiscourseAi::InferredConcepts::Manager.match_topic_to_concepts(item) + else # posts + DiscourseAi::InferredConcepts::Manager.match_post_to_concepts(item) + end + else + if item_type == "topics" + DiscourseAi::InferredConcepts::Manager.analyze_topic(item) + else # posts + DiscourseAi::InferredConcepts::Manager.analyze_post(item) + end + end + end + end +end diff --git a/app/jobs/scheduled/generate_concepts_from_popular_items.rb b/app/jobs/scheduled/generate_concepts_from_popular_items.rb new file mode 100644 index 000000000..a9a034935 --- /dev/null +++ b/app/jobs/scheduled/generate_concepts_from_popular_items.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +module Jobs + class GenerateConceptsFromPopularItems < ::Jobs::Scheduled + every 1.day + + # This job runs daily and generates new concepts from popular topics and posts + # It selects items based on engagement metrics and generates concepts from their content + def execute(args = {}) + return unless SiteSetting.inferred_concepts_enabled + + process_popular_topics + process_popular_posts + end + + private + + def process_popular_topics + + # Find candidate topics that are popular and don't have concepts yet + candidates = DiscourseAi::InferredConcepts::Manager.find_candidate_topics( + limit: SiteSetting.inferred_concepts_daily_topics_limit || 20, + min_posts: SiteSetting.inferred_concepts_min_posts || 5, + min_likes: SiteSetting.inferred_concepts_min_likes || 10, + min_views: SiteSetting.inferred_concepts_min_views || 100, + created_after: SiteSetting.inferred_concepts_lookback_days.days.ago + ) + + return if candidates.blank? + + # Process candidate topics - first generate concepts, then match + Jobs.enqueue( + :generate_inferred_concepts, + item_type: 'topics', + item_ids: candidates.map(&:id), + batch_size: 10 + ) + + # Schedule a follow-up job to match existing concepts + Jobs.enqueue_in( + 1.hour, + :generate_inferred_concepts, + item_type: 'topics', + item_ids: candidates.map(&:id), + batch_size: 10, + match_only: true + ) + end + + def process_popular_posts + + # Find candidate posts that are popular and don't have concepts yet + candidates = DiscourseAi::InferredConcepts::Manager.find_candidate_posts( + limit: SiteSetting.inferred_concepts_daily_posts_limit || 30, + min_likes: SiteSetting.inferred_concepts_post_min_likes || 5, + exclude_first_posts: true, + created_after: SiteSetting.inferred_concepts_lookback_days.days.ago + ) + + return if candidates.blank? + + # Process candidate posts - first generate concepts, then match + Jobs.enqueue( + :generate_inferred_concepts, + item_type: 'posts', + item_ids: candidates.map(&:id), + batch_size: 10 + ) + + # Schedule a follow-up job to match against existing concepts + Jobs.enqueue_in( + 1.hour, + :generate_inferred_concepts, + item_type: 'posts', + item_ids: candidates.map(&:id), + batch_size: 10, + match_only: true + ) + end + end +end \ No newline at end of file diff --git a/app/models/inferred_concept.rb b/app/models/inferred_concept.rb new file mode 100644 index 000000000..0248277fa --- /dev/null +++ b/app/models/inferred_concept.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +class InferredConcept < ActiveRecord::Base + has_and_belongs_to_many :topics + has_and_belongs_to_many :posts + + validates :name, presence: true, uniqueness: true +end + +# == Schema Information +# +# Table name: inferred_concepts +# +# id :bigint not null, primary key +# name :string not null +# created_at :datetime not null +# updated_at :datetime not null +# +# Indexes +# +# index_inferred_concepts_on_name (name) UNIQUE +# \ No newline at end of file diff --git a/app/serializers/ai_inferred_concept_post_serializer.rb b/app/serializers/ai_inferred_concept_post_serializer.rb new file mode 100644 index 000000000..d4bfcd628 --- /dev/null +++ b/app/serializers/ai_inferred_concept_post_serializer.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +class AiInferredConceptPostSerializer < ApplicationSerializer + attributes :id, + :post_number, + :topic_id, + :topic_title, + :username, + :avatar_template, + :created_at, + :updated_at, + :excerpt, + :truncated, + :inferred_concepts + + def avatar_template + User.avatar_template(object.username, object.uploaded_avatar_id) + end + + def excerpt + Post.excerpt(object.cooked) + end + + def truncated + object.cooked.length > SiteSetting.post_excerpt_maxlength + end + + def inferred_concepts + ActiveModel::ArraySerializer.new( + object.inferred_concepts, + each_serializer: InferredConceptSerializer + ) + end +end \ No newline at end of file diff --git a/app/serializers/inferred_concept_serializer.rb b/app/serializers/inferred_concept_serializer.rb new file mode 100644 index 000000000..265fe858c --- /dev/null +++ b/app/serializers/inferred_concept_serializer.rb @@ -0,0 +1,5 @@ +# frozen_string_literal: true + +class InferredConceptSerializer < ApplicationSerializer + attributes :id, :name, :created_at, :updated_at +end \ No newline at end of file diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 1c3351c9f..aec473beb 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -323,6 +323,12 @@ en: short_summarizer: name: "Summarizer (short form)" description: "Default persona used to power AI short summaries for topic lists' items" + concept_finder: + name: "Concept Finder" + description: "AI Bot specialized in identifying concepts and themes in content" + concept_matcher: + name: "Concept Matcher" + description: "AI Bot specialized in matching content against existing concepts" topic_not_found: "Summary unavailable, topic not found!" summarizing: "Summarizing topic" searching: "Searching for: '%{query}'" diff --git a/config/settings.yml b/config/settings.yml index 8e8b9e682..cdd7b6ab8 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -391,3 +391,36 @@ discourse_ai: default: true client: true + inferred_concepts_enabled: + default: false + client: true + description: "Enable the inferred concepts system that automatically generates and applies concepts to topics" + inferred_concepts_daily_topics_limit: + default: 20 + client: false + description: "Maximum number of topics to process each day for concept generation" + inferred_concepts_min_posts: + default: 5 + client: false + description: "Minimum number of posts a topic must have to be considered for concept generation" + inferred_concepts_min_likes: + default: 10 + client: false + description: "Minimum number of likes a topic must have to be considered for concept generation" + inferred_concepts_min_views: + default: 100 + client: false + description: "Minimum number of views a topic must have to be considered for concept generation" + inferred_concepts_lookback_days: + default: 30 + client: false + description: "Only consider topics created within this many days for concept generation" + inferred_concepts_daily_posts_limit: + default: 30 + client: false + description: "Maximum number of posts to process each day for concept generation" + inferred_concepts_post_min_likes: + default: 5 + client: false + description: "Minimum number of likes a post must have to be considered for concept generation" + diff --git a/db/fixtures/personas/603_ai_personas.rb b/db/fixtures/personas/603_ai_personas.rb index 7d52e8a9e..65dd31bce 100644 --- a/db/fixtures/personas/603_ai_personas.rb +++ b/db/fixtures/personas/603_ai_personas.rb @@ -1,6 +1,9 @@ # frozen_string_literal: true summarization_personas = [DiscourseAi::Personas::Summarizer, DiscourseAi::Personas::ShortSummarizer] +concepts_personas = [DiscourseAi::Personas::ConceptFinder, DiscourseAi::Personas::ConceptMatcher] + +disabled_personas = summarization_personas + concepts_personas def from_setting(setting_name) DB.query_single( @@ -33,7 +36,7 @@ def from_setting(setting_name) persona.allowed_group_ids = [Group::AUTO_GROUPS[:trust_level_0]] end - persona.enabled = !summarization_personas.include?(persona_class) + persona.enabled = disabled_personas.exclude?(persona_class) persona.priority = true if persona_class == DiscourseAi::Personas::General end diff --git a/db/migrate/20250508182047_create_inferred_concepts_table.rb b/db/migrate/20250508182047_create_inferred_concepts_table.rb new file mode 100644 index 000000000..6686c040d --- /dev/null +++ b/db/migrate/20250508182047_create_inferred_concepts_table.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true +class CreateInferredConceptsTable < ActiveRecord::Migration[7.2] + def change + create_table :inferred_concepts do |t| + t.string :name, null: false + t.timestamps + end + + add_index :inferred_concepts, :name, unique: true + end +end diff --git a/db/migrate/20250508183456_create_topics_inferred_concepts.rb b/db/migrate/20250508183456_create_topics_inferred_concepts.rb new file mode 100644 index 000000000..6066bfbbd --- /dev/null +++ b/db/migrate/20250508183456_create_topics_inferred_concepts.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +class CreateTopicsInferredConcepts < ActiveRecord::Migration[7.0] + def change + create_table :topics_inferred_concepts do |t| + t.integer :topic_id, null: false + t.integer :inferred_concept_id, null: false + t.timestamps + end + + add_index :topics_inferred_concepts, [:topic_id, :inferred_concept_id], unique: true, name: 'idx_unique_topic_inferred_concept' + add_index :topics_inferred_concepts, :topic_id + add_index :topics_inferred_concepts, :inferred_concept_id + end +end \ No newline at end of file diff --git a/db/migrate/20250509000001_create_posts_inferred_concepts.rb b/db/migrate/20250509000001_create_posts_inferred_concepts.rb new file mode 100644 index 000000000..258d0f144 --- /dev/null +++ b/db/migrate/20250509000001_create_posts_inferred_concepts.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +class CreatePostsInferredConcepts < ActiveRecord::Migration[7.0] + def change + create_table :posts_inferred_concepts do |t| + t.integer :post_id, null: false + t.integer :inferred_concept_id, null: false + t.timestamps + end + + add_index :posts_inferred_concepts, [:post_id, :inferred_concept_id], unique: true, name: 'idx_unique_post_inferred_concept' + add_index :posts_inferred_concepts, :post_id + add_index :posts_inferred_concepts, :inferred_concept_id + end +end \ No newline at end of file diff --git a/lib/inferred_concepts/applier.rb b/lib/inferred_concepts/applier.rb new file mode 100644 index 000000000..9976f2474 --- /dev/null +++ b/lib/inferred_concepts/applier.rb @@ -0,0 +1,179 @@ +# frozen_string_literal: true + +module DiscourseAi + module InferredConcepts + class Applier + # Associates the provided concepts with a topic + # topic: a Topic instance + # concepts: an array of InferredConcept instances + def self.apply_to_topic(topic, concepts) + return if topic.blank? || concepts.blank? + + concepts.each do |concept| + # Use the join table to associate the concept with the topic + # Avoid duplicates by using find_or_create_by + ActiveRecord::Base.connection.execute(<<~SQL) + INSERT INTO topics_inferred_concepts (topic_id, inferred_concept_id, created_at, updated_at) + VALUES (#{topic.id}, #{concept.id}, NOW(), NOW()) + ON CONFLICT (topic_id, inferred_concept_id) DO NOTHING + SQL + end + end + + # Associates the provided concepts with a post + # post: a Post instance + # concepts: an array of InferredConcept instances + def self.apply_to_post(post, concepts) + return if post.blank? || concepts.blank? + + concepts.each do |concept| + # Use the join table to associate the concept with the post + # Avoid duplicates by using find_or_create_by + ActiveRecord::Base.connection.execute(<<~SQL) + INSERT INTO posts_inferred_concepts (post_id, inferred_concept_id, created_at, updated_at) + VALUES (#{post.id}, #{concept.id}, NOW(), NOW()) + ON CONFLICT (post_id, inferred_concept_id) DO NOTHING + SQL + end + end + + # Extracts content from a topic for concept analysis + # Returns a string with the topic title and first few posts + def self.topic_content_for_analysis(topic) + return "" if topic.blank? + + # Combine title and first few posts for analysis + posts = Post.where(topic_id: topic.id).order(:post_number).limit(10) + + content = "Title: #{topic.title}\n\n" + content += posts.map { |p| "#{p.post_number}) #{p.user.username}: #{p.raw}" }.join("\n\n") + + content + end + + # Extracts content from a post for concept analysis + # Returns a string with the post content + def self.post_content_for_analysis(post) + return "" if post.blank? + + # Get the topic title for context + topic_title = post.topic&.title || "" + + content = "Topic: #{topic_title}\n\n" + content += "Post by #{post.user.username}:\n#{post.raw}" + + content + end + + # Comprehensive method to analyze a topic and apply concepts + def self.analyze_and_apply(topic) + return if topic.blank? + + # Get content to analyze + content = topic_content_for_analysis(topic) + + # Identify concepts + concept_names = Finder.identify_concepts(content) + + # Create or find concepts in the database + concepts = Finder.create_or_find_concepts(concept_names) + + # Apply concepts to the topic + apply_to_topic(topic, concepts) + + concepts + end + + # Comprehensive method to analyze a post and apply concepts + def self.analyze_and_apply_post(post) + return if post.blank? + + # Get content to analyze + content = post_content_for_analysis(post) + + # Identify concepts + concept_names = Finder.identify_concepts(content) + + # Create or find concepts in the database + concepts = Finder.create_or_find_concepts(concept_names) + + # Apply concepts to the post + apply_to_post(post, concepts) + + concepts + end + + # Match a topic with existing concepts + def self.match_existing_concepts(topic) + return [] if topic.blank? + + # Get content to analyze + content = topic_content_for_analysis(topic) + + # Get all existing concepts + existing_concepts = DiscourseAi::InferredConcepts::Manager.list_concepts + return [] if existing_concepts.empty? + + # Use the ConceptMatcher persona to match concepts + matched_concept_names = match_concepts_to_content(content, existing_concepts) + + # Find concepts in the database + matched_concepts = InferredConcept.where(name: matched_concept_names) + + # Apply concepts to the topic + apply_to_topic(topic, matched_concepts) + + matched_concepts + end + + # Match a post with existing concepts + def self.match_existing_concepts_for_post(post) + return [] if post.blank? + + # Get content to analyze + content = post_content_for_analysis(post) + + # Get all existing concepts + existing_concepts = DiscourseAi::InferredConcepts::Manager.list_concepts + return [] if existing_concepts.empty? + + # Use the ConceptMatcher persona to match concepts + matched_concept_names = match_concepts_to_content(content, existing_concepts) + + # Find concepts in the database + matched_concepts = InferredConcept.where(name: matched_concept_names) + + # Apply concepts to the post + apply_to_post(post, matched_concepts) + + matched_concepts + end + + # Use ConceptMatcher persona to match content against provided concepts + def self.match_concepts_to_content(content, concept_list) + return [] if content.blank? || concept_list.blank? + + # Prepare user message with only the content + user_message = content + + # Use the ConceptMatcher persona to match concepts + llm = DiscourseAi::Completions::Llm.default_llm + persona = DiscourseAi::Personas::ConceptMatcher.new(concept_list: concept_list) + context = + DiscourseAi::Personas::BotContext.new( + messages: [{ type: :user, content: user_message }], + user: Discourse.system_user, + inferred_concepts: DiscourseAi::InferredConcepts::Manager.list_concepts, + ) + + prompt = persona.craft_prompt(context) + response = llm.completion(prompt, extract_json: true) + + return [] unless response.success? + + matching_concepts = response.parsed_output["matching_concepts"] + matching_concepts || [] + end + end + end +end diff --git a/lib/inferred_concepts/finder.rb b/lib/inferred_concepts/finder.rb new file mode 100644 index 000000000..37e2c625e --- /dev/null +++ b/lib/inferred_concepts/finder.rb @@ -0,0 +1,139 @@ +# frozen_string_literal: true + +module DiscourseAi + module InferredConcepts + class Finder + # Identifies potential concepts from provided content + # Returns an array of concept names (strings) + def self.identify_concepts(content) + return [] if content.blank? + + # Use the ConceptFinder persona to identify concepts + llm = DiscourseAi::Completions::Llm.default_llm + persona = DiscourseAi::Personas::ConceptFinder.new + context = + DiscourseAi::Personas::BotContext.new( + messages: [{ type: :user, content: content }], + user: Discourse.system_user, + inferred_concepts: DiscourseAi::InferredConcepts::Manager.list_concepts, + ) + + prompt = persona.craft_prompt(context) + response = llm.completion(prompt, extract_json: true) + + return [] unless response.success? + + concepts = response.parsed_output["concepts"] + concepts || [] + end + + # Creates or finds concepts in the database from provided names + # Returns an array of InferredConcept instances + def self.create_or_find_concepts(concept_names) + return [] if concept_names.blank? + + concept_names.map { |name| InferredConcept.find_or_create_by(name: name) } + end + + # Finds candidate topics to use for concept generation + # + # @param limit [Integer] Maximum number of topics to return + # @param min_posts [Integer] Minimum number of posts in topic + # @param min_likes [Integer] Minimum number of likes across all posts + # @param min_views [Integer] Minimum number of views + # @param exclude_topic_ids [Array] Topic IDs to exclude + # @param category_ids [Array] Only include topics from these categories (optional) + # @param created_after [DateTime] Only include topics created after this time (optional) + # @return [Array] Array of Topic objects that are good candidates + def self.find_candidate_topics( + limit: 100, + min_posts: 5, + min_likes: 10, + min_views: 100, + exclude_topic_ids: [], + category_ids: nil, + created_after: 30.days.ago + ) + query = + Topic.where( + "topics.posts_count >= ? AND topics.views >= ? AND topics.like_count >= ?", + min_posts, + min_views, + min_likes, + ) + + # Apply additional filters + query = query.where("topics.id NOT IN (?)", exclude_topic_ids) if exclude_topic_ids.present? + query = query.where("topics.category_id IN (?)", category_ids) if category_ids.present? + query = query.where("topics.created_at >= ?", created_after) if created_after.present? + + # Exclude PM topics (if they exist in Discourse) + query = query.where(archetype: Topic.public_archetype) + + # Exclude topics that already have concepts + topics_with_concepts = <<~SQL + SELECT DISTINCT topic_id + FROM topics_inferred_concepts + SQL + + query = query.where("topics.id NOT IN (#{topics_with_concepts})") + + # Score and order topics by engagement (combination of views, likes, and posts) + query = + query.select( + "topics.*, + (topics.like_count * 2 + topics.posts_count * 3 + topics.views * 0.1) AS engagement_score", + ).order("engagement_score DESC") + + # Return limited number of topics + query.limit(limit) + end + + # Find candidate posts that are good for concept generation + # + # @param limit [Integer] Maximum number of posts to return + # @param min_likes [Integer] Minimum number of likes + # @param exclude_first_posts [Boolean] Exclude first posts in topics + # @param exclude_post_ids [Array] Post IDs to exclude + # @param category_ids [Array] Only include posts from topics in these categories + # @param created_after [DateTime] Only include posts created after this time + # @return [Array] Array of Post objects that are good candidates + def self.find_candidate_posts( + limit: 100, + min_likes: 5, + exclude_first_posts: true, + exclude_post_ids: [], + category_ids: nil, + created_after: 30.days.ago + ) + query = Post.where("posts.like_count >= ?", min_likes) + + # Exclude first posts if specified + query = query.where("posts.post_number > 1") if exclude_first_posts + + # Apply additional filters + query = query.where("posts.id NOT IN (?)", exclude_post_ids) if exclude_post_ids.present? + query = query.where("posts.created_at >= ?", created_after) if created_after.present? + + # Filter by category if specified + if category_ids.present? + query = query.joins(:topic).where("topics.category_id IN (?)", category_ids) + end + + # Exclude posts that already have concepts + posts_with_concepts = <<~SQL + SELECT DISTINCT post_id + FROM posts_inferred_concepts + SQL + + query = query.where("posts.id NOT IN (#{posts_with_concepts})") + + # Order by engagement (likes) + query = query.order(like_count: :desc) + + # Return limited number of posts + query.limit(limit) + end + end + end +end diff --git a/lib/inferred_concepts/manager.rb b/lib/inferred_concepts/manager.rb new file mode 100644 index 000000000..3037cd153 --- /dev/null +++ b/lib/inferred_concepts/manager.rb @@ -0,0 +1,160 @@ +# frozen_string_literal: true + +module DiscourseAi + module InferredConcepts + class Manager + # Get a list of existing concepts + # @param limit [Integer, nil] Optional maximum number of concepts to return + # @return [Array] Array of InferredConcept objects + def self.list_concepts(limit: nil) + query = InferredConcept.all.order("name ASC") + + # Apply limit if provided + query = query.limit(limit) if limit.present? + + query.pluck(:name) + end + # Generate new concepts for a topic and apply them + # @param topic [Topic] A Topic instance + # @return [Array] The concepts that were applied + def self.analyze_topic(topic) + return [] if topic.blank? + + Applier.analyze_and_apply(topic) + end + + # Generate new concepts for a post and apply them + # @param post [Post] A Post instance + # @return [Array] The concepts that were applied + def self.analyze_post(post) + return [] if post.blank? + + Applier.analyze_and_apply_post(post) + end + + # Extract new concepts from arbitrary content + # @param content [String] The content to analyze + # @return [Array] The identified concept names + def self.identify_concepts(content) + Finder.identify_concepts(content) + end + + # Identify and create concepts from content without applying them to any topic + # @param content [String] The content to analyze + # @return [Array] The created or found concepts + def self.generate_concepts_from_content(content) + return [] if content.blank? + + # Identify concepts + concept_names = Finder.identify_concepts(content) + return [] if concept_names.blank? + + # Create or find concepts in the database + Finder.create_or_find_concepts(concept_names) + end + + # Generate concepts from a topic's content without applying them to the topic + # @param topic [Topic] A Topic instance + # @return [Array] The created or found concepts + def self.generate_concepts_from_topic(topic) + return [] if topic.blank? + + # Get content to analyze + content = Applier.topic_content_for_analysis(topic) + return [] if content.blank? + + # Generate concepts from the content + generate_concepts_from_content(content) + end + + # Generate concepts from a post's content without applying them to the post + # @param post [Post] A Post instance + # @return [Array] The created or found concepts + def self.generate_concepts_from_post(post) + return [] if post.blank? + + # Get content to analyze + content = Applier.post_content_for_analysis(post) + return [] if content.blank? + + # Generate concepts from the content + generate_concepts_from_content(content) + end + + # Match a topic against existing concepts + # @param topic [Topic] A Topic instance + # @return [Array] The concepts that were applied + def self.match_topic_to_concepts(topic) + return [] if topic.blank? + + Applier.match_existing_concepts(topic) + end + + # Match a post against existing concepts + # @param post [Post] A Post instance + # @return [Array] The concepts that were applied + def self.match_post_to_concepts(post) + return [] if post.blank? + + Applier.match_existing_concepts_for_post(post) + end + + # Find topics that have a specific concept + # @param concept_name [String] The name of the concept to search for + # @return [Array] Topics that have the specified concept + def self.search_topics_by_concept(concept_name) + concept = ::InferredConcept.find_by(name: concept_name) + return [] unless concept + concept.topics + end + + # Find posts that have a specific concept + # @param concept_name [String] The name of the concept to search for + # @return [Array] Posts that have the specified concept + def self.search_posts_by_concept(concept_name) + concept = ::InferredConcept.find_by(name: concept_name) + return [] unless concept + concept.posts + end + + # Match arbitrary content against existing concepts + # @param content [String] The content to analyze + # @return [Array] Names of matching concepts + def self.match_content_to_concepts(content) + existing_concepts = InferredConcept.all.pluck(:name) + return [] if existing_concepts.empty? + + Applier.match_concepts_to_content(content, existing_concepts) + end + + # Find candidate topics that are good for concept generation + # + # @param opts [Hash] Options to pass to the finder + # @option opts [Integer] :limit (100) Maximum number of topics to return + # @option opts [Integer] :min_posts (5) Minimum number of posts in topic + # @option opts [Integer] :min_likes (10) Minimum number of likes across all posts + # @option opts [Integer] :min_views (100) Minimum number of views + # @option opts [Array] :exclude_topic_ids ([]) Topic IDs to exclude + # @option opts [Array] :category_ids (nil) Only include topics from these categories + # @option opts [DateTime] :created_after (30.days.ago) Only include topics created after this time + # @return [Array] Array of Topic objects that are good candidates + def self.find_candidate_topics(opts = {}) + Finder.find_candidate_topics(opts) + end + + # Find candidate posts that are good for concept generation + # @param opts [Hash] Options to pass to the finder + # @return [Array] Array of Post objects that are good candidates + def self.find_candidate_posts(opts = {}) + Finder.find_candidate_posts( + limit: opts[:limit], + min_likes: opts[:min_likes], + exclude_first_posts: opts[:exclude_first_posts], + exclude_post_ids: opts[:exclude_post_ids], + category_ids: opts[:category_ids], + created_after: opts[:created_after], + ) + end + end + end +end diff --git a/lib/personas/bot_context.rb b/lib/personas/bot_context.rb index 5f7dd99ef..dc1d86b72 100644 --- a/lib/personas/bot_context.rb +++ b/lib/personas/bot_context.rb @@ -16,7 +16,8 @@ class BotContext :channel_id, :context_post_ids, :feature_name, - :resource_url + :resource_url, + :inferred_concepts def initialize( post: nil, @@ -33,7 +34,8 @@ def initialize( channel_id: nil, context_post_ids: nil, feature_name: "bot", - resource_url: nil + resource_url: nil, + inferred_concepts: [] ) @participants = participants @user = user @@ -52,7 +54,7 @@ def initialize( @resource_url = resource_url @feature_name = feature_name - @resource_url = resource_url + @inferred_concepts = inferred_concepts if post @post_id = post.id @@ -64,7 +66,15 @@ def initialize( end # these are strings that can be safely interpolated into templates - TEMPLATE_PARAMS = %w[time site_url site_title site_description participants resource_url] + TEMPLATE_PARAMS = %w[ + time + site_url + site_title + site_description + participants + resource_url + inferred_concepts + ] def lookup_template_param(key) public_send(key.to_sym) if TEMPLATE_PARAMS.include?(key) @@ -110,6 +120,7 @@ def to_json skip_tool_details: @skip_tool_details, feature_name: @feature_name, resource_url: @resource_url, + inferred_concepts: @inferred_concepts, } end end diff --git a/lib/personas/concept_finder.rb b/lib/personas/concept_finder.rb new file mode 100644 index 000000000..a713e8b86 --- /dev/null +++ b/lib/personas/concept_finder.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +module DiscourseAi + module Personas + class ConceptFinder < Persona + def system_prompt + existing_concepts = DiscourseAi::InferredConcepts::Manager.list_concepts(limit: 100) + existing_concepts_text = "" + + existing_concepts_text = <<~CONCEPTS if existing_concepts.present? + The following concepts already exist in the system: + #{existing_concepts.join(", ")} + + You can reuse these existing concepts if they apply to the content, or suggest new concepts. + CONCEPTS + + <<~PROMPT.strip + You are an advanced concept tagging system that identifies key concepts, themes, and topics from provided text. + Your job is to extract meaningful labels that can be used to categorize content. + + Guidelines for generating concepts: + - Extract up to 7 concepts from the provided content + - Concepts should be single words or short phrases (1-3 words maximum) + - Focus on substantive topics, themes, technologies, methodologies, or domains + - Avoid overly general terms like "discussion" or "question" + - Ensure concepts are relevant to the core content + - Do not include proper nouns unless they represent key technologies or methodologies + - Maintain the original language of the text being analyzed + #{existing_concepts_text} + Format your response as a JSON object with a single key named "concepts", which has an array of concept strings as the value. + Your output should be in the following format: + + {"concepts": ["concept1", "concept2", "concept3"]} + + + Where the concepts are replaced by the actual concepts you've identified. + PROMPT + end + + def response_format + [{ key: "concepts", type: "array" }] + end + end + end +end diff --git a/lib/personas/concept_matcher.rb b/lib/personas/concept_matcher.rb new file mode 100644 index 000000000..5099196b8 --- /dev/null +++ b/lib/personas/concept_matcher.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +module DiscourseAi + module Personas + class ConceptMatcher < Persona + def system_prompt + <<~PROMPT.strip + You are an advanced concept matching system that determines which concepts from a provided list are relevant to a piece of content. + Your job is to analyze the content and determine which concepts from the list apply to it. + + Guidelines for matching concepts: + - Only select concepts that are clearly relevant to the content + - The content must substantially discuss or relate to the concept + - Superficial mentions are not enough to consider a concept relevant + - Be precise and selective - don't match concepts that are only tangentially related + - Consider both explicit mentions and implicit discussions of concepts + - Maintain the original language of the text being analyzed + - IMPORTANT: Only select from the exact concepts in the provided list - do not add new concepts + - If no concepts from the list match the content, return an empty array + + The list of available concepts is: + {inferred_concepts} + + Format your response as a JSON object with a single key named "matching_concepts", which has an array of concept strings from the provided list. + Your output should be in the following format: + + {"matching_concepts": ["concept1", "concept3", "concept5"]} + + + Only include concepts from the provided list that match the content. If no concepts match, return an empty array. + PROMPT + end + + def response_format + [{ key: "matching_concepts", type: "array" }] + end + end + end +end diff --git a/lib/personas/persona.rb b/lib/personas/persona.rb index a8b087850..791e250b5 100644 --- a/lib/personas/persona.rb +++ b/lib/personas/persona.rb @@ -47,6 +47,8 @@ def system_personas Summarizer => -11, ShortSummarizer => -12, Designer => -13, + ConceptFinder => -14, + ConceptMatcher => -15, } end diff --git a/lib/post_extensions.rb b/lib/post_extensions.rb index 04a28a156..3a06495f6 100644 --- a/lib/post_extensions.rb +++ b/lib/post_extensions.rb @@ -11,6 +11,8 @@ module PostExtensions -> { where(classification_type: "sentiment") }, class_name: "ClassificationResult", as: :target + + has_and_belongs_to_many :inferred_concepts end end end diff --git a/lib/topic_extensions.rb b/lib/topic_extensions.rb index 7ab36493d..659a33923 100644 --- a/lib/topic_extensions.rb +++ b/lib/topic_extensions.rb @@ -11,6 +11,8 @@ module TopicExtensions -> { where(summary_type: AiSummary.summary_types[:gist]) }, class_name: "AiSummary", as: :target + + has_and_belongs_to_many :inferred_concepts end end end diff --git a/spec/lib/personas/persona_spec.rb b/spec/lib/personas/persona_spec.rb index 25f12914b..856b70cc8 100644 --- a/spec/lib/personas/persona_spec.rb +++ b/spec/lib/personas/persona_spec.rb @@ -16,6 +16,7 @@ def system_prompt {participants} {time} {resource_url} + {inferred_concepts} PROMPT end end @@ -37,6 +38,7 @@ def system_prompt end let(:resource_url) { "https://path-to-resource" } + let(:inferred_concepts) { %w[bulbassaur charmander squirtle].join(", ") } let(:context) do DiscourseAi::Personas::BotContext.new( @@ -46,6 +48,7 @@ def system_prompt time: Time.zone.now, participants: topic_with_users.allowed_users.map(&:username).join(", "), resource_url: resource_url, + inferred_concepts: inferred_concepts, ) end @@ -65,6 +68,7 @@ def system_prompt expect(system_message).to include("joe, jane") expect(system_message).to include(Time.zone.now.to_s) expect(system_message).to include(resource_url) + expect(system_message).to include(inferred_concepts) tools = rendered.tools