diff --git a/.env b/.env index 95017b4654..2c68a10be4 100644 --- a/.env +++ b/.env @@ -113,7 +113,7 @@ ADMIN_API_SECRET=# secret to admin API calls, like computing usage stats or expo PARQUET_EXPORT_SECRET=#DEPRECATED, use ADMIN_API_SECRET instead -RATE_LIMIT= # requests per minute +RATE_LIMIT= # /!\ Legacy definition of messages per minute. Use USAGE_LIMITS.messagesPerMinute instead MESSAGES_BEFORE_LOGIN=# how many messages a user can send in a conversation before having to login. set to 0 to force login right away APP_BASE="" # base path of the app, e.g. /chat, left blank as default @@ -140,4 +140,7 @@ ALTERNATIVE_REDIRECT_URLS=`[]` #valide alternative redirect URL for OAuth WEBHOOK_URL_REPORT_ASSISTANT=#provide webhook url to get notified when an assistant gets reported -ALLOWED_USER_EMAILS=`[]` # if it's defined, only these emails will be allowed to use the app \ No newline at end of file +ALLOWED_USER_EMAILS=`[]` # if it's defined, only these emails will be allowed to use the app + +USAGE_LIMITS=`{}` + diff --git a/.env.template b/.env.template index 889c6526ff..68735c98ac 100644 --- a/.env.template +++ b/.env.template @@ -269,9 +269,6 @@ PUBLIC_APP_DISCLAIMER_MESSAGE="Disclaimer: AI is an area of active research with PUBLIC_APP_DATA_SHARING=1 PUBLIC_APP_DISCLAIMER=1 -RATE_LIMIT=16 -MESSAGES_BEFORE_LOGIN=5# how many messages a user can send in a conversation before having to login. set to 0 to force login right away - PUBLIC_GOOGLE_ANALYTICS_ID=G-8Q63TH4CSL PUBLIC_PLAUSIBLE_SCRIPT_URL="/js/script.js" diff --git a/.github/workflows/deploy-release.yml b/.github/workflows/deploy-release.yml index 2c3553fa4c..c3ff7d17b7 100644 --- a/.github/workflows/deploy-release.yml +++ b/.github/workflows/deploy-release.yml @@ -27,6 +27,8 @@ jobs: HF_DEPLOYMENT_TOKEN: ${{ secrets.HF_DEPLOYMENT_TOKEN }} WEBHOOK_URL_REPORT_ASSISTANT: ${{ secrets.WEBHOOK_URL_REPORT_ASSISTANT }} ADMIN_API_SECRET: ${{ secrets.ADMIN_API_SECRET }} + USAGE_LIMITS: ${{ secrets.USAGE_LIMITS }} + MESSAGES_BEFORE_LOGIN: ${{ secrets.MESSAGES_BEFORE_LOGIN }} run: npm run updateProdEnv sync-to-hub: runs-on: ubuntu-latest diff --git a/scripts/updateProdEnv.ts b/scripts/updateProdEnv.ts index ab1f0d0bc3..b66267bbfc 100644 --- a/scripts/updateProdEnv.ts +++ b/scripts/updateProdEnv.ts @@ -8,6 +8,8 @@ const MONGODB_URL = process.env.MONGODB_URL; const HF_TOKEN = process.env.HF_TOKEN ?? process.env.HF_ACCESS_TOKEN; // token used for API requests in prod const WEBHOOK_URL_REPORT_ASSISTANT = process.env.WEBHOOK_URL_REPORT_ASSISTANT; // slack webhook url used to get "report assistant" events const ADMIN_API_SECRET = process.env.ADMIN_API_SECRET; +const USAGE_LIMITS = process.env.USAGE_LIMITS; +const MESSAGES_BEFORE_LOGIN = process.env.MESSAGES_BEFORE_LOGIN; // Read the content of the file .env.template const PUBLIC_CONFIG = fs.readFileSync(".env.template", "utf8"); @@ -20,6 +22,8 @@ SERPER_API_KEY=${SERPER_API_KEY} HF_TOKEN=${HF_TOKEN} WEBHOOK_URL_REPORT_ASSISTANT=${WEBHOOK_URL_REPORT_ASSISTANT} ADMIN_API_SECRET=${ADMIN_API_SECRET} +USAGE_LIMITS=${USAGE_LIMITS} +MESSAGES_BEFORE_LOGIN=${MESSAGES_BEFORE_LOGIN} `; // Make an HTTP POST request to add the space secrets diff --git a/src/lib/server/usageLimits.ts b/src/lib/server/usageLimits.ts new file mode 100644 index 0000000000..0323e83fb5 --- /dev/null +++ b/src/lib/server/usageLimits.ts @@ -0,0 +1,23 @@ +import { z } from "zod"; +import { USAGE_LIMITS, RATE_LIMIT } from "$env/static/private"; +import JSON5 from "json5"; + +// RATE_LIMIT is the legacy way to define messages per minute limit +export const usageLimitsSchema = z + .object({ + conversations: z.coerce.number().optional(), // how many conversations + messages: z.coerce.number().optional(), // how many messages in a conversation + assistants: z.coerce.number().optional(), // how many assistants + messageLength: z.coerce.number().optional(), // how long can a message be before we cut it off + messagesPerMinute: z + .preprocess((val) => { + if (val === undefined) { + return RATE_LIMIT; + } + return val; + }, z.coerce.number().optional()) + .optional(), // how many messages per minute + }) + .optional(); + +export const usageLimits = usageLimitsSchema.parse(JSON5.parse(USAGE_LIMITS)); diff --git a/src/routes/+page.svelte b/src/routes/+page.svelte index 78d93c4a3f..d0410802aa 100644 --- a/src/routes/+page.svelte +++ b/src/routes/+page.svelte @@ -47,8 +47,9 @@ }); if (!res.ok) { - error.set("Error while creating conversation, try again."); - console.error("Error while creating conversation: " + (await res.text())); + const errorMessage = (await res.json()).message || ERROR_MESSAGES.default; + error.set(errorMessage); + console.error("Error while creating conversation: ", errorMessage); return; } @@ -63,7 +64,7 @@ // invalidateAll to update list of conversations await goto(`${base}/conversation/${conversationId}`, { invalidateAll: true }); } catch (err) { - error.set(ERROR_MESSAGES.default); + error.set((err as Error).message || ERROR_MESSAGES.default); console.error(err); } finally { loading = false; diff --git a/src/routes/conversation/+server.ts b/src/routes/conversation/+server.ts index df3787b25d..06e964c3ef 100644 --- a/src/routes/conversation/+server.ts +++ b/src/routes/conversation/+server.ts @@ -8,6 +8,8 @@ import type { Message } from "$lib/types/Message"; import { models, validateModel } from "$lib/server/models"; import { defaultEmbeddingModel } from "$lib/server/embeddingModels"; import { v4 } from "uuid"; +import { authCondition } from "$lib/server/auth"; +import { usageLimits } from "$lib/server/usageLimits"; export const POST: RequestHandler = async ({ locals, request }) => { const body = await request.text(); @@ -23,6 +25,15 @@ export const POST: RequestHandler = async ({ locals, request }) => { }) .parse(JSON.parse(body)); + const convCount = await collections.conversations.countDocuments(authCondition(locals)); + + if (usageLimits?.conversations && convCount > usageLimits?.conversations) { + throw error( + 429, + "You have reached the maximum number of conversations. Delete some to continue." + ); + } + let messages: Message[] = [ { id: v4(), diff --git a/src/routes/conversation/[id]/+page.svelte b/src/routes/conversation/[id]/+page.svelte index 97b730ebf0..6bfe63485e 100644 --- a/src/routes/conversation/[id]/+page.svelte +++ b/src/routes/conversation/[id]/+page.svelte @@ -43,7 +43,7 @@ }); if (!res.ok) { - error.set("Error while creating conversation, try again."); + error.set(await res.text()); console.error("Error while creating conversation: " + (await res.text())); return; } diff --git a/src/routes/conversation/[id]/+server.ts b/src/routes/conversation/[id]/+server.ts index 7aeda860c8..e9da299b94 100644 --- a/src/routes/conversation/[id]/+server.ts +++ b/src/routes/conversation/[id]/+server.ts @@ -1,4 +1,4 @@ -import { MESSAGES_BEFORE_LOGIN, RATE_LIMIT } from "$env/static/private"; +import { MESSAGES_BEFORE_LOGIN } from "$env/static/private"; import { authCondition, requiresUser } from "$lib/server/auth"; import { collections } from "$lib/server/database"; import { models } from "$lib/server/models"; @@ -19,6 +19,7 @@ import { buildSubtree } from "$lib/utils/tree/buildSubtree.js"; import { addChildren } from "$lib/utils/tree/addChildren.js"; import { addSibling } from "$lib/utils/tree/addSibling.js"; import { preprocessMessages } from "$lib/server/preprocessMessages.js"; +import { usageLimits } from "$lib/server/usageLimits"; export async function POST({ request, locals, params, getClientAddress }) { const id = z.string().parse(params.id); @@ -95,14 +96,22 @@ export async function POST({ request, locals, params, getClientAddress }) { } } - // check if the user is rate limited - const nEvents = Math.max( - await collections.messageEvents.countDocuments({ userId }), - await collections.messageEvents.countDocuments({ ip: getClientAddress() }) - ); + if (usageLimits?.messagesPerMinute) { + // check if the user is rate limited + const nEvents = Math.max( + await collections.messageEvents.countDocuments({ userId }), + await collections.messageEvents.countDocuments({ ip: getClientAddress() }) + ); + if (nEvents > usageLimits.messagesPerMinute) { + throw error(429, ERROR_MESSAGES.rateLimited); + } + } - if (RATE_LIMIT != "" && nEvents > parseInt(RATE_LIMIT)) { - throw error(429, ERROR_MESSAGES.rateLimited); + if (usageLimits?.messages && conv.messages.length > usageLimits.messages) { + throw error( + 429, + `This conversation has more than ${usageLimits.messages} messages. Start a new one to continue` + ); } // fetch the model @@ -125,7 +134,13 @@ export async function POST({ request, locals, params, getClientAddress }) { } = z .object({ id: z.string().uuid().refine(isMessageId).optional(), // parent message id to append to for a normal message, or the message id for a retry/continue - inputs: z.optional(z.string().trim().min(1)), + inputs: z.optional( + z + .string() + .trim() + .min(1) + .transform((s) => s.replace(/\r\n/g, "\n")) + ), is_retry: z.optional(z.boolean()), is_continue: z.optional(z.boolean()), web_search: z.optional(z.boolean()), @@ -133,6 +148,9 @@ export async function POST({ request, locals, params, getClientAddress }) { }) .parse(json); + if (usageLimits?.messageLength && (newPrompt?.length ?? 0) > usageLimits.messageLength) { + throw error(400, "Message too long."); + } // files is an array of base64 strings encoding Blob objects // we need to convert this array to an array of File objects diff --git a/src/routes/settings/assistants/new/+page.server.ts b/src/routes/settings/assistants/new/+page.server.ts index 678c52457c..938bb4a806 100644 --- a/src/routes/settings/assistants/new/+page.server.ts +++ b/src/routes/settings/assistants/new/+page.server.ts @@ -7,6 +7,7 @@ import { ObjectId } from "mongodb"; import { z } from "zod"; import { sha256 } from "$lib/utils/sha256"; import sharp from "sharp"; +import { usageLimits } from "$lib/server/usageLimits"; import { generateSearchTokens } from "$lib/utils/searchTokens"; const newAsssistantSchema = z.object({ @@ -62,6 +63,18 @@ export const actions: Actions = { return fail(400, { error: true, errors }); } + const assistantsCount = await collections.assistants.countDocuments(authCondition(locals)); + + if (usageLimits?.assistants && assistantsCount > usageLimits.assistants) { + const errors = [ + { + field: "preprompt", + message: "You have reached the maximum number of assistants. Delete some to continue.", + }, + ]; + return fail(400, { error: true, errors }); + } + const createdById = locals.user?._id ?? locals.sessionId; const newAssistantId = new ObjectId();