Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add limits on API endpoints #886

Merged
merged 14 commits into from Mar 6, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 5 additions & 2 deletions .env
Expand Up @@ -113,7 +113,7 @@ ADMIN_API_SECRET=# secret to admin API calls, like computing usage stats or expo

PARQUET_EXPORT_SECRET=#DEPRECATED, use ADMIN_API_SECRET instead

RATE_LIMIT= # requests per minute
RATE_LIMIT= # /!\ Legacy definition of messages per minute. Use USAGE_LIMITS.messagesPerMinute instead
MESSAGES_BEFORE_LOGIN=# how many messages a user can send in a conversation before having to login. set to 0 to force login right away

APP_BASE="" # base path of the app, e.g. /chat, left blank as default
Expand All @@ -140,4 +140,7 @@ ALTERNATIVE_REDIRECT_URLS=`[]` #valide alternative redirect URL for OAuth

WEBHOOK_URL_REPORT_ASSISTANT=#provide webhook url to get notified when an assistant gets reported

ALLOWED_USER_EMAILS=`[]` # if it's defined, only these emails will be allowed to use the app
ALLOWED_USER_EMAILS=`[]` # if it's defined, only these emails will be allowed to use the app

USAGE_LIMITS=`{}`

3 changes: 0 additions & 3 deletions .env.template
Expand Up @@ -269,9 +269,6 @@ PUBLIC_APP_DISCLAIMER_MESSAGE="Disclaimer: AI is an area of active research with
PUBLIC_APP_DATA_SHARING=1
PUBLIC_APP_DISCLAIMER=1

RATE_LIMIT=16
MESSAGES_BEFORE_LOGIN=5# how many messages a user can send in a conversation before having to login. set to 0 to force login right away

PUBLIC_GOOGLE_ANALYTICS_ID=G-8Q63TH4CSL
PUBLIC_PLAUSIBLE_SCRIPT_URL="/js/script.js"

Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/deploy-release.yml
Expand Up @@ -27,6 +27,8 @@ jobs:
HF_DEPLOYMENT_TOKEN: ${{ secrets.HF_DEPLOYMENT_TOKEN }}
WEBHOOK_URL_REPORT_ASSISTANT: ${{ secrets.WEBHOOK_URL_REPORT_ASSISTANT }}
ADMIN_API_SECRET: ${{ secrets.ADMIN_API_SECRET }}
USAGE_LIMITS: ${{ secrets.USAGE_LIMITS }}
MESSAGES_BEFORE_LOGIN: ${{ secrets.MESSAGES_BEFORE_LOGIN }}
run: npm run updateProdEnv
sync-to-hub:
runs-on: ubuntu-latest
Expand Down
4 changes: 4 additions & 0 deletions scripts/updateProdEnv.ts
Expand Up @@ -8,6 +8,8 @@ const MONGODB_URL = process.env.MONGODB_URL;
const HF_TOKEN = process.env.HF_TOKEN ?? process.env.HF_ACCESS_TOKEN; // token used for API requests in prod
const WEBHOOK_URL_REPORT_ASSISTANT = process.env.WEBHOOK_URL_REPORT_ASSISTANT; // slack webhook url used to get "report assistant" events
const ADMIN_API_SECRET = process.env.ADMIN_API_SECRET;
const USAGE_LIMITS = process.env.USAGE_LIMITS;
const MESSAGES_BEFORE_LOGIN = process.env.MESSAGES_BEFORE_LOGIN;

// Read the content of the file .env.template
const PUBLIC_CONFIG = fs.readFileSync(".env.template", "utf8");
Expand All @@ -20,6 +22,8 @@ SERPER_API_KEY=${SERPER_API_KEY}
HF_TOKEN=${HF_TOKEN}
WEBHOOK_URL_REPORT_ASSISTANT=${WEBHOOK_URL_REPORT_ASSISTANT}
ADMIN_API_SECRET=${ADMIN_API_SECRET}
USAGE_LIMITS=${USAGE_LIMITS}
MESSAGES_BEFORE_LOGIN=${MESSAGES_BEFORE_LOGIN}
`;

// Make an HTTP POST request to add the space secrets
Expand Down
23 changes: 23 additions & 0 deletions src/lib/server/usageLimits.ts
@@ -0,0 +1,23 @@
import { z } from "zod";
import { USAGE_LIMITS, RATE_LIMIT } from "$env/static/private";
import JSON5 from "json5";

// RATE_LIMIT is the legacy way to define messages per minute limit
export const usageLimitsSchema = z
.object({
conversations: z.coerce.number().optional(), // how many conversations
messages: z.coerce.number().optional(), // how many messages in a conversation
assistants: z.coerce.number().optional(), // how many assistants
messageLength: z.coerce.number().optional(), // how long can a message be before we cut it off
messagesPerMinute: z
.preprocess((val) => {
if (val === undefined) {
return RATE_LIMIT;
}
return val;
}, z.coerce.number().optional())
.optional(), // how many messages per minute
})
.optional();

export const usageLimits = usageLimitsSchema.parse(JSON5.parse(USAGE_LIMITS));
7 changes: 4 additions & 3 deletions src/routes/+page.svelte
Expand Up @@ -47,8 +47,9 @@
});

if (!res.ok) {
error.set("Error while creating conversation, try again.");
console.error("Error while creating conversation: " + (await res.text()));
const errorMessage = (await res.json()).message || ERROR_MESSAGES.default;
error.set(errorMessage);
console.error("Error while creating conversation: ", errorMessage);
return;
}

Expand All @@ -63,7 +64,7 @@
// invalidateAll to update list of conversations
await goto(`${base}/conversation/${conversationId}`, { invalidateAll: true });
} catch (err) {
error.set(ERROR_MESSAGES.default);
error.set((err as Error).message || ERROR_MESSAGES.default);
console.error(err);
} finally {
loading = false;
Expand Down
11 changes: 11 additions & 0 deletions src/routes/conversation/+server.ts
Expand Up @@ -8,6 +8,8 @@ import type { Message } from "$lib/types/Message";
import { models, validateModel } from "$lib/server/models";
import { defaultEmbeddingModel } from "$lib/server/embeddingModels";
import { v4 } from "uuid";
import { authCondition } from "$lib/server/auth";
import { usageLimits } from "$lib/server/usageLimits";

export const POST: RequestHandler = async ({ locals, request }) => {
const body = await request.text();
Expand All @@ -23,6 +25,15 @@ export const POST: RequestHandler = async ({ locals, request }) => {
})
.parse(JSON.parse(body));

const convCount = await collections.conversations.countDocuments(authCondition(locals));

if (usageLimits?.conversations && convCount > usageLimits?.conversations) {
throw error(
429,
"You have reached the maximum number of conversations. Delete some to continue."
);
}

let messages: Message[] = [
{
id: v4(),
Expand Down
2 changes: 1 addition & 1 deletion src/routes/conversation/[id]/+page.svelte
Expand Up @@ -43,7 +43,7 @@
});

if (!res.ok) {
error.set("Error while creating conversation, try again.");
error.set(await res.text());
console.error("Error while creating conversation: " + (await res.text()));
return;
}
Expand Down
28 changes: 20 additions & 8 deletions src/routes/conversation/[id]/+server.ts
@@ -1,4 +1,4 @@
import { MESSAGES_BEFORE_LOGIN, RATE_LIMIT } from "$env/static/private";
import { MESSAGES_BEFORE_LOGIN } from "$env/static/private";
import { authCondition, requiresUser } from "$lib/server/auth";
import { collections } from "$lib/server/database";
import { models } from "$lib/server/models";
Expand All @@ -19,6 +19,7 @@ import { buildSubtree } from "$lib/utils/tree/buildSubtree.js";
import { addChildren } from "$lib/utils/tree/addChildren.js";
import { addSibling } from "$lib/utils/tree/addSibling.js";
import { preprocessMessages } from "$lib/server/preprocessMessages.js";
import { usageLimits } from "$lib/server/usageLimits";

export async function POST({ request, locals, params, getClientAddress }) {
const id = z.string().parse(params.id);
Expand Down Expand Up @@ -95,14 +96,22 @@ export async function POST({ request, locals, params, getClientAddress }) {
}
}

// check if the user is rate limited
const nEvents = Math.max(
await collections.messageEvents.countDocuments({ userId }),
await collections.messageEvents.countDocuments({ ip: getClientAddress() })
);
if (usageLimits?.messagesPerMinute) {
// check if the user is rate limited
const nEvents = Math.max(
await collections.messageEvents.countDocuments({ userId }),
await collections.messageEvents.countDocuments({ ip: getClientAddress() })
);
if (nEvents > usageLimits.messagesPerMinute) {
throw error(429, ERROR_MESSAGES.rateLimited);
}
}

if (RATE_LIMIT != "" && nEvents > parseInt(RATE_LIMIT)) {
throw error(429, ERROR_MESSAGES.rateLimited);
if (usageLimits?.messages && conv.messages.length > usageLimits.messages) {
throw error(
429,
`This conversation has more than ${usageLimits.messages} messages. Start a new one to continue`
);
}

// fetch the model
Expand Down Expand Up @@ -133,6 +142,9 @@ export async function POST({ request, locals, params, getClientAddress }) {
})
.parse(json);

if (usageLimits?.messageLength && (newPrompt?.length ?? 0) > usageLimits.messageLength) {
throw error(400, "Message too long.");
}
// files is an array of base64 strings encoding Blob objects
nsarrazin marked this conversation as resolved.
Show resolved Hide resolved
// we need to convert this array to an array of File objects

Expand Down
13 changes: 13 additions & 0 deletions src/routes/settings/assistants/new/+page.server.ts
Expand Up @@ -7,6 +7,7 @@ import { ObjectId } from "mongodb";
import { z } from "zod";
import { sha256 } from "$lib/utils/sha256";
import sharp from "sharp";
import { usageLimits } from "$lib/server/usageLimits";

const newAsssistantSchema = z.object({
name: z.string().min(1),
Expand Down Expand Up @@ -61,6 +62,18 @@ export const actions: Actions = {
return fail(400, { error: true, errors });
}

const assistantsCount = await collections.assistants.countDocuments(authCondition(locals));

if (usageLimits?.assistants && assistantsCount > usageLimits.assistants) {
const errors = [
{
field: "preprompt",
message: "You have reached the maximum number of assistants. Delete some to continue.",
},
];
return fail(400, { error: true, errors });
}

const createdById = locals.user?._id ?? locals.sessionId;

const newAssistantId = new ObjectId();
Expand Down