Skip to content

Commit 73467f7

Browse files
committed
feat: implement graceful shutdowns
This patch introduces graceful shutdowns in the form of a ready flag that the health endpoints utilizes when determining whether the readiness probe returns healthy or unhealthy. When a shutdown signal is detected the ready flag is set to false and a timeout for actual shutdown starts. The shutdown process will wait until either all active requests have finished processing OR the specified timeout is met. Whichever happens first.
1 parent babd3fb commit 73467f7

File tree

5 files changed

+128
-14
lines changed

5 files changed

+128
-14
lines changed

src/config.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,4 @@ export const uptimeRepo = getEnvironmentVariabel("UPTIME_REPO", "oppetid");
7272
export const uptimeToken = getEnvironmentVariabel("UPTIME_API_TOKEN", undefined);
7373
export const googleApiKey = getEnvironmentVariabel("NDLA_GOOGLE_API_KEY", undefined);
7474
export const slowLogTimeout = getEnvironmentVariabel("SLOW_LOG_TIMEOUT", "500");
75+
export const gracePeriodSeconds = parseInt(getEnvironmentVariabel("READINESS_PROBE_DETECTION_SECONDS", "7"));

src/server.ts

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,23 +8,30 @@
88

99
import compression from "compression";
1010
import cors from "cors";
11-
import express, { json, Request, Response } from "express";
11+
import express, { json } from "express";
1212
import promBundle from "express-prom-bundle";
1313
import { ApolloServer } from "@apollo/server";
1414
import { expressMiddleware } from "@apollo/server/express4";
1515
import { port } from "./config";
1616
import { resolvers } from "./resolvers";
1717
import { typeDefs } from "./schema";
1818
import correlationIdMiddleware from "./utils/correlationIdMiddleware";
19-
import { logError } from "./utils/logger";
19+
import { getLogger, logError } from "./utils/logger";
2020
import loggerMiddleware from "./utils/loggerMiddleware";
2121
import { contextExpressMiddleware } from "./utils/context/contextMiddleware";
2222
import { getContextOrThrow } from "./utils/context/contextStore";
23+
import { Server } from "http";
24+
import { healthRouter } from "./utils/healthRouter";
25+
import { activeRequestsMiddleware } from "./utils/activeRequestsMiddleware";
26+
import { gracefulShutdown } from "./utils/gracefulShutdown";
2327

2428
const GRAPHQL_PORT = port;
2529

2630
const app = express();
2731

32+
let server: Server;
33+
let apolloServer: ApolloServer<ContextWithLoaders>;
34+
2835
const metricsMiddleware = promBundle({
2936
includeMethod: true,
3037
includePath: false,
@@ -37,12 +44,11 @@ app.use(metricsMiddleware);
3744
app.use(compression());
3845
app.use(express.json({ limit: "1mb" }));
3946

40-
app.get("/health", (_: Request, res: Response) => {
41-
res.status(200).json({ status: 200, text: "Health check ok" });
42-
});
47+
app.use(healthRouter);
48+
app.use(activeRequestsMiddleware);
4349

44-
async function startApolloServer() {
45-
const server = new ApolloServer({
50+
async function startApolloServer(): Promise<void> {
51+
apolloServer = new ApolloServer({
4652
typeDefs,
4753
resolvers,
4854
introspection: true,
@@ -60,21 +66,21 @@ async function startApolloServer() {
6066
};
6167
},
6268
});
63-
await server.start();
69+
await apolloServer.start();
6470
app.use(
6571
"/graphql-api/graphql",
6672
cors(),
6773
json(),
6874
correlationIdMiddleware,
6975
contextExpressMiddleware,
7076
loggerMiddleware,
71-
expressMiddleware(server, { context: async () => getContextOrThrow() }),
77+
expressMiddleware(apolloServer, { context: async () => getContextOrThrow() }),
78+
);
79+
server = app.listen(GRAPHQL_PORT, () =>
80+
getLogger().info(`GraphQL Playground is now running on http://localhost:${GRAPHQL_PORT}/graphql-api/graphql`),
7281
);
7382
}
7483

75-
startApolloServer();
84+
process.on("SIGTERM", () => gracefulShutdown(server, apolloServer));
7685

77-
app.listen(GRAPHQL_PORT, () =>
78-
// eslint-disable-next-line no-console
79-
console.log(`GraphQL Playground is now running on http://localhost:${GRAPHQL_PORT}/graphql-api/graphql`),
80-
);
86+
startApolloServer();

src/utils/activeRequestsMiddleware.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/**
2+
* Copyright (c) 2025-present, NDLA.
3+
*
4+
* This source code is licensed under the GPLv3 license found in the
5+
* LICENSE file in the root directory of this source tree.
6+
*
7+
*/
8+
9+
import { NextFunction, Request, Response } from "express";
10+
11+
let activeRequests = 0;
12+
13+
export const activeRequestsMiddleware = (_: Request, res: Response, next: NextFunction): void => {
14+
activeRequests++;
15+
res.on("finish", () => {
16+
activeRequests--;
17+
});
18+
next();
19+
};
20+
21+
export const getActiveRequests = (): number => {
22+
return activeRequests;
23+
};

src/utils/gracefulShutdown.ts

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/**
2+
* Copyright (c) 2025-present, NDLA.
3+
*
4+
* This source code is licensed under the GPLv3 license found in the
5+
* LICENSE file in the root directory of this source tree.
6+
*
7+
*/
8+
9+
import { ApolloServer } from "@apollo/server";
10+
import { gracePeriodSeconds } from "../config";
11+
import { getActiveRequests } from "./activeRequestsMiddleware";
12+
import { getIsShuttingDown, setIsShuttingDown } from "./healthRouter";
13+
import { Server } from "http";
14+
import getLogger from "./logger";
15+
16+
async function waitForActiveRequests() {
17+
const timeout = 30000;
18+
const pollInterval = 250;
19+
const start = Date.now();
20+
21+
const activeRequests = getActiveRequests();
22+
getLogger().info(`Waiting for ${activeRequests} active requests to finish...`);
23+
while (getActiveRequests() > 0 && Date.now() - start < timeout) {
24+
await new Promise((resolve) => setTimeout(resolve, pollInterval));
25+
}
26+
27+
if (getActiveRequests() > 0) {
28+
getLogger().warn(
29+
`Timeout reached while waiting for active requests to finish. Active requests: ${getActiveRequests()}`,
30+
);
31+
} else {
32+
getLogger().info("All active requests have finished processing.");
33+
}
34+
}
35+
36+
export async function gracefulShutdown(server: Server, apolloServer: ApolloServer<ContextWithLoaders>) {
37+
const logger = getLogger();
38+
if (getIsShuttingDown()) return;
39+
setIsShuttingDown();
40+
logger.info(
41+
`Recieved shutdown signal, waiting ${gracePeriodSeconds} seconds for shutdown to be detected before stopping...`,
42+
);
43+
setTimeout(async () => {
44+
logger.info("Shutting down gracefully...");
45+
await waitForActiveRequests();
46+
if (server) server.close();
47+
if (apolloServer) await apolloServer.stop();
48+
process.exit(0);
49+
}, gracePeriodSeconds * 1000);
50+
}

src/utils/healthRouter.ts

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/**
2+
* Copyright (c) 2025-present, NDLA.
3+
*
4+
* This source code is licensed under the GPLv3 license found in the
5+
* LICENSE file in the root directory of this source tree.
6+
*
7+
*/
8+
9+
import { Router, Request, Response } from "express";
10+
11+
let isShuttingDown = false;
12+
13+
export function getIsShuttingDown(): boolean {
14+
return isShuttingDown;
15+
}
16+
17+
export function setIsShuttingDown() {
18+
isShuttingDown = true;
19+
}
20+
21+
export const healthRouter = Router();
22+
const livenessHandler = (_: Request, res: Response) => {
23+
res.status(200).json({ status: 200, text: "Health check ok" });
24+
};
25+
26+
healthRouter.get("/health", livenessHandler);
27+
healthRouter.get("/health/liveness", livenessHandler);
28+
healthRouter.get("/health/readiness", (_: Request, res: Response) => {
29+
if (!isShuttingDown) {
30+
res.status(200).json({ status: 200, text: "Health check ok" });
31+
} else {
32+
res.status(500).json({ status: 500, text: "Service shutting down" });
33+
}
34+
});

0 commit comments

Comments
 (0)