diff --git a/04-manual-instrumentation.md b/04-manual-instrumentation.md index e407adc..17ab23a 100644 --- a/04-manual-instrumentation.md +++ b/04-manual-instrumentation.md @@ -6,7 +6,7 @@ As a basis for the instrumentation we use [backend4](./app/backend4/main.go). To # Initialize OpenTelemetry-go-sdk -### TODO +Before we start instrumenting our application, we should create a standard tracer and register it globally. This ensures that the same exporter is used throughout the application and that the same process steps are performed. ```diff func main() { @@ -21,9 +21,9 @@ func main() { ``` -## Create and register a global trace provider +### Create and register a global trace provider -### TODO +First we have to create a tracer that identifies our specific service. ```diff +var tracer = otel.GetTracerProvider().Tracer("github.com/kubecon-eu-2024/backend") @@ -42,14 +42,12 @@ Keep in mind that any Span that is created `MUST` also be ended. This is the res When defining a span name, it's important to choose descriptive and meaningful names that accurately reflect the operation being performed. ```diff - mux.HandleFunc("GET /rolldice", func(w http.ResponseWriter, r *http.Request) { -+ var span trace.Span -+ ctx, span := tracer.Start(r.Context(), "rolldice") -+ defer span.End() - player := "Anonymous player" - if p := r.URL.Query().Get("player"); p != "" { - player = p - } + mux := http.NewServeMux() + ++ registerHandleFunc := func(pattern string, h http.HandlerFunc) { ++ route := strings.Split(pattern, " ") ++ mux.Handle(pattern, otelhttp.NewHandler(otelhttp.WithRouteTag(route[len(route)-1], h), pattern)) ++ } ``` To simulate a more complex behaviour, we find a `causeError` function in the `/rolldice` handler source code of the backend4 application. Since there is a defined probability that errors will occur, it makes sense to take this part into account as well. @@ -62,14 +60,13 @@ RecordError will record err as an exception span event for this span. An additio ```diff func causeError(ctx context.Context, rate int) error { -+ var span trace.Span -+ _, span = tracer.Start(ctx, "causeError") ++ _, span := tracer.Start(ctx, "causeError") + defer span.End() randomNumber := rand.Intn(100) -+ span.AddEvent(fmt.Sprintf("random nr: %d", randomNumber)) ++ span.AddEvent("roll", trace.WithAttributes(attribute.Int("number", randomNumber))) if randomNumber < rate { - err := fmt.Errorf("internal server error") + err := fmt.Errorf("number(%d)) < rate(%d)", randomNumber, rate) + span.RecordError(err) + span.SetStatus(codes.Error, "some error occured") return err @@ -88,11 +85,10 @@ AddEvent adds an event with the provided name and optionsAddEvent adds an event ```diff func causeDelay(ctx context.Context, rate int) { -+ var span trace.Span -+ _, span = tracer.Start(ctx, "causeDelay") ++ _, span := tracer.Start(ctx, "causeDelay") + defer span.End() randomNumber := rand.Intn(100) -+ span.AddEvent(fmt.Sprintf("random nr: %d", randomNumber)) ++ span.AddEvent("roll", trace.WithAttributes(attribute.Int("number", randomNumber))) if randomNumber < rate { time.Sleep(time.Duration(2+rand.Intn(3)) * time.Second) } @@ -103,22 +99,6 @@ func causeDelay(ctx context.Context, rate int) { Once the code has been instrumented, we can use `go mod tidy` to update the existing `go.mod` file and start testing our application. -## Configuring an OTLP exporter and setting the endpoint - -To get quick feedback, we can run a Jaeger instance locally and point our application at it. Jaeger all-in-one will make this easy. - -```bash -docker run --rm -it -p 127.0.0.1:4317:4317 -p 127.0.0.1:16686:16686 -e COLLECTOR_OTLP_ENABLED=true -e LOG_LEVEL=debug jaegertracing/all-in-one:latest -``` - -Now we can configure our application with a specific `RATE_ERROR` and `RATE_DELAY` in `%`. This indicates how many traces should be delayed and/or cause an error. - -Finally we need to configure the OpenTelemetry-SDK, by default we can use common environment variables. [Documentation](https://opentelemetry.io/docs/languages/sdk-configuration/) - -```bash -OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 OTEL_SERVICE_NAME=go-backend RATE_ERROR=20 RATE_HIGH_DELAY=20 go run app/backend4/main.go -``` - ## Apply Backend4 to the Kubernetes test cluster Now that we have instrumentalised `backend4`, we can use it as a drop-in replacement for `backend2`. @@ -126,7 +106,7 @@ Now that we have instrumentalised `backend4`, we can use it as a drop-in replace For this we need to build and provide a new container image or use the prepared `backend4:with-instr` version. ```bash -kubectl apply -f https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/main/backend/04-backend.yaml +kubectl apply -f https://raw.githubusercontent.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/main/app/instrumentation-replace-backend2.yaml ``` When using `kubectl diff` we should see something similar to this. @@ -153,8 +133,40 @@ spec: - name: backend2 - image: ghcr.io/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial-backend2:latest + image: ghcr.io/frzifus/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial-backend4:with-instr + env: ++ - name: RATE_ERROR ++ value: 20 ++ - name: RATE_HIGH_DELAY ++ value: 20 +``` + +> [!NOTE] +> This is an optional section. + +
+ +Run and test backend 4 locally (shorter development cycle) + +## Configuring an OTLP exporter and setting the endpoint + +To get quick feedback, we can run a Jaeger instance locally and point our application at it. Jaeger all-in-one will make this easy. + +```bash +docker run --rm -it -p 127.0.0.1:4317:4317 -p 127.0.0.1:16686:16686 -e COLLECTOR_OTLP_ENABLED=true -e LOG_LEVEL=debug jaegertracing/all-in-one:latest ``` +Now we can configure our application with a specific `RATE_ERROR` and `RATE_DELAY` in `%`. This indicates how many traces should be delayed and/or cause an error. + +Finally we need to configure the OpenTelemetry-SDK, by default we can use common environment variables. [Documentation](https://opentelemetry.io/docs/languages/sdk-configuration/) + +```bash +OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 OTEL_SERVICE_NAME=go-backend RATE_ERROR=20 RATE_HIGH_DELAY=20 go run main.go +``` + +
+ +--- + By instrumenting our applications, whether manually or automatically, we get more telemetry data to help us understand our system. However, since a large amount of telemetry data also generates costs, in the next chapter we will discuss how we can utilise this amount in a meaningful way. --- diff --git a/05-sampling.md b/05-sampling.md index e2e60fd..fe98af2 100644 --- a/05-sampling.md +++ b/05-sampling.md @@ -4,9 +4,9 @@ This tutorial step covers the basic usage of the OpenTelemetry Collector on Kube ## Overview -![tracing setup](images/tracing-setup.png) +In chapter 3 we saw the [schematic structure of the dice game application](https://github.com/pavolloffay/kubecon-eu-2024-opentelemetry-kubernetes-tracing-tutorial/blob/main/03-auto-instrumentation.md#application-description). The following diagram illustrates how the telemetry data collected there is exported and stored. [excalidraw](https://excalidraw.com/#json=15BrdSOMEkc9RA5cxeqwz,urTmfk01mbx7V-PpQI7KgA) -[excalidraw](https://excalidraw.com/#json=15BrdSOMEkc9RA5cxeqwz,urTmfk01mbx7V-PpQI7KgA) +![tracing setup](images/tracing-setup.png) ## Sampling, what does it mean and why is it important? diff --git a/app/backend4/main.go b/app/backend4/main.go index cfeaa7f..0921530 100644 --- a/app/backend4/main.go +++ b/app/backend4/main.go @@ -8,6 +8,7 @@ import ( "net/http" "os" "strconv" + "strings" "time" "github.com/prometheus/client_golang/prometheus" @@ -74,8 +75,12 @@ func main() { mux := http.NewServeMux() - const path = "GET /rolldice" - mux.Handle(path, otelhttp.NewMiddleware(path)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + registerHandleFunc := func(pattern string, h http.HandlerFunc) { + route := strings.Split(pattern, " ") + mux.Handle(pattern, otelhttp.NewHandler(otelhttp.WithRouteTag(route[len(route)-1], h), pattern)) + } + + registerHandleFunc("GET /rolldice", func(w http.ResponseWriter, r *http.Request) { player := "Anonymous player" if p := r.URL.Query().Get("player"); p != "" { player = p @@ -101,9 +106,9 @@ func main() { w.WriteHeader(http.StatusInternalServerError) } - }))) + }) - mux.HandleFunc("GET /metrics", promhttp.Handler().ServeHTTP) + registerHandleFunc("GET /metrics", promhttp.Handler().ServeHTTP) srv := &http.Server{ Addr: "0.0.0.0:5165", Handler: mux, diff --git a/backend/04-backend.yaml b/app/instrumentation-replace-backend2.yaml similarity index 82% rename from backend/04-backend.yaml rename to app/instrumentation-replace-backend2.yaml index 259e10b..3924f62 100644 --- a/backend/04-backend.yaml +++ b/app/instrumentation-replace-backend2.yaml @@ -32,7 +32,7 @@ spec: - name: RATE_HIGH_DELAY value: 20 # NOTE: alternative to instrumentation annotation - # - name: OTEL_EXPORTER_OTLP_ENDPOINT - # value: http://otel-collector.observability-backend.svc.cluster.local:4317 - # - name: OTEL_SERVICE_NAME - # value: go-backend + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: http://otel-collector.observability-backend.svc.cluster.local:4317 + - name: OTEL_SERVICE_NAME + value: go-backend