Skip to content

Commit ab2eb81

Browse files
naiming-zededaeriknordmark
authored andcommitted
Nested App Stats Collection and Publications
- Docker-Compose 'app' operation is through Patch-Envelope mechanism and is not known to the pillar side - in zedrouter/appcontainer.go to extend the deployment type of 'Docker' for query the list of 'Docker-Compose' apps and their stats - periodically querying the runtime agent of Docker-Compose - publish the NestedAppDomainStatus, at least for 'newlogd' to comsume - publish the AppContainerStats to be included in App stats upload Signed-off-by: Naiming Shen <[email protected]>
1 parent 6205a6b commit ab2eb81

File tree

12 files changed

+284
-21
lines changed

12 files changed

+284
-21
lines changed

pkg/pillar/cmd/domainmgr/domainmgr.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1372,6 +1372,7 @@ func handleCreate(ctx *domainContext, key string, config *types.DomainConfig) {
13721372
VmConfig: config.VmConfig,
13731373
Service: config.Service,
13741374
NodeName: ctx.nodeName,
1375+
DeploymentType: config.DeploymentType,
13751376
}
13761377

13771378
status.VmConfig.CPUs = make([]int, 0)

pkg/pillar/cmd/zedagent/parseconfig.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -716,7 +716,14 @@ func parseAppInstanceConfig(getconfigCtx *getconfigContext,
716716
parseVolumeRefList(appInstance.VolumeRefConfigList, cfgApp.GetVolumeRefList(), appInstance.UUIDandVersion.UUID)
717717

718718
// fill in the collect stats IP address of the App
719+
// Get the runtime deployment type of the App
719720
appInstance.CollectStatsIPAddr = net.ParseIP(cfgApp.GetCollectStatsIPAddr())
721+
switch cfgApp.GetRuntimeType() {
722+
case zconfig.AppRuntimeType_APP_RUNTIME_TYPE_DOCKER:
723+
appInstance.DeploymentType = types.AppRuntimeTypeDocker
724+
default:
725+
appInstance.DeploymentType = types.AppRuntimeTypeUnSpecified
726+
}
720727

721728
// fill the app adapter config
722729
parseAppNetworkConfig(&appInstance, cfgApp, config.Networks,

pkg/pillar/cmd/zedmanager/handledomainmgr.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ func MaybeAddDomainConfig(ctx *zedmanagerContext,
6464
// This isDNiDnode will be set to true even if the App is not in cluster mode,
6565
// This will be set in zedagent parseConfig for the case of single node/device App case.
6666
IsDNidNode: aiConfig.IsDesignatedNodeID,
67+
// DeploymentType is set to the value of the DeploymentType of the AppInstanceConfig
68+
DeploymentType: aiConfig.DeploymentType,
6769
}
6870

6971
dc.DiskConfigList = make([]types.DiskConfig, 0, len(aiStatus.VolumeRefStatusList))

pkg/pillar/cmd/zedmanager/handlezedrouter.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ func MaybeAddAppNetworkConfig(ctx *zedmanagerContext,
7575
CloudInitUserData: aiConfig.CloudInitUserData,
7676
CipherBlockStatus: aiConfig.CipherBlockStatus,
7777
MetaDataType: aiConfig.MetaDataType,
78+
DeploymentType: aiConfig.DeploymentType, // can not be dynamically changed
7879
}
7980
nc.AppNetAdapterList = make([]types.AppNetAdapterConfig,
8081
len(aiConfig.AppNetAdapterList))

pkg/pillar/cmd/zedrouter/appcontainer.go

Lines changed: 211 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) 2020 Zededa, Inc.
1+
// Copyright (c) 2020-2025 Zededa, Inc.
22
// SPDX-License-Identifier: Apache-2.0
33

44
package zedrouter
@@ -7,6 +7,7 @@ import (
77
"bytes"
88
"context"
99
"encoding/json"
10+
"fmt"
1011
"io"
1112
"net/http"
1213
"strconv"
@@ -16,10 +17,14 @@ import (
1617
apitypes "github.com/docker/docker/api/types"
1718
"github.com/docker/docker/client"
1819
"github.com/docker/docker/pkg/stdcopy"
20+
"github.com/google/go-cmp/cmp"
21+
nestedapp "github.com/lf-edge/eve-tools/runtimemetrics/go/nestedappinstancemetrics"
1922
"github.com/lf-edge/eve/pkg/pillar/agentlog"
2023
"github.com/lf-edge/eve/pkg/pillar/types"
2124
"github.com/lf-edge/eve/pkg/pillar/utils"
25+
uuid "github.com/satori/go.uuid"
2226
"github.com/sirupsen/logrus"
27+
"google.golang.org/protobuf/encoding/protojson"
2328
)
2429

2530
// dockerAPIPort - unencrypted docker socket for remote password-less access
@@ -28,6 +33,19 @@ const dockerAPIPort int = 2375
2833
// dockerAPIVersion - docker API version used
2934
const dockerAPIVersion string = "1.40"
3035

36+
const (
37+
// See detail in https://github.com/lf-edge/eve-tools/blob/master/runtimemetrics/README.md
38+
39+
// nestedAppDomainAppPort - TCP port for nested domain app stats for runtime to provide app list and metrics
40+
// this is used in the http://<runtime-ip>:57475, where the <runtime-ip> is the 'GetStatsIPAddr' of the AppNetworkStatus
41+
// and the deployment type is 'Docker'
42+
nestedAppDomainAppPort int = 57475
43+
// nestedAppDomainAppListURL - URL to get nested domain app list
44+
nestedAppDomainAppListURL = "/api/v1/inventory/nested-app-id"
45+
// nestedAppDomainAppMetricsURL - URL to get nested domain app metrics with nested-app uuid
46+
nestedAppDomainAppMetricsURL = "/api/v1/metrics/nested-app-id/"
47+
)
48+
3149
// check if we need to launch the goroutine to collect App container stats
3250
func (z *zedrouter) checkAppContainerStatsCollecting(config *types.AppNetworkConfig,
3351
status *types.AppNetworkStatus) {
@@ -36,11 +54,13 @@ func (z *zedrouter) checkAppContainerStatsCollecting(config *types.AppNetworkCon
3654
if config != nil {
3755
if !status.GetStatsIPAddr.Equal(config.GetStatsIPAddr) {
3856
status.GetStatsIPAddr = config.GetStatsIPAddr
57+
status.DeploymentType = config.DeploymentType
3958
changed = true
4059
}
4160
} else {
4261
if status.GetStatsIPAddr != nil {
4362
status.GetStatsIPAddr = nil
63+
status.DeploymentType = types.AppRuntimeTypeUnSpecified
4464
changed = true
4565
}
4666
}
@@ -72,28 +92,16 @@ func (z *zedrouter) collectAppContainerStats() {
7292
collectTime := time.Now() // all apps collection assign the same timestamp
7393
for _, st := range items {
7494
status := st.(types.AppNetworkStatus)
95+
// When the GetStatsIPAddr is configured, we need to handle collecting stats
96+
// for various deployment types, defined by the DeploymentType. At this moment,
97+
// we have two types of deployment for stats collection: Docker-Compose and IoT-Edge.
7598
if status.GetStatsIPAddr != nil {
76-
// get a list of containers and client handle
77-
cli, containers, err := z.getAppContainers(status)
78-
if err != nil {
79-
z.log.Errorf(
80-
"collectAppContainerStats: can't get App Containers %s on %s, %v",
81-
status.UUIDandVersion.UUID.String(), status.GetStatsIPAddr.String(),
82-
err)
83-
continue
84-
}
85-
acNum += len(containers)
86-
87-
// collect container stats, and publish to zedclient
88-
acMetrics := z.getAppContainerStats(cli, containers)
89-
if len(acMetrics.StatsList) > 0 {
90-
acMetrics.UUIDandVersion = status.UUIDandVersion
91-
acMetrics.CollectTime = collectTime
92-
z.pubAppContainerStats.Publish(acMetrics.Key(), acMetrics)
99+
switch status.DeploymentType {
100+
case types.AppRuntimeTypeDocker:
101+
z.getNestedDomainAppMetrics(status, &acNum)
102+
default:
103+
z.getIotEdgeMetricsAndLogs(status, collectTime, lastLogTime, &acNum, &numlogs)
93104
}
94-
95-
// collect container logs and send through the logging system
96-
numlogs += z.getAppContainerLogs(status, lastLogTime, cli, containers)
97105
}
98106
}
99107
// log output every 5 min, see this goroutine running status and number
@@ -328,3 +336,185 @@ func (z *zedrouter) getAppContainers(status types.AppNetworkStatus) (
328336

329337
return cli, containers, nil
330338
}
339+
340+
// getIotEdgeMetricsAndLogs collects the metrics and logs for IoT-Edge
341+
func (z *zedrouter) getIotEdgeMetricsAndLogs(status types.AppNetworkStatus,
342+
collectTime time.Time, lastLogTime map[string]time.Time, acNum, numlogs *int) {
343+
// get a list of containers and client handle
344+
cli, containers, err := z.getAppContainers(status)
345+
if err != nil {
346+
z.log.Errorf(
347+
"getIotEdgeMetricsAndLogs: can't get App Containers %s on %s, %v",
348+
status.UUIDandVersion.UUID.String(), status.GetStatsIPAddr.String(),
349+
err)
350+
return
351+
}
352+
*acNum += len(containers)
353+
354+
// collect container stats, and publish to zedclient
355+
acMetrics := z.getAppContainerStats(cli, containers)
356+
if len(acMetrics.StatsList) > 0 {
357+
acMetrics.UUIDandVersion = status.UUIDandVersion
358+
acMetrics.CollectTime = collectTime
359+
z.pubAppContainerStats.Publish(acMetrics.Key(), acMetrics)
360+
}
361+
362+
// collect container logs and send through the logging system
363+
*numlogs += z.getAppContainerLogs(status, lastLogTime, cli, containers)
364+
}
365+
366+
// Helper function to construct the URL for nested app operations
367+
func buildNestedAppURL(status types.AppNetworkStatus, endpoint string, appID string) string {
368+
baseURL := fmt.Sprintf("http://%s:%d%s", status.GetStatsIPAddr.String(), nestedAppDomainAppPort, endpoint)
369+
if appID != "" {
370+
return baseURL + appID
371+
}
372+
return baseURL
373+
}
374+
375+
// getNestedDomainAppMetrics collects the metrics for nested domain apps
376+
// this does several tasks:
377+
// - http request to runtime agent to get the list of nested domain apps
378+
// - publish the nested domain apps, currently it can be used by 'newlogd'
379+
// - http request to runtime agent to get the metrics for each nested domain app
380+
// - publish the metrics to zedagent w/ types.AppContainerStats
381+
func (z *zedrouter) getNestedDomainAppMetrics(status types.AppNetworkStatus, acNum *int) {
382+
// first get the list of nested domain apps
383+
nestedApps, err := z.getNestedDomainAppList(status)
384+
if err != nil {
385+
z.log.Errorf("getNestedDomainAppMetrics: failed to get nested app list, error: %v", err)
386+
return
387+
}
388+
389+
*acNum += len(nestedApps)
390+
var acMetrics types.AppContainerMetrics
391+
acMetrics.UUIDandVersion = status.UUIDandVersion
392+
acMetrics.CollectTime = time.Now()
393+
394+
// for each nested domain app, get the metrics
395+
// this list of nested app metrics is published to zedclient
396+
// and to be uploaded to the controller along with the runtime or parent app metrics
397+
for _, nestedApp := range nestedApps {
398+
url := buildNestedAppURL(status, nestedAppDomainAppMetricsURL, nestedApp.UUIDandVersion.UUID.String())
399+
400+
data, err := fetchHTTPData(url)
401+
if err != nil {
402+
z.log.Errorf("getNestedDomainAppMetrics: %v", err)
403+
continue
404+
}
405+
406+
var nastat nestedapp.NestedAppMetrics
407+
if err := protojson.Unmarshal(data, &nastat); err != nil {
408+
z.log.Errorf("getNestedDomainAppMetrics: failed to decode JSON data, error: %v", err)
409+
continue
410+
}
411+
412+
acStats := types.AppContainerStats{
413+
ContainerName: nastat.Id,
414+
Status: nastat.Status,
415+
Pids: nastat.Pids,
416+
Uptime: nastat.Uptime,
417+
CPUTotal: nastat.CPUTotal,
418+
SystemCPUTotal: nastat.SystemCPUTotal,
419+
UsedMem: nastat.UsedMem,
420+
AllocatedMem: nastat.AllocatedMem,
421+
TxBytes: nastat.TxBytes,
422+
RxBytes: nastat.RxBytes,
423+
ReadBytes: nastat.ReadBytes,
424+
WriteBytes: nastat.WriteBytes,
425+
}
426+
acMetrics.StatsList = append(acMetrics.StatsList, acStats)
427+
}
428+
// send for zedagent to pack w/ parent app metrics
429+
z.pubAppContainerStats.Publish(acMetrics.Key(), acMetrics)
430+
431+
z.log.Functionf("getNestedDomainAppMetrics: collected metrics %+v", acMetrics)
432+
}
433+
434+
// getNestedDomainAppList gets the list of nested domain apps
435+
func (z *zedrouter) getNestedDomainAppList(status types.AppNetworkStatus) ([]types.NestedAppDomainStatus, error) {
436+
pub := z.pubNestedAppDomainStatus
437+
existingItems := pub.GetAll()
438+
existingNestedApps := make(map[string]types.NestedAppDomainStatus)
439+
440+
// Save existing items for later comparison
441+
for _, st := range existingItems {
442+
nestedApp := st.(types.NestedAppDomainStatus)
443+
existingNestedApps[nestedApp.UUIDandVersion.UUID.String()] = nestedApp
444+
}
445+
446+
// Get the JSON data from the Runtime endpoint
447+
url := buildNestedAppURL(status, nestedAppDomainAppListURL, "")
448+
449+
data, err := fetchHTTPData(url)
450+
if err != nil {
451+
z.log.Errorf("getNestedDomainAppMetrics: %v", err)
452+
return nil, err
453+
}
454+
455+
var nestedAppInventory nestedapp.NestedAppInventory
456+
// Decode the JSON data into the protobuf struct
457+
if err := protojson.Unmarshal(data, &nestedAppInventory); err != nil {
458+
z.log.Errorf("getNestedAppListAndMetrics: failed to decode JSON data using protojson, error: %v", err)
459+
return nil, err
460+
}
461+
462+
// Process the nested app IDs
463+
var nestedapps []types.NestedAppDomainStatus
464+
newNestedApps := make(map[string]types.NestedAppDomainStatus)
465+
for _, nestedAppID := range nestedAppInventory.Apps {
466+
nestedAppUUID, err := uuid.FromString(nestedAppID.AppId)
467+
if err != nil {
468+
z.log.Errorf("getNestedAppListAndMetrics: invalid UUID %s, error: %v", nestedAppID.AppId, err)
469+
continue
470+
}
471+
472+
nestedApp := types.NestedAppDomainStatus{
473+
UUIDandVersion: types.UUIDandVersion{UUID: nestedAppUUID},
474+
DisplayName: nestedAppID.AppName,
475+
DisableLogs: nestedAppID.DisableLogs,
476+
ParentAppUUID: status.UUIDandVersion.UUID,
477+
}
478+
479+
newNestedApps[nestedAppID.AppId] = nestedApp
480+
nestedapps = append(nestedapps, nestedApp)
481+
}
482+
483+
// Compare old and new sets of nested apps and publish if different
484+
for uuidStr, newNestedApp := range newNestedApps {
485+
if existingNestedApp, exists := existingNestedApps[uuidStr]; !exists || !cmp.Equal(existingNestedApp, newNestedApp) {
486+
z.log.Functionf("getNestedAppListAndMetrics: publish nestedApp %+v", newNestedApp)
487+
z.pubNestedAppDomainStatus.Publish(newNestedApp.Key(), newNestedApp)
488+
}
489+
}
490+
491+
// handle removed nested apps
492+
for uuidStr := range existingNestedApps {
493+
if _, exists := newNestedApps[uuidStr]; !exists {
494+
z.log.Functionf("getNestedAppListAndMetrics: remove nestedApp with UUID %s", uuidStr)
495+
z.pubNestedAppDomainStatus.Unpublish(uuidStr)
496+
}
497+
}
498+
499+
return nestedapps, nil
500+
}
501+
502+
// fetchHTTPData fetches data from the given URL
503+
func fetchHTTPData(url string) ([]byte, error) {
504+
resp, err := http.Get(url)
505+
if err != nil {
506+
return nil, fmt.Errorf("failed to fetch data from %s: %w", url, err)
507+
}
508+
defer resp.Body.Close()
509+
510+
if resp.StatusCode != http.StatusOK {
511+
return nil, fmt.Errorf("unexpected status code %d from %s", resp.StatusCode, url)
512+
}
513+
514+
data, err := io.ReadAll(resp.Body)
515+
if err != nil {
516+
return nil, fmt.Errorf("failed to read response body from %s: %w", url, err)
517+
}
518+
519+
return data, nil
520+
}

pkg/pillar/cmd/zedrouter/pubsubhandlers.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -660,6 +660,11 @@ func (z *zedrouter) handleAppNetworkDelete(ctxArg interface{}, key string,
660660
// Write out what we modified to AppNetworkStatus aka delete
661661
z.unpublishAppNetworkStatus(status)
662662

663+
// Unpublish AppContainerStats
664+
if config.GetStatsIPAddr != nil {
665+
z.pubAppContainerStats.Unpublish(status.Key())
666+
}
667+
663668
// Free all numbers allocated for this app network.
664669
appNumKey := types.UuidToNumKey{UUID: status.UUIDandVersion.UUID}
665670
err := z.appNumAllocator.Free(appNumKey, false)

pkg/pillar/cmd/zedrouter/zedrouter.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,9 @@ type zedrouter struct {
154154
// Kubernetes networking
155155
withKubeNetworking bool
156156
cniRequests chan *rpcRequest
157+
158+
// publist nested App Status
159+
pubNestedAppDomainStatus pubsub.Publication
157160
}
158161

159162
// AddAgentSpecificCLIFlags adds CLI options
@@ -578,6 +581,15 @@ func (z *zedrouter) initPublications() (err error) {
578581
return err
579582
}
580583

584+
z.pubNestedAppDomainStatus, err = z.pubSub.NewPublication(
585+
pubsub.PublicationOptions{
586+
AgentName: agentName,
587+
TopicType: types.NestedAppDomainStatus{},
588+
})
589+
if err != nil {
590+
return err
591+
}
592+
581593
return nil
582594
}
583595

pkg/pillar/go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ require (
3434
github.com/lf-edge/edge-containers v0.0.0-20240207093504-5dfda0619b80
3535
github.com/lf-edge/eve-api/go v0.0.0-20250310225738-c77ab6f8c73a
3636
github.com/lf-edge/eve-libs v0.0.0-20250313200311-28f858e8e99b
37+
github.com/lf-edge/eve-tools/runtimemetrics/go v0.0.0-20250320220227-713ea9d6c6d2
3738
github.com/lf-edge/eve/pkg/kube/cnirpc v0.0.0-20240315102754-0f6d1f182e0d
3839
github.com/lf-edge/go-qemu v0.0.0-20231121152149-4c467eda0c56
3940
github.com/linuxkit/linuxkit/src/cmd/linuxkit v0.0.0-20240507172735-6d37353ca1ee

pkg/pillar/go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1461,6 +1461,8 @@ github.com/lf-edge/eve-api/go v0.0.0-20250310225738-c77ab6f8c73a h1:AI1EbTLgXv+v
14611461
github.com/lf-edge/eve-api/go v0.0.0-20250310225738-c77ab6f8c73a/go.mod h1:ot6MhAhBXapUDl/hXklaX4kY88T3uC4PTg0D2wD8DzA=
14621462
github.com/lf-edge/eve-libs v0.0.0-20250313200311-28f858e8e99b h1:sprMvkZ9Ogls4WDo2rgtRwRFPpdykjYtUqntzwy3NKI=
14631463
github.com/lf-edge/eve-libs v0.0.0-20250313200311-28f858e8e99b/go.mod h1:We4FeQWOAxUiB8ZH+eIfRdn2QG9Hfr1W5/IoufU1t64=
1464+
github.com/lf-edge/eve-tools/runtimemetrics/go v0.0.0-20250320220227-713ea9d6c6d2 h1:rCZhnv3q+g8bcNUHp9wf0NlIB7k4ORckSnoZdHGcMtU=
1465+
github.com/lf-edge/eve-tools/runtimemetrics/go v0.0.0-20250320220227-713ea9d6c6d2/go.mod h1:l5jh2deQzJnZ7gW4F7dvzJcLRlkYBgFjcHs+eBnBUxo=
14641466
github.com/lf-edge/eve/pkg/kube/cnirpc v0.0.0-20240315102754-0f6d1f182e0d h1:tUBb9M6u42LXwHAYHyh22wJeUUQlTpDkXwRXalpRqbo=
14651467
github.com/lf-edge/eve/pkg/kube/cnirpc v0.0.0-20240315102754-0f6d1f182e0d/go.mod h1:Nn3juMJJ1G8dyHOebdZyS4jOB/fuxAd5fIajBaWjHr8=
14661468
github.com/lf-edge/go-qemu v0.0.0-20231121152149-4c467eda0c56 h1:LmFp0jbNSwPLuxJA+nQ+mMQrQ53ESkvHP4CVMqR0zrY=

pkg/pillar/types/domainmgrtypes.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ type DomainConfig struct {
6666
// OemWindowsLicenseKeyInfo provides the information required to propagate the OEM license key
6767
// to the VM.
6868
OemWindowsLicenseKeyInfo OemWindowsLicenseKeyInfo
69+
70+
DeploymentType AppRuntimeType
6971
}
7072

7173
// MetaDataType of metadata service for app
@@ -340,6 +342,8 @@ type DomainStatus struct {
340342
// PassthroughWindowsLicenseKey is true if eveything it available to propagate
341343
// the OEM license key to the VM.
342344
PassthroughWindowsLicenseKey bool
345+
// DeploymentType is the type of deployment for the app
346+
DeploymentType AppRuntimeType
343347
}
344348

345349
func (status DomainStatus) Key() string {

0 commit comments

Comments
 (0)