Skip to content

Commit 910d2d0

Browse files
authored
Integrate category classifier model in tracker (#2756)
Signed-off-by: Sergio Castaño Arteaga <[email protected]>
1 parent 815d95f commit 910d2d0

File tree

31 files changed

+268
-31
lines changed

31 files changed

+268
-31
lines changed

.gitattributes

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@ web/build/** linguist-generated
33
__fixtures__/** linguist-generated
44
*.sql linguist-detectable=true
55
*.sql linguist-language=sql
6-
ml/categories/model/** linguist-generated
6+
ml/category/model/** linguist-generated

.github/workflows/ci.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,12 @@ jobs:
1313
uses: actions/setup-go@v3
1414
with:
1515
go-version: 1.19
16+
- name: Install TensorFlow C library
17+
run: |
18+
FILENAME=libtensorflow-cpu-linux-x86_64-2.11.0.tar.gz
19+
wget -q --no-check-certificate https://storage.googleapis.com/tensorflow/libtensorflow/${FILENAME}
20+
sudo tar -C /usr/local -xzf ${FILENAME}
21+
sudo ldconfig /usr/local/lib
1622
- name: Run golangci-lint
1723
uses: golangci/golangci-lint-action@v3
1824
with:
@@ -98,6 +104,12 @@ jobs:
98104
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
99105
restore-keys: |
100106
${{ runner.os }}-go-
107+
- name: Install TensorFlow C library
108+
run: |
109+
FILENAME=libtensorflow-cpu-linux-x86_64-2.11.0.tar.gz
110+
wget -q --no-check-certificate https://storage.googleapis.com/tensorflow/libtensorflow/${FILENAME}
111+
sudo tar -C /usr/local -xzf ${FILENAME}
112+
sudo ldconfig /usr/local/lib
101113
- name: Run backend tests
102114
run: go test -cover -race -v -mod=readonly ./...
103115

@@ -171,6 +183,12 @@ jobs:
171183
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
172184
restore-keys: |
173185
${{ runner.os }}-go-
186+
- name: Install TensorFlow C library
187+
run: |
188+
FILENAME=libtensorflow-cpu-linux-x86_64-2.11.0.tar.gz
189+
wget -q --no-check-certificate https://storage.googleapis.com/tensorflow/libtensorflow/${FILENAME}
190+
sudo tar -C /usr/local -xzf ${FILENAME}
191+
sudo ldconfig /usr/local/lib
174192
- name: Build hub
175193
working-directory: ./cmd/hub
176194
run: go build -v

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,4 @@ docs/www/content/topics/*
1919
docs/www/content/topics/annotations/*
2020
!docs/www/content/topics/annotations/_index.md
2121
dist
22-
ml/categories/data/generated
22+
ml/category/data/generated

charts/artifact-hub/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ apiVersion: v2
22
name: artifact-hub
33
description: Artifact Hub is a web-based application that enables finding, installing, and publishing Kubernetes packages.
44
type: application
5-
version: 1.12.1-3
5+
version: 1.12.1-4
66
appVersion: 1.12.0
77
kubeVersion: ">= 1.19.0-0"
88
home: https://artifacthub.io

charts/artifact-hub/templates/tracker_secret.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,4 @@ stringData:
2828
repositoriesNames: {{ .Values.tracker.repositoriesNames }}
2929
repositoriesKinds: {{ .Values.tracker.repositoriesKinds }}
3030
bypassDigestCheck: {{ .Values.tracker.bypassDigestCheck }}
31+
categoryModelPath: ./ml/category/model

cmd/scanner/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o /scanner .
1010
# Trivy installer
1111
FROM alpine:3.17.1 AS trivy-installer
1212
RUN apk --no-cache add curl
13-
RUN curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/master/contrib/install.sh | sh -s -- -b /usr/local/bin v0.36.1
13+
RUN curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin v0.36.1
1414

1515
# Final stage
1616
FROM alpine:3.17.1

cmd/tracker/Dockerfile

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,39 @@
11
# Build tracker
2-
FROM golang:1.19.5-alpine3.17 AS builder
2+
FROM golang:1.19.5-bullseye AS builder
3+
WORKDIR /tmp
4+
ENV LIBTENSORFLOW_TGZ libtensorflow-cpu-linux-x86_64-2.11.0.tar.gz
5+
RUN wget -q --no-check-certificate https://storage.googleapis.com/tensorflow/libtensorflow/$LIBTENSORFLOW_TGZ
6+
RUN tar -C /usr/local -xzf $LIBTENSORFLOW_TGZ
7+
RUN ldconfig /usr/local/lib
38
WORKDIR /go/src/github.com/artifacthub/hub
49
COPY go.* ./
510
COPY cmd/tracker cmd/tracker
611
COPY internal internal
712
WORKDIR /go/src/github.com/artifacthub/hub/cmd/tracker
8-
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o /tracker .
13+
RUN CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -o /tracker .
914

1015
# OPM installer
11-
FROM golang:1.17-alpine3.16 AS opm-installer
12-
RUN apk --no-cache add build-base
16+
FROM golang:1.17-bullseye AS opm-installer
1317
RUN GO111MODULE=on go get github.com/operator-framework/operator-registry/cmd/[email protected]
1418

1519
# Final stage
16-
FROM alpine:3.17.1
17-
RUN apk --no-cache add ca-certificates && addgroup -S tracker -g 1000 && adduser -S tracker -u 1000 -G tracker
20+
FROM debian:bullseye-slim
21+
RUN apt-get update \
22+
&& apt-get install -y ca-certificates \
23+
&& groupadd -g 1000 tracker \
24+
&& useradd -u 1000 -g tracker tracker
25+
WORKDIR /tmp
26+
ENV LIBTENSORFLOW_TGZ libtensorflow-cpu-linux-x86_64-2.11.0.tar.gz
27+
RUN apt-get install -y wget \
28+
&& export LIBTENSORFLOW_TGZ=libtensorflow-cpu-linux-x86_64-2.11.0.tar.gz \
29+
&& wget -q --no-check-certificate https://storage.googleapis.com/tensorflow/libtensorflow/$LIBTENSORFLOW_TGZ \
30+
&& tar -C /usr/local -xzf $LIBTENSORFLOW_TGZ \
31+
&& rm $LIBTENSORFLOW_TGZ \
32+
&& apt-get remove -y wget \
33+
&& ldconfig /usr/local/lib
1834
USER 1000
1935
WORKDIR /home/tracker
36+
COPY ml ./ml
2037
COPY --from=builder /tracker ./
2138
COPY --from=opm-installer /go/bin/opm /usr/local/bin
2239
CMD ["./tracker"]

cmd/tracker/main.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ func main() {
7070
}
7171
ec := repo.NewErrorsCollector(rm, repo.Tracker)
7272
op := oci.NewPuller(cfg)
73+
pcc := tracker.NewPackageCategoryClassifierML(cfg.GetString("tracker.categoryModelPath"))
7374
svc := &hub.TrackerServices{
7475
Ctx: ctx,
7576
Cfg: cfg,
@@ -82,6 +83,7 @@ func main() {
8283
Op: op,
8384
Is: is,
8485
Sc: oci.NewSignatureChecker(cfg, op),
86+
Pcc: pcc,
8587
SetupTrackerSource: tracker.SetupSource,
8688
}
8789

database/migrations/functions/packages/register_package.sql

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ begin
6666
is_operator,
6767
channels,
6868
default_channel,
69+
package_category_id,
6970
repository_id
7071
) values (
7172
v_name,
@@ -75,6 +76,7 @@ begin
7576
(p_pkg->>'is_operator')::boolean,
7677
nullif(p_pkg->'channels', 'null'),
7778
nullif(p_pkg->>'default_channel', ''),
79+
nullif((p_pkg->>'category')::int, 0),
7880
v_repository_id
7981
)
8082
on conflict (repository_id, name) do update
@@ -85,7 +87,8 @@ begin
8587
tsdoc = generate_package_tsdoc(v_name, v_alternative_name, v_display_name, v_description, v_keywords, v_ts_repository, v_ts_publisher),
8688
is_operator = excluded.is_operator,
8789
channels = excluded.channels,
88-
default_channel = excluded.default_channel
90+
default_channel = excluded.default_channel,
91+
package_category_id = excluded.package_category_id
8992
where is_latest(
9093
v_repository_kind_id,
9194
v_version,
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
create table if not exists package_category (
2+
package_category_id integer primary key,
3+
name text not null check (name <> ''),
4+
display_name text not null check (display_name <> '')
5+
);
6+
7+
insert into package_category values (1, 'ai-machine-learning', 'AI / Machine learning');
8+
insert into package_category values (2, 'database', 'Database');
9+
insert into package_category values (3, 'integration-delivery', 'Integration and delivery');
10+
insert into package_category values (4, 'monitoring-logging', 'Monitoring and logging');
11+
insert into package_category values (5, 'networking', 'Networking');
12+
insert into package_category values (6, 'security', 'Security');
13+
insert into package_category values (7, 'storage', 'Storage');
14+
insert into package_category values (8, 'streaming-messaging', 'Streaming and messaging');
15+
16+
alter table package add column package_category_id integer;
17+
18+
---- create above / drop below ----
19+
20+
alter table package drop column package_category_id;
21+
drop table if exists package_category;

0 commit comments

Comments
 (0)