-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
315 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
.env | ||
.randomnotes/ | ||
.git/ | ||
*.pyc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
ARG REGISTRY= | ||
ARG MANIFEST_REGISTRY=ghcr.io/ | ||
ARG BASE_IMAGE=cccs/assemblyline-v4-service-base:stable | ||
FROM ${BASE_IMAGE} | ||
|
||
ENV SERVICE_PATH service.al_run.AssemblylineService | ||
|
||
USER root | ||
RUN apt-get update && apt-get upgrade -y | ||
|
||
USER assemblyline | ||
COPY requirements.txt requirements.txt | ||
|
||
RUN pip install --no-cache-dir --user --requirement requirements.txt && rm -rf ~/.cache/pip | ||
|
||
WORKDIR /opt/al_service | ||
COPY . . | ||
|
||
USER root | ||
ARG BASE_TAG=4.5.0.stable | ||
RUN sed -i "s|\(image: \${REGISTRY}\).*\(kam193/.*\)|\1$MANIFEST_REGISTRY\2|g" service_manifest.yml && \ | ||
sed -i "s/\$SERVICE_TAG/$BASE_TAG$(cat VERSION)/g" service_manifest.yml | ||
|
||
USER assemblyline |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
include ../common.mk | ||
|
||
AL_SERVICE_NAME=Semgrep | ||
# SERVICE_NAME=assemblyline-service-template |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# Semgrep | ||
|
||
Service using [Semgrep](https://semgrep.dev) to analyze code for malicious activity. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
assemblyline-v4-service | ||
semgrep |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
rules: | ||
- id: exec-call | ||
pattern: exec(...) | ||
message: Executing code dynamically | ||
severity: WARNING | ||
languages: | ||
- python |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
import hashlib | ||
import json | ||
import subprocess | ||
import tempfile | ||
from collections import defaultdict | ||
from typing import Iterable | ||
|
||
from assemblyline_v4_service.common.base import ServiceBase | ||
from assemblyline_v4_service.common.request import ServiceRequest | ||
from assemblyline_v4_service.common.result import ( | ||
Result, | ||
ResultMemoryDumpSection, | ||
ResultMultiSection, | ||
ResultTextSection, | ||
) | ||
|
||
RULES = "sample_rules/exec-rule.yaml" | ||
|
||
BASE_CONFIG = [ | ||
"--metrics=off", | ||
"--quiet", | ||
"--error", | ||
"--no-autofix", | ||
"--no-git-ignore", | ||
"--scan-unknown-extensions", | ||
"--disable-version-check", | ||
"--disable-nosem", | ||
"--json", | ||
] | ||
|
||
SEVERITY_TO_HEURISTIC = { | ||
"INFO": 3, | ||
"WARNING": 1, | ||
"ERROR": 2, | ||
} | ||
|
||
|
||
class AssemblylineService(ServiceBase): | ||
def __init__(self, config=None): | ||
super().__init__(config) | ||
|
||
def _load_config(self): | ||
self._semgrep_config = {} | ||
self._semgrep_config["timeout"] = str(self.config.get("SEMGREP_RULE_TIMEOUT", 10)) | ||
self._semgrep_config["max-memory"] = str(self.config.get("SEMGREP_RAM_LIMIT_MB", 400)) | ||
|
||
self._cli_timeout = int(self.config.get("SEMGREP_CLI_TIMEOUT", 60)) | ||
|
||
def start(self): | ||
self.log.info(f"start() from {self.service_attributes.name} service called") | ||
self._load_config() | ||
|
||
self.log.info(f"{self.service_attributes.name} service started") | ||
|
||
# def _load_rules(self) -> None: | ||
# pass | ||
|
||
def _execute_semgrep(self, file_path: str) -> dict: | ||
cmd = ["semgrep"] + BASE_CONFIG | ||
for option, value in self._semgrep_config.items(): | ||
cmd.append(f"--{option}") | ||
cmd.append(value) | ||
|
||
result = subprocess.run( | ||
cmd + ["--config", f"{RULES}", file_path], | ||
capture_output=True, | ||
text=True, | ||
timeout=self._cli_timeout, | ||
) | ||
|
||
self.log.debug("Semgrep result: %s", result.stdout) | ||
|
||
# Something was found | ||
if result.returncode == 1: | ||
return json.loads(result.stdout) | ||
elif result.returncode == 0: | ||
return {} | ||
else: | ||
self.log.error("Error running semgrep (%d) %s", result.returncode, result.stderr) | ||
return {} | ||
|
||
def _get_code_hash(self, code: str): | ||
code = code or "" | ||
code = code.strip() | ||
if not code: | ||
return "" | ||
code_hash = hashlib.sha256(code.encode()).hexdigest() | ||
return f"code.{code_hash}" | ||
|
||
def _process_results(self, results: list[dict]) -> Iterable[ResultMultiSection]: | ||
result_by_rule = defaultdict(list) | ||
for result in results: | ||
result_by_rule[result["check_id"]].append(result) | ||
|
||
for rule_id, matches in result_by_rule.items(): | ||
extra = matches[0].get("extra", {}) | ||
message = extra.get("message", "") | ||
severity = extra.get("severity", "INFO") | ||
heuristic = SEVERITY_TO_HEURISTIC.get(severity.upper(), 0) | ||
metadata = extra.get("metadata", {}) | ||
title = metadata.get("title", metadata.get("name", message[:50])) | ||
attack_id = metadata.get("attack_id") | ||
section = ResultTextSection( | ||
title, | ||
zeroize_on_tag_safe=True, | ||
) | ||
section.add_line(message) | ||
section.set_heuristic(heuristic, signature=rule_id, attack_id=attack_id) | ||
for match in matches: | ||
code_hash = self._get_code_hash(match["extra"]["lines"]) | ||
ResultMemoryDumpSection( | ||
f"Match at line {match['start']['line']}", | ||
body=match["extra"]["lines"], | ||
parent=section, | ||
zeroize_on_tag_safe=True, | ||
tags={"file.rule.semgrep": [code_hash, rule_id]}, | ||
) | ||
section.add_tag("file.rule.semgrep", code_hash) | ||
# subsection.set_heuristic(heuristic, signature=rule_id, attack_id=attack_id) | ||
yield section | ||
|
||
def execute(self, request: ServiceRequest) -> None: | ||
result = Result() | ||
request.result = result | ||
|
||
results = self._execute_semgrep(request.file_path) | ||
request.set_service_context(f"Semgrep™ OSS {results.get('version', '')}") | ||
if res_list := results.get("results", []): | ||
# main_section = ResultTextSection("Results from Semgrep™ OSS Engine") | ||
# result.add_section(main_section) | ||
for result_section in self._process_results(res_list): | ||
result.add_section(result_section) | ||
|
||
with tempfile.NamedTemporaryFile("w", delete=False) as f: | ||
json.dump(results, f, indent=2) | ||
request.add_supplementary(f.name, "semgrep_results.json", "Semgrep™ OSS Results") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import os | ||
import pathlib | ||
|
||
import yaml | ||
from assemblyline.odm.models.signature import Signature | ||
from assemblyline_v4_service.updater.updater import ServiceUpdater | ||
|
||
|
||
class AssemblylineServiceUpdater(ServiceUpdater): | ||
def __init__(self, *args, **kwargs): | ||
super().__init__(*args, **kwargs) | ||
|
||
self.persistent_dir = pathlib.Path(os.getenv("UPDATER_DIR", "/tmp/updater")) | ||
self.client | ||
|
||
# def do_source_update( | ||
# self, service: Service, specific_sources: list[str] = [] | ||
# ) -> None: | ||
# pass | ||
|
||
# def is_valid(self, file_path) -> bool: | ||
# return True | ||
|
||
def import_update(self, files_sha256, source, default_classification) -> None: | ||
# output_dir = os.path.join(self.latest_updates_dir, source) | ||
# os.makedirs(os.path.join(self.latest_updates_dir, source), exist_ok=True) | ||
signatures: list[Signature] = [] | ||
for file, _ in files_sha256: | ||
with open(file, "r") as f: | ||
rules = yaml.safe_load(f).get("rules", []) | ||
|
||
for rule in rules: | ||
signature = Signature( | ||
dict( | ||
classification=default_classification, | ||
data=yaml.dump(rule), | ||
name=rule["id"], | ||
source=source, | ||
status="DEPLOYED", | ||
type="semgrep", | ||
revision=1, | ||
signature_id=rule["id"], | ||
) | ||
) | ||
signatures.append(signature) | ||
|
||
self.client.signature.add_update_many(source, "semgrep", signatures) | ||
|
||
# def prepare_output_directory(self) -> str: | ||
# tempdir = tempfile.mkdtemp() | ||
# shutil.copytree(self.latest_updates_dir, tempdir, dirs_exist_ok=True) | ||
# return tempdir | ||
|
||
|
||
if __name__ == "__main__": | ||
with AssemblylineServiceUpdater(default_pattern=".*") as server: | ||
server.serve_forever() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
name: Semgrep | ||
version: $SERVICE_TAG | ||
description: FIXME | ||
enabled: true | ||
|
||
accepts: code/* | ||
rejects: empty | ||
stage: CORE | ||
category: Static Analysis | ||
uses_tags: false | ||
file_required: true | ||
timeout: 90 | ||
is_external: false | ||
|
||
config: | ||
SEMGREP_RAM_LIMIT_MB: "400" | ||
SEMGREP_RULE_TIMEOUT: "10" | ||
SEMGREP_CLI_TIMEOUT: 60 | ||
|
||
# submission_params: | ||
# - default: "auto" | ||
# name: platform | ||
# type: list | ||
# value: "auto" | ||
# list: ["auto", "linux"] | ||
|
||
# -1000: safe | ||
# 0 - 299: informative | ||
# 300 - 699: suspicious | ||
# 700 - 999: highly suspicious | ||
# >= 1000: malicious | ||
|
||
heuristics: | ||
- description: Suspicious code pattern | ||
filetype: "*" | ||
heur_id: 1 | ||
name: Score | ||
score: 300 | ||
max_score: 500 | ||
- description: Malicious code pattern | ||
filetype: "*" | ||
heur_id: 2 | ||
name: Score | ||
score: 1000 | ||
max_score: 2000 | ||
- description: Informative code pattern | ||
filetype: "*" | ||
heur_id: 3 | ||
name: Score | ||
score: 10 | ||
max_score: 50 | ||
|
||
docker_config: | ||
image: ${REGISTRY}ghcr.io/kam193/assemblyline-service-semgrep:$SERVICE_TAG | ||
cpu_cores: 1.0 | ||
ram_mb: 512 | ||
ram_mb_min: 256 | ||
allow_internet_access: false | ||
|
||
update_config: | ||
update_interval_seconds: 86400 # 1 day | ||
generates_signatures: true | ||
wait_for_update: true | ||
sources: | ||
- uri: https://gist.githubusercontent.com/kam193/474547be4a37bb7990caa4e26ee542e4/raw/a0d3783f38e156cfda5df63f426bfa8d36cc4027/example_rule.yaml | ||
name: example_rules | ||
|
||
dependencies: | ||
updates: | ||
container: | ||
ram_mb: 512 | ||
ram_mb_min: 128 | ||
allow_internet_access: true | ||
command: ["python", "-m", "service.updater"] | ||
image: ${REGISTRY}ghcr.io/kam193/assemblyline-service-semgrep:$SERVICE_TAG | ||
ports: ["5003"] | ||
run_as_core: True |