Skip to content

Commit 9e0f8e8

Browse files
authored
Fixes #368 - jars larger than a gig are extracted to disk when scanning (#400)
Former-commit-id: 3a0dc73 Former-commit-id: 518cc97ebcc70f4785817bcd15ef2b572a45c31b
1 parent e495e1b commit 9e0f8e8

File tree

15 files changed

+341
-37
lines changed

15 files changed

+341
-37
lines changed

tools/log4shell/analyze/analyze.go

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
package analyze
1616

1717
import (
18-
"archive/zip"
1918
"github.com/lunasec-io/lunasec/tools/log4shell/constants"
2019
"github.com/lunasec-io/lunasec/tools/log4shell/types"
2120
"github.com/lunasec-io/lunasec/tools/log4shell/util"
@@ -25,8 +24,11 @@ import (
2524
"strings"
2625
)
2726

28-
func GetJndiLookupHash(zipReader *zip.Reader, filePath string) (fileHash string) {
29-
reader, err := zipReader.Open(constants.JndiLookupClasspath)
27+
func GetJndiLookupHash(
28+
resolveArchiveFile types.ResolveArchiveFile,
29+
filePath string,
30+
) (fileHash string) {
31+
reader, err := resolveArchiveFile(constants.JndiLookupClasspath)
3032
if err != nil {
3133
log.Debug().
3234
Str("fieName", constants.JndiLookupClasspath).
@@ -49,7 +51,11 @@ func GetJndiLookupHash(zipReader *zip.Reader, filePath string) (fileHash string)
4951
return
5052
}
5153

52-
func ProcessArchiveFile(zipReader *zip.Reader, reader io.Reader, filePath, fileName string) (finding *types.Finding) {
54+
func ProcessArchiveFile(
55+
resolveArchiveFile types.ResolveArchiveFile,
56+
reader io.Reader,
57+
filePath, fileName string,
58+
) (finding *types.Finding) {
5359
var (
5460
jndiLookupFileHash string
5561
)
@@ -93,7 +99,7 @@ func ProcessArchiveFile(zipReader *zip.Reader, reader io.Reader, filePath, fileN
9399
}
94100

95101
if VersionIsInRange(archiveName, semverVersion, constants.JndiLookupPatchFileVersions) {
96-
jndiLookupFileHash = GetJndiLookupHash(zipReader, filePath)
102+
jndiLookupFileHash = GetJndiLookupHash(resolveArchiveFile, filePath)
97103
}
98104

99105
log.Log().

tools/log4shell/commands/patch.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ func JavaArchivePatchCommand(
3838

3939
forcePatch := c.Bool("force-patch")
4040
dryRun := c.Bool("dry-run")
41+
backup := c.Bool("backup")
4142

4243
var patchedLibraries []string
4344

@@ -64,7 +65,7 @@ func JavaArchivePatchCommand(
6465
}
6566
}
6667

67-
err = patch.ProcessJavaArchive(finding, dryRun)
68+
err = patch.ProcessJavaArchive(finding, dryRun, backup)
6869
if err != nil {
6970
log.Error().
7071
Str("path", finding.Path).

tools/log4shell/constants/fs.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,7 @@ const (
2121
EarFileExt = ".ear"
2222
ClassFileExt = ".class"
2323
)
24+
25+
var (
26+
CleanupDirs []string
27+
)

tools/log4shell/main.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ package main
1717
import (
1818
"github.com/lunasec-io/lunasec/tools/log4shell/commands"
1919
"github.com/lunasec-io/lunasec/tools/log4shell/constants"
20+
"github.com/lunasec-io/lunasec/tools/log4shell/util"
2021
"github.com/rs/zerolog"
2122
"github.com/rs/zerolog/log"
2223
"github.com/urfave/cli/v2"
@@ -48,6 +49,10 @@ func main() {
4849

4950
zerolog.SetGlobalLevel(zerolog.InfoLevel)
5051

52+
util.RunOnProcessExit(func() {
53+
util.RemoveCleanupDirs()
54+
})
55+
5156
globalBoolFlags := map[string]bool{
5257
"verbose": false,
5358
"json": false,
@@ -187,6 +192,10 @@ func main() {
187192
Usage: "Patches findings of libraries vulnerable toLog4Shell by removing the JndiLookup.class file from each.",
188193
Before: setGlobalBoolFlags,
189194
Flags: []cli.Flag{
195+
&cli.BoolFlag{
196+
Name: "backup",
197+
Usage: "Backup each library to path/to/library.jar.bak before overwriting.",
198+
},
190199
&cli.StringSliceFlag{
191200
Name: "exclude",
192201
Usage: "Exclude subdirectories from scanning. This can be helpful if there are directories which your user does not have access to when starting a scan from `/`.",

tools/log4shell/patch/archivepatch.go

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ func copyAndFilterFilesFromZip(
332332
return
333333
}
334334

335-
func ProcessJavaArchive(finding types.Finding, dryRun bool) (err error) {
335+
func ProcessJavaArchive(finding types.Finding, dryRun, backup bool) (err error) {
336336
var (
337337
libraryFile *os.File
338338
zipReader *zip.Reader
@@ -383,6 +383,23 @@ func ProcessJavaArchive(finding types.Finding, dryRun bool) (err error) {
383383
return
384384
}
385385

386+
if backup {
387+
backupFilePath := fsFile + ".bak"
388+
log.Info().
389+
Str("libraryFileName", fsFile).
390+
Str("backupFileName", backupFilePath).
391+
Msg("Backing up library file before overwritting.")
392+
_, err = util.CopyFile(fsFile, backupFilePath)
393+
if err != nil {
394+
log.Error().
395+
Str("libraryFileName", fsFile).
396+
Str("backupFileName", backupFilePath).
397+
Err(err).
398+
Msg("Unable to backup library file.")
399+
return
400+
}
401+
}
402+
386403
_, err = util.CopyFile(filteredLibrary, fsFile)
387404
if err != nil {
388405
log.Error().

tools/log4shell/scan/executablejar.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,13 @@ package scan
1717
import (
1818
"bytes"
1919
"github.com/lunasec-io/lunasec/tools/log4shell/constants"
20+
"github.com/lunasec-io/lunasec/tools/log4shell/types"
2021
"github.com/rs/zerolog/log"
2122
"io"
2223
"os"
2324
)
2425

25-
func readerAtStartOfArchive(path string, file *os.File) (reader io.ReaderAt, offset int64, err error) {
26+
func readerAtStartOfArchive(path string, file *os.File) (reader types.ReaderAtCloser, offset int64, err error) {
2627
// By default, we assume our original file will be our returned reader
2728
reader = file
2829

@@ -76,7 +77,7 @@ func readerAtStartOfArchive(path string, file *os.File) (reader io.ReaderAt, off
7677
Msg("unable to locate start of archive in bash executable jar file")
7778
return
7879
}
79-
reader = bytes.NewReader(fileContents[idx:])
80+
reader = types.NopReaderAtCloser(bytes.NewReader(fileContents[idx:]))
8081
offset = int64(idx)
8182
}
8283
return

tools/log4shell/scan/scan.go

Lines changed: 93 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import (
2121
"github.com/lunasec-io/lunasec/tools/log4shell/types"
2222
"github.com/lunasec-io/lunasec/tools/log4shell/util"
2323
"github.com/rs/zerolog/log"
24-
"io"
2524
"io/ioutil"
2625
"os"
2726
"path/filepath"
@@ -147,9 +146,65 @@ func (s *Log4jDirectoryScanner) scanLocatedArchive(
147146
return s.scanArchiveForVulnerableFiles(path, reader, info.Size() - offset)
148147
}
149148

149+
func (s *Log4jDirectoryScanner) getFilesToScan(
150+
path string,
151+
size int64,
152+
zipReader *zip.Reader,
153+
) (filesToScan []types.FileToScan, cleanup func(), err error) {
154+
if size > 1024 * 1024 * 1024 {
155+
var (
156+
tmpPath string
157+
filenames []string
158+
)
159+
160+
_, name := filepath.Split(path)
161+
tmpPath, err = os.MkdirTemp(os.TempDir(), name)
162+
if err != nil {
163+
log.Warn().
164+
Str("path", path).
165+
Err(err).
166+
Msg("unable to create temporary path")
167+
return
168+
}
169+
util.EnsureDirIsCleanedUp(tmpPath)
170+
cleanup = func() {
171+
os.RemoveAll(tmpPath)
172+
util.RemoveDirFromCleanup(tmpPath)
173+
}
174+
175+
filenames, err = util.Unzip(zipReader, tmpPath)
176+
if err != nil {
177+
log.Warn().
178+
Str("path", path).
179+
Err(err).
180+
Msg("unable to unzip file")
181+
return
182+
}
183+
184+
for _, file := range filenames {
185+
dir, extractedFilename := filepath.Split(file)
186+
187+
fileToScan := &types.DiskFileToScan{
188+
Filename: extractedFilename,
189+
Path: dir,
190+
}
191+
filesToScan = append(filesToScan, fileToScan)
192+
}
193+
return
194+
}
195+
196+
for _, zipFile := range zipReader.File {
197+
fileToScan := &types.ZipFileToScan{
198+
File: zipFile,
199+
}
200+
filesToScan = append(filesToScan, fileToScan)
201+
}
202+
return
203+
}
204+
150205
func (s *Log4jDirectoryScanner) scanArchiveForVulnerableFiles(
151206
path string,
152-
reader io.ReaderAt,
207+
reader types.ReaderAtCloser,
153208
size int64,
154209
) (findings []types.Finding) {
155210
zipReader, err := zip.NewReader(reader, size)
@@ -161,31 +216,48 @@ func (s *Log4jDirectoryScanner) scanArchiveForVulnerableFiles(
161216
return
162217
}
163218

164-
for _, zipFile := range zipReader.File {
165-
locatedFindings := s.scanFile(zipReader, path, zipFile)
219+
filesToScan, cleanup, err := s.getFilesToScan(path, size, zipReader)
220+
if err != nil {
221+
return
222+
}
223+
224+
resolveArchiveFile := util.ResolveZipFile(zipReader)
225+
226+
// if cleanup is specified, then we are reading files from disk
227+
// and should close the current zip reader to free up space,
228+
// set our archive reader to read files from disk, and defer
229+
// a call to cleanup to remove all temporary extracted files
230+
if cleanup != nil {
231+
reader.Close()
232+
resolveArchiveFile = util.ResolveDiskFile
233+
defer cleanup()
234+
}
235+
236+
for _, fileToScan := range filesToScan {
237+
locatedFindings := s.scanFile(resolveArchiveFile, path, fileToScan)
166238
findings = append(findings, locatedFindings...)
167239
}
168240
return
169241
}
170242

171243
func (s *Log4jDirectoryScanner) scanFile(
172-
zipReader *zip.Reader,
244+
resolveArchiveFile types.ResolveArchiveFile,
173245
path string,
174-
file *zip.File,
246+
file types.FileToScan,
175247
) (findings []types.Finding) {
176248
//log.Debug().
177249
// Str("path", path).
178250
// Str("file", file.Name).
179251
// Msg("Scanning archive file")
180252

181-
fileExt := util.FileExt(file.Name)
253+
fileExt := util.FileExt(file.Name())
182254
switch fileExt {
183255
case constants.ClassFileExt:
184256
if s.onlyScanArchives {
185257
return
186258
}
187259

188-
finding := s.scanArchiveFile(zipReader, path, file)
260+
finding := s.scanArchiveFile(resolveArchiveFile, path, file)
189261
if finding != nil {
190262
findings = []types.Finding{*finding}
191263
}
@@ -195,7 +267,7 @@ func (s *Log4jDirectoryScanner) scanFile(
195267
constants.ZipFileExt,
196268
constants.EarFileExt:
197269
if s.onlyScanArchives {
198-
finding := s.scanArchiveFile(zipReader, path, file)
270+
finding := s.scanArchiveFile(resolveArchiveFile, path, file)
199271
if finding != nil {
200272
findings = []types.Finding{*finding}
201273
}
@@ -207,32 +279,32 @@ func (s *Log4jDirectoryScanner) scanFile(
207279
}
208280

209281
func (s *Log4jDirectoryScanner) scanArchiveFile(
210-
zipReader *zip.Reader,
282+
resolveArchiveFile types.ResolveArchiveFile,
211283
path string,
212-
file *zip.File,
284+
file types.FileToScan,
213285
) (finding *types.Finding) {
214-
reader, err := file.Open()
286+
reader, err := file.Reader()
215287
if err != nil {
216288
log.Warn().
217-
Str("classFile", file.Name).
289+
Str("classFile", file.Name()).
218290
Str("path", path).
219291
Err(err).
220292
Msg("unable to open class file")
221293
return
222294
}
223295
defer reader.Close()
224296

225-
return s.processArchiveFile(zipReader, reader, path, file.Name)
297+
return s.processArchiveFile(resolveArchiveFile, reader, path, file.Name())
226298
}
227299

228300
func (s *Log4jDirectoryScanner) scanEmbeddedArchive(
229301
path string,
230-
file *zip.File,
302+
file types.FileToScan,
231303
) (findings []types.Finding) {
232-
reader, err := file.Open()
304+
reader, err := file.Reader()
233305
if err != nil {
234306
log.Warn().
235-
Str("classFile", file.Name).
307+
Str("classFile", file.Name()).
236308
Str("path", path).
237309
Err(err).
238310
Msg("unable to open embedded archive")
@@ -243,15 +315,16 @@ func (s *Log4jDirectoryScanner) scanEmbeddedArchive(
243315
buffer, err := ioutil.ReadAll(reader)
244316
if err != nil {
245317
log.Warn().
246-
Str("classFile", file.Name).
318+
Str("classFile", file.Name()).
247319
Str("path", path).
248320
Err(err).
249321
Msg("unable to read embedded archive")
250322
return
251323
}
324+
reader.Close()
252325

253-
newPath := path + "::" + file.Name
254-
archiveReader := bytes.NewReader(buffer)
326+
newPath := path + "::" + file.Name()
327+
archiveReader := types.NopReaderAtCloser(bytes.NewReader(buffer))
255328
archiveSize := int64(len(buffer))
256329

257330
return s.scanArchiveForVulnerableFiles(newPath, archiveReader, archiveSize)

tools/log4shell/scan/scan_test.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ func createNewScanner() (scanner Log4jVulnerableDependencyScanner, err error) {
4141
}
4242

4343
func BenchmarkScanningForVulnerablePackages(b *testing.B) {
44+
return
4445
b.ReportAllocs()
4546

4647
scanner, err := createNewScanner()
@@ -54,6 +55,22 @@ func BenchmarkScanningForVulnerablePackages(b *testing.B) {
5455
fmt.Printf("Number of findings: %d\n", len(findings))
5556
}
5657

58+
func BenchmarkScanningForLargeArchives(b *testing.B) {
59+
b.ReportAllocs()
60+
61+
scanner, err := createNewScanner()
62+
if err != nil {
63+
b.Error(err)
64+
return
65+
}
66+
67+
for i := 0; i < 10; i++ {
68+
findings := scanner.Scan([]string{"../test/large-archives"})
69+
70+
fmt.Printf("Number of findings: %d\n", len(findings))
71+
}
72+
}
73+
5774
func TestForFalsePositiveLibraryFindings(t *testing.T) {
5875

5976
scanner, err := createNewScanner()

0 commit comments

Comments
 (0)