Skip to content

Commit b885946

Browse files
authored
Merge pull request #2 from PotatoesFall/memory
Reduce memory usage
2 parents 3a163cc + 8d198c9 commit b885946

File tree

3 files changed

+82
-95
lines changed

3 files changed

+82
-95
lines changed

Dockerfile

+4-1
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,12 @@ ARG TARGETARCH
44

55
WORKDIR /go/src
66

7-
COPY . .
7+
COPY go.mod go.sum .
88

99
RUN go mod download
10+
11+
COPY . .
12+
1013
RUN mkdir /data
1114

1215
RUN GOOS=linux GOARCH=${TARGETARCH} go build -ldflags "-s -w" -o entropy .

main.go

+49-35
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,38 @@ type Entropy struct {
3535
Line string // Line with high entropy
3636
}
3737

38+
// Entropies should be created with a size n using make()
39+
// it should not be written to manually, instead use Entropies.Add
40+
type Entropies struct {
41+
sync.Mutex
42+
Entropies []Entropy
43+
}
44+
45+
// Add assumes that es contains an ordered set of entropies.
46+
// It preserves ordering, and inserts an additional value e, if it has high enough entropy.
47+
// In that case, the entry with lowest entropy is rejected.
48+
func (es *Entropies) Add(e Entropy) {
49+
es.Lock()
50+
defer es.Unlock()
51+
52+
if es.Entropies[len(es.Entropies)-1].Entropy >= e.Entropy {
53+
return
54+
}
55+
56+
i, _ := slices.BinarySearchFunc(es.Entropies, e, func(a, b Entropy) int {
57+
if b.Entropy > a.Entropy {
58+
return 1
59+
}
60+
if a.Entropy > b.Entropy {
61+
return -1
62+
}
63+
return 0
64+
})
65+
66+
copy(es.Entropies[i+1:], es.Entropies[i:])
67+
es.Entropies[i] = e
68+
}
69+
3870
func main() {
3971
minCharactersFlag := flag.Int("min", minCharactersDefault, "Minimum number of characters in the line to consider computing entropy")
4072
resultCountFlag := flag.Int("top", resultCountDefault, "Number of results to display")
@@ -64,17 +96,16 @@ func main() {
6496
fmt.Println("No files provided, defaults to current folder.")
6597
fileNames = []string{"."}
6698
}
67-
entropies := make([]Entropy, 0, 10*len(fileNames))
99+
entropies := &Entropies{
100+
Entropies: make([]Entropy, resultCount),
101+
}
68102
for _, fileName := range fileNames {
69-
fileEntropies, err := readFile(fileName)
103+
err := readFile(entropies, fileName)
70104
if err != nil {
71105
fmt.Fprintf(os.Stderr, "Error reading file %s: %v\n", fileName, err)
72106
}
73-
entropies = append(entropies, fileEntropies...)
74107
}
75108

76-
entropies = sortAndCutTop(entropies)
77-
78109
redMark := "\033[31m"
79110
resetMark := "\033[0m"
80111
if !term.IsTerminal(int(os.Stdout.Fd())) {
@@ -83,59 +114,54 @@ func main() {
83114
resetMark = ""
84115
}
85116

86-
for _, entropy := range entropies {
117+
for _, entropy := range entropies.Entropies {
118+
if entropy == (Entropy{}) {
119+
return
120+
}
87121
fmt.Printf("%.2f: %s%s:%d%s %s\n", entropy.Entropy, redMark, entropy.File, entropy.LineNum, resetMark, entropy.Line)
88122
}
89123
}
90124

91-
func readFile(fileName string) ([]Entropy, error) {
125+
func readFile(entropies *Entropies, fileName string) error {
92126
// If file is a folder, walk inside the folder
93127
fileInfo, err := os.Stat(fileName)
94128
if err != nil {
95-
return nil, err
129+
return err
96130
}
97131

98132
if isFileHidden(fileInfo.Name()) && !exploreHidden {
99-
return nil, nil
133+
return nil
100134
}
101135

102-
entropies := make([]Entropy, 0, 10)
103136
if fileInfo.IsDir() {
104137
// Walk through the folder and read all files
105138
dir, err := os.ReadDir(fileName)
106139
if err != nil {
107-
return nil, err
140+
return err
108141
}
109142

110-
entropiies := make([][]Entropy, len(dir))
111-
112143
var wg sync.WaitGroup
113144
for i, file := range dir {
114145
wg.Add(1)
115146
go func(i int, file os.DirEntry) {
116147
defer wg.Done()
117-
fileEntropies, err := readFile(fileName + "/" + file.Name())
148+
err := readFile(entropies, fileName+"/"+file.Name())
118149
if err != nil {
119150
fmt.Fprintf(os.Stderr, "Error reading file %s: %v\n", file.Name(), err)
120151
}
121-
entropiies[i] = fileEntropies
122152
}(i, file)
123153
}
124154

125155
wg.Wait()
126-
127-
for _, fileEntropies := range entropiies {
128-
entropies = append(entropies, fileEntropies...)
129-
}
130156
}
131157

132158
if !isFileIncluded(fileInfo.Name()) {
133-
return sortAndCutTop(entropies), nil
159+
return nil
134160
}
135161

136162
file, err := os.Open(fileName)
137163
if err != nil {
138-
return nil, err
164+
return err
139165
}
140166
defer file.Close()
141167

@@ -150,7 +176,7 @@ func readFile(fileName string) ([]Entropy, error) {
150176
continue
151177
}
152178

153-
entropies = append(entropies, Entropy{
179+
entropies.Add(Entropy{
154180
Entropy: entropy(field),
155181
File: fileName,
156182
LineNum: i,
@@ -159,7 +185,7 @@ func readFile(fileName string) ([]Entropy, error) {
159185
}
160186
}
161187

162-
return sortAndCutTop(entropies), nil
188+
return nil
163189
}
164190

165191
func entropy(text string) float64 {
@@ -210,15 +236,3 @@ func isFileIncluded(filename string) bool {
210236

211237
return false
212238
}
213-
214-
func sortAndCutTop(entropies []Entropy) []Entropy {
215-
slices.SortFunc(entropies, func(a, b Entropy) int {
216-
return int((b.Entropy - a.Entropy) * 10000)
217-
})
218-
219-
if len(entropies) > resultCount {
220-
return entropies[:resultCount]
221-
}
222-
223-
return entropies
224-
}

main_test.go

+29-59
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@ import (
55
)
66

77
func BenchmarkFile(b *testing.B) {
8+
entropies := &Entropies{Entropies: make([]Entropy, 10)}
89
for range b.N {
9-
readFile("testdata")
10+
_ = readFile(entropies, "testdata")
1011
}
1112
}
1213

@@ -46,82 +47,38 @@ func TestEntropy(t *testing.T) {
4647

4748
func TestReadFile(t *testing.T) {
4849
t.Run("random.js", func(t *testing.T) {
49-
res, err := readFile("testdata/random.js")
50+
res := &Entropies{Entropies: make([]Entropy, 10)}
51+
err := readFile(res, "testdata/random.js")
5052
if err != nil {
5153
t.Errorf("expected nil, got %v", err)
5254
}
5355

54-
Expect(t, len(res), 10)
55-
ExpectFloat(t, res[0].Entropy, 5.53614242151549)
56-
Expect(t, res[0].LineNum, 7) // The token is hidden here
57-
ExpectFloat(t, res[4].Entropy, 3.321928094887362)
56+
ExpectFloat(t, res.Entropies[0].Entropy, 5.53614242151549)
57+
Expect(t, res.Entropies[0].LineNum, 7) // The token is hidden here
58+
ExpectFloat(t, res.Entropies[4].Entropy, 3.321928094887362)
5859
})
5960

6061
t.Run("testdata/folder", func(t *testing.T) {
61-
res, err := readFile("testdata/folder")
62+
res := &Entropies{Entropies: make([]Entropy, 10)}
63+
err := readFile(res, "testdata/folder")
6264
if err != nil {
6365
t.Errorf("expected nil, got %v", err)
6466
}
6567

66-
Expect(t, len(res), 10)
67-
ExpectFloat(t, res[0].Entropy, 3.7667029194153567)
68-
Expect(t, res[0].LineNum, 7) // The token is hidden here
69-
ExpectFloat(t, res[6].Entropy, 2.8553885422075336)
68+
Expect(t, len(res.Entropies), 10)
69+
ExpectFloat(t, res.Entropies[0].Entropy, 3.7667029194153567)
70+
Expect(t, res.Entropies[0].LineNum, 7) // The token is hidden here
71+
ExpectFloat(t, res.Entropies[6].Entropy, 2.8553885422075336)
7072
})
7173

7274
t.Run("dangling symlink in testdata folder", func(t *testing.T) {
73-
res, err := readFile("testdata")
75+
res := &Entropies{Entropies: make([]Entropy, 10)}
76+
err := readFile(res, "testdata")
7477
if err != nil {
7578
t.Errorf("expected nil, got %v", err)
7679
}
7780

78-
Expect(t, len(res), 10)
79-
})
80-
}
81-
82-
func TestSortAndCutTop(t *testing.T) {
83-
resultCount = 5
84-
85-
t.Run("nil", func(t *testing.T) {
86-
res := sortAndCutTop(nil)
87-
if len(res) != 0 {
88-
t.Errorf("expected 0, got %d", len(res))
89-
}
90-
})
91-
92-
t.Run("empty", func(t *testing.T) {
93-
res := sortAndCutTop([]Entropy{})
94-
if len(res) != 0 {
95-
t.Errorf("expected 0, got %d", len(res))
96-
}
97-
})
98-
99-
t.Run("less than resultCount", func(t *testing.T) {
100-
res := sortAndCutTop([]Entropy{
101-
{Entropy: 0.1},
102-
{Entropy: 0.6},
103-
{Entropy: 0.3},
104-
})
105-
106-
Expect(t, len(res), 3)
107-
Expect(t, res[0].Entropy, 0.6)
108-
Expect(t, res[2].Entropy, 0.1)
109-
})
110-
111-
t.Run("more than resultCount", func(t *testing.T) {
112-
res := sortAndCutTop([]Entropy{
113-
{Entropy: 0.1},
114-
{Entropy: 0.6},
115-
{Entropy: 0.3},
116-
{Entropy: 0.7},
117-
{Entropy: 0.4},
118-
{Entropy: 0.5},
119-
{Entropy: 0.2},
120-
})
121-
122-
Expect(t, len(res), 5)
123-
Expect(t, res[0].Entropy, 0.7)
124-
Expect(t, res[4].Entropy, 0.3)
81+
Expect(t, len(res.Entropies), 10)
12582
})
12683
}
12784

@@ -164,6 +121,19 @@ func TestIsFileHidden(t *testing.T) {
164121
Expect(t, isFileHidden(".env"), true)
165122
}
166123

124+
func TestEntropies(t *testing.T) {
125+
res := &Entropies{Entropies: make([]Entropy, 5)}
126+
for _, i := range []float64{1, 3, 5, 7, 2, 4, 6, 8} {
127+
res.Add(Entropy{Entropy: i})
128+
}
129+
130+
Expect(t, res.Entropies[0].Entropy, 8)
131+
Expect(t, res.Entropies[1].Entropy, 7)
132+
Expect(t, res.Entropies[2].Entropy, 6)
133+
Expect(t, res.Entropies[3].Entropy, 5)
134+
Expect(t, res.Entropies[4].Entropy, 4)
135+
}
136+
167137
func Expect[T comparable](t *testing.T, got, expected T) {
168138
t.Helper()
169139
if got != expected {

0 commit comments

Comments
 (0)