Skip to content

Commit 5d28b16

Browse files
committed
Add title similarity score for sorting entries of same quality. Priority over seed count
Set torrent rename default as true
1 parent ab8e870 commit 5d28b16

File tree

7 files changed

+105
-36
lines changed

7 files changed

+105
-36
lines changed

cmd/service/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ func main() {
6363
Sources: config.Sources,
6464
Qualitites: config.Qualities,
6565
Category: config.Category,
66-
RenameTorrent: config.RenameTorrent,
66+
RenameTorrent: *utils.Coalesce(config.RenameTorrent, utils.Pointer(true)),
6767
DownloadPath: config.DownloadPath,
6868
CreateShowFolder: config.CreateShowFolder,
6969
PollFrequency: config.PollFrequency,

internal/configs/configs.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"time"
77

88
"github.com/rs/zerolog/log"
9+
"github.com/sonalys/animeman/internal/utils"
910
"gopkg.in/yaml.v3"
1011
)
1112

@@ -90,7 +91,7 @@ type TorrentConfig struct {
9091
Category string `yaml:"category"`
9192
DownloadPath string `yaml:"downloadPath"`
9293
CreateShowFolder bool `yaml:"createShowFolder"`
93-
RenameTorrent bool `yaml:"renameTorrent"`
94+
RenameTorrent *bool `yaml:"renameTorrent,omitempty"`
9495
}
9596

9697
func (c TorrentConfig) Validate() error {
@@ -144,7 +145,7 @@ func GenerateBoilerplateConfig() {
144145
Username: "admin",
145146
Password: "adminadmin",
146147
CreateShowFolder: true,
147-
RenameTorrent: true,
148+
RenameTorrent: utils.Pointer(true),
148149
Type: TorrentClientTypeQBittorrent,
149150
},
150151
})

internal/discovery/run.go

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ import (
44
"context"
55
"errors"
66
"fmt"
7+
"regexp"
78
"sort"
9+
"strings"
810

911
"github.com/rs/zerolog/log"
1012
"github.com/sonalys/animeman/integrations/nyaa"
@@ -60,31 +62,80 @@ func episodeFilterNew(list []parser.ParsedNyaa, latestTag string, excludeBatch b
6062
return out
6163
}
6264

65+
var notWordDigitOrSpace = regexp.MustCompile("[^a-zA-Z 0-9]")
66+
67+
// calculateTitleSimilarityScore returns a value between 0 and 1 for how similar the titles are.
68+
func calculateTitleSimilarityScore(originalTitle, title string) float64 {
69+
originalTitle = strings.ToLower(originalTitle)
70+
title = strings.ToLower(title)
71+
originalTitle = notWordDigitOrSpace.ReplaceAllString(originalTitle, "")
72+
title = notWordDigitOrSpace.ReplaceAllString(title, "")
73+
74+
originalTitleWords := strings.Split(originalTitle, " ")
75+
titleWords := strings.Split(title, " ")
76+
wordCount := len(titleWords)
77+
78+
var match int
79+
outer:
80+
for _, curWord := range titleWords {
81+
for i, target := range originalTitleWords {
82+
if curWord == target {
83+
match++
84+
originalTitleWords = append(originalTitleWords[:i], originalTitleWords[i+1:]...)
85+
continue outer
86+
}
87+
}
88+
}
89+
return float64(match) / float64(wordCount)
90+
}
91+
6392
// parseAndSort will digest the raw data from Nyaa into a parsed metadata struct `ParsedNyaa`.
6493
// it will also sort the response by season and episode.
6594
// it's important it returns a crescent season/episode list, so you don't download a recent episode and
6695
// don't download the oldest ones in case you don't have all episodes since your latestTag.
67-
func parseAndSort(entries []nyaa.Entry) []parser.ParsedNyaa {
96+
func parseAndSort(animeListEntry animelist.Entry, entries []nyaa.Entry) []parser.ParsedNyaa {
6897
resp := utils.Map(entries, func(entry nyaa.Entry) parser.ParsedNyaa { return parser.NewParsedNyaa(entry) })
6998
sort.Slice(resp, func(i, j int) bool {
7099
cmp := tagCompare(resp[i].SeasonEpisodeTag, resp[j].SeasonEpisodeTag)
100+
if cmp != 0 {
101+
return cmp < 0
102+
}
71103
// For same tag, we compare vertical resolution, prioritizing better quality.
72-
if cmp == 0 {
73-
cmp = resp[j].Meta.VerticalResolution - resp[i].Meta.VerticalResolution
74-
if cmp == 0 {
75-
cmp = resp[j].Entry.Seeders - resp[i].Entry.Seeders
104+
cmp = resp[j].Meta.VerticalResolution - resp[i].Meta.VerticalResolution
105+
if cmp != 0 {
106+
return cmp < 0
107+
}
108+
var scoreI, scoreJ float64
109+
// Then we prioritize by title proximity score.
110+
for _, title := range animeListEntry.Titles {
111+
curScoreI := calculateTitleSimilarityScore(title, resp[i].Meta.Title)
112+
curScoreJ := calculateTitleSimilarityScore(title, resp[j].Meta.Title)
113+
if curScoreI > scoreI {
114+
scoreI = curScoreI
76115
}
116+
if curScoreJ > scoreJ {
117+
scoreJ = curScoreJ
118+
}
119+
}
120+
cmp = int((scoreJ - scoreI) * 100)
121+
if cmp != 0 {
122+
return cmp < 0
123+
}
124+
// Then prioritize number of seeds
125+
cmp = resp[j].Entry.Seeders - resp[i].Entry.Seeders
126+
if cmp != 0 {
127+
return cmp < 0
77128
}
78129
return cmp < 0
79130
})
80131
return resp
81132
}
82133

83134
// getDownloadableEntries is responsible for filtering and ordering the raw Nyaa feed into valid downloadable torrents.
84-
func getDownloadableEntries(entries []nyaa.Entry, latestTag string, animeStatus animelist.AiringStatus) []parser.ParsedNyaa {
135+
func getDownloadableEntries(animeListEntry animelist.Entry, entries []nyaa.Entry, latestTag string, animeStatus animelist.AiringStatus) []parser.ParsedNyaa {
85136
// If we don't have any episodes, and show is released, try to find a batch for all episodes.
86137
useBatch := latestTag == "" && animeStatus == animelist.AiringStatusAired
87-
parsedEntries := parseAndSort(entries)
138+
parsedEntries := parseAndSort(animeListEntry, entries)
88139
if useBatch {
89140
return utils.Filter(parsedEntries, filterBatchEntries)
90141
}
@@ -94,7 +145,7 @@ func getDownloadableEntries(entries []nyaa.Entry, latestTag string, animeStatus
94145
func (c *Controller) NyaaSearch(ctx context.Context, entry animelist.Entry) ([]nyaa.Entry, error) {
95146
// Build search query for Nyaa.
96147
// For title we filter for english and original titles.
97-
strippedTitles := utils.Map(entry.Titles, parser.TitleStrip)
148+
strippedTitles := utils.Map(entry.Titles, func(title string) string { return parser.TitleStrip(title, true) })
98149
titleQuery := nyaa.QueryOr(strippedTitles)
99150
sourceQuery := nyaa.QueryOr(c.dep.Config.Sources)
100151
qualityQuery := nyaa.QueryOr(c.dep.Config.Qualitites)
@@ -118,7 +169,7 @@ func (c *Controller) DigestAnimeListEntry(ctx context.Context, entry animelist.E
118169
if err != nil {
119170
return fmt.Errorf("getting latest tag: %w", err)
120171
}
121-
for _, nyaaEntry := range getDownloadableEntries(nyaaEntries, latestTag, entry.AiringStatus) {
172+
for _, nyaaEntry := range getDownloadableEntries(entry, nyaaEntries, latestTag, entry.AiringStatus) {
122173
if err := c.TorrentDigestNyaa(ctx, entry, nyaaEntry); err != nil {
123174
log.Error().Msgf("failed to digest nyaa entry: %s", err)
124175
continue

internal/discovery/run_test.go

Lines changed: 34 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ func Test_filterEpisodes(t *testing.T) {
8686

8787
func Test_buildTaggedNyaaList(t *testing.T) {
8888
t.Run("empty", func(t *testing.T) {
89-
got := parseAndSort([]nyaa.Entry{})
89+
got := parseAndSort(animelist.Entry{}, []nyaa.Entry{})
9090
require.Empty(t, got)
9191
})
9292
t.Run("sort by tag", func(t *testing.T) {
@@ -96,7 +96,7 @@ func Test_buildTaggedNyaaList(t *testing.T) {
9696
{Title: "Show3: S03E01"},
9797
{Title: "Show3: S03"},
9898
}
99-
got := parseAndSort(input)
99+
got := parseAndSort(animelist.Entry{}, input)
100100
require.Len(t, got, len(input))
101101
for i := 1; i < len(got); i++ {
102102
require.True(t, tagCompare(got[i-1].SeasonEpisodeTag, got[i].SeasonEpisodeTag) <= 0)
@@ -106,7 +106,7 @@ func Test_buildTaggedNyaaList(t *testing.T) {
106106

107107
func Test_filterNyaaFeed(t *testing.T) {
108108
t.Run("empty", func(t *testing.T) {
109-
got := getDownloadableEntries([]nyaa.Entry{}, "", animelist.AiringStatusAiring)
109+
got := getDownloadableEntries(animelist.Entry{}, []nyaa.Entry{}, "", animelist.AiringStatusAiring)
110110
require.Empty(t, got)
111111
})
112112
t.Run("airing: no latestTag", func(t *testing.T) {
@@ -115,7 +115,7 @@ func Test_filterNyaaFeed(t *testing.T) {
115115
{Title: "Show3: S03E02"},
116116
{Title: "Show3: S03E01"},
117117
}
118-
got := getDownloadableEntries(input, "", animelist.AiringStatusAiring)
118+
got := getDownloadableEntries(animelist.Entry{}, input, "", animelist.AiringStatusAiring)
119119
require.Len(t, got, len(input))
120120
for i := 1; i < len(got); i++ {
121121
require.True(t, tagCompare(got[i-1].SeasonEpisodeTag, got[i].SeasonEpisodeTag) <= 0)
@@ -127,19 +127,19 @@ func Test_filterNyaaFeed(t *testing.T) {
127127
{Title: "Show3: S03E02"},
128128
{Title: "Show3: S03E01"},
129129
}
130-
got := getDownloadableEntries(input, "Show3 S03E02", animelist.AiringStatusAiring)
130+
got := getDownloadableEntries(animelist.Entry{}, input, "Show3 S03E02", animelist.AiringStatusAiring)
131131
require.Len(t, got, 1)
132-
require.Equal(t, parseAndSort(input[:1]), got)
132+
require.Equal(t, parseAndSort(animelist.Entry{}, input[:1]), got)
133133
})
134134
t.Run("airing: with repeated tag", func(t *testing.T) {
135135
input := []nyaa.Entry{
136136
{Title: "Show3: S03E02"},
137137
{Title: "Show3: S03E02"},
138138
{Title: "Show3: S03E01"},
139139
}
140-
got := getDownloadableEntries(input, "Show3 S03E01", animelist.AiringStatusAiring)
140+
got := getDownloadableEntries(animelist.Entry{}, input, "Show3 S03E01", animelist.AiringStatusAiring)
141141
require.Len(t, got, 1)
142-
require.Equal(t, parseAndSort(input[0:1]), got)
142+
require.Equal(t, parseAndSort(animelist.Entry{}, input[0:1]), got)
143143
})
144144

145145
t.Run("airing: with latestTag and quality", func(t *testing.T) {
@@ -149,57 +149,71 @@ func Test_filterNyaaFeed(t *testing.T) {
149149
{Title: "Show3: S03E02"},
150150
{Title: "Show3: S03E01"},
151151
}
152-
got := getDownloadableEntries(input, "Show3 S03E02", animelist.AiringStatusAiring)
152+
got := getDownloadableEntries(animelist.Entry{}, input, "Show3 S03E02", animelist.AiringStatusAiring)
153153
require.Len(t, got, 1)
154-
require.Equal(t, parseAndSort(input[1:2]), got)
154+
require.Equal(t, parseAndSort(animelist.Entry{}, input[1:2]), got)
155155
})
156156
t.Run("aired: with latestTag", func(t *testing.T) {
157157
input := []nyaa.Entry{
158158
{Title: "Show3: S03E03"},
159159
{Title: "Show3: S03E02"},
160160
{Title: "Show3: S03E01"},
161161
}
162-
got := getDownloadableEntries(input, "Show3 S03E02", animelist.AiringStatusAired)
162+
got := getDownloadableEntries(animelist.Entry{}, input, "Show3 S03E02", animelist.AiringStatusAired)
163163
require.Len(t, got, 1)
164-
require.Equal(t, parseAndSort(input[:1]), got)
164+
require.Equal(t, parseAndSort(animelist.Entry{}, input[:1]), got)
165165
})
166166
t.Run("aired: with batch, no latestTag", func(t *testing.T) {
167167
input := []nyaa.Entry{
168168
{Title: "Show3: S03E03"},
169169
{Title: "Show3: S03E02"},
170170
{Title: "Show3: S03"},
171171
}
172-
got := getDownloadableEntries(input, "", animelist.AiringStatusAired)
172+
got := getDownloadableEntries(animelist.Entry{}, input, "", animelist.AiringStatusAired)
173173
require.Len(t, got, 1)
174-
require.Equal(t, parseAndSort(input[2:]), got)
174+
require.Equal(t, parseAndSort(animelist.Entry{}, input[2:]), got)
175175
})
176176
t.Run("aired: with batch, different qualities", func(t *testing.T) {
177177
input := []nyaa.Entry{
178178
{Title: "Show3: S03 1220x760"},
179179
{Title: "Show3: S03 1080p"},
180180
}
181-
got := getDownloadableEntries(input, "", animelist.AiringStatusAired)
181+
got := getDownloadableEntries(animelist.Entry{}, input, "", animelist.AiringStatusAired)
182182
require.Len(t, got, 1)
183-
require.Equal(t, parseAndSort(input[1:]), got)
183+
require.Equal(t, parseAndSort(animelist.Entry{}, input[1:]), got)
184184
})
185185
t.Run("aired: with batch, with latestTag", func(t *testing.T) {
186186
input := []nyaa.Entry{
187187
{Title: "Show3: S03E03"},
188188
{Title: "Show3: S03E02"},
189189
{Title: "Show3: S03"},
190190
}
191-
got := getDownloadableEntries(input, "Show3 S03E02", animelist.AiringStatusAired)
191+
got := getDownloadableEntries(animelist.Entry{}, input, "Show3 S03E02", animelist.AiringStatusAired)
192192
require.Len(t, got, 1)
193-
require.Equal(t, parseAndSort(input[:1]), got)
193+
require.Equal(t, parseAndSort(animelist.Entry{}, input[:1]), got)
194194
})
195195
t.Run("same tag and quality, different seeders", func(t *testing.T) {
196196
input := []nyaa.Entry{
197197
{Title: "Show3: S03E03", Seeders: 1},
198198
{Title: "Show3: S03E03", Seeders: 10},
199199
{Title: "Show3: S03"},
200200
}
201-
got := getDownloadableEntries(input, "Show3 S03E02", animelist.AiringStatusAired)
201+
got := getDownloadableEntries(animelist.Entry{}, input, "Show3 S03E02", animelist.AiringStatusAired)
202202
require.Len(t, got, 1)
203-
require.Equal(t, parseAndSort(input[1:2]), got)
203+
require.Equal(t, parseAndSort(animelist.Entry{}, input[1:2]), got)
204+
})
205+
}
206+
207+
func Test_calculateTitleSimilarityScore(t *testing.T) {
208+
t.Run("exact match in lower case", func(t *testing.T) {
209+
score := calculateTitleSimilarityScore("My pony academy: the story continues", "My Pony Academy the story continues")
210+
require.EqualValues(t, score, 1)
211+
})
212+
213+
t.Run("closer match should have higher score", func(t *testing.T) {
214+
originalTitle := "My pony academy: the battle continues"
215+
scoreA := calculateTitleSimilarityScore(originalTitle, "My Pony Academy")
216+
scoreB := calculateTitleSimilarityScore(originalTitle, "My Pony Academy 2: second battle")
217+
require.Greater(t, scoreA, scoreB)
204218
})
205219
}

internal/discovery/torrent.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ func (c *Controller) buildTorrentName(entry animelist.Entry, parsedNyaa parser.P
5858
// It will configure all necessary metadata and send it to your torrent client.
5959
func (c *Controller) TorrentDigestNyaa(ctx context.Context, entry animelist.Entry, parsedNyaa parser.ParsedNyaa) error {
6060
savePath := c.TorrentGetDownloadPath(entry.Titles[0])
61+
parsedNyaa.Meta.Title = parser.TitleStrip(parsedNyaa.Meta.Title, true)
6162
tags := parsedNyaa.Meta.TagsBuildTorrent()
6263
req := &torrentclient.AddTorrentConfig{
6364
Tags: tags,

internal/parser/title.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,17 @@ func TitleStripSubtitle(title string) string {
2121

2222
// TitleStrip cleans title from sub-titles, tags and season / episode information.
2323
// Example: [Source] Show: another story - S03E02 [1080p].mkv -> Show.
24-
func TitleStrip(title string) string {
24+
func TitleStrip(title string, cleanSubtitle bool) string {
2525
if index := seasonIndexMatch(title); index != -1 {
2626
title = title[:index]
2727
}
2828
if index := episodeIndexMatch(title); index != -1 {
2929
title = title[:index]
3030
}
3131
title = regexp.MustCompile(`\s{2,}`).ReplaceAllString(title, " ")
32-
title = TitleStripSubtitle(title)
32+
if cleanSubtitle {
33+
title = TitleStripSubtitle(title)
34+
}
3335
title = strings.ReplaceAll(title, ".", " ")
3436
title = removeTags(title)
3537
return strings.TrimSpace(title)
@@ -45,7 +47,7 @@ func removeTags(title string) string {
4547
// TitleParse will parse a title into a Metadata, extracting stripped title, tags, season and episode information.
4648
func TitleParse(title string) Metadata {
4749
resp := Metadata{
48-
Title: TitleStrip(title),
50+
Title: TitleStrip(title, false),
4951
VerticalResolution: qualityMatch(title),
5052
}
5153
if tags := tagsExpr.FindAllStringSubmatch(title, -1); len(tags) > 0 {

internal/parser/title_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ func TestTitleStrip(t *testing.T) {
1717
}
1818
for _, tt := range tests {
1919
t.Run(tt.name, func(t *testing.T) {
20-
if got := TitleStrip(tt.title); got != tt.want {
20+
if got := TitleStrip(tt.title, false); got != tt.want {
2121
t.Errorf("TitleStrip() = %v, want %v", got, tt.want)
2222
}
2323
})

0 commit comments

Comments
 (0)