Skip to content

Commit 00d2999

Browse files
committed
init
0 parents  commit 00d2999

13 files changed

+1430
-0
lines changed

.gitignore

+405
Large diffs are not rendered by default.

.vscode/launch.json

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
{
2+
// Use IntelliSense to learn about possible attributes.
3+
// Hover to view descriptions of existing attributes.
4+
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5+
"version": "0.2.0",
6+
"configurations": [
7+
{
8+
"name": ".NET Core Launch (console)",
9+
"type": "coreclr",
10+
"request": "launch",
11+
"preLaunchTask": "build",
12+
"program": "${workspaceFolder}/WebReaper.Tests/bin/Debug/net6.0/WebReaper.Tests.dll",
13+
"args": [],
14+
"cwd": "${workspaceFolder}/WebReaper.Tests",
15+
"console": "internalConsole",
16+
"stopAtEntry": false
17+
},
18+
{
19+
"name": ".NET Core Attach",
20+
"type": "coreclr",
21+
"request": "attach"
22+
}
23+
]
24+
}

.vscode/tasks.json

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
{
2+
// See https://go.microsoft.com/fwlink/?LinkId=733558
3+
// for the documentation about the tasks.json format
4+
"version": "2.0.0",
5+
"tasks": [
6+
{
7+
"label": "build",
8+
"command": "dotnet",
9+
"type": "process",
10+
"args": [
11+
"build",
12+
"${workspaceFolder}/WebReaper.Tests/WebReaper.Tests.csproj",
13+
"/property:GenerateFullPaths=true",
14+
"/consoleloggerparameters:NoSummary"
15+
],
16+
"problemMatcher": "$msCompile"
17+
},
18+
{
19+
"label": "publish",
20+
"command": "dotnet",
21+
"type": "process",
22+
"args": [
23+
"publish",
24+
"${workspaceFolder}/WebReaper.Tests/WebReaper.Tests.csproj",
25+
"/property:GenerateFullPaths=true",
26+
"/consoleloggerparameters:NoSummary"
27+
],
28+
"problemMatcher": "$msCompile"
29+
},
30+
{
31+
"label": "watch",
32+
"command": "dotnet",
33+
"type": "process",
34+
"args": [
35+
"watch",
36+
"run",
37+
"--project",
38+
"${workspaceFolder}/WebReaper.Tests/WebReaper.Tests.csproj"
39+
],
40+
"problemMatcher": "$msCompile"
41+
}
42+
]
43+
}

WebReaper.Tests/Program.cs

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
using WebReaper;
2+
3+
// await new Scraper("https://rutracker.org/forum/viewforum.php?f=402")
4+
// .FollowLinks(".torTopic.bold.tt-text")
5+
// .Paginate(".pg:contains('След.')")
6+
// .WithScheme(new WebEl[] {
7+
// new("title", "span[style='font-size: 24px; line-height: normal;']"),
8+
// new("image", ".postImg", JsonType.Image)
9+
// })
10+
// .Limit(100)
11+
// .To("output.json")
12+
// .Run();
13+
14+
await new Scraper("https://rutracker.org/forum/viewforum.php?f=402")
15+
.FollowLinks(".torTopic.bold.tt-text")
16+
.Paginate(".pg")
17+
.WithScheme(new WebEl[] {
18+
new("title", "span[style='font-size: 24px; line-height: normal;']"),
19+
new("image", ".postImg", JsonType.Image)
20+
})
21+
.Limit(10000)
22+
.To("output.json")
23+
.Run();
+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>net6.0</TargetFramework>
6+
<ImplicitUsings>enable</ImplicitUsings>
7+
<Nullable>enable</Nullable>
8+
</PropertyGroup>
9+
10+
<ItemGroup>
11+
<ProjectReference Include="..\WebReaper\WebReaper.csproj" />
12+
</ItemGroup>
13+
14+
<ItemGroup>
15+
</ItemGroup>
16+
</Project>

WebReaper.Tests/output.json

+582
Large diffs are not rendered by default.

WebReaper.sln

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
2+
Microsoft Visual Studio Solution File, Format Version 12.00
3+
# Visual Studio Version 16
4+
VisualStudioVersion = 25.0.1700.0
5+
MinimumVisualStudioVersion = 10.0.40219.1
6+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "WebReaper", "WebReaper\WebReaper.csproj", "{72A004F0-3B30-4149-B33F-8C8220135A8B}"
7+
EndProject
8+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "WebReaper.Tests", "WebReaper.Tests\WebReaper.Tests.csproj", "{6A3C1998-9015-4AC8-8701-FBD2E2210EB1}"
9+
EndProject
10+
Global
11+
GlobalSection(SolutionConfigurationPlatforms) = preSolution
12+
Debug|Any CPU = Debug|Any CPU
13+
Release|Any CPU = Release|Any CPU
14+
EndGlobalSection
15+
GlobalSection(ProjectConfigurationPlatforms) = postSolution
16+
{72A004F0-3B30-4149-B33F-8C8220135A8B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
17+
{72A004F0-3B30-4149-B33F-8C8220135A8B}.Debug|Any CPU.Build.0 = Debug|Any CPU
18+
{72A004F0-3B30-4149-B33F-8C8220135A8B}.Release|Any CPU.ActiveCfg = Release|Any CPU
19+
{72A004F0-3B30-4149-B33F-8C8220135A8B}.Release|Any CPU.Build.0 = Release|Any CPU
20+
{6A3C1998-9015-4AC8-8701-FBD2E2210EB1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
21+
{6A3C1998-9015-4AC8-8701-FBD2E2210EB1}.Debug|Any CPU.Build.0 = Debug|Any CPU
22+
{6A3C1998-9015-4AC8-8701-FBD2E2210EB1}.Release|Any CPU.ActiveCfg = Release|Any CPU
23+
{6A3C1998-9015-4AC8-8701-FBD2E2210EB1}.Release|Any CPU.Build.0 = Release|Any CPU
24+
EndGlobalSection
25+
GlobalSection(SolutionProperties) = preSolution
26+
HideSolutionNode = FALSE
27+
EndGlobalSection
28+
GlobalSection(ExtensibilityGlobals) = postSolution
29+
SolutionGuid = {8413198B-8D26-4E30-A5E2-E0FBC4DCEA59}
30+
EndGlobalSection
31+
EndGlobal

WebReaper/JsonType.cs

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
namespace WebReaper;
2+
3+
public enum JsonType
4+
{
5+
String,
6+
Number,
7+
Boolean,
8+
Array,
9+
Html,
10+
Image
11+
}

WebReaper/OutputElement.cs

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
namespace WebReaper;
2+
3+
public class WebEl {
4+
5+
public WebEl[]? Children { get; set; }
6+
7+
public string Field { get; set; }
8+
9+
public string? Selector { get; set; }
10+
11+
public JsonType? Type { get; set; }
12+
13+
public WebEl(
14+
string field,
15+
string selector,
16+
JsonType type = JsonType.String,
17+
string[]? excludeSelectors = null)
18+
{
19+
this.Field = field;
20+
this.Selector = selector;
21+
this.Type = type;
22+
}
23+
24+
public WebEl(
25+
string field,
26+
params WebEl[] children)
27+
{
28+
this.Field = field;
29+
this.Children = children;
30+
this.Type = JsonType.Array;
31+
}
32+
}

0 commit comments

Comments
 (0)