Skip to content

Commit 810c8e7

Browse files
Updates PCSO Scraping procedure (#25)
1 parent cf96674 commit 810c8e7

File tree

3 files changed

+60
-85
lines changed

3 files changed

+60
-85
lines changed

Includes/Classes/Scraping/LottoPCSOScraper.cs

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -100,13 +100,40 @@ internal async Task<IHtmlDocument> GetWebsiteDOMAsync(Dictionary<string, string>
100100
{
101101
var encodedContent = new FormUrlEncodedContent(parameters);
102102
CancellationTokenSource cancellationToken = new CancellationTokenSource();
103-
HttpResponseMessage request = await httpClient.PostAsync(webUrlToScrape, encodedContent);
104-
cancellationToken.Token.ThrowIfCancellationRequested();
105-
Stream response = await request.Content.ReadAsStreamAsync();
103+
cancellationToken.CancelAfter(TimeSpan.FromMilliseconds(Timeout.Infinite));
104+
105+
HttpResponseMessage response = null;
106+
httpClient.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0");
107+
httpClient.DefaultRequestHeaders.Add("Accept", "text/html");
108+
httpClient.DefaultRequestHeaders.Add("Accept-Language", "en-US,en;q=0.5");
109+
httpClient.DefaultRequestHeaders.Add("Accept-Encoding", "deflate");
110+
httpClient.DefaultRequestHeaders.Add("Connection", "keep-alive");
111+
httpClient.DefaultRequestHeaders.Add("Upgrade-Insecure-Requests", "1");
112+
httpClient.DefaultRequestHeaders.Add("Sec-Fetch-Dest", "document");
113+
httpClient.DefaultRequestHeaders.Add("Sec-Fetch-Mode", "navigate");
114+
httpClient.DefaultRequestHeaders.Add("Sec-Fetch-Site", "none");
115+
httpClient.DefaultRequestHeaders.Add("Sec-Fetch-User", "?1");
116+
httpClient.DefaultRequestHeaders.Add("Pragma", "no-cache");
117+
httpClient.DefaultRequestHeaders.Add("Cache-Control", "no-cache");
118+
httpClient.DefaultRequestHeaders.Add("TE", "trailers");
119+
120+
if (parameters.Count <=0)
121+
{
122+
response = await httpClient.GetAsync(webUrlToScrape);
123+
}
124+
else
125+
{
126+
response = await httpClient.PostAsync(webUrlToScrape, encodedContent);
127+
}
128+
response.EnsureSuccessStatusCode();
106129
cancellationToken.Token.ThrowIfCancellationRequested();
107-
HtmlParser parser = new HtmlParser();
108-
IHtmlDocument document = parser.ParseDocument(response);
109-
return document;
130+
using (var stream = response.Content.ReadAsStreamAsync().Result)
131+
{
132+
cancellationToken.Token.ThrowIfCancellationRequested();
133+
HtmlParser parser = new HtmlParser();
134+
IHtmlDocument document = parser.ParseDocument(stream);
135+
return document;
136+
}
110137
}
111138
}
112139

@@ -115,7 +142,7 @@ internal async void ScrapeWebsite(LotteryDetails lotteryDetails, Dictionary<stri
115142
try
116143
{
117144
RaiseEvent(LottoWebScrapingStages.CONNECTING);
118-
IHtmlDocument documentForSession = await GetWebsiteDOMAsync(GenerateParameters(lotteryDetails));
145+
IHtmlDocument documentForSession = await GetWebsiteDOMAsync(new Dictionary<string, string>());
119146
RaiseEvent(LottoWebScrapingStages.SESSION_CREATION);
120147
Dictionary<string, string> sessionParam = GetSessionBasedParameters(lotteryDetails, documentForSession);
121148
RaiseEvent(LottoWebScrapingStages.SEARCHING_DATA);

LottoDataManager.csproj

Lines changed: 26 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
11
<?xml version="1.0" encoding="utf-8"?>
22
<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3-
<Import Project="packages\LightGBM.2.2.3\build\LightGBM.props" Condition="Exists('packages\LightGBM.2.2.3\build\LightGBM.props')" />
4-
<Import Project="packages\Microsoft.ML.FastTree.1.5.5\build\netstandard2.0\Microsoft.ML.FastTree.props" Condition="Exists('packages\Microsoft.ML.FastTree.1.5.5\build\netstandard2.0\Microsoft.ML.FastTree.props')" />
5-
<Import Project="packages\Microsoft.ML.1.5.5\build\netstandard2.0\Microsoft.ML.props" Condition="Exists('packages\Microsoft.ML.1.5.5\build\netstandard2.0\Microsoft.ML.props')" />
6-
<Import Project="packages\Microsoft.ML.CpuMath.1.5.5\build\netstandard2.0\Microsoft.ML.CpuMath.props" Condition="Exists('packages\Microsoft.ML.CpuMath.1.5.5\build\netstandard2.0\Microsoft.ML.CpuMath.props')" />
73
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
84
<PropertyGroup>
95
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
@@ -45,36 +41,9 @@
4541
<StartupObject />
4642
</PropertyGroup>
4743
<ItemGroup>
48-
<Reference Include="Itenso.TimePeriod, Version=2.1.1.0, Culture=neutral, PublicKeyToken=8cb6db75b4e81486, processorArchitecture=MSIL">
49-
<HintPath>packages\TimePeriodLibrary.NET.2.1.1\lib\net46\Itenso.TimePeriod.dll</HintPath>
50-
</Reference>
51-
<Reference Include="Microsoft.Bcl.AsyncInterfaces, Version=5.0.0.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
52-
<HintPath>packages\Microsoft.Bcl.AsyncInterfaces.5.0.0\lib\net461\Microsoft.Bcl.AsyncInterfaces.dll</HintPath>
53-
</Reference>
54-
<Reference Include="Microsoft.Extensions.DependencyModel, Version=3.1.6.0, Culture=neutral, PublicKeyToken=adb9793829ddae60, processorArchitecture=MSIL">
55-
<HintPath>packages\Microsoft.Extensions.DependencyModel.3.1.6\lib\net451\Microsoft.Extensions.DependencyModel.dll</HintPath>
56-
</Reference>
57-
<Reference Include="Microsoft.ML, Version=1.0.0.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
58-
<HintPath>packages\Microsoft.ML.1.5.5\lib\netstandard2.0\Microsoft.ML.dll</HintPath>
59-
</Reference>
60-
<Reference Include="Microsoft.ML.LightGbm, Version=1.0.0.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
61-
<HintPath>packages\Microsoft.ML.LightGbm.1.5.5\lib\netstandard2.0\Microsoft.ML.LightGbm.dll</HintPath>
62-
</Reference>
6344
<Reference Include="System" />
64-
<Reference Include="System.Buffers, Version=4.0.3.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
65-
<HintPath>packages\System.Buffers.4.5.1\lib\net461\System.Buffers.dll</HintPath>
66-
</Reference>
67-
<Reference Include="System.Collections.NonGeneric, Version=4.0.2.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
68-
<HintPath>packages\System.Collections.NonGeneric.4.3.0\lib\net46\System.Collections.NonGeneric.dll</HintPath>
69-
<Private>True</Private>
70-
<Private>True</Private>
71-
</Reference>
7245
<Reference Include="System.Core" />
7346
<Reference Include="System.Numerics" />
74-
<Reference Include="System.Numerics.Vectors, Version=4.1.3.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
75-
<HintPath>packages\System.Numerics.Vectors.4.4.0\lib\net46\System.Numerics.Vectors.dll</HintPath>
76-
<Private>True</Private>
77-
</Reference>
7847
<Reference Include="System.Runtime.Caching" />
7948
<Reference Include="System.Runtime.InteropServices.RuntimeInformation, Version=4.0.1.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
8049
<HintPath>packages\System.Runtime.InteropServices.RuntimeInformation.4.3.0\lib\net45\System.Runtime.InteropServices.RuntimeInformation.dll</HintPath>
@@ -474,7 +443,6 @@
474443
</Compile>
475444
<None Include="DatabaseMain\MLModel_14_3H_Fast_Tree.zip" />
476445
<None Include="DatabaseMain\MLModel_16_3H_SdcaRegression.zip" />
477-
<None Include="packages.config" />
478446
<None Include="Properties\Settings.settings">
479447
<Generator>SettingsSingleFileGenerator</Generator>
480448
<LastGenOutput>Settings.Designer.cs</LastGenOutput>
@@ -612,18 +580,31 @@
612580
<Name>AngelsRepositoryLib</Name>
613581
</ProjectReference>
614582
</ItemGroup>
583+
<ItemGroup>
584+
<PackageReference Include="AngleSharp">
585+
<Version>1.1.0</Version>
586+
</PackageReference>
587+
<PackageReference Include="FSharp.Core">
588+
<Version>8.0.101</Version>
589+
</PackageReference>
590+
<PackageReference Include="HtmlAgilityPack">
591+
<Version>1.11.57</Version>
592+
</PackageReference>
593+
<PackageReference Include="Microsoft.ML.LightGbm">
594+
<Version>3.0.1</Version>
595+
</PackageReference>
596+
<PackageReference Include="ObjectListView.Official">
597+
<Version>2.9.1</Version>
598+
</PackageReference>
599+
<PackageReference Include="System.Collections.NonGeneric">
600+
<Version>4.3.0</Version>
601+
</PackageReference>
602+
<PackageReference Include="System.Runtime.Caching">
603+
<Version>8.0.0</Version>
604+
</PackageReference>
605+
<PackageReference Include="TimePeriodLibrary.NET">
606+
<Version>2.1.5</Version>
607+
</PackageReference>
608+
</ItemGroup>
615609
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
616-
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
617-
<PropertyGroup>
618-
<ErrorText>This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.</ErrorText>
619-
</PropertyGroup>
620-
<Error Condition="!Exists('packages\Microsoft.ML.CpuMath.1.5.5\build\netstandard2.0\Microsoft.ML.CpuMath.props')" Text="$([System.String]::Format('$(ErrorText)', 'packages\Microsoft.ML.CpuMath.1.5.5\build\netstandard2.0\Microsoft.ML.CpuMath.props'))" />
621-
<Error Condition="!Exists('packages\Microsoft.ML.1.5.5\build\netstandard2.0\Microsoft.ML.props')" Text="$([System.String]::Format('$(ErrorText)', 'packages\Microsoft.ML.1.5.5\build\netstandard2.0\Microsoft.ML.props'))" />
622-
<Error Condition="!Exists('packages\Microsoft.ML.1.5.5\build\netstandard2.0\Microsoft.ML.targets')" Text="$([System.String]::Format('$(ErrorText)', 'packages\Microsoft.ML.1.5.5\build\netstandard2.0\Microsoft.ML.targets'))" />
623-
<Error Condition="!Exists('packages\Microsoft.ML.FastTree.1.5.5\build\netstandard2.0\Microsoft.ML.FastTree.props')" Text="$([System.String]::Format('$(ErrorText)', 'packages\Microsoft.ML.FastTree.1.5.5\build\netstandard2.0\Microsoft.ML.FastTree.props'))" />
624-
<Error Condition="!Exists('packages\LightGBM.2.2.3\build\LightGBM.props')" Text="$([System.String]::Format('$(ErrorText)', 'packages\LightGBM.2.2.3\build\LightGBM.props'))" />
625-
<Error Condition="!Exists('packages\LightGBM.2.2.3\build\LightGBM.targets')" Text="$([System.String]::Format('$(ErrorText)', 'packages\LightGBM.2.2.3\build\LightGBM.targets'))" />
626-
</Target>
627-
<Import Project="packages\Microsoft.ML.1.5.5\build\netstandard2.0\Microsoft.ML.targets" Condition="Exists('packages\Microsoft.ML.1.5.5\build\netstandard2.0\Microsoft.ML.targets')" />
628-
<Import Project="packages\LightGBM.2.2.3\build\LightGBM.targets" Condition="Exists('packages\LightGBM.2.2.3\build\LightGBM.targets')" />
629610
</Project>

packages.config

Lines changed: 0 additions & 33 deletions
This file was deleted.

0 commit comments

Comments
 (0)