Skip to content

Commit

Permalink
Updates PCSO Scraping procedure (#25)
Browse files Browse the repository at this point in the history
  • Loading branch information
angelsburger90 authored Jan 23, 2024
1 parent cf96674 commit 810c8e7
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 85 deletions.
41 changes: 34 additions & 7 deletions Includes/Classes/Scraping/LottoPCSOScraper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,40 @@ internal async Task<IHtmlDocument> GetWebsiteDOMAsync(Dictionary<string, string>
{
var encodedContent = new FormUrlEncodedContent(parameters);
CancellationTokenSource cancellationToken = new CancellationTokenSource();
HttpResponseMessage request = await httpClient.PostAsync(webUrlToScrape, encodedContent);
cancellationToken.Token.ThrowIfCancellationRequested();
Stream response = await request.Content.ReadAsStreamAsync();
cancellationToken.CancelAfter(TimeSpan.FromMilliseconds(Timeout.Infinite));

HttpResponseMessage response = null;
httpClient.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0");
httpClient.DefaultRequestHeaders.Add("Accept", "text/html");
httpClient.DefaultRequestHeaders.Add("Accept-Language", "en-US,en;q=0.5");
httpClient.DefaultRequestHeaders.Add("Accept-Encoding", "deflate");
httpClient.DefaultRequestHeaders.Add("Connection", "keep-alive");
httpClient.DefaultRequestHeaders.Add("Upgrade-Insecure-Requests", "1");
httpClient.DefaultRequestHeaders.Add("Sec-Fetch-Dest", "document");
httpClient.DefaultRequestHeaders.Add("Sec-Fetch-Mode", "navigate");
httpClient.DefaultRequestHeaders.Add("Sec-Fetch-Site", "none");
httpClient.DefaultRequestHeaders.Add("Sec-Fetch-User", "?1");
httpClient.DefaultRequestHeaders.Add("Pragma", "no-cache");
httpClient.DefaultRequestHeaders.Add("Cache-Control", "no-cache");
httpClient.DefaultRequestHeaders.Add("TE", "trailers");

if (parameters.Count <=0)
{
response = await httpClient.GetAsync(webUrlToScrape);
}
else
{
response = await httpClient.PostAsync(webUrlToScrape, encodedContent);
}
response.EnsureSuccessStatusCode();
cancellationToken.Token.ThrowIfCancellationRequested();
HtmlParser parser = new HtmlParser();
IHtmlDocument document = parser.ParseDocument(response);
return document;
using (var stream = response.Content.ReadAsStreamAsync().Result)
{
cancellationToken.Token.ThrowIfCancellationRequested();
HtmlParser parser = new HtmlParser();
IHtmlDocument document = parser.ParseDocument(stream);
return document;
}
}
}

Expand All @@ -115,7 +142,7 @@ internal async void ScrapeWebsite(LotteryDetails lotteryDetails, Dictionary<stri
try
{
RaiseEvent(LottoWebScrapingStages.CONNECTING);
IHtmlDocument documentForSession = await GetWebsiteDOMAsync(GenerateParameters(lotteryDetails));
IHtmlDocument documentForSession = await GetWebsiteDOMAsync(new Dictionary<string, string>());
RaiseEvent(LottoWebScrapingStages.SESSION_CREATION);
Dictionary<string, string> sessionParam = GetSessionBasedParameters(lotteryDetails, documentForSession);
RaiseEvent(LottoWebScrapingStages.SEARCHING_DATA);
Expand Down
71 changes: 26 additions & 45 deletions LottoDataManager.csproj
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="packages\LightGBM.2.2.3\build\LightGBM.props" Condition="Exists('packages\LightGBM.2.2.3\build\LightGBM.props')" />
<Import Project="packages\Microsoft.ML.FastTree.1.5.5\build\netstandard2.0\Microsoft.ML.FastTree.props" Condition="Exists('packages\Microsoft.ML.FastTree.1.5.5\build\netstandard2.0\Microsoft.ML.FastTree.props')" />
<Import Project="packages\Microsoft.ML.1.5.5\build\netstandard2.0\Microsoft.ML.props" Condition="Exists('packages\Microsoft.ML.1.5.5\build\netstandard2.0\Microsoft.ML.props')" />
<Import Project="packages\Microsoft.ML.CpuMath.1.5.5\build\netstandard2.0\Microsoft.ML.CpuMath.props" Condition="Exists('packages\Microsoft.ML.CpuMath.1.5.5\build\netstandard2.0\Microsoft.ML.CpuMath.props')" />
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
Expand Down Expand Up @@ -45,36 +41,9 @@
<StartupObject />
</PropertyGroup>
<ItemGroup>
<Reference Include="Itenso.TimePeriod, Version=2.1.1.0, Culture=neutral, PublicKeyToken=8cb6db75b4e81486, processorArchitecture=MSIL">
<HintPath>packages\TimePeriodLibrary.NET.2.1.1\lib\net46\Itenso.TimePeriod.dll</HintPath>
</Reference>
<Reference Include="Microsoft.Bcl.AsyncInterfaces, Version=5.0.0.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
<HintPath>packages\Microsoft.Bcl.AsyncInterfaces.5.0.0\lib\net461\Microsoft.Bcl.AsyncInterfaces.dll</HintPath>
</Reference>
<Reference Include="Microsoft.Extensions.DependencyModel, Version=3.1.6.0, Culture=neutral, PublicKeyToken=adb9793829ddae60, processorArchitecture=MSIL">
<HintPath>packages\Microsoft.Extensions.DependencyModel.3.1.6\lib\net451\Microsoft.Extensions.DependencyModel.dll</HintPath>
</Reference>
<Reference Include="Microsoft.ML, Version=1.0.0.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
<HintPath>packages\Microsoft.ML.1.5.5\lib\netstandard2.0\Microsoft.ML.dll</HintPath>
</Reference>
<Reference Include="Microsoft.ML.LightGbm, Version=1.0.0.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
<HintPath>packages\Microsoft.ML.LightGbm.1.5.5\lib\netstandard2.0\Microsoft.ML.LightGbm.dll</HintPath>
</Reference>
<Reference Include="System" />
<Reference Include="System.Buffers, Version=4.0.3.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
<HintPath>packages\System.Buffers.4.5.1\lib\net461\System.Buffers.dll</HintPath>
</Reference>
<Reference Include="System.Collections.NonGeneric, Version=4.0.2.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
<HintPath>packages\System.Collections.NonGeneric.4.3.0\lib\net46\System.Collections.NonGeneric.dll</HintPath>
<Private>True</Private>
<Private>True</Private>
</Reference>
<Reference Include="System.Core" />
<Reference Include="System.Numerics" />
<Reference Include="System.Numerics.Vectors, Version=4.1.3.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
<HintPath>packages\System.Numerics.Vectors.4.4.0\lib\net46\System.Numerics.Vectors.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="System.Runtime.Caching" />
<Reference Include="System.Runtime.InteropServices.RuntimeInformation, Version=4.0.1.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
<HintPath>packages\System.Runtime.InteropServices.RuntimeInformation.4.3.0\lib\net45\System.Runtime.InteropServices.RuntimeInformation.dll</HintPath>
Expand Down Expand Up @@ -474,7 +443,6 @@
</Compile>
<None Include="DatabaseMain\MLModel_14_3H_Fast_Tree.zip" />
<None Include="DatabaseMain\MLModel_16_3H_SdcaRegression.zip" />
<None Include="packages.config" />
<None Include="Properties\Settings.settings">
<Generator>SettingsSingleFileGenerator</Generator>
<LastGenOutput>Settings.Designer.cs</LastGenOutput>
Expand Down Expand Up @@ -612,18 +580,31 @@
<Name>AngelsRepositoryLib</Name>
</ProjectReference>
</ItemGroup>
<ItemGroup>
<PackageReference Include="AngleSharp">
<Version>1.1.0</Version>
</PackageReference>
<PackageReference Include="FSharp.Core">
<Version>8.0.101</Version>
</PackageReference>
<PackageReference Include="HtmlAgilityPack">
<Version>1.11.57</Version>
</PackageReference>
<PackageReference Include="Microsoft.ML.LightGbm">
<Version>3.0.1</Version>
</PackageReference>
<PackageReference Include="ObjectListView.Official">
<Version>2.9.1</Version>
</PackageReference>
<PackageReference Include="System.Collections.NonGeneric">
<Version>4.3.0</Version>
</PackageReference>
<PackageReference Include="System.Runtime.Caching">
<Version>8.0.0</Version>
</PackageReference>
<PackageReference Include="TimePeriodLibrary.NET">
<Version>2.1.5</Version>
</PackageReference>
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
<PropertyGroup>
<ErrorText>This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.</ErrorText>
</PropertyGroup>
<Error Condition="!Exists('packages\Microsoft.ML.CpuMath.1.5.5\build\netstandard2.0\Microsoft.ML.CpuMath.props')" Text="$([System.String]::Format('$(ErrorText)', 'packages\Microsoft.ML.CpuMath.1.5.5\build\netstandard2.0\Microsoft.ML.CpuMath.props'))" />
<Error Condition="!Exists('packages\Microsoft.ML.1.5.5\build\netstandard2.0\Microsoft.ML.props')" Text="$([System.String]::Format('$(ErrorText)', 'packages\Microsoft.ML.1.5.5\build\netstandard2.0\Microsoft.ML.props'))" />
<Error Condition="!Exists('packages\Microsoft.ML.1.5.5\build\netstandard2.0\Microsoft.ML.targets')" Text="$([System.String]::Format('$(ErrorText)', 'packages\Microsoft.ML.1.5.5\build\netstandard2.0\Microsoft.ML.targets'))" />
<Error Condition="!Exists('packages\Microsoft.ML.FastTree.1.5.5\build\netstandard2.0\Microsoft.ML.FastTree.props')" Text="$([System.String]::Format('$(ErrorText)', 'packages\Microsoft.ML.FastTree.1.5.5\build\netstandard2.0\Microsoft.ML.FastTree.props'))" />
<Error Condition="!Exists('packages\LightGBM.2.2.3\build\LightGBM.props')" Text="$([System.String]::Format('$(ErrorText)', 'packages\LightGBM.2.2.3\build\LightGBM.props'))" />
<Error Condition="!Exists('packages\LightGBM.2.2.3\build\LightGBM.targets')" Text="$([System.String]::Format('$(ErrorText)', 'packages\LightGBM.2.2.3\build\LightGBM.targets'))" />
</Target>
<Import Project="packages\Microsoft.ML.1.5.5\build\netstandard2.0\Microsoft.ML.targets" Condition="Exists('packages\Microsoft.ML.1.5.5\build\netstandard2.0\Microsoft.ML.targets')" />
<Import Project="packages\LightGBM.2.2.3\build\LightGBM.targets" Condition="Exists('packages\LightGBM.2.2.3\build\LightGBM.targets')" />
</Project>
33 changes: 0 additions & 33 deletions packages.config

This file was deleted.

0 comments on commit 810c8e7

Please sign in to comment.