Skip to content

Commit

Permalink
Add support for HTML entities in content docs
Browse files Browse the repository at this point in the history
Leverage HtmlDocument for parsing.

Fixes #3
  • Loading branch information
kzu committed Nov 30, 2023
1 parent d883e68 commit f07ea35
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 4 deletions.
6 changes: 2 additions & 4 deletions src/Epub/Content.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System.IO.Compression;
using System.Xml;
using System.Xml.Linq;
using Devlooped.Web;

namespace Devlooped.Epub;

Expand Down Expand Up @@ -35,11 +36,8 @@ public XDocument Open()
throw new InvalidArchiveException(ThisAssembly.Strings.MissingContent(Href));

using var stream = entry.Open();
using var reader = XmlReader
.Create(stream, new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore })
.IgnoreNamespaces();

return XDocument.Load(reader);
return HtmlDocument.Load(stream);
}

/// <summary>
Expand Down
13 changes: 13 additions & 0 deletions src/Tests/PublicationTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,19 @@ public void CanOpen()
Assert.NotNull(pub);
}

[Fact]
public void CanReadContentFragments()
{
using var pub = Publication.Open(@"Samples/CCyC_Comentado_Tomo_II.epub");

Assert.NotNull(pub.Package.Navigation);

foreach (var nav in pub.Package.Navigation)
{
var content = nav.Open();
}
}

[Fact]
public void CanAccessPackageInformation()
{
Expand Down
Binary file added src/Tests/Samples/CCyC_Comentado_Tomo_II.epub
Binary file not shown.

0 comments on commit f07ea35

Please sign in to comment.