Skip to content

Commit 4a2d955

Browse files
authored
SVG tag support (#164)
* Support of `svg` tag element * Detect Xml/Svg image and read for its preferred size * Rationalise object declaration * Support of `svg` tag element * Detect Xml/Svg image and read for its preferred size * Rationalise object declaration * Fix after rebase * Minor fixup * Improve testing * Handle SVG image in an unified code
1 parent 1666f5b commit 4a2d955

File tree

11 files changed

+228
-37
lines changed

11 files changed

+228
-37
lines changed

examples/Demo/Program.cs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ static class Program
1515
static async Task Main(string[] args)
1616
{
1717
const string filename = "test.docx";
18-
string html = ResourceHelper.GetString("Resources.Document.html");
18+
string html = ResourceHelper.GetString("Resources.AdvancedTable.html");
1919
if (File.Exists(filename)) File.Delete(filename);
2020

2121
using (MemoryStream generatedDocument = new MemoryStream())
@@ -39,14 +39,9 @@ static async Task Main(string[] args)
3939
}
4040

4141
HtmlConverter converter = new HtmlConverter(mainPart);
42-
// HeaderPart headerPart = mainPart.AddNewPart<HeaderPart>();
43-
//FooterPart footerPart = mainPart.AddNewPart<FooterPart>();
4442
converter.RenderPreAsTable = true;
4543
Body body = mainPart.Document.Body;
4644

47-
await converter.ParseHeader(@"<a href=""www.github.com"">
48-
<img src=""data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg=="" alt=""Red dot"" /> Red Dot</a>");
49-
5045
await converter.ParseBody(html);
5146
mainPart.Document.Save();
5247

src/Html2OpenXml/Expressions/HtmlDomExpression.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ private static Dictionary<string, Func<IElement, HtmlDomExpression>> InitKnownTa
6767
{ TagNames.Strong, el => new PhrasingElementExpression((IHtmlElement) el, new Bold()) },
6868
{ TagNames.Sub, el => new PhrasingElementExpression((IHtmlElement) el, new VerticalTextAlignment() { Val = VerticalPositionValues.Subscript }) },
6969
{ TagNames.Sup, el => new PhrasingElementExpression((IHtmlElement) el, new VerticalTextAlignment() { Val = VerticalPositionValues.Superscript }) },
70+
{ TagNames.Svg, el => new SvgExpression((AngleSharp.Svg.Dom.ISvgSvgElement) el) },
7071
{ TagNames.Table, el => new TableExpression((IHtmlTableElement) el) },
7172
{ TagNames.Time, el => new PhrasingElementExpression((IHtmlElement) el) },
7273
{ TagNames.U, el => new PhrasingElementExpression((IHtmlElement) el, new Underline() { Val = UnderlineValues.Single }) },

src/Html2OpenXml/Expressions/Image/ImageExpression.cs

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,11 @@
1111
*/
1212
using System;
1313
using System.Threading;
14+
using AngleSharp.Dom;
1415
using AngleSharp.Html.Dom;
16+
using AngleSharp.Svg.Dom;
1517
using DocumentFormat.OpenXml;
18+
using DocumentFormat.OpenXml.Packaging;
1619
using DocumentFormat.OpenXml.Wordprocessing;
1720
using HtmlToOpenXml.IO;
1821

@@ -57,14 +60,24 @@ class ImageExpression(IHtmlImageElement node) : ImageExpressionBase(node)
5760
preferredSize.Height = imgNode.DisplayHeight;
5861
}
5962

60-
var (imageObjId, drawingObjId) = IncrementDrawingObjId(context);
61-
6263
HtmlImageInfo? iinfo = context.ImageLoader.Download(src, CancellationToken.None)
6364
.ConfigureAwait(false).GetAwaiter().GetResult();
6465

6566
if (iinfo == null)
6667
return null;
6768

69+
if (iinfo.TypeInfo == ImagePartType.Svg)
70+
{
71+
var imagePart = context.HostingPart.GetPartById(iinfo.ImagePartId);
72+
using var stream = imagePart.GetStream(System.IO.FileMode.Open);
73+
using var sreader = new System.IO.StreamReader(stream);
74+
imgNode.Insert(AdjacentPosition.AfterBegin, sreader.ReadToEnd());
75+
76+
var svgNode = imgNode.FindChild<ISvgSvgElement>();
77+
if (svgNode is null) return null;
78+
return SvgExpression.CreateSvgDrawing(context, svgNode, iinfo.ImagePartId, preferredSize);
79+
}
80+
6881
if (preferredSize.IsEmpty)
6982
{
7083
preferredSize = iinfo.Size;
@@ -78,6 +91,7 @@ class ImageExpression(IHtmlImageElement node) : ImageExpressionBase(node)
7891
long widthInEmus = new Unit(UnitMetric.Pixel, preferredSize.Width).ValueInEmus;
7992
long heightInEmus = new Unit(UnitMetric.Pixel, preferredSize.Height).ValueInEmus;
8093

94+
var (imageObjId, drawingObjId) = IncrementDrawingObjId(context);
8195
var img = new Drawing(
8296
new wp.Inline(
8397
new wp.Extent() { Cx = widthInEmus, Cy = heightInEmus },
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved
2+
*
3+
* This source is subject to the Microsoft Permissive License.
4+
* Please see the License.txt file for more information.
5+
* All other rights reserved.
6+
*
7+
* THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
8+
* KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
9+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
10+
* PARTICULAR PURPOSE.
11+
*/
12+
using AngleSharp.Svg.Dom;
13+
using DocumentFormat.OpenXml;
14+
using DocumentFormat.OpenXml.Packaging;
15+
using DocumentFormat.OpenXml.Wordprocessing;
16+
using DocumentFormat.OpenXml.Office2019.Drawing.SVG;
17+
using System.Text;
18+
19+
using a = DocumentFormat.OpenXml.Drawing;
20+
using pic = DocumentFormat.OpenXml.Drawing.Pictures;
21+
using wp = DocumentFormat.OpenXml.Drawing.Wordprocessing;
22+
using AngleSharp.Text;
23+
24+
namespace HtmlToOpenXml.Expressions;
25+
26+
/// <summary>
27+
/// Process the parsing of a <c>svg</c> element.
28+
/// </summary>
29+
sealed class SvgExpression(ISvgSvgElement node) : ImageExpressionBase(node)
30+
{
31+
private readonly ISvgSvgElement svgNode = node;
32+
33+
34+
protected override Drawing? CreateDrawing(ParsingContext context)
35+
{
36+
var imgPart = context.MainPart.AddImagePart(ImagePartType.Svg);
37+
using var stream = new System.IO.MemoryStream(Encoding.UTF8.GetBytes(svgNode.OuterHtml), writable: false);
38+
imgPart.FeedData(stream);
39+
var imagePartId = context.MainPart.GetIdOfPart(imgPart);
40+
return CreateSvgDrawing(context, svgNode, imagePartId, Size.Empty);
41+
}
42+
43+
internal static Drawing CreateSvgDrawing(ParsingContext context, ISvgSvgElement svgNode, string imagePartId, Size preferredSize)
44+
{
45+
var width = Unit.Parse(svgNode.GetAttribute("width"));
46+
var height = Unit.Parse(svgNode.GetAttribute("height"));
47+
long widthInEmus, heightInEmus;
48+
if (width.IsValid && height.IsValid)
49+
{
50+
widthInEmus = width.ValueInEmus;
51+
heightInEmus = height.ValueInEmus;
52+
}
53+
else
54+
{
55+
widthInEmus = new Unit(UnitMetric.Pixel, preferredSize.Width).ValueInEmus;
56+
heightInEmus = new Unit(UnitMetric.Pixel, preferredSize.Height).ValueInEmus;
57+
}
58+
59+
var (imageObjId, drawingObjId) = IncrementDrawingObjId(context);
60+
61+
string? title = svgNode.QuerySelector("title")?.TextContent?.CollapseAndStrip() ?? "Picture " + imageObjId;
62+
string? description = svgNode.QuerySelector("desc")?.TextContent?.CollapseAndStrip() ?? string.Empty;
63+
64+
var img = new Drawing(
65+
new wp.Inline(
66+
new wp.Extent() { Cx = widthInEmus, Cy = heightInEmus },
67+
new wp.EffectExtent() { LeftEdge = 0L, TopEdge = 0L, RightEdge = 0L, BottomEdge = 0L },
68+
new wp.DocProperties() { Id = drawingObjId, Name = title, Description = description },
69+
new wp.NonVisualGraphicFrameDrawingProperties {
70+
GraphicFrameLocks = new a.GraphicFrameLocks() { NoChangeAspect = true }
71+
},
72+
new a.Graphic(
73+
new a.GraphicData(
74+
new pic.Picture(
75+
new pic.NonVisualPictureProperties {
76+
NonVisualDrawingProperties = new pic.NonVisualDrawingProperties() {
77+
Id = imageObjId, Name = title
78+
},
79+
NonVisualPictureDrawingProperties = new()
80+
},
81+
new pic.BlipFill(
82+
new a.Blip(
83+
new a.BlipExtensionList(
84+
new a.BlipExtension(new SVGBlip { Embed = imagePartId }) {
85+
Uri = "{96DAC541-7B7A-43D3-8B79-37D633B846F1}"
86+
})
87+
) { Embed = imagePartId /* ideally, that should be a png representation of the svg */ },
88+
new a.Stretch(
89+
new a.FillRectangle())
90+
),
91+
new pic.ShapeProperties(
92+
new a.Transform2D(
93+
new a.Offset() { X = 0L, Y = 0L },
94+
new a.Extents() { Cx = widthInEmus, Cy = heightInEmus }),
95+
new a.PresetGeometry(
96+
new a.AdjustValueList()
97+
) { Preset = a.ShapeTypeValues.Rectangle })
98+
)
99+
) { Uri = "http://schemas.openxmlformats.org/drawingml/2006/picture" })
100+
) { DistanceFromTop = (UInt32Value)0U, DistanceFromBottom = (UInt32Value)0U, DistanceFromLeft = (UInt32Value)0U, DistanceFromRight = (UInt32Value)0U }
101+
);
102+
103+
return img;
104+
}
105+
}

src/Html2OpenXml/IO/ImageHeader.cs

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
using System.IO;
1919
using System.Linq;
2020
using System.Text;
21+
using System.Xml.XPath;
2122

2223
namespace HtmlToOpenXml.IO;
2324

@@ -29,7 +30,7 @@ public static class ImageHeader
2930
// https://en.wikipedia.org/wiki/List_of_file_signatures
3031

3132
#pragma warning disable CS1591 // Missing XML comment for publicly visible type or member
32-
public enum FileType { Unrecognized, Bitmap, Gif, Png, Jpeg, Emf }
33+
public enum FileType { Unrecognized, Bitmap, Gif, Png, Jpeg, Emf, Xml }
3334
#pragma warning restore CS1591 // Missing XML comment for publicly visible type or member
3435

3536
private static readonly byte[] pngSignatureBytes = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
@@ -41,7 +42,8 @@ public enum FileType { Unrecognized, Bitmap, Gif, Png, Jpeg, Emf }
4142
{ Encoding.UTF8.GetBytes("GIF89a"), FileType.Gif }, // animated gif
4243
{ pngSignatureBytes, FileType.Png },
4344
{ new byte[] { 0xff, 0xd8 }, FileType.Jpeg },
44-
{ new byte[] { 0x1, 0, 0, 0 }, FileType.Emf }
45+
{ new byte[] { 0x1, 0, 0, 0 }, FileType.Emf },
46+
{ Encoding.UTF8.GetBytes("<?xml "), FileType.Xml }, // Xml so potentially Svg
4547
};
4648

4749
private static readonly int MaxMagicBytesLength = imageFormatDecoders
@@ -83,6 +85,7 @@ public static Size GetDimensions(Stream stream)
8385
case FileType.Jpeg: return DecodeJfif(reader);
8486
case FileType.Png: return DecodePng(reader);
8587
case FileType.Emf: return DecodeEmf(reader);
88+
case FileType.Xml: return DecodeXml(stream);
8689
default: return Size.Empty;
8790
}
8891
}
@@ -279,5 +282,27 @@ private static Size DecodeEmf(SequentialBinaryReader reader)
279282

280283
return new Size(widthInPixel, heightInPixel);
281284
}
282-
}
283285

286+
private static Size DecodeXml(Stream stream)
287+
{
288+
try
289+
{
290+
var nav = new XPathDocument(stream).CreateNavigator();
291+
// use local-name() to ignore any xml namespace
292+
nav = nav.SelectSingleNode("/*[local-name() = 'svg']");
293+
if (nav is not null)
294+
{
295+
var width = Unit.Parse(nav.GetAttribute("width", string.Empty));
296+
var height = Unit.Parse(nav.GetAttribute("height", string.Empty));
297+
if (width.IsValid && height.IsValid)
298+
return new Size(width.ValueInPx, height.ValueInPx);
299+
}
300+
}
301+
catch (SystemException)
302+
{
303+
return Size.Empty;
304+
}
305+
306+
return Size.Empty;
307+
}
308+
}

src/Html2OpenXml/IO/ImagePrefetcher.cs

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,6 @@ public ImagePrefetcher(T hostingPart, IWebRequest resourceLoader)
110110
if (response?.Content == null)
111111
return null;
112112

113-
HtmlImageInfo info = new HtmlImageInfo(src);
114113
using (response)
115114
{
116115
// For requested url with no filename, we need to read the media mime type if provided
@@ -123,16 +122,19 @@ public ImagePrefetcher(T hostingPart, IWebRequest resourceLoader)
123122
}
124123

125124
var ipart = hostingPart.AddImagePart(type);
125+
Size originalSize;
126126
using (var outputStream = ipart.GetStream(FileMode.Create))
127127
{
128128
response.Content.CopyTo(outputStream);
129129

130130
outputStream.Seek(0L, SeekOrigin.Begin);
131-
info.Size = GetImageSize(outputStream);
131+
originalSize = GetImageSize(outputStream);
132132
}
133133

134-
info.ImagePartId = hostingPart.GetIdOfPart(ipart);
135-
return info;
134+
return new HtmlImageInfo(src, hostingPart.GetIdOfPart(ipart)) {
135+
TypeInfo = type,
136+
Size = originalSize
137+
};
136138
}
137139
}
138140

@@ -143,20 +145,20 @@ public ImagePrefetcher(T hostingPart, IWebRequest resourceLoader)
143145
{
144146
if (DataUri.TryCreate(src, out var dataUri))
145147
{
146-
Size size;
148+
Size originalSize;
147149
knownContentType.TryGetValue(dataUri!.Mime, out PartTypeInfo type);
148150
var ipart = hostingPart.AddImagePart(type);
149151
using (var outputStream = ipart.GetStream(FileMode.Create))
150152
{
151153
outputStream.Write(dataUri.Data, 0, dataUri.Data.Length);
152154

153155
outputStream.Seek(0L, SeekOrigin.Begin);
154-
size = GetImageSize(outputStream);
156+
originalSize = GetImageSize(outputStream);
155157
}
156158

157-
return new HtmlImageInfo(src) {
158-
ImagePartId = hostingPart.GetIdOfPart(ipart),
159-
Size = size
159+
return new HtmlImageInfo(src, hostingPart.GetIdOfPart(ipart)) {
160+
TypeInfo = type,
161+
Size = originalSize
160162
};
161163
}
162164

src/Html2OpenXml/Primitives/HtmlImageInfo.cs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ namespace HtmlToOpenXml;
1616
/// <summary>
1717
/// Represents an image and its metadata.
1818
/// </summary>
19-
sealed class HtmlImageInfo(string source)
19+
sealed class HtmlImageInfo(string source, string partId)
2020
{
2121
/// <summary>
2222
/// The URI identifying this cached image information.
@@ -26,12 +26,17 @@ sealed class HtmlImageInfo(string source)
2626
/// <summary>
2727
/// The Unique identifier of the ImagePart in the <see cref="MainDocumentPart"/>.
2828
/// </summary>
29-
public string? ImagePartId { get; set; }
29+
public string ImagePartId { get; set; } = partId;
3030

3131
/// <summary>
32-
/// Gets or sets the size of the image
32+
/// Gets or sets the original size of the image.
3333
/// </summary>
3434
public Size Size { get; set; }
35+
36+
/// <summary>
37+
/// Gets the content type of the image.
38+
/// </summary>
39+
public PartTypeInfo TypeInfo { get; set; }
3540
}
3641

3742
/// <summary>

test/HtmlToOpenXml.Tests/BodyTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ public async Task WithGoBackBookmark_ShouldBeAfterAppendedOutput()
7272
Assert.That(goBackBookmark, Is.Not.Null);
7373

7474
HtmlConverter converter = new HtmlConverter(mainPart);
75-
await converter.ParseHtml("<p>Placeholder</p>");
75+
await converter.ParseBody("<p>Placeholder</p>");
7676

7777
Assert.That(mainPart.Document.Body!.LastChild, Is.TypeOf<SectionProperties>());
7878
var paragrahs = mainPart.Document.Body!.Elements<Paragraph>();

test/HtmlToOpenXml.Tests/ImageFormats/ImageHeaderTests.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ public void GuessFormat_ReturnsImageSize((string resourceName, Size expectedSize
2828
yield return ("Resources.html2openxml.emf", new Size(100, 100));
2929
// animated gif:
3030
yield return ("Resources.stan.gif", new Size(252, 318));
31+
yield return ("Resources.kiwi.svg", new Size(612, 502));
3132
}
3233

3334
/// <summary>
@@ -53,6 +54,7 @@ public void PngSof2_ReturnsImageSize()
5354
[TestCase("Resources.html2openxml.gif", ExpectedResult = ImageHeader.FileType.Gif)]
5455
[TestCase("Resources.html2openxml.jpg", ExpectedResult = ImageHeader.FileType.Jpeg)]
5556
[TestCase("Resources.html2openxml.png", ExpectedResult = ImageHeader.FileType.Png)]
57+
[TestCase("Resources.kiwi.svg", ExpectedResult = ImageHeader.FileType.Xml)]
5658
public ImageHeader.FileType GuessFormat_ReturnsFileType(string resourceName)
5759
{
5860
using var imageStream = ResourceHelper.GetStream(resourceName);

0 commit comments

Comments
 (0)