Skip to content

Commit

Permalink
Reduce memory requirements when processing templates + template formu…
Browse files Browse the repository at this point in the history
…las (#638)

* fix for issue 606

* fix formatting
  • Loading branch information
meld-cp authored Jul 20, 2024
1 parent 9de96ab commit 92170f1
Show file tree
Hide file tree
Showing 6 changed files with 130 additions and 47 deletions.
Binary file added samples/xlsx/TestIssue606_Template.xlsx
Binary file not shown.
1 change: 1 addition & 0 deletions src/MiniExcel/OpenXml/Config.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@ internal class Config
public const string SpreadsheetmlXmlStrictns = "http://purl.oclc.org/ooxml/spreadsheetml/main";
public const string SpreadsheetmlXmlRelationshipns = "http://schemas.openxmlformats.org/officeDocument/2006/relationships";
public const string SpreadsheetmlXmlStrictRelationshipns = "http://purl.oclc.org/ooxml/officeDocument/relationships";
public const string SpreadsheetmlXml_x14ac = "http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac";
}
}
91 changes: 64 additions & 27 deletions src/MiniExcel/OpenXml/ExcelOpenXmlTemplate.Impl.cs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,9 @@ public string ToXmlString(string prefix)
}
}

private List<XRowInfo> XRowInfos { get; set; }
private List<XRowInfo> XRowInfos { get; set; }

private readonly List<string> CalcChainCellRefs = new List<string>();

private Dictionary<string, XMergeCell> XMergeCellInfos { get; set; }
public List<XMergeCell> NewXMergeCellInfos { get; private set; }
Expand Down Expand Up @@ -688,7 +690,10 @@ private void WriteSheetXml(Stream stream, XmlDocument doc, XmlNode sheetData, bo

var mergeBaseRowIndex = newRowIndex;
newRowIndex += rowInfo.IEnumerableMercell?.Height ?? 1;
writer.Write(CleanXml(rowXml, endPrefix)); // pass StringBuilder for netcoreapp3.0 or above

// replace formulas
ProcessFormulas( rowXml, newRowIndex );
writer.Write(CleanXml( rowXml, endPrefix)); // pass StringBuilder for netcoreapp3.0 or above

//mergecells
if (rowInfo.RowMercells != null)
Expand Down Expand Up @@ -743,30 +748,6 @@ private void WriteSheetXml(Stream stream, XmlDocument doc, XmlNode sheetData, bo
else
{

// convert cells starting with '$=' into formulas
var cs = row.SelectNodes($"x:c", _ns);
foreach (XmlElement c in cs)
{
/* Target:
<c r="C8" s="3">
<f>SUM(C2:C7)</f>
</c>
*/
var vs = c.SelectNodes($"x:v", _ns);
foreach (XmlElement v in vs)
{
if (!v.InnerText.StartsWith("$="))
{
continue;
}
var fNode = c.OwnerDocument.CreateElement("f", Config.SpreadsheetmlXmlns);
fNode.InnerText = v.InnerText.Substring(2);
c.InsertBefore(fNode, v);
c.RemoveChild(v);
}
}
innerXml = row.InnerXml;

rowXml.Clear()
.Append(outerXmlOpen)
.AppendFormat(@" r=""{0}"">", newRowIndex)
Expand All @@ -775,7 +756,10 @@ private void WriteSheetXml(Stream stream, XmlDocument doc, XmlNode sheetData, bo
.Replace($"{{{{$enumrowstart}}}}", enumrowstart.ToString())
.Replace($"{{{{$enumrowend}}}}", enumrowend.ToString())
.AppendFormat("</{0}>", row.Name);
writer.Write(CleanXml(rowXml, endPrefix)); // pass StringBuilder for netcoreapp3.0 or above

ProcessFormulas( rowXml, newRowIndex );

writer.Write(CleanXml( rowXml, endPrefix)); // pass StringBuilder for netcoreapp3.0 or above

//mergecells
if (rowInfo.RowMercells != null)
Expand Down Expand Up @@ -810,6 +794,59 @@ private void WriteSheetXml(Stream stream, XmlDocument doc, XmlNode sheetData, bo
writer.Write(contents[1]);
}
}

private void ProcessFormulas( StringBuilder rowXml, int rowIndex )
{

var rowXmlString = rowXml.ToString();

// exit early if possible
if ( !rowXmlString.Contains( "$=" ) ) {
return;
}

XmlReaderSettings settings = new XmlReaderSettings { NameTable = _ns.NameTable };
XmlParserContext context = new XmlParserContext( null, _ns, "", XmlSpace.Default );
XmlReader reader = XmlReader.Create( new StringReader( rowXmlString ), settings, context );

XmlDocument d = new XmlDocument();
d.Load( reader );

var row = d.FirstChild as XmlElement;

// convert cells starting with '$=' into formulas
var cs = row.SelectNodes( $"x:c", _ns );
for ( var ci = 0; ci < cs.Count; ci++ )
{
var c = cs.Item( ci ) as XmlElement;
if ( c == null ) {
continue;
}
/* Target:
<c r="C8" s="3">
<f>SUM(C2:C7)</f>
</c>
*/
var vs = c.SelectNodes( $"x:v", _ns );
foreach ( XmlElement v in vs )
{
if ( !v.InnerText.StartsWith( "$=" ) )
{
continue;
}
var fNode = c.OwnerDocument.CreateElement( "f", Config.SpreadsheetmlXmlns );
fNode.InnerText = v.InnerText.Substring( 2 );
c.InsertBefore( fNode, v );
c.RemoveChild( v );

var celRef = ExcelOpenXmlUtils.ConvertXyToCell( ci + 1, rowIndex );
CalcChainCellRefs.Add( celRef );

}
}
rowXml.Clear();
rowXml.Append( row.OuterXml );
}

private static string ConvertToDateTimeString(KeyValuePair<string, PropInfo> propInfo, object cellValue)
{
Expand Down
8 changes: 3 additions & 5 deletions src/MiniExcel/OpenXml/ExcelOpenXmlTemplate.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ static ExcelOpenXmlTemplate()
_isExpressionRegex = new Regex("(?<={{).*?(?=}})");
_ns = new XmlNamespaceManager(new NameTable());
_ns.AddNamespace("x", Config.SpreadsheetmlXmlns);
}
_ns.AddNamespace( "x14ac", Config.SpreadsheetmlXml_x14ac );
}

private readonly Stream _stream;
private readonly OpenXmlConfiguration _configuration;
Expand Down Expand Up @@ -118,10 +119,7 @@ public void SaveAsByTemplateImpl(Stream templateStream, object value)
using (var filledStream = entry.Open())
{
sheetIdx++;
var filledDoc = new XmlDocument();
filledDoc.Load(filledStream);
var filledSheetData = filledDoc.SelectSingleNode("/x:worksheet/x:sheetData", _ns);
_calcChainContent.Append(CalcChainHelper.GetCalcChainContentFromSheet(filledSheetData, _ns, sheetIdx));
_calcChainContent.Append( CalcChainHelper.GetCalcChainContent( CalcChainCellRefs, sheetIdx ) );
}
}

Expand Down
24 changes: 10 additions & 14 deletions src/MiniExcel/Utils/calChainHelper.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
using System.IO;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Xml;

namespace MiniExcelLibs.Utils
{
Expand All @@ -13,22 +13,18 @@ internal static class CalcChainHelper
// Each <c> element should have a r attribute that specifies the cell's address (e.g., "A1" or "B2").
// The <c> element should also have a i attribute that specifies the index of the formula in the formulas collection (in the workbook's sheet data file).
// https://learn.microsoft.com/en-us/dotnet/api/documentformat.openxml.spreadsheet.calculationchain?view=openxml-2.8.1
public static string GetCalcChainContentFromSheet(in XmlNode sheetData, XmlNamespaceManager ns, int sheetIndex)
{
public static string GetCalcChainContent( List<string> cellRefs, int sheetIndex ) {

StringBuilder calcChainContent = new StringBuilder();
StringBuilder calcChainContent = new StringBuilder();

// each c having f nodes
var cs = sheetData.SelectNodes($"x:row/x:c[./x:f]", ns);
foreach (XmlElement c in cs)
{
calcChainContent.Append($@"<c r=""{c.GetAttribute("r")}"" i=""{sheetIndex}""/>");
}
foreach ( string cr in cellRefs ) {
calcChainContent.Append( $@"<c r=""{cr}"" i=""{sheetIndex}""/>" );
}

return calcChainContent.ToString();
}
return calcChainContent.ToString();
}

public static void GenerateCalcChainSheet(Stream calcChainStream, string calcChainContent)
public static void GenerateCalcChainSheet(Stream calcChainStream, string calcChainContent)
{
using (var writer = new StreamWriter(calcChainStream, Encoding.UTF8))
{
Expand Down
53 changes: 52 additions & 1 deletion tests/MiniExcelTests/MiniExcelIssueTests.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using Dapper;
using Dapper;
using MiniExcelLibs.Attributes;
using MiniExcelLibs.Csv;
using MiniExcelLibs.Exceptions;
Expand Down Expand Up @@ -3641,5 +3641,56 @@ public void Issue507_3_MismatchedQuoteCsv() {
Assert.Equal(2, getRowsInfo.Length );

}


[Fact]
public void Issue606_1()
{
// excel max rows: 1,048,576
// before changes: 1999999 => 25.8 GB mem
// after changes: 1999999 => peaks at 3.2 GB mem (10:20 min)
// after changes: 100000 => peaks at 222 MB mem (34 sec)

var value = new
{
Title = "My Title",
OrderInfo = Enumerable
.Range( 1, 100 )
.Select( x => new
{
Standard = "standard",
RegionName = "region",
DealerName = "department",
SalesPointName = "region",
CustomerName = "customer",
IdentityType = "aaaaaa",
IdentitySeries = "ssssss",
IdentityNumber = "nnnnn",
BirthDate = "date",
TariffPlanName = "plan",
PhoneNumber = "num",
SimCardIcc = "sss",
BisContractNumber = "eee",
CreatedAt = "dd.mm.yyyy",
UserDescription = "fhtyrhthrthrt",
UserName = "dfsfsdfds",
PaymentsAmount = "dfhgdfgadfgdfg",
OrderState = "agafgdafgadfgd",
})
};

var path = Path.Combine
(
Path.GetTempPath(),
string.Concat( nameof( MiniExcelIssueTests ), "_", nameof( Issue606_1 ), ".xlsx" )
);

var templateFileName = @"../../../../../samples/xlsx/TestIssue606_Template.xlsx";


MiniExcel.SaveAsByTemplate( path, Path.GetFullPath( templateFileName ), value );

}

}
}

0 comments on commit 92170f1

Please sign in to comment.