Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[rel/3.8] Handle if RootNamespace contains invalid identifier characters #5069

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.

namespace Microsoft.Testing.Platform.MSBuild;

internal static class NamespaceHelpers
{
internal static string ToSafeNamespace(string value)
{
const char invalidCharacterReplacement = '_';

value = value.Trim();

StringBuilder safeValueStr = new(value.Length);

for (int i = 0; i < value.Length; i++)
{
if (i < value.Length - 1 && char.IsSurrogatePair(value[i], value[i + 1]))
{
safeValueStr.Append(invalidCharacterReplacement);
// Skip both chars that make up this symbol.
i++;
continue;
}

bool isFirstCharacterOfIdentifier = safeValueStr.Length == 0 || safeValueStr[safeValueStr.Length - 1] == '.';
bool isValidFirstCharacter = UnicodeCharacterUtilities.IsIdentifierStartCharacter(value[i]);
bool isValidPartCharacter = UnicodeCharacterUtilities.IsIdentifierPartCharacter(value[i]);

if (isFirstCharacterOfIdentifier && !isValidFirstCharacter && isValidPartCharacter)
{
// This character cannot be at the beginning, but is good otherwise. Prefix it with something valid.
safeValueStr.Append(invalidCharacterReplacement);
safeValueStr.Append(value[i]);
}
else if ((isFirstCharacterOfIdentifier && isValidFirstCharacter) ||
(!isFirstCharacterOfIdentifier && isValidPartCharacter) ||
(safeValueStr.Length > 0 && i < value.Length - 1 && value[i] == '.'))
{
// This character is allowed to be where it is.
safeValueStr.Append(value[i]);
}
else
{
safeValueStr.Append(invalidCharacterReplacement);
}
}

return safeValueStr.ToString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,11 @@ private static void GenerateCode(string language, string rootNamespace, ITaskIte

private static string GetSourceCode(string language, string rootNamespace, string extensionsFragments)
{
if (language != VBLanguageSymbol && !string.IsNullOrEmpty(rootNamespace))
{
rootNamespace = NamespaceHelpers.ToSafeNamespace(rootNamespace);
}

if (language == CSharpLanguageSymbol)
{
return string.IsNullOrEmpty(rootNamespace)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ private static void GenerateEntryPoint(string language, string rootNamespace, IT

private static string GetEntryPointSourceCode(string language, string rootNamespace)
{
if (language != VBLanguageSymbol && !string.IsNullOrEmpty(rootNamespace))
{
rootNamespace = NamespaceHelpers.ToSafeNamespace(rootNamespace);
}

if (language == CSharpLanguageSymbol)
{
return string.IsNullOrEmpty(rootNamespace)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.

namespace Microsoft.Testing.Platform.MSBuild;

/// <summary>
/// Defines a set of helper methods to classify Unicode characters.
/// </summary>
internal static partial class UnicodeCharacterUtilities
{
public static bool IsIdentifierStartCharacter(char ch)
{
// identifier-start-character:
// letter-character
// _ (the underscore character U+005F)
if (ch < 'a') // '\u0061'
{
if (ch < 'A') // '\u0041'
{
return false;
}

return ch is <= 'Z' // '\u005A'
or '_'; // '\u005F'
}

if (ch <= 'z') // '\u007A'
{
return true;
}

if (ch <= '\u007F') // max ASCII
{
return false;
}

// The ASCII range is handled above Only a-z, A-Z, and underscore are valid.
// Now, we allow unicode characters that are classified as letters.
return IsLetterChar(CharUnicodeInfo.GetUnicodeCategory(ch));
}

/// <summary>
/// Returns true if the Unicode character can be a part of an identifier.
/// </summary>
/// <param name="ch">The Unicode character.</param>
public static bool IsIdentifierPartCharacter(char ch)
{
// identifier-part-character:
// letter-character
// decimal-digit-character
// connecting-character
// combining-character
// formatting-character
if (ch < 'a') // '\u0061'
{
if (ch < 'A') // '\u0041'
{
return ch is >= '0' // '\u0030'
and <= '9'; // '\u0039'
}

return ch is <= 'Z' // '\u005A'
or '_'; // '\u005F'
}

if (ch <= 'z') // '\u007A'
{
return true;
}

if (ch <= '\u007F') // max ASCII
{
return false;
}

UnicodeCategory cat = CharUnicodeInfo.GetUnicodeCategory(ch);
return IsLetterChar(cat)
|| IsDecimalDigitChar(cat)
|| IsConnectingChar(cat)
|| IsCombiningChar(cat)
|| IsFormattingChar(cat);
}

/// <summary>
/// Check that the name is a valid Unicode identifier.
/// </summary>
public static bool IsValidIdentifier(string? name)
{
if (string.IsNullOrEmpty(name))
{
return false;
}

if (!IsIdentifierStartCharacter(name![0]))
{
return false;
}

int nameLength = name.Length;
for (int i = 1; i < nameLength; i++) // NB: start at 1
{
if (!IsIdentifierPartCharacter(name[i]))
{
return false;
}
}

return true;
}

/// <summary>
/// Returns true if the Unicode character is a formatting character (Unicode class Cf).
/// </summary>
/// <param name="ch">The Unicode character.</param>
internal static bool IsFormattingChar(char ch)
// There are no FormattingChars in ASCII range
=> ch > 127 && IsFormattingChar(CharUnicodeInfo.GetUnicodeCategory(ch));

private static bool IsLetterChar(UnicodeCategory cat)
// letter-character:
// A Unicode character of classes Lu, Ll, Lt, Lm, Lo, or Nl
// A Unicode-escape-sequence representing a character of classes Lu, Ll, Lt, Lm, Lo, or Nl
=> cat switch
{
UnicodeCategory.UppercaseLetter or UnicodeCategory.LowercaseLetter or UnicodeCategory.TitlecaseLetter or UnicodeCategory.ModifierLetter or UnicodeCategory.OtherLetter or UnicodeCategory.LetterNumber => true,
_ => false,
};

private static bool IsCombiningChar(UnicodeCategory cat)
// combining-character:
// A Unicode character of classes Mn or Mc
// A Unicode-escape-sequence representing a character of classes Mn or Mc
=> cat switch
{
UnicodeCategory.NonSpacingMark or UnicodeCategory.SpacingCombiningMark => true,
_ => false,
};

private static bool IsDecimalDigitChar(UnicodeCategory cat)
// decimal-digit-character:
// A Unicode character of the class Nd
// A unicode-escape-sequence representing a character of the class Nd
=> cat == UnicodeCategory.DecimalDigitNumber;

private static bool IsConnectingChar(UnicodeCategory cat)
// connecting-character:
// A Unicode character of the class Pc
// A unicode-escape-sequence representing a character of the class Pc
=> cat == UnicodeCategory.ConnectorPunctuation;

/// <summary>
/// Returns true if the Unicode character is a formatting character (Unicode class Cf).
/// </summary>
/// <param name="cat">The Unicode character.</param>
private static bool IsFormattingChar(UnicodeCategory cat)
// formatting-character:
// A Unicode character of the class Cf
// A unicode-escape-sequence representing a character of the class Cf
=> cat == UnicodeCategory.Format;
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ public async Task Microsoft_Testing_Platform_Extensions_ShouldBe_Correctly_Regis
<OutputType>Exe</OutputType>
<!-- Do not warn about package downgrade. NuGet uses alphabetical sort as ordering so -dev or -ci are considered downgrades of -preview. -->
<NoWarn>$(NoWarn);NETSDK1201</NoWarn>
<RootNamespace>(MSBuild Tests)</RootNamespace>
</PropertyGroup>
<ItemGroup>
Expand Down