Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix a few issues with infinite recursions if the content contains cyclic curation #898

Merged
merged 15 commits into from
Apr 29, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ public struct GeneratedCurationWriter {
for (usr, reference) in context.documentationCache.referencesBySymbolID {
// Filter out symbols that aren't in the specified sub hierarchy.
if symbolLink != nil || depthLimit != nil {
guard reference == curationCrawlRoot || context.pathsTo(reference).contains(where: { path in path.suffix(depthLimit ?? .max).contains(curationCrawlRoot)}) else {
guard reference == curationCrawlRoot || context.finitePaths(to: reference).contains(where: { path in path.suffix(depthLimit ?? .max).contains(curationCrawlRoot)}) else {
continue
}
}
Expand Down
3 changes: 1 addition & 2 deletions Sources/SwiftDocC/Indexing/Navigator/NavigatorIndex.swift
Original file line number Diff line number Diff line change
Expand Up @@ -255,8 +255,7 @@ public class NavigatorIndex {
}

/// Indicates the page type of a given item inside the tree.
/// - Note: This information is stored as UInt8 to decrease the required size to store it and make
/// the comparision faster between types.
/// - Note: This information is stored as `UInt8` to decrease the required size to store it and make the comparison faster between types.
public enum PageType: UInt8 {
case root = 0
case article = 1
Expand Down
50 changes: 42 additions & 8 deletions Sources/SwiftDocC/Indexing/Navigator/NavigatorTree.swift
Original file line number Diff line number Diff line change
Expand Up @@ -480,20 +480,54 @@ public class NavigatorTree {

/// Copy the current node and children to new instances preserving the node item.
public func copy() -> NavigatorTree.Node {
// DocC detects cyclic curation and avoids adding it to the topic graph.
// However, the navigator is based on the render node's topic sections which still has the cycle.
//
// Because the navigator nodes are shared instances, the cycle needs to be broken at each occurrence to correctly unroll it.
// For example, consider this hierarchy:
//
// 1 2
// │ │
// ▼ ▼
// 3 ────▶ 4
// ▲ │
// └── 5 ◀─┘
//
// When approaching the cycle from `1`, it unrolls as `1, 3, 4, 5` but when approaching it from `2` it unrolls as `2, 4, 5, 3`.
// This ensures a consistent structure for the navigation tree, no matter which render node was indexed first.
//
// Alternatively we could process the entire graph and break the cycle based on which node has the shortest path from the root.
// However, because DocC already warns about the cyclic curation, it seem sufficient to create a consistent navigation tree,
// even if it's not as small as it could be if we were using a more sophisticated graph algorithm.

return _copy([])
}

/// Private version of the logic to copy the current node and children to new instances preserving the node item.
/// - Parameter hierarchy: The set containing the parent items to avoid entering in an infinite loop.
private func _copy(_ hierarchy: Set<NavigatorItem>) -> NavigatorTree.Node {
/// The private copy implementation which tracks the already encountered items to avoid an infinite recursion.
private func _copy(_ seen: Set<NavigatorItem>) -> NavigatorTree.Node {
let mirror = NavigatorTree.Node(item: item, bundleIdentifier: bundleIdentifier)
guard !hierarchy.contains(item) else { return mirror } // Avoid to enter in an infity loop.
var updatedHierarchy = hierarchy
if let parentItem = parent?.item { updatedHierarchy.insert(parentItem) }
children.forEach { (child) in
let childCopy = child._copy(updatedHierarchy)
guard !seen.contains(item) else { return mirror } // Avoid an infinite recursion.
var seen = seen
seen.insert(item)

// Avoid repeating children that have already occurred in this path through the navigation hierarchy.
let childrenNotAlreadyEncountered = children.filter { !seen.contains($0.item) }
for (child, indexAfter) in zip(childrenNotAlreadyEncountered, 1...) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't there a Swift function enumerated() you could use here, maybe off by 1? Why the explicit call to zip with the range?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The enumerated() function works very similar to this but it starts at 0. I could have used enumerated and then done let indexAfter = index + 1 in the loop body but that would introduce a slight risk of using index instead of indexAfter which is avoided by only having one index variable in the loop body.

// If the child is a group marker, ensure that the group is not empty by checking that it's followed by a non-group.
// If we didn't do this, we could end up with empty groups if all the children had already been encountered higher up in the hierarchy.
if child.item.pageType == NavigatorIndex.PageType.groupMarker.rawValue {
guard indexAfter < childrenNotAlreadyEncountered.endIndex,
childrenNotAlreadyEncountered[indexAfter].item.pageType != NavigatorIndex.PageType.groupMarker.rawValue
else {
// This group is empty. Skip copying it.
continue
}
// This group is not empty. Include a copy of it
}
let childCopy = child._copy(seen)
mirror.add(child: childCopy)
}

return mirror
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
This source file is part of the Swift.org open source project

Copyright (c) 2024 Apple Inc. and the Swift project authors
Licensed under Apache License v2.0 with Runtime Library Exception

See https://swift.org/LICENSE.txt for license information
See https://swift.org/CONTRIBUTORS.txt for Swift project authors
*/

extension DocumentationContext {
/// Options that configure how the context produces node breadcrumbs.
struct PathOptions: OptionSet {
let rawValue: Int

/// Prefer a technology as the canonical path over a shorter path.
static let preferTechnologyRoot = PathOptions(rawValue: 1 << 0)
}

/// Finds all finite paths (breadcrumbs) to the given reference.
///
/// Each path is a list of references for the nodes traversed while walking the topic graph from a root node to, but not including, the given `reference`.
///
/// The first path is the canonical path to the node. The other paths are sorted by increasing length (number of components).
///
/// - Parameters:
/// - reference: The reference to find paths to.
/// - options: Options for how the context produces node breadcrumbs.
/// - Returns: A list of finite paths to the given reference in the topic graph.
func finitePaths(to reference: ResolvedTopicReference, options: PathOptions = []) -> [[ResolvedTopicReference]] {
reverseEdgesGraph
.allFinitePaths(from: reference)
.map { $0.dropFirst().reversed() }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see this pattern in a few places... map, drop first, reversed. Should we consider changing the way paths are returned by DirectedGraph? Or maybe create a utility function inside of DirectedGraph+Paths.swift that does this?

Understanding why we need this here without reading the entire implementation of DirectedGraph is a bit difficult.

Or maybe you just need one of your ascii art examples here in a commit to make it clear what this code expects from DirectedGraph.

.sorted { (lhs, rhs) -> Bool in
// Order a path rooted in a technology as the canonical one.
if options.contains(.preferTechnologyRoot), let first = lhs.first {
return try! entity(with: first).semantic is Technology
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tangential: Is it really safe here to use try! ? I know this was existing code you didn't change here. But are we sure that the first element in a (or in a path that was reversed above) will always be a technology node? Might be worth an assertion in case we have a bug in DirectedGraph somewhere.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But are we sure that the first element in a (or in a path that was reversed above) will always be a technology node?

The first element doesn't have to be a "technology" (tutorials table of contents page), it just needs to exist in the context. The only portion of this line that raises an error is entity(with: first). There's nothing the graph algorithm can do wrong to make it return a node that wasn't in the edges it was created with because the DirectedGraph/Node is just any Hashable, so the graph doesn't know how to create a new value.

This means that every element in every path is guaranteed to appear in topicGraph.reverseEdges.

Unless the topic graph node is virtual and doesn't represent a page, there should always exist a documentation node for that reference in the context.

In theory it's possible that there's either a bug in the graph traversal or in the topic graph reverse edges data so that the start of the path is a virtual node but that would be a really bad bug where it's probably better to crash.

}

return breadcrumbsAreInIncreasingOrder(lhs, rhs)
}
}

/// Finds the shortest finite path (breadcrumb) to the given reference.
///
/// - Parameter reference: The reference to find the shortest path to.
/// - Returns: The shortest path to the given reference, or `nil` if all paths to the reference are infinite (contain cycles).
func shortestFinitePath(to reference: ResolvedTopicReference) -> [ResolvedTopicReference]? {
reverseEdgesGraph
.shortestFinitePaths(from: reference)
.map { $0.dropFirst().reversed() }
.min(by: breadcrumbsAreInIncreasingOrder)
}

/// Finds all the reachable root node references from the given reference.
///
/// > Note:
/// If all paths from the given reference are infinite (contain cycles) then it can't reach any roots and will return an empty set.
///
/// - Parameter reference: The reference to find reachable root node references from.
/// - Returns: The references of the root nodes that are reachable fro the given reference, or `[]` if all paths from the reference are infinite (contain cycles).
func reachableRoots(from reference: ResolvedTopicReference) -> Set<ResolvedTopicReference> {
reverseEdgesGraph.reachableLeafNodes(from: reference)
}

/// The directed graph of reverse edges in the topic graph.
private var reverseEdgesGraph: DirectedGraph<ResolvedTopicReference> {
DirectedGraph(neighbors: topicGraph.reverseEdges)
}
}

/// Compares two breadcrumbs for sorting so that the breadcrumb with fewer components come first and breadcrumbs with the same number of components are sorter alphabetically.
private func breadcrumbsAreInIncreasingOrder(_ lhs: [ResolvedTopicReference], _ rhs: [ResolvedTopicReference]) -> Bool {
// If the breadcrumbs have the same number of components, sort alphabetically to produce stable results.
guard lhs.count != rhs.count else {
return lhs.map({ $0.path }).joined(separator: ",") < rhs.map({ $0.path }).joined(separator: ",")
}
// Otherwise, sort by the number of breadcrumb components.
return lhs.count < rhs.count
}

119 changes: 41 additions & 78 deletions Sources/SwiftDocC/Infrastructure/DocumentationContext.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2218,7 +2218,7 @@ public class DocumentationContext: DocumentationContextDataProviderDelegate {
let automaticallyCurated = autoCurateSymbolsInTopicGraph()

// Crawl the rest of the symbols that haven't been crawled so far in hierarchy pre-order.
allCuratedReferences = try crawlSymbolCuration(in: automaticallyCurated.map(\.child), bundle: bundle, initial: allCuratedReferences)
allCuratedReferences = try crawlSymbolCuration(in: automaticallyCurated.map(\.symbol), bundle: bundle, initial: allCuratedReferences)

// Remove curation paths that have been created automatically above
// but we've found manual curation for in the second crawl pass.
Expand Down Expand Up @@ -2277,19 +2277,18 @@ public class DocumentationContext: DocumentationContextDataProviderDelegate {
/// call `removeUnneededAutomaticCuration(_:)` which walks the list of automatic curations and removes
/// the parent <-> child topic graph relationships that have been obsoleted.
///
/// - Parameter automaticallyCurated: A list of topics that have been automatically curated.
func removeUnneededAutomaticCuration(_ automaticallyCurated: [(child: ResolvedTopicReference, parent: ResolvedTopicReference)]) {
for pair in automaticallyCurated {
let paths = pathsTo(pair.child)

// Collect all current unique parents of the child.
let parents = Set(paths.map({ $0.last?.path }))

// Check if the topic has multiple curation paths
guard parents.count > 1 else { continue }

// The topic has been manually curated, remove the automatic curation now.
topicGraph.removeEdge(fromReference: pair.parent, toReference: pair.child)
/// - Parameter automaticallyCurated: A list of automatic curation records.
func removeUnneededAutomaticCuration(_ automaticallyCurated: [AutoCuratedSymbolRecord]) {
// It might look like it would be correct to check `topicGraph.nodes[symbol]?.isManuallyCurated` here,
// but that would incorrectly remove the only parent if the manual curation and the automatic curation was the same.
//
// Similarly, it might look like it would be correct to only check `parents(of: symbol).count > 1` here,
// but that would incorrectly remove the automatic curation for symbols with different language representations with different parents.
for (symbol, parent, counterpartParent) in automaticallyCurated where parents(of: symbol).count > (counterpartParent != nil ? 2 : 1) {
topicGraph.removeEdge(fromReference: parent, toReference: symbol)
if let counterpartParent {
topicGraph.removeEdge(fromReference: counterpartParent, toReference: symbol)
}
}
}

Expand Down Expand Up @@ -2330,20 +2329,39 @@ public class DocumentationContext: DocumentationContextDataProviderDelegate {
}
}

typealias AutoCuratedSymbolRecord = (symbol: ResolvedTopicReference, parent: ResolvedTopicReference, counterpartParent: ResolvedTopicReference?)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for naming this tuple to simplify all the type annotations below...


/// Curate all remaining uncurated symbols under their natural parent from the symbol graph.
///
/// This will include all symbols that were not manually curated by the documentation author.
/// - Returns: An ordered list of symbol references that have been added to the topic graph automatically.
private func autoCurateSymbolsInTopicGraph() -> [(child: ResolvedTopicReference, parent: ResolvedTopicReference)] {
var automaticallyCuratedSymbols = [(ResolvedTopicReference, ResolvedTopicReference)]()
linkResolver.localResolver.traverseSymbolAndParentPairs { reference, parentReference in
private func autoCurateSymbolsInTopicGraph() -> [AutoCuratedSymbolRecord] {
var automaticallyCuratedSymbols = [AutoCuratedSymbolRecord]()
linkResolver.localResolver.traverseSymbolAndParents { reference, parentReference, counterpartParentReference in
guard let topicGraphNode = topicGraph.nodeWithReference(reference),
let topicGraphParentNode = topicGraph.nodeWithReference(parentReference),
// Check that the node hasn't got any parents from manual curation
// Check that the node isn't already manually curated
!topicGraphNode.isManuallyCurated
else { return }

// Check that the symbol doesn't already have parent's that aren't either language representation's hierarchical parent.
// This for example happens for default implementation and symbols that are requirements of protocol conformances.
guard parents(of: reference).allSatisfy({ $0 == parentReference || $0 == counterpartParentReference }) else {
return
}

guard let topicGraphParentNode = topicGraph.nodeWithReference(parentReference) else { return }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should checks like this be an assertion instead? If we have a parent reference which does not exist in the topic graph, is that a programming error? Or a valid state somehow?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possibly... 🤔

I believe that we expect that every symbol reference known to the PathHierarchyBasedLinkResolver exist in the context and that every symbol reference in the context also exist in the topic graph.

topicGraph.addEdge(from: topicGraphParentNode, to: topicGraphNode)
automaticallyCuratedSymbols.append((child: reference, parent: parentReference))

if let counterpartParentReference {
guard let topicGraphCounterpartParentNode = topicGraph.nodeWithReference(counterpartParentReference) else {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here - why would we have a parent reference to a node that is not in the topic graph?

// If the counterpart parent doesn't exist. Return the auto curation record without the it.
automaticallyCuratedSymbols.append((reference, parentReference, nil))
return
}
topicGraph.addEdge(from: topicGraphCounterpartParentNode, to: topicGraphNode)
}
// Collect a single automatic curation record for both language representation parents.
automaticallyCuratedSymbols.append((reference, parentReference, counterpartParentReference))
}
return automaticallyCuratedSymbols
}
Expand Down Expand Up @@ -2707,15 +2725,9 @@ public class DocumentationContext: DocumentationContextDataProviderDelegate {
return topicGraph.nodes[reference]?.title
}

/**
Traverse the Topic Graph breadth-first, starting at the given reference.
*/
func traverseBreadthFirst(from reference: ResolvedTopicReference, _ observe: (TopicGraph.Node) -> TopicGraph.Traversal) {
guard let node = topicGraph.nodeWithReference(reference) else {
return
}

topicGraph.traverseBreadthFirst(from: node, observe)
/// Returns a sequence that traverses the topic graph in breadth first order from a given reference, without visiting the same node more than once.
func breadthFirstSearch(from reference: ResolvedTopicReference) -> some Sequence<TopicGraph.Node> {
topicGraph.breadthFirstSearch(from: reference)
}

/**
Expand Down Expand Up @@ -2837,55 +2849,6 @@ public class DocumentationContext: DocumentationContextDataProviderDelegate {
.map { $0.reference }
}

/// Options to consider when producing node breadcrumbs.
struct PathOptions: OptionSet {
let rawValue: Int

/// The node is a technology page; sort the path to a technology as canonical.
static let preferTechnologyRoot = PathOptions(rawValue: 1 << 0)
}

/// Finds all paths (breadcrumbs) to the given node reference.
///
/// Each path is an array of references to the symbols from the module symbol to the current one.
/// The first path in the array is always the canonical path to the symbol.
///
/// - Parameters:
/// - reference: The reference to build that paths to.
/// - currentPathToNode: Used for recursion - an accumulated path to "continue" working on.
/// - Returns: A list of paths to the current reference in the topic graph.
func pathsTo(_ reference: ResolvedTopicReference, currentPathToNode: [ResolvedTopicReference] = [], options: PathOptions = []) -> [[ResolvedTopicReference]] {
let nodeParents = parents(of: reference)
guard !nodeParents.isEmpty else {
// The path ends with this node
return [currentPathToNode]
}
var results = [[ResolvedTopicReference]]()
for parentReference in nodeParents {
let parentPaths = pathsTo(parentReference, currentPathToNode: [parentReference] + currentPathToNode)
results.append(contentsOf: parentPaths)
}

// We are sorting the breadcrumbs by the path distance to the documentation root
// so that the first element is the shortest path that we are using as canonical.
results.sort { (lhs, rhs) -> Bool in
// Order a path rooted in a technology as the canonical one.
if options.contains(.preferTechnologyRoot), let first = lhs.first {
return try! entity(with: first).semantic is Technology
}

// If the breadcrumbs have equal amount of components
// sort alphabetically to produce stable paths order.
guard lhs.count != rhs.count else {
return lhs.map({ $0.path }).joined(separator: ",") < rhs.map({ $0.path }).joined(separator: ",")
}
// Order by the length of the breadcrumb.
return lhs.count < rhs.count
}

return results
}

func dumpGraph() -> String {
return topicGraph.nodes.values
.filter { parents(of: $0.reference).isEmpty }
Expand Down