9
9
ConfluencePagesLoader,
10
10
} = require ( "langchain/document_loaders/web/confluence" ) ;
11
11
12
- function validSpaceUrl ( spaceUrl = "" ) {
13
- // Atlassian default URL match
14
- const atlassianPattern = new UrlPattern (
15
- "https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)*"
16
- ) ;
17
- const atlassianMatch = atlassianPattern . match ( spaceUrl ) ;
18
- if ( atlassianMatch ) {
19
- return { valid : true , result : atlassianMatch } ;
20
- }
21
-
22
- let customMatch = null ;
23
- [
24
- "https\\://(:subdomain.):domain.:tld/wiki/spaces/(:spaceKey)*" , // Custom Confluence space
25
- "https\\://(:subdomain.):domain.:tld/display/(:spaceKey)*" , // Custom Confluence space + Human-readable space tag.
26
- ] . forEach ( ( matchPattern ) => {
27
- if ( ! ! customMatch ) return ;
28
- const pattern = new UrlPattern ( matchPattern ) ;
29
- customMatch = pattern . match ( spaceUrl ) ;
30
- } ) ;
31
-
32
- if ( customMatch ) {
33
- customMatch . customDomain =
34
- ( customMatch . subdomain ? `${ customMatch . subdomain } .` : "" ) + //
35
- `${ customMatch . domain } .${ customMatch . tld } ` ;
36
- return { valid : true , result : customMatch , custom : true } ;
37
- }
38
-
39
- // No match
40
- return { valid : false , result : null } ;
41
- }
42
-
43
12
async function loadConfluence ( { pageUrl, username, accessToken } ) {
44
13
if ( ! pageUrl || ! username || ! accessToken ) {
45
14
return {
@@ -49,21 +18,16 @@ async function loadConfluence({ pageUrl, username, accessToken }) {
49
18
} ;
50
19
}
51
20
52
- const validSpace = validSpaceUrl ( pageUrl ) ;
53
- if ( ! validSpace . result ) {
21
+ const { valid , result } = validSpaceUrl ( pageUrl ) ;
22
+ if ( ! valid ) {
54
23
return {
55
24
success : false ,
56
25
reason :
57
- "Confluence space URL is not in the expected format of https://domain.atlassian.net/wiki/space/~SPACEID/* or https://customDomain/wiki/space/~SPACEID/*" ,
26
+ "Confluence space URL is not in the expected format of one of https://domain.atlassian.net/wiki/space/~SPACEID/* or https://customDomain/wiki/space/~SPACEID/* or https://customDomain/display /~SPACEID/*" ,
58
27
} ;
59
28
}
60
29
61
- const { subdomain, customDomain, spaceKey } = validSpace . result ;
62
- let baseUrl = `https://${ subdomain } .atlassian.net/wiki` ;
63
- if ( customDomain ) {
64
- baseUrl = `https://${ customDomain } /wiki` ;
65
- }
66
-
30
+ const { apiBase : baseUrl , spaceKey, subdomain } = result ;
67
31
console . log ( `-- Working Confluence ${ baseUrl } --` ) ;
68
32
const loader = new ConfluencePagesLoader ( {
69
33
baseUrl,
@@ -142,4 +106,93 @@ async function loadConfluence({ pageUrl, username, accessToken }) {
142
106
} ;
143
107
}
144
108
109
+ /**
110
+ * A match result for a url-pattern of a Confluence URL
111
+ * @typedef {Object } ConfluenceMatchResult
112
+ * @property {string } subdomain - the subdomain of an organization's Confluence space
113
+ * @property {string } spaceKey - the spaceKey of an organization that determines the documents to collect.
114
+ * @property {string } apiBase - the correct REST API url to use for loader.
115
+ */
116
+
117
+ /**
118
+ * Generates the correct API base URL for interfacing with the Confluence REST API
119
+ * depending on the URL pattern being used since there are various ways to host/access a
120
+ * Confluence space.
121
+ * @param {ConfluenceMatchResult } matchResult - result from `url-pattern`.match
122
+ * @param {boolean } isCustomDomain - determines if we need to coerce the subpath of the provided URL
123
+ * @returns {string } - the resulting REST API URL
124
+ */
125
+ function generateAPIBaseUrl ( matchResult = { } , isCustomDomain = false ) {
126
+ const { subdomain } = matchResult ;
127
+ let subpath = isCustomDomain ? `` : `/wiki` ;
128
+ if ( isCustomDomain ) return `https://${ customDomain } ${ subpath } ` ;
129
+ return `https://${ subdomain } .atlassian.net${ subpath } ` ;
130
+ }
131
+
132
+ /**
133
+ * Validates and parses the correct information from a given Confluence URL
134
+ * @param {string } spaceUrl - The organization's Confluence URL to parse
135
+ * @returns {{
136
+ * valid: boolean,
137
+ * result: (ConfluenceMatchResult|null),
138
+ * }}
139
+ */
140
+ function validSpaceUrl ( spaceUrl = "" ) {
141
+ let matchResult ;
142
+ const patterns = {
143
+ default : new UrlPattern (
144
+ "https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)*"
145
+ ) ,
146
+ subdomain : new UrlPattern (
147
+ "https\\://(:subdomain.):domain.:tld/wiki/spaces/(:spaceKey)*"
148
+ ) ,
149
+ custom : new UrlPattern (
150
+ "https\\://(:subdomain.):domain.:tld/display/(:spaceKey)*"
151
+ ) ,
152
+ } ;
153
+
154
+ // If using the default Atlassian Confluence URL pattern.
155
+ // We can proceed because the Library/API can use this base url scheme.
156
+ matchResult = patterns . default . match ( spaceUrl ) ;
157
+ if ( matchResult )
158
+ return {
159
+ valid : matchResult . hasOwnProperty ( "spaceKey" ) ,
160
+ result : {
161
+ ...matchResult ,
162
+ apiBase : generateAPIBaseUrl ( matchResult ) ,
163
+ } ,
164
+ } ;
165
+
166
+ // If using a custom subdomain Confluence URL pattern.
167
+ // We need to attach the customDomain as a property to the match result
168
+ // so we can form the correct REST API base from the subdomain.
169
+ matchResult = patterns . subdomain . match ( spaceUrl ) ;
170
+ if ( matchResult ) {
171
+ return {
172
+ valid : matchResult . hasOwnProperty ( "spaceKey" ) ,
173
+ result : {
174
+ ...matchResult ,
175
+ apiBase : generateAPIBaseUrl ( matchResult ) ,
176
+ } ,
177
+ } ;
178
+ }
179
+
180
+ // If using a base FQDN Confluence URL pattern.
181
+ // We need to attach the customDomain as a property to the match result
182
+ // so we can form the correct REST API base from the root domain since /display/ is basically a URL mask.
183
+ matchResult = patterns . custom . match ( spaceUrl ) ;
184
+ if ( matchResult ) {
185
+ return {
186
+ valid : matchResult . hasOwnProperty ( "spaceKey" ) ,
187
+ result : {
188
+ ...matchResult ,
189
+ apiBase : generateAPIBaseUrl ( matchResult , true ) ,
190
+ } ,
191
+ } ;
192
+ }
193
+
194
+ // No match
195
+ return { valid : false , result : null } ;
196
+ }
197
+
145
198
module . exports = loadConfluence ;
0 commit comments