Skip to content

Commit a598c8e

Browse files
timothycarambatcopePredrag Stojadinovic
authored
1347 human readable confluence url (Mintplex-Labs#1706)
* chore: confluence data connector can now handle custom urls, in addition to default {subdomain}.atlassian.net ones * chore: formatting as per yarn lint * chore: fixing the human readable confluence url fetch baseUrl * chore: fixing the human readable confluence url fetch baseUrl * chore: fixing the human readable confluence url fetch baseUrl * chore: fixing the human readable confluence url fetch baseUrl * chore: fixing the human readable confluence url fetch baseUrl * refactor implementation of various types of Confluence URL patterns --------- Co-authored-by: Predrag Stojadinovic <[email protected]> Co-authored-by: Predrag Stojadinović <[email protected]> Co-authored-by: Predrag Stojadinovic <[email protected]>
1 parent c8c6181 commit a598c8e

File tree

1 file changed

+93
-40
lines changed
  • collector/utils/extensions/Confluence

1 file changed

+93
-40
lines changed

collector/utils/extensions/Confluence/index.js

Lines changed: 93 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -9,37 +9,6 @@ const {
99
ConfluencePagesLoader,
1010
} = require("langchain/document_loaders/web/confluence");
1111

12-
function validSpaceUrl(spaceUrl = "") {
13-
// Atlassian default URL match
14-
const atlassianPattern = new UrlPattern(
15-
"https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)*"
16-
);
17-
const atlassianMatch = atlassianPattern.match(spaceUrl);
18-
if (atlassianMatch) {
19-
return { valid: true, result: atlassianMatch };
20-
}
21-
22-
let customMatch = null;
23-
[
24-
"https\\://(:subdomain.):domain.:tld/wiki/spaces/(:spaceKey)*", // Custom Confluence space
25-
"https\\://(:subdomain.):domain.:tld/display/(:spaceKey)*", // Custom Confluence space + Human-readable space tag.
26-
].forEach((matchPattern) => {
27-
if (!!customMatch) return;
28-
const pattern = new UrlPattern(matchPattern);
29-
customMatch = pattern.match(spaceUrl);
30-
});
31-
32-
if (customMatch) {
33-
customMatch.customDomain =
34-
(customMatch.subdomain ? `${customMatch.subdomain}.` : "") + //
35-
`${customMatch.domain}.${customMatch.tld}`;
36-
return { valid: true, result: customMatch, custom: true };
37-
}
38-
39-
// No match
40-
return { valid: false, result: null };
41-
}
42-
4312
async function loadConfluence({ pageUrl, username, accessToken }) {
4413
if (!pageUrl || !username || !accessToken) {
4514
return {
@@ -49,21 +18,16 @@ async function loadConfluence({ pageUrl, username, accessToken }) {
4918
};
5019
}
5120

52-
const validSpace = validSpaceUrl(pageUrl);
53-
if (!validSpace.result) {
21+
const { valid, result } = validSpaceUrl(pageUrl);
22+
if (!valid) {
5423
return {
5524
success: false,
5625
reason:
57-
"Confluence space URL is not in the expected format of https://domain.atlassian.net/wiki/space/~SPACEID/* or https://customDomain/wiki/space/~SPACEID/*",
26+
"Confluence space URL is not in the expected format of one of https://domain.atlassian.net/wiki/space/~SPACEID/* or https://customDomain/wiki/space/~SPACEID/* or https://customDomain/display/~SPACEID/*",
5827
};
5928
}
6029

61-
const { subdomain, customDomain, spaceKey } = validSpace.result;
62-
let baseUrl = `https://${subdomain}.atlassian.net/wiki`;
63-
if (customDomain) {
64-
baseUrl = `https://${customDomain}/wiki`;
65-
}
66-
30+
const { apiBase: baseUrl, spaceKey, subdomain } = result;
6731
console.log(`-- Working Confluence ${baseUrl} --`);
6832
const loader = new ConfluencePagesLoader({
6933
baseUrl,
@@ -142,4 +106,93 @@ async function loadConfluence({ pageUrl, username, accessToken }) {
142106
};
143107
}
144108

109+
/**
110+
* A match result for a url-pattern of a Confluence URL
111+
* @typedef {Object} ConfluenceMatchResult
112+
* @property {string} subdomain - the subdomain of an organization's Confluence space
113+
* @property {string} spaceKey - the spaceKey of an organization that determines the documents to collect.
114+
* @property {string} apiBase - the correct REST API url to use for loader.
115+
*/
116+
117+
/**
118+
* Generates the correct API base URL for interfacing with the Confluence REST API
119+
* depending on the URL pattern being used since there are various ways to host/access a
120+
* Confluence space.
121+
* @param {ConfluenceMatchResult} matchResult - result from `url-pattern`.match
122+
* @param {boolean} isCustomDomain - determines if we need to coerce the subpath of the provided URL
123+
* @returns {string} - the resulting REST API URL
124+
*/
125+
function generateAPIBaseUrl(matchResult = {}, isCustomDomain = false) {
126+
const { subdomain } = matchResult;
127+
let subpath = isCustomDomain ? `` : `/wiki`;
128+
if (isCustomDomain) return `https://${customDomain}${subpath}`;
129+
return `https://${subdomain}.atlassian.net${subpath}`;
130+
}
131+
132+
/**
133+
* Validates and parses the correct information from a given Confluence URL
134+
* @param {string} spaceUrl - The organization's Confluence URL to parse
135+
* @returns {{
136+
* valid: boolean,
137+
* result: (ConfluenceMatchResult|null),
138+
* }}
139+
*/
140+
function validSpaceUrl(spaceUrl = "") {
141+
let matchResult;
142+
const patterns = {
143+
default: new UrlPattern(
144+
"https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)*"
145+
),
146+
subdomain: new UrlPattern(
147+
"https\\://(:subdomain.):domain.:tld/wiki/spaces/(:spaceKey)*"
148+
),
149+
custom: new UrlPattern(
150+
"https\\://(:subdomain.):domain.:tld/display/(:spaceKey)*"
151+
),
152+
};
153+
154+
// If using the default Atlassian Confluence URL pattern.
155+
// We can proceed because the Library/API can use this base url scheme.
156+
matchResult = patterns.default.match(spaceUrl);
157+
if (matchResult)
158+
return {
159+
valid: matchResult.hasOwnProperty("spaceKey"),
160+
result: {
161+
...matchResult,
162+
apiBase: generateAPIBaseUrl(matchResult),
163+
},
164+
};
165+
166+
// If using a custom subdomain Confluence URL pattern.
167+
// We need to attach the customDomain as a property to the match result
168+
// so we can form the correct REST API base from the subdomain.
169+
matchResult = patterns.subdomain.match(spaceUrl);
170+
if (matchResult) {
171+
return {
172+
valid: matchResult.hasOwnProperty("spaceKey"),
173+
result: {
174+
...matchResult,
175+
apiBase: generateAPIBaseUrl(matchResult),
176+
},
177+
};
178+
}
179+
180+
// If using a base FQDN Confluence URL pattern.
181+
// We need to attach the customDomain as a property to the match result
182+
// so we can form the correct REST API base from the root domain since /display/ is basically a URL mask.
183+
matchResult = patterns.custom.match(spaceUrl);
184+
if (matchResult) {
185+
return {
186+
valid: matchResult.hasOwnProperty("spaceKey"),
187+
result: {
188+
...matchResult,
189+
apiBase: generateAPIBaseUrl(matchResult, true),
190+
},
191+
};
192+
}
193+
194+
// No match
195+
return { valid: false, result: null };
196+
}
197+
145198
module.exports = loadConfluence;

0 commit comments

Comments
 (0)