Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Proposal to add Gitlab functionalities #1094

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions config/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,22 @@
"updateTemplateId": 7
}
},
"reporter": {
"githubIssues": {
"repositories": {
"declarations": "OpenTermsArchive/contrib-declarations"
}
},
"gitlabIssues": {
"repositories": {
"declarations": "p2b/contrib-declarations"
}
}
},
"dataset": {
"title": "sandbox",
"versionsRepositoryURL": "https://github.com/OpenTermsArchive/sandbox",
"versionsRepositoryURLGitLab": "https://gitlab.com/p2b/contrib-versions"
"publishingSchedule": "30 8 * * MON"
}
}
Expand Down
3 changes: 3 additions & 0 deletions env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
GITHUB_TOKEN=
GITLAB_TOKEN=
GITLAB_RELEASES_TOKEN=
65 changes: 65 additions & 0 deletions scripts/dataset/assets/README.templateGitLab.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import config from 'config';

const LOCALE = 'en-EN';
const DATE_OPTIONS = { year: 'numeric', month: 'long', day: 'numeric' };

export default function readme({ releaseDate, servicesCount, firstVersionDate, lastVersionDate }) {
return `# Open Terms Archive — ${title({ releaseDate })}

${body({ servicesCount, firstVersionDate, lastVersionDate })}`;
}

export function title({ releaseDate }) {
releaseDate = releaseDate.toLocaleDateString(LOCALE, DATE_OPTIONS);

const title = config.get('dataset.title');

return `${title} — ${releaseDate} dataset`;
}

export function body({ servicesCount, firstVersionDate, lastVersionDate }) {
firstVersionDate = firstVersionDate.toLocaleDateString(LOCALE, DATE_OPTIONS);
lastVersionDate = lastVersionDate.toLocaleDateString(LOCALE, DATE_OPTIONS);

const versionsRepositoryURLGitLab = config.get('dataset.versionsRepositoryURLGitLab');

return `This dataset consolidates the contractual documents of ${servicesCount} service providers, in all their versions that were accessible online between ${firstVersionDate} and ${lastVersionDate}.

This dataset is tailored for datascientists and other analysts. You can also explore all these versions interactively on [${versionsRepositoryURLGitLab}](${versionsRepositoryURLGitLab}).

It has been generated with [Open Terms Archive](https://opentermsarchive.org).

### Dataset format

This dataset represents each version of a document as a separate [Markdown](https://spec.commonmark.org/0.30/) file, nested in a directory with the name of the service provider and in a directory with the name of the terms type. The filesystem layout will look like below.

\`\`\`
├ README.md
├┬ Service provider 1 (e.g. Facebook)
│├┬ Terms type 1 (e.g. Terms of Service)
││├ YYYY-DD-MMTHH-MM-SSZ.md (e.g. 2021-08-01T01-03-12Z.md)
┆┆┆
││└ YYYY-DD-MMTHH-MM-SSZ.md (e.g. 2021-10-03T08-12-25Z.md)
┆┆
│└┬ Terms type X (e.g. Privacy Policy)
│ ├ YYYY-DD-MMTHH-MM-SSZ.md (e.g. 2021-05-02T03-02-15Z.md)
┆ ┆
│ └ YYYY-DD-MMTHH-MM-SSZ.md (e.g. 2021-11-14T12-36-45Z.md)
└┬ Service provider Y (e.g. Google)
├┬ Terms type 1 (e.g. Developer Terms)
│├ YYYY-DD-MMTHH-MM-SSZ.md (e.g. 2019-03-12T04-18-22Z.md)
┆┆
│└ YYYY-DD-MMTHH-MM-SSZ.md (e.g. 2021-12-04T22-47-05Z.md)
└┬ Terms type Z (e.g. Privacy Policy)
├ YYYY-DD-MMTHH-MM-SSZ.md (e.g. 2021-05-02T03-02-15Z.md)
└ YYYY-DD-MMTHH-MM-SSZ.md (e.g. 2021-11-14T12-36-45Z.md)
\`\`\`

### License

This dataset is made available under an [Open Database (OdBL) License](https://opendatacommons.org/licenses/odbl/1.0/) by Open Terms Archive Contributors.
`;
}
25 changes: 18 additions & 7 deletions scripts/dataset/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@ import config from 'config';
import generateRelease from './export/index.js';
import logger from './logger/index.js';
import publishRelease from './publish/index.js';
import publishReleaseGitLab from './publishGitLab/index.js';

export async function release({ shouldPublish, shouldRemoveLocalCopy, fileName }) {
const releaseDate = new Date();
const archiveName = fileName || `dataset-${config.get('@opentermsarchive/engine.dataset.title')}-${releaseDate.toISOString().replace(/T.*/, '')}`;
const archiveName = fileName || `dataset-${config.get('dataset.title')}-${releaseDate.toISOString().replace(/T.*/, '')}`;
const archivePath = `${path.basename(archiveName, '.zip')}.zip`; // allow to pass filename or filename.zip as the archive name and have filename.zip as the result name

logger.info('Start exporting dataset…');
Expand All @@ -24,13 +25,23 @@ export async function release({ shouldPublish, shouldRemoveLocalCopy, fileName }

logger.info('Start publishing dataset…');

const releaseUrl = await publishRelease({
archivePath,
releaseDate,
stats,
});
if (typeof process.env.GITHUB_TOKEN !== 'undefined') {
const releaseUrl = await publishRelease({
archivePath,
releaseDate,
stats,
});
logger.info(`Dataset published to ${releaseUrl}`);
}

logger.info(`Dataset published to ${releaseUrl}`);
if (typeof process.env.GITLAB_RELEASES_TOKEN !== 'undefined') {
const releaseUrl = await publishReleaseGitLab({
archivePath,
releaseDate,
stats,
});
logger.info(`Dataset published to ${releaseUrl}`);
}

if (!shouldRemoveLocalCopy) {
return;
Expand Down
81 changes: 81 additions & 0 deletions scripts/dataset/publishGitLab/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import fsApi from 'fs';
import path from 'path';
import url from 'url';

import config from 'config';
import dotenv from 'dotenv';
import { Octokit } from 'octokit';

import axios from 'axios';
import FormData from 'form-data';

import * as readme from '../assets/README.templateGitLab.js';

dotenv.config();

const gitlabAPIUrl = "https://gitlab.com/api/v4";
const gitlabUrl = "https://gitlab.com";

export default async function publishReleaseGitLab({ archivePath, releaseDate, stats }) {
let projectId = null;

// const octokit = new Octokit({ auth: process.env.GITHUB_TOKEN });

const [ owner, repo ] = url.parse(config.get('dataset.versionsRepositoryURLGitLab')).pathname.split('/').filter(component => component);
const commonParams = { owner, repo };

try {
const repositoryPath = `${commonParams.owner}/${commonParams.repo}`;
const response = await axios.get(`${gitlabAPIUrl}/projects/${encodeURIComponent(repositoryPath)}`, {
headers: {
'Authorization': `Bearer ${process.env.GITLAB_RELEASES_TOKEN}`
}
});
projectId = response.data.id;
} catch (error) {
//logger.error(`🤖 Error while obtaining projectId: ${error}`);
projectId = null;
}

const tagName = `${path.basename(archivePath, path.extname(archivePath))}`; // use archive filename as Git tag

try {
// First, create the release
const releaseResponse = await axios.post(`${gitlabAPIUrl}/projects/${projectId}/releases`, {
ref: 'main',
tag_name: tagName,
name: readme.title({ releaseDate }),
description: readme.body(stats)
}, {
headers: {
'Authorization': `Bearer ${process.env.GITLAB_RELEASES_TOKEN}`,
'Content-Type': 'application/json'
}
});

const releaseId = releaseResponse.data.commit.id;

// Then, upload the ZIP file as an asset to the release
const formData = new FormData();
formData.append('name', archivePath);
formData.append('url', `${gitlabUrl}/${commonParams.owner}/${commonParams.repo}/-/archive/${tagName}/${archivePath}`);
formData.append('file', fsApi.createReadStream(archivePath), {
filename: path.basename(archivePath)
});

const uploadResponse = await axios.post(`${gitlabAPIUrl}/projects/${projectId}/releases/${tagName}/assets/links`, formData, {
headers: {
...formData.getHeaders(),
'Authorization': `Bearer ${process.env.GITLAB_RELEASES_TOKEN}`
}
});

const releaseUrl = uploadResponse.data.direct_asset_url;

return releaseUrl;

} catch (error) {
console.error('Failed to create release or upload ZIP file:', error);
throw error;
}
}
65 changes: 29 additions & 36 deletions src/index.js
Original file line number Diff line number Diff line change
@@ -1,29 +1,24 @@
import { createRequire } from 'module';

import config from 'config';
import cron from 'croner';
import cronstrue from 'cronstrue';

import Archivist from './archivist/index.js';
import logger from './logger/index.js';
import Notifier from './notifier/index.js';
import Reporter from './reporter/index.js';

const require = createRequire(import.meta.url);
import ReporterGitlab from './reporterGitlab/index.js';

export default async function track({ services, types, extractOnly, schedule }) {
const archivist = new Archivist({
recorderConfig: config.get('@opentermsarchive/engine.recorder'),
fetcherConfig: config.get('@opentermsarchive/engine.fetcher'),
recorderConfig: config.get('recorder'),
fetcherConfig: config.get('fetcher'),
});

archivist.attach(logger);

await archivist.initialize();

const { version } = require('../package.json');

logger.info(`Start Open Terms Archive engine v${version}\n`);
console.log('Running from src');
logger.info('Start Open Terms Archive\n');

if (services?.length) {
services = services.filter(serviceId => {
Expand All @@ -45,48 +40,46 @@ export default async function track({ services, types, extractOnly, schedule })
return;
}

if (process.env.OTA_ENGINE_SENDINBLUE_API_KEY) {
try {
archivist.attach(new Notifier(archivist.services));
} catch (error) {
logger.error('Cannot instantiate the Notifier module; it will be ignored:', error);
}
if (process.env.SENDINBLUE_API_KEY) {
archivist.attach(new Notifier(archivist.services));
} else {
logger.warn('Environment variable "OTA_ENGINE_SENDINBLUE_API_KEY" was not found; the Notifier module will be ignored');
logger.warn('Environment variable "SENDINBLUE_API_KEY" was not found; the Notifier module will be ignored');
}

if (process.env.OTA_ENGINE_GITHUB_TOKEN) {
if (config.has('@opentermsarchive/engine.reporter.githubIssues.repositories.declarations')) {
try {
const reporter = new Reporter(config.get('@opentermsarchive/engine.reporter'));
if (process.env.GITHUB_TOKEN) {
if (config.has('reporter.githubIssues.repositories.declarations')) {
const reporter = new Reporter(config.get('reporter'));

await reporter.initialize();
archivist.attach(reporter);
} catch (error) {
logger.error('Cannot instantiate the Reporter module; it will be ignored:', error);
}
await reporter.initialize();
archivist.attach(reporter);
} else {
logger.warn('Configuration key "reporter.githubIssues.repositories.declarations" was not found; issues on the declarations repository cannot be created');
}
} else {
logger.warn('Environment variable "OTA_ENGINE_GITHUB_TOKEN" was not found; the Reporter module will be ignored');
logger.warn('Environment variable "GITHUB_TOKEN" was not found; the Reporter module will be ignored');
}

if (process.env.GITLAB_TOKEN) {
if (config.has('reporter.gitlabIssues.repositories.declarations')) {
const reporter = new ReporterGitlab(config.get('reporter'));

await reporter.initialize();
archivist.attach(reporter);
} else {
logger.warn('Configuration key "reporter.gitlabIssues.repositories.declarations" was not found; issues on the declarations repository cannot be created');
}
} else {
logger.warn('Environment variable "GITLAB_TOKEN" was not found; the ReporterGitlab module will be ignored');
}

if (!schedule) {
await archivist.track({ services, types });

return;
}

const trackingSchedule = config.get('@opentermsarchive/engine.trackingSchedule');
const humanReadableSchedule = cronstrue.toString(trackingSchedule);

logger.info('The scheduler is running…');
logger.info(`Terms will be tracked ${humanReadableSchedule.toLowerCase()} in the timezone of this machine`);
logger.info('Terms will be tracked every six hours starting at half past midnight');

cron(
trackingSchedule,
{ protect: job => logger.warn(`Tracking scheduled at ${new Date().toISOString()} were blocked by an unfinished tracking started at ${job.currentRun().toISOString()}`) },
() => archivist.track({ services, types }),
);
cron('30 */6 * * *', () => archivist.track({ services, types }));
}
Loading
Loading