Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Gitlab functionalities #1095

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions config/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,22 @@
"updateTemplateId": 7
}
},
"reporter": {
"githubIssues": {
"repositories": {
"declarations": "OpenTermsArchive/contrib-declarations"
}
},
"gitlabIssues": {
"repositories": {
"declarations": "p2b/contrib-declarations"
}
}
},
"dataset": {
"title": "sandbox",
"versionsRepositoryURL": "https://github.com/OpenTermsArchive/sandbox",
"versionsRepositoryURLGitLab": "https://gitlab.com/p2b/contrib-versions",
"publishingSchedule": "30 8 * * MON"
}
}
Expand Down
3 changes: 3 additions & 0 deletions env.example
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This probably should be .env.example 😉

Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
GITHUB_TOKEN=
GITLAB_TOKEN=
GITLAB_RELEASES_TOKEN=
33 changes: 26 additions & 7 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@
"swagger-jsdoc": "^6.2.8",
"swagger-ui-express": "^5.0.0",
"winston": "^3.3.3",
"winston-mail": "^2.0.0"
"winston-mail": "^2.0.0",
"axios": "^1.7.2"
},
"devDependencies": {
"@commitlint/cli": "^19.0.3",
Expand Down
65 changes: 65 additions & 0 deletions scripts/dataset/assets/README.templateGitLab.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import config from 'config';

const LOCALE = 'en-EN';
const DATE_OPTIONS = { year: 'numeric', month: 'long', day: 'numeric' };

export default function readme({ releaseDate, servicesCount, firstVersionDate, lastVersionDate }) {
return `# Open Terms Archive — ${title({ releaseDate })}

${body({ servicesCount, firstVersionDate, lastVersionDate })}`;
}

export function title({ releaseDate }) {
releaseDate = releaseDate.toLocaleDateString(LOCALE, DATE_OPTIONS);

const title = config.get('dataset.title');

return `${title} — ${releaseDate} dataset`;
}

export function body({ servicesCount, firstVersionDate, lastVersionDate }) {
firstVersionDate = firstVersionDate.toLocaleDateString(LOCALE, DATE_OPTIONS);
lastVersionDate = lastVersionDate.toLocaleDateString(LOCALE, DATE_OPTIONS);

const versionsRepositoryURLGitLab = config.get('dataset.versionsRepositoryURLGitLab');

return `This dataset consolidates the contractual documents of ${servicesCount} service providers, in all their versions that were accessible online between ${firstVersionDate} and ${lastVersionDate}.

This dataset is tailored for datascientists and other analysts. You can also explore all these versions interactively on [${versionsRepositoryURLGitLab}](${versionsRepositoryURLGitLab}).

It has been generated with [Open Terms Archive](https://opentermsarchive.org).

### Dataset format

This dataset represents each version of a document as a separate [Markdown](https://spec.commonmark.org/0.30/) file, nested in a directory with the name of the service provider and in a directory with the name of the terms type. The filesystem layout will look like below.

\`\`\`
├ README.md
├┬ Service provider 1 (e.g. Facebook)
│├┬ Terms type 1 (e.g. Terms of Service)
││├ YYYY-DD-MMTHH-MM-SSZ.md (e.g. 2021-08-01T01-03-12Z.md)
┆┆┆
││└ YYYY-DD-MMTHH-MM-SSZ.md (e.g. 2021-10-03T08-12-25Z.md)
┆┆
│└┬ Terms type X (e.g. Privacy Policy)
│ ├ YYYY-DD-MMTHH-MM-SSZ.md (e.g. 2021-05-02T03-02-15Z.md)
┆ ┆
│ └ YYYY-DD-MMTHH-MM-SSZ.md (e.g. 2021-11-14T12-36-45Z.md)
└┬ Service provider Y (e.g. Google)
├┬ Terms type 1 (e.g. Developer Terms)
│├ YYYY-DD-MMTHH-MM-SSZ.md (e.g. 2019-03-12T04-18-22Z.md)
┆┆
│└ YYYY-DD-MMTHH-MM-SSZ.md (e.g. 2021-12-04T22-47-05Z.md)
└┬ Terms type Z (e.g. Privacy Policy)
├ YYYY-DD-MMTHH-MM-SSZ.md (e.g. 2021-05-02T03-02-15Z.md)
└ YYYY-DD-MMTHH-MM-SSZ.md (e.g. 2021-11-14T12-36-45Z.md)
\`\`\`

### License

This dataset is made available under an [Open Database (OdBL) License](https://opendatacommons.org/licenses/odbl/1.0/) by Open Terms Archive Contributors.
`;
}
27 changes: 20 additions & 7 deletions scripts/dataset/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@ import config from 'config';
import generateRelease from './export/index.js';
import logger from './logger/index.js';
import publishRelease from './publish/index.js';
import publishReleaseGitLab from './publishGitLab/index.js';

export async function release({ shouldPublish, shouldRemoveLocalCopy, fileName }) {
const releaseDate = new Date();
const archiveName = fileName || `dataset-${config.get('@opentermsarchive/engine.dataset.title')}-${releaseDate.toISOString().replace(/T.*/, '')}`;
const archiveName = fileName || `dataset-${config.get('dataset.title')}-${releaseDate.toISOString().replace(/T.*/, '')}`;
const archivePath = `${path.basename(archiveName, '.zip')}.zip`; // allow to pass filename or filename.zip as the archive name and have filename.zip as the result name

logger.info('Start exporting dataset…');
Expand All @@ -24,13 +25,25 @@ export async function release({ shouldPublish, shouldRemoveLocalCopy, fileName }

logger.info('Start publishing dataset…');

const releaseUrl = await publishRelease({
archivePath,
releaseDate,
stats,
});
if (typeof process.env.GITHUB_TOKEN !== 'undefined') {
const releaseUrl = await publishRelease({
archivePath,
releaseDate,
stats,
});

logger.info(`Dataset published to ${releaseUrl}`);
logger.info(`Dataset published to ${releaseUrl}`);
}

if (typeof process.env.GITLAB_RELEASES_TOKEN !== 'undefined') {
const releaseUrl = await publishReleaseGitLab({
archivePath,
releaseDate,
stats,
});

logger.info(`Dataset published to ${releaseUrl}`);
}

if (!shouldRemoveLocalCopy) {
return;
Expand Down
102 changes: 102 additions & 0 deletions scripts/dataset/publishGitLab/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import fsApi from 'fs';
import path from 'path';
import url from 'url';

import axios from 'axios';

import config from 'config';
import dotenv from 'dotenv';
//import { Octokit } from 'octokit';

import FormData from 'form-data';

import * as readme from '../assets/README.templateGitLab.js';

dotenv.config();

const gitlabAPIUrl = "https://gitlab.com/api/v4";
const gitlabUrl = "https://gitlab.com";

export default async function publishReleaseGitLab({
archivePath,
releaseDate,
stats,
}) {
let projectId = null;

// const octokit = new Octokit({ auth: process.env.GITHUB_TOKEN });

const [owner, repo] = url
.parse(config.get('dataset.versionsRepositoryURLGitLab'))
.pathname.split('/')
.filter((component) => component);
const commonParams = { owner, repo };

try {
const repositoryPath = `${commonParams.owner}/${commonParams.repo}`;
const response = await axios.get(
`${gitlabAPIUrl}/projects/${encodeURIComponent(repositoryPath)}`,
{
headers: {
Authorization: `Bearer ${process.env.GITLAB_RELEASES_TOKEN}`,
},
},
);
projectId = response.data.id;
} catch (error) {
//logger.error(`🤖 Error while obtaining projectId: ${error}`);
projectId = null;
}

const tagName = `${path.basename(archivePath, path.extname(archivePath))}`; // use archive filename as Git tag

try {
// First, create the release
const releaseResponse = await axios.post(
`${gitlabAPIUrl}/projects/${projectId}/releases`,
{
ref: 'main',
tag_name: tagName,
name: readme.title({ releaseDate }),
description: readme.body(stats),
},
{
headers: {
Authorization: `Bearer ${process.env.GITLAB_RELEASES_TOKEN}`,
'Content-Type': 'application/json',
},
},
);

const releaseId = releaseResponse.data.commit.id;

// Then, upload the ZIP file as an asset to the release
const formData = new FormData();
formData.append('name', archivePath);
formData.append(
'url',
`${gitlabUrl}/${commonParams.owner}/${commonParams.repo}/-/archive/${tagName}/${archivePath}`,
);
formData.append('file', fsApi.createReadStream(archivePath), {
filename: path.basename(archivePath),
});

const uploadResponse = await axios.post(
`${gitlabAPIUrl}/projects/${projectId}/releases/${tagName}/assets/links`,
formData,
{
headers: {
...formData.getHeaders(),
Authorization: `Bearer ${process.env.GITLAB_RELEASES_TOKEN}`,
},
},
);

const releaseUrl = uploadResponse.data.direct_asset_url;

return releaseUrl;
} catch (error) {
console.error('Failed to create release or upload ZIP file:', error);
throw error;
}
}
Loading
Loading