Skip to content
This repository was archived by the owner on Jan 28, 2025. It is now read-only.

Commit 2ec09d8

Browse files
authored
Add export:category command
Add a new command to export pages and files in a category. Bug: GH#238
1 parent 9468c08 commit 2ec09d8

File tree

4 files changed

+169
-0
lines changed

4 files changed

+169
-0
lines changed

README.md

+15
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,21 @@ Remove a site from the config file.
7777
* `--wiki` `-w` — The mwcli name of the wiki to use. Use <info>sites:list</info> to list all.
7878
*Required.*
7979

80+
### export:category
81+
82+
Export pages and files in a category (and its subcategories).
83+
84+
export:category [-c|--config [CONFIG]] [-w|--wiki WIKI] [-a|--category CATEGORY] [-d|--dest DEST]
85+
86+
* `--config` `-c` — Path of the Yaml config file to use.
87+
Default: '[CWD]/config.yml'
88+
* `--wiki` `-w` — The mwcli name of the wiki to use. Use <info>sites:list</info> to list all.
89+
*Required.*
90+
* `--category` `-a` — Name of the category to export (with or without the leading 'Category:', and in any language).
91+
*Required.*
92+
* `--dest` `-d` — The destination directory for exported files.
93+
Default: '[CWD]/categories'
94+
8095
### export:contribs
8196

8297
Export a user's contributions.

bin/mwcli

+2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
require __DIR__.'/../vendor/autoload.php';
55

66
use Samwilson\MediaWikiCLI\Command\AuthCommand;
7+
use Samwilson\MediaWikiCLI\Command\ExportCategoryCommand;
78
use Samwilson\MediaWikiCLI\Command\ExportContribsCommand;
89
use Samwilson\MediaWikiCLI\Command\ExportWikitextCommand;
910
use Samwilson\MediaWikiCLI\Command\ExtensionInstallCommand;
@@ -23,6 +24,7 @@ $application->add(new SitesInfoCommand());
2324
$application->add(new SitesListCommand());
2425
$application->add(new SitesRemoveCommand());
2526
$application->add(new AuthCommand());
27+
$application->add(new ExportCategoryCommand());
2628
$application->add(new ExportContribsCommand());
2729
$application->add(new ExportWikitextCommand());
2830
$application->add(new ExtensionInstallCommand());

i18n/en.json

+4
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515
"option-wiki-desc": "The mwcli name of the wiki to use. Use <info>sites:list</info> to list all.",
1616
"option-wiki-missing": "Please specify a wiki with the `--wiki` option.",
1717

18+
"command-export-category-desc": "Export pages and files in a category (and its subcategories).",
19+
"option-category-desc": "Name of the category to export (with or without the leading 'Category:', and in any language).",
20+
"export-cat-not-found": "Category not found: $1",
21+
1822
"command-export-contribs-desc": "Export a user's contributions.",
1923
"option-dest-desc": "The destination directory for exported files.",
2024
"option-user-desc": "Export contributions of this username.",

src/Command/ExportCategoryCommand.php

+148
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
<?php
2+
3+
namespace Samwilson\MediaWikiCLI\Command;
4+
5+
use Addwiki\Mediawiki\Api\Client\Action\ActionApi;
6+
use Addwiki\Mediawiki\Api\Client\Action\Request\ActionRequest;
7+
use Addwiki\Mediawiki\Api\MediawikiFactory;
8+
use Addwiki\Mediawiki\Api\Service\CategoryTraverser;
9+
use Addwiki\Mediawiki\Api\Service\NamespaceGetter;
10+
use Addwiki\Mediawiki\DataModel\Page;
11+
use Addwiki\Mediawiki\DataModel\PageIdentifier;
12+
use Addwiki\Mediawiki\DataModel\Title;
13+
use GuzzleHttp\Client;
14+
use Symfony\Component\Console\Command\Command;
15+
use Symfony\Component\Console\Input\InputInterface;
16+
use Symfony\Component\Console\Input\InputOption;
17+
use Symfony\Component\Console\Output\OutputInterface;
18+
19+
class ExportCategoryCommand extends CommandBase {
20+
21+
/** @var string Destination directory. */
22+
private $optionDest;
23+
24+
/** @var ActionApi */
25+
private ActionApi $api;
26+
27+
public function configure() {
28+
parent::configure();
29+
$this->setName( 'export:category' );
30+
$this->setDescription( $this->msg( 'command-export-category-desc' ) );
31+
$this->addOption( 'wiki', 'w', InputOption::VALUE_REQUIRED, $this->msg( 'option-wiki-desc' ) );
32+
$this->addOption( 'category', 'a', InputOption::VALUE_REQUIRED, $this->msg( 'option-category-desc' ) );
33+
$this->addOption( 'dest', 'd', InputOption::VALUE_REQUIRED, $this->msg( 'option-dest-desc' ),
34+
$this->getConfigDirDefault() . 'categories' );
35+
}
36+
37+
public function execute( InputInterface $input, OutputInterface $output ) {
38+
$ret = parent::execute( $input, $output );
39+
if ( $ret ) {
40+
return $ret;
41+
}
42+
$siteInfo = $this->getSite( $input );
43+
if ( !$siteInfo ) {
44+
return Command::FAILURE;
45+
}
46+
$this->api = $this->getApi( $siteInfo );
47+
$catTraverser = ( new MediawikiFactory( $this->api ) )->newCategoryTraverser();
48+
$catTraverser->addCallback( CategoryTraverser::CALLBACK_PAGE, [ $this, 'descender' ] );
49+
$catTraverser->addCallback( CategoryTraverser::CALLBACK_CATEGORY, [ $this, 'descender' ] );
50+
51+
// The category option can be with or without the namespace prefix, and it can be given as any of its aliases.
52+
$categoryName = $input->getOption( 'category' );
53+
if ( !$categoryName ) {
54+
$this->io->warning( 'Please set the --category option.' );
55+
return Command::FAILURE;
56+
}
57+
$catAliases = ( new NamespaceGetter( $this->api ) )
58+
->getNamespaceByName( 'Category' )
59+
->getAliases();
60+
$catPrefixes = array_merge( [ 'Category' ], $catAliases );
61+
foreach ( $catPrefixes as $catPrefix ) {
62+
if ( str_starts_with( $categoryName, $catPrefix . ':' ) ) {
63+
$categoryName = substr( $categoryName, strlen( $catPrefix ) + 1 );
64+
}
65+
}
66+
$categoryNamespaceId = 14;
67+
$catTitle = new Title( 'Category:' . ucfirst( $categoryName ), $categoryNamespaceId );
68+
$this->io->writeln( 'Downloading ' . $catTitle->getText() );
69+
70+
$this->optionDest = $input->getOption( 'dest' ) . '/' . $siteInfo['id'];
71+
72+
$catProps = [
73+
'titles' => $catTitle->getText(),
74+
'action' => 'query',
75+
'prop' => 'info',
76+
'formatversion' => 2,
77+
'inprop' => 'url',
78+
];
79+
$cat = $this->api->request( ActionRequest::simpleGet( 'query', $catProps ) );
80+
$catInfo = reset( $cat['query']['pages'] );
81+
if ( isset( $catInfo['missing'] ) ) {
82+
$this->io->error( $this->msg( 'export-cat-not-found', [ $catInfo['canonicalurl'] ] ) );
83+
return Command::FAILURE;
84+
}
85+
$catTraverser->descend( new Page( new PageIdentifier( $catTitle ) ) );
86+
return Command::SUCCESS;
87+
}
88+
89+
public function descender( Page $member, Page $rootCat ) {
90+
$title = $member->getPageIdentifier()->getTitle()->getText();
91+
92+
// Sparate namespace and page names.
93+
$firstColon = strpos( $title, ':' );
94+
$namespace = $firstColon ? substr( $title, 0, $firstColon ) : '(main)';
95+
$pageTitlePart = $firstColon ? substr( $title, $firstColon + 1 ) : $title;
96+
$pageTitle = str_replace( ' ', '_', $pageTitlePart );
97+
98+
$this->io->writeln( "Downloading $title . . . " );
99+
$pageInfo = $this->api->request( ActionRequest::simpleGet( 'query', [
100+
'prop' => 'imageinfo|revisions',
101+
'iiprop' => 'url|sha1|timestamp',
102+
'titles' => $title,
103+
'rvprop' => 'content',
104+
'rvslots' => 'main|mediainfo',
105+
'formatversion' => 2,
106+
] ) );
107+
108+
if ( !isset( $pageInfo['query']['pages'] ) ) {
109+
echo "Unable to get $title\n";
110+
exit();
111+
}
112+
$page = array_shift( $pageInfo['query']['pages'] );
113+
114+
// File.
115+
if ( isset( $page['imageinfo'] ) ) {
116+
$fileUrl = $page['imageinfo'][0]['url'];
117+
$destFile = $this->optionDest . '/files/' . basename( $fileUrl );
118+
if ( !is_file( $destFile ) || sha1_file( $destFile ) !== $page['imageinfo'][0]['sha1'] ) {
119+
if ( !is_dir( dirname( $destFile ) ) ) {
120+
$this->io->writeln( 'Creating directory ' . dirname( $destFile ) );
121+
mkdir( dirname( $destFile ), 0755, true );
122+
}
123+
$this->io->writeln( " File: $destFile" );
124+
( new Client() )->get( $fileUrl, [ 'sink' => $destFile ] );
125+
}
126+
}
127+
128+
// Wikitext of the page.
129+
$destWikitext = $this->optionDest . '/' . $namespace . '/' . $pageTitle . '.wikitext';
130+
$rev = reset( $page['revisions'] );
131+
$content = $rev['slots']['main']['content'];
132+
if ( !empty( trim( $content ) ) ) {
133+
if ( !is_dir( dirname( $destWikitext ) ) ) {
134+
$this->io->writeln( 'Creating directory: ' . dirname( $destWikitext ) );
135+
mkdir( dirname( $destWikitext ), 0755, true );
136+
}
137+
$this->io->writeln( " Wikitext: $destWikitext" );
138+
file_put_contents( $destWikitext, $content );
139+
}
140+
141+
// MediaInfo JSON.
142+
if ( isset( $rev['slots']['mediainfo']['content'] ) ) {
143+
$destMediaInfo = $this->optionDest . '/' . $namespace . '/' . $pageTitle . '_mediainfo.json';
144+
$this->io->writeln( " Structured data: $destMediaInfo" );
145+
file_put_contents( $destMediaInfo, $rev['slots']['mediainfo']['content'] );
146+
}
147+
}
148+
}

0 commit comments

Comments
 (0)