|
| 1 | +<?php |
| 2 | + |
| 3 | +namespace Samwilson\MediaWikiCLI\Command; |
| 4 | + |
| 5 | +use Addwiki\Mediawiki\Api\Client\Action\ActionApi; |
| 6 | +use Addwiki\Mediawiki\Api\Client\Action\Request\ActionRequest; |
| 7 | +use Addwiki\Mediawiki\Api\MediawikiFactory; |
| 8 | +use Addwiki\Mediawiki\Api\Service\CategoryTraverser; |
| 9 | +use Addwiki\Mediawiki\Api\Service\NamespaceGetter; |
| 10 | +use Addwiki\Mediawiki\DataModel\Page; |
| 11 | +use Addwiki\Mediawiki\DataModel\PageIdentifier; |
| 12 | +use Addwiki\Mediawiki\DataModel\Title; |
| 13 | +use GuzzleHttp\Client; |
| 14 | +use Symfony\Component\Console\Command\Command; |
| 15 | +use Symfony\Component\Console\Input\InputInterface; |
| 16 | +use Symfony\Component\Console\Input\InputOption; |
| 17 | +use Symfony\Component\Console\Output\OutputInterface; |
| 18 | + |
| 19 | +class ExportCategoryCommand extends CommandBase { |
| 20 | + |
| 21 | + /** @var string Destination directory. */ |
| 22 | + private $optionDest; |
| 23 | + |
| 24 | + /** @var ActionApi */ |
| 25 | + private ActionApi $api; |
| 26 | + |
| 27 | + public function configure() { |
| 28 | + parent::configure(); |
| 29 | + $this->setName( 'export:category' ); |
| 30 | + $this->setDescription( $this->msg( 'command-export-category-desc' ) ); |
| 31 | + $this->addOption( 'wiki', 'w', InputOption::VALUE_REQUIRED, $this->msg( 'option-wiki-desc' ) ); |
| 32 | + $this->addOption( 'category', 'a', InputOption::VALUE_REQUIRED, $this->msg( 'option-category-desc' ) ); |
| 33 | + $this->addOption( 'dest', 'd', InputOption::VALUE_REQUIRED, $this->msg( 'option-dest-desc' ), |
| 34 | + $this->getConfigDirDefault() . 'categories' ); |
| 35 | + } |
| 36 | + |
| 37 | + public function execute( InputInterface $input, OutputInterface $output ) { |
| 38 | + $ret = parent::execute( $input, $output ); |
| 39 | + if ( $ret ) { |
| 40 | + return $ret; |
| 41 | + } |
| 42 | + $siteInfo = $this->getSite( $input ); |
| 43 | + if ( !$siteInfo ) { |
| 44 | + return Command::FAILURE; |
| 45 | + } |
| 46 | + $this->api = $this->getApi( $siteInfo ); |
| 47 | + $catTraverser = ( new MediawikiFactory( $this->api ) )->newCategoryTraverser(); |
| 48 | + $catTraverser->addCallback( CategoryTraverser::CALLBACK_PAGE, [ $this, 'descender' ] ); |
| 49 | + $catTraverser->addCallback( CategoryTraverser::CALLBACK_CATEGORY, [ $this, 'descender' ] ); |
| 50 | + |
| 51 | + // The category option can be with or without the namespace prefix, and it can be given as any of its aliases. |
| 52 | + $categoryName = $input->getOption( 'category' ); |
| 53 | + if ( !$categoryName ) { |
| 54 | + $this->io->warning( 'Please set the --category option.' ); |
| 55 | + return Command::FAILURE; |
| 56 | + } |
| 57 | + $catAliases = ( new NamespaceGetter( $this->api ) ) |
| 58 | + ->getNamespaceByName( 'Category' ) |
| 59 | + ->getAliases(); |
| 60 | + $catPrefixes = array_merge( [ 'Category' ], $catAliases ); |
| 61 | + foreach ( $catPrefixes as $catPrefix ) { |
| 62 | + if ( str_starts_with( $categoryName, $catPrefix . ':' ) ) { |
| 63 | + $categoryName = substr( $categoryName, strlen( $catPrefix ) + 1 ); |
| 64 | + } |
| 65 | + } |
| 66 | + $categoryNamespaceId = 14; |
| 67 | + $catTitle = new Title( 'Category:' . ucfirst( $categoryName ), $categoryNamespaceId ); |
| 68 | + $this->io->writeln( 'Downloading ' . $catTitle->getText() ); |
| 69 | + |
| 70 | + $this->optionDest = $input->getOption( 'dest' ) . '/' . $siteInfo['id']; |
| 71 | + |
| 72 | + $catProps = [ |
| 73 | + 'titles' => $catTitle->getText(), |
| 74 | + 'action' => 'query', |
| 75 | + 'prop' => 'info', |
| 76 | + 'formatversion' => 2, |
| 77 | + 'inprop' => 'url', |
| 78 | + ]; |
| 79 | + $cat = $this->api->request( ActionRequest::simpleGet( 'query', $catProps ) ); |
| 80 | + $catInfo = reset( $cat['query']['pages'] ); |
| 81 | + if ( isset( $catInfo['missing'] ) ) { |
| 82 | + $this->io->error( $this->msg( 'export-cat-not-found', [ $catInfo['canonicalurl'] ] ) ); |
| 83 | + return Command::FAILURE; |
| 84 | + } |
| 85 | + $catTraverser->descend( new Page( new PageIdentifier( $catTitle ) ) ); |
| 86 | + return Command::SUCCESS; |
| 87 | + } |
| 88 | + |
| 89 | + public function descender( Page $member, Page $rootCat ) { |
| 90 | + $title = $member->getPageIdentifier()->getTitle()->getText(); |
| 91 | + |
| 92 | + // Sparate namespace and page names. |
| 93 | + $firstColon = strpos( $title, ':' ); |
| 94 | + $namespace = $firstColon ? substr( $title, 0, $firstColon ) : '(main)'; |
| 95 | + $pageTitlePart = $firstColon ? substr( $title, $firstColon + 1 ) : $title; |
| 96 | + $pageTitle = str_replace( ' ', '_', $pageTitlePart ); |
| 97 | + |
| 98 | + $this->io->writeln( "Downloading $title . . . " ); |
| 99 | + $pageInfo = $this->api->request( ActionRequest::simpleGet( 'query', [ |
| 100 | + 'prop' => 'imageinfo|revisions', |
| 101 | + 'iiprop' => 'url|sha1|timestamp', |
| 102 | + 'titles' => $title, |
| 103 | + 'rvprop' => 'content', |
| 104 | + 'rvslots' => 'main|mediainfo', |
| 105 | + 'formatversion' => 2, |
| 106 | + ] ) ); |
| 107 | + |
| 108 | + if ( !isset( $pageInfo['query']['pages'] ) ) { |
| 109 | + echo "Unable to get $title\n"; |
| 110 | + exit(); |
| 111 | + } |
| 112 | + $page = array_shift( $pageInfo['query']['pages'] ); |
| 113 | + |
| 114 | + // File. |
| 115 | + if ( isset( $page['imageinfo'] ) ) { |
| 116 | + $fileUrl = $page['imageinfo'][0]['url']; |
| 117 | + $destFile = $this->optionDest . '/files/' . basename( $fileUrl ); |
| 118 | + if ( !is_file( $destFile ) || sha1_file( $destFile ) !== $page['imageinfo'][0]['sha1'] ) { |
| 119 | + if ( !is_dir( dirname( $destFile ) ) ) { |
| 120 | + $this->io->writeln( 'Creating directory ' . dirname( $destFile ) ); |
| 121 | + mkdir( dirname( $destFile ), 0755, true ); |
| 122 | + } |
| 123 | + $this->io->writeln( " File: $destFile" ); |
| 124 | + ( new Client() )->get( $fileUrl, [ 'sink' => $destFile ] ); |
| 125 | + } |
| 126 | + } |
| 127 | + |
| 128 | + // Wikitext of the page. |
| 129 | + $destWikitext = $this->optionDest . '/' . $namespace . '/' . $pageTitle . '.wikitext'; |
| 130 | + $rev = reset( $page['revisions'] ); |
| 131 | + $content = $rev['slots']['main']['content']; |
| 132 | + if ( !empty( trim( $content ) ) ) { |
| 133 | + if ( !is_dir( dirname( $destWikitext ) ) ) { |
| 134 | + $this->io->writeln( 'Creating directory: ' . dirname( $destWikitext ) ); |
| 135 | + mkdir( dirname( $destWikitext ), 0755, true ); |
| 136 | + } |
| 137 | + $this->io->writeln( " Wikitext: $destWikitext" ); |
| 138 | + file_put_contents( $destWikitext, $content ); |
| 139 | + } |
| 140 | + |
| 141 | + // MediaInfo JSON. |
| 142 | + if ( isset( $rev['slots']['mediainfo']['content'] ) ) { |
| 143 | + $destMediaInfo = $this->optionDest . '/' . $namespace . '/' . $pageTitle . '_mediainfo.json'; |
| 144 | + $this->io->writeln( " Structured data: $destMediaInfo" ); |
| 145 | + file_put_contents( $destMediaInfo, $rev['slots']['mediainfo']['content'] ); |
| 146 | + } |
| 147 | + } |
| 148 | +} |
0 commit comments