Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a script to check if a wiki is OK for enabling ReplaceText #500

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
148 changes: 148 additions & 0 deletions maintenance/replaceTextEligible.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
<?php

namespace Miraheze\MirahezeMagic\Maintenance;

/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Maintenance
* @author Alex
* @version 1.0
*/

$IP = getenv( 'MW_INSTALL_PATH' );
if ( $IP === false ) {
$IP = __DIR__ . '/../../..';
}

require_once "$IP/maintenance/Maintenance.php";

use Exception;
use Maintenance;
use Wikimedia\Rdbms\SelectQueryBuilder;;

class ReplaceTextEligible extends Maintenance {
public function __construct() {
parent::__construct();

$this->addDescription( 'Checks if the current wiki is eligible for enabling ReplaceText\n See https://meta.miraheze.org/wiki/Tech:Noticeboard?oldid=414759#The_state_of_the_ReplaceText_extension' );
}

public function execute() {
$dbr = $this->getDB( DB_REPLICA );

$pages = $dbr->newSelectQueryBuilder()
->select( [ 'page_id', 'page_latest', 'page_name' ] )
->from( 'page' )
->caller( __METHOD__ )->fetchResultSet();
$deletedPageIDs = $dbr->newSelectQueryBuilder()
->select( [ 'ar_page_id' ] )
->from( 'archive' )
->distinct()
->caller( __METHOD__ )->fetchResultSet();
$this->output( sprintf( 'Got %d pages from the page table and %d deleted pages from the archive table to process, hang tight...', $pages->numRows(), $deletedPageIDs->numRows() ) );

// Arrays to hold the names of pages preventing ReplaceText from working correctly
$problematicPages = [];
$problematicDeletedPages = [];

// Regular pages
$this->output( 'Processing regular pages' );
foreach ( $pages as $page ) {
// TODO: Use JOINs?
Copy link
Contributor

@BlankEclair BlankEclair Aug 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you want an example to read and/or steal, I'd try reading rollbackEdits.php lines 117-123.

(For substr() in PHP, you probably could use SUBSTRING())

$slotContentID = $dbr->newSelectQueryBuilder()
->select( [ 'slot_content_id' ] )
->from( 'slots' )
->where( [ 'slot_revision_id' => $page->page_latest ] )
->caller( __METHOD__ )->fetchRow();
$contentAddress = $dbr->newSelectQueryBuilder()
->select( [ 'content_address' ] )
->from( 'content' )
->where( [ 'content_id' => $slotContentID ] )
->caller( __METHOD__ )->fetchRow();
$oldID = substr( $contentAddress, 3 );
$textFlags = $dbr->newSelectQueryBuilder()
->select( [ 'old_flags' ] )
->from( 'text' )
->where( [ 'old_id' => $oldID ] )
->caller( __METHOD__ )->fetchRow();

if ( str_contains( $textFlags, 'gzip' ) ) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably doable with something like old_flags LIKE '%gzip%' instead of doing it in PHP

// The latest revision of this page is compressed
$problematicPages[] = $page->page_name;
}
}

// Deleted pages
// These can be undeleted on-wiki, and if so, they may also cause issues with ReplaceText
$this->output( 'Processing deleted pages' );
foreach ( $deletedPageIDs as $deletedPageID ) {
// TODO: Use JOINs?
// Get the latest revision
$revID = $dbr->newSelectQueryBuilder()
->select( [ 'ar_rev_id' ] )
->from( 'archive' )
->where( [ 'ar_page_id' => $deletedPageID->ar_page_id ] )
->orderBy( 'ar_rev_id', SelectQueryBuilder::SORT_DESC )
->limit( 1 )
->caller( __METHOD__ )->fetchRow();
$slotContentID = $dbr->newSelectQueryBuilder()
->select( [ 'slot_content_id' ] )
->from( 'slots' )
->where( [ 'slot_revision_id' => $revID ] )
->caller( __METHOD__ )->fetchRow();
$contentAddress = $dbr->newSelectQueryBuilder()
->select( [ 'content_address' ] )
->from( 'content' )
->where( [ 'content_id' => $slotContentID ] )
->caller( __METHOD__ )->fetchRow();
$oldID = substr( $contentAddress, 3 );
$textFlags = $dbr->newSelectQueryBuilder()
->select( [ 'old_flags' ] )
->from( 'text' )
->where( [ 'old_id' => $oldID ] )
->caller( __METHOD__ )->fetchRow();
if ( str_contains( $textFlags, 'gzip' ) ) {
// The latest revision of this page is compressed
$deletedPageName = $dbr->newSelectQueryBuilder
->select( [ 'ar_page_name' ] )
->from( 'archive' )
->where( [ 'ar_page_id' => $deletedPageID->ar_page_id ] )
->limit( 1 )
->caller( __METHOD__ )->fetchRow();
$problematicDeletedPages[] = $deletedPageName;
}
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider Using JOINs for Efficiency.

As with the regular pages, consider using JOINs to improve the efficiency of database queries for deleted pages.

Refactor the database queries to use JOINs where applicable.


Add Error Handling for Database Operations.

Similar to the regular pages, add error handling for database operations to handle potential issues gracefully.

Implement error handling for database queries to improve robustness.

if ( count( $problematicPages ) > 0 || count( $problematicDeletedPages ) > 0 ) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This probably could be converted into an early return.

$this->output( 'ReplaceText should not be enabled on this wiki.' );
if ( count( $problematicPages ) > 0 ) {
$this->output( 'The following pages\' latest revisions are compressed:' );
$this->output( implode( ', ', $problematicPages ) );
}
if ( count( $problematicDeletedPages ) > 0 ) {
$this->output( 'The following deleted pages\' latest revisions are compressed:' );
$this->output( implode( ', ', $problematicDeletedPages ) );
$this->output( 'If these pages are undeleted with ReplaceText enabled, usage of the extension will cause problems.' );
}
} else {
$this->output( 'There\'s no problem with this wiki\'s pages; enabling ReplaceText in this wiki is safe.' );
}
}
}

$maintClass = ReplaceTextEligible::class;
require_once RUN_MAINTENANCE_IF_MAIN;
Loading