Jump to content

User:Polbot/source/Time sorting.pl

From Wikipedia, the free encyclopedia
use strict;
use Perlwikipedia;

my $Polbot_password = '(bot password)';
my $cat_limit = 5000;
my $edit_summary = "Improving sortkey for time-based category, per [[Wikipedia:Bots/Requests for approval/Polbot 9]]";

print "\nLogging in to Wikipedia.\n" ;
my $pw=Perlwikipedia->new();
$pw->{mech}->agent('Bot/polbot');
my $login_status=$pw->login('Polbot', $Polbot_password);
die "I can't log in." unless ($login_status eq 0);

# Years

my $year = 1; 

while ($year < 2009) {
	my $cat_name = "$year";
	print "\n ** [[Category:$cat_name]] - ";
	my @catmembers = $pw->get_catmembers_api("Category:$cat_name", $cat_limit);
	print scalar (@catmembers) . " members\n";
	
	foreach my $catmember (@catmembers) {
		if ($catmember =~ m/^$cat_name$/) {
			# article [[1881]] in [[Category:1881]]
			
			print "[[$catmember]] is in [[Category:$cat_name]].\n";
			my $should_sortkey = ' ';
			check_and_fix_key($cat_name, $catmember, $should_sortkey);
			
		} elsif ($catmember =~ m/^(?:Category\:)?$cat_name (.*)$/) {
			# article [[1881 deaths]] (or [[Category:1881 in music]]) in [[Category:1881]]
			
			my $should_sortkey = $1;
			print "[[$catmember]] is in [[Category:$cat_name]].\n";
			$should_sortkey =~ s/^(?:by|of|in) (.*)/$1/;
			$should_sortkey =~ s/^(.)/\U$1\E/;
			check_and_fix_key($cat_name, $catmember, $should_sortkey);
			
			# If it's a category, check for subcats (e.g., for [[Category:1881 deaths]], look for [[1881 deaths by country]])
			if ($catmember =~ m/^Category:(.*)$/) {
				check_subcats($1);
			}
		}
		# We don't care about other catmembers.
	}
	$year++;
}

# Decades

my $decade = 10;

while ($decade < 2010) {
	my $cat_name = "$decade" . "s";
	print "\n ** [[Category:$cat_name]]\n";
	my @catmembers = $pw->get_catmembers_api("Category:$cat_name", $cat_limit);
	
	foreach my $catmember (@catmembers) {
		if ($catmember =~ m/^$cat_name$/) {
			# article [[1880s]] in [[Category:1880s]]
			
			print "[[$catmember]] is in [[Category:$cat_name]].\n";
			my $should_sortkey = ' ';
			check_and_fix_key($cat_name, $catmember, $should_sortkey);
			
		} elsif ($catmember =~ m/^(?:Category\:)?$cat_name (.*)$/) {
			# article [[1880s deaths]] in [[Category:1880s]]
			
			my $should_sortkey = $1;
			print "[[$catmember]] is in [[Category:$cat_name]].\n";
			$should_sortkey =~ s/^(?:by|of|in) (.*)/$1/;
			$should_sortkey =~ s/^(.)/\U$1\E/;
			check_and_fix_key($cat_name, $catmember, $should_sortkey);
			
			# If it's a category, check for subcats (e.g., for [[Category:1881 deaths]], look for [[1881 deaths by country]])
			if ($catmember =~ m/^Category:(.*)$/) {
				check_subcats($1);
			}
		}
		# We don't care about other catmembers.
	}
	$decade += 10;
}

# Centuries

my @centuries = ('1st', '2nd', '3rd', '4th', '5th', 
'6th', '7th', '8th', '9th', '10th', '11th', '12th', 
'13th', '14th', '14th', '15th', '16th', '17th', 
'18th', '19th', '20th', '21st');

foreach my $century (@centuries) {
	my $cat_name = "$century century";
	print "\n ** [[Category:$cat_name]]\n";
	my @catmembers = $pw->get_catmembers_api("Category:$cat_name", $cat_limit);
	
	foreach my $catmember (@catmembers) {
		if ($catmember =~ m/^$cat_name$/) {
			# article [[17th century]] in [[Category:17th century]]
			
			print "[[$catmember]] is in [[Category:$cat_name]].\n";
			my $should_sortkey = ' ';
			check_and_fix_key($cat_name, $catmember, $should_sortkey);
			
		} elsif ($catmember =~ m/^(?:Category\:)?$cat_name (.*)$/) {
			# article [[17th century deaths]] in [[Category:17th century]]
			
			my $should_sortkey = $1;
			print "[[$catmember]] is in [[Category:$cat_name]].\n";
			$should_sortkey =~ s/^(?:by|of|in) (.*)/$1/;
			$should_sortkey =~ s/^(.)/\U$1\E/;
			check_and_fix_key($cat_name, $catmember, $should_sortkey);
			
			# If it's a category, check for subcats (e.g., for [[Category:1881 deaths]], look for [[1881 deaths by country]])
			if ($catmember =~ m/^Category:(.*)$/) {
				check_subcats($1);
			}
		}
		# We don't care about other catmembers.
	}
}

sub check_subcats {
	my $cat_name = shift;
	print " looking thru [[Category:$cat_name]] - ";
	my @catmembers = $pw->get_catmembers_api("Category:$cat_name", $cat_limit);
	print scalar(@catmembers) . " members\n";
	foreach my $catmember (@catmembers) {
		if ($catmember =~ m/^$cat_name$/) {
			# article [[1970s in aviation]] in [[Category:1970s in aviation]]
			
			print " [[$catmember]] is in [[Category:$cat_name]].\n";
			my $should_sortkey = ' ';
			check_and_fix_key($cat_name, $catmember, $should_sortkey);
	
		} elsif ($catmember =~ m/^(?:Category\:)?$cat_name (.*)$/) {
			# article [[1881 establishments by country]] (or [[Category:1881 establishments of companies]]) in [[Category:1881 establishments]]
			
			my $should_sortkey = $1;
			print "[[$catmember]] is in [[Category:$cat_name]].\n";
			$should_sortkey =~ s/^(?:by|of|in) (.*)/$1/;
			$should_sortkey =~ s/^(.)/\U$1\E/;
			check_and_fix_key($cat_name, $catmember, $should_sortkey);
		}
		# We don't care about other catmembers.
	}	
}

sub check_and_fix_key {
	my $cat_name = shift;
	my $catmember = shift;
	my $should_sortkey = shift;
	
	my $wiki = $pw->get_text($catmember);
	if ($wiki =~ m/\[\[\s*Category\s*:\s*$cat_name\s*(\|\s*[^\]]*)?\s*\]\]/i) {
		my $is_sortkey = $1;
		$is_sortkey =~ s/^\|//;
		
		if ($is_sortkey ne $should_sortkey) {
			print " sortkey '$is_sortkey' -> '$should_sortkey'\n";
			
			$wiki =~ s/\[\[\s*Category\s*:\s*$cat_name\s*(\|\s*[^\]]*)?\s*\]\]/[[Category:$cat_name|$should_sortkey\]\]/i;
			$pw->edit($catmember, $wiki, $edit_summary);
			
			# Log
			$wiki = $pw->get_text("User:Polbot/time_sorting_log");
			$wiki .= "[[$catmember]]: sortkey for [[:Category:$cat_name]] changed from '$is_sortkey' to '$should_sortkey'\n";
			$pw->edit("User:Polbot/time_sorting_log", $wiki, "Logging");
	
		} else { print " sortkey is already correct.\n"; }
	} else { print " Category must be templated in\n"; }	
}