User:ImageRemovalBot/removebot-followup.pl
Appearance
Source code for ImageRemovalBot's second-pass removal, to deal with protected pages and delays in updating the "image usage" table in the database. Requires User:FairuseBot/Pearle.pm and User:FairuseBot/libBot.pm.
#!/usr/bin/perl # RemoveBot Followup # # A bot to remove deleted images from pages. Checks up on images RemoveBot was unable to remove at a 24-hour delay. use strict; use warnings; use lib '/home/mark/perllib'; use lib '/home/mark/Desktop/Projects/Wikibots/dev/common'; #use Date::Calc qw(); #use URI::Escape; use Fcntl qw(:flock); use libBot; my $homedir = '/home/mark/removebot'; my $permit_interruptions = 0; # Allow talkpage messages to stop the bot? Pearle::init("ImageRemovalBot", "<INSERT PASSWORD HERE>", "$homedir/removebot-followup.log","$homedir/followup-cookies.txt"); config(username => "ImageRemovalBot"); if(!Pearle::login()) { exit; } # Check for a running copy if(-e "$homedir/pid-followup") { # Possible other copy. Compare PIDs open PIDFILE, "<", "$homedir/pid-followup"; my $pid = <PIDFILE>; close PIDFILE; my $psresult = `ps -p $pid`; if($psresult =~ /removebot-followup.pl/) { botwarnlog("*Previous run is taking longer than normal\n"); exit; } } open PIDFILE, ">", "$homedir/pid-followup"; print PIDFILE $$; close PIDFILE; my @images; # Process the followup log if(-e "$homedir/followup.log") { my @new_images; # Read the log in open INFILE, "<", "$homedir/followup.log"; flock INFILE, LOCK_SH; while(<INFILE>) { my ($date, $image) = $_ =~ /(\d+) (.*)/; chomp $image; if(defined($date) && $date < time() - 86400) { push @images, $image; } else { if(defined($date)) { push @new_images, "$date $image\n"; } } } flock INFILE, LOCK_UN; close INFILE; # Write out a log containing the entries we aren't going to process now open INFILE, ">", "$homedir/followup.log"; flock INFILE, LOCK_EX; foreach my $line (@new_images) { print INFILE $line; } flock INFILE, LOCK_UN; close INFILE; } else { Pearle::myLog(2, "No images in followup log\n"); exit; } { my $image; Pearle::myLog(2, "Beginning set at " . time() . "\n"); print join "\n", @images; print "\n", scalar(@images), " images found\n"; if(scalar(@images) == 0) { Pearle::myLog(1, "*No images in log need processing\n"); } foreach $image (@images) { my $image_url; my $image_regex = $image; my $page; my @pages = (); my ($day, $month, $year); # Fetch image info my $image_data = Pearle::APIQuery(titles => [$image], prop => 'imageinfo', meta => 'userinfo', uiprop => ['hasmsg'], # Basic data list => 'imageusage', iutitle => $image, iunamespace => [0, 10, 12, 14, 100], iulimit => 500); # Image usage my $full_comment = ""; my $removal_prefix = "Deleted image removed:"; my $removal_comment = "Removing deleted image"; if($permit_interruptions and DoIHaveMessages($image_data)) { Pearle::myLog(0, "Talkpage message found; exiting on image $image.\n"); exit; } # Verify the image is still deleted if($image_data !~ /missing=""/) { Pearle::myLog(2, "*Image [[:$image]] has been re-uploaded.\n"); next; } # Images from Commons. May have been masked by the deleted version. if($image_data =~ /imagerepository="shared"/) { Pearle::myLog(2, "*Commons image [[:$image]] found\n"); next; } @pages = GetPageList($image_data); if(scalar(@pages) == 0) { notelog("Image $image is already orphaned\n"); next; } my ($raw_image) = $image =~ /Image:(.*)/; $raw_image = MakeWikiRegex($raw_image); if($image !~ /(\.jpg|\.jpeg|\.png|\.gif|\.svg)$/i) { $image_regex = "[ _]*(:?[Ii][Mm][Aa][Gg][Ee]|[Mm][Ee][Dd][Ii][Aa])[ _]*:[ _]*${raw_image}[ _]*"; Pearle::myLog(2, "*Non-image media file [[:$image]] found.\n"); } else { $image_regex = "[ _]*[Ii][Mm][Aa][Gg][Ee][ _]*:[ _]*${raw_image}[ _]*"; } # Sanity check if(!defined($raw_image) or $image !~ /$raw_image/) { botwarnlog("*Parse error on image [[:$image]] ($raw_image)\n"); next; } Pearle::myLog(3, "Image regex: $image_regex\n"); if($image !~ /(\.jpg|\.jpeg|\.png|\.gif|\.svg)$/i) { my $page_list = "( "; foreach my $page_entry (@pages) { $page_list .= "[[:$page_entry]] "; } $page_list .= ")"; wikilog("User talk:ImageRemovalBot/media", "*[[:$image]] $page_list\n"); next; # Non-image files are too hard to work with } my $parsed_removal_comment = $removal_comment; $parsed_removal_comment =~ s/image/[[:$image|image]]/; foreach $page (@pages) { my $hits = 0; notelog("Page for removal: $page\n"); if($hits = RemoveImageFromPage($image, $page, $image_regex, $removal_prefix, $parsed_removal_comment)) # Don't limit if we just touched the article { Pearle::myLog(2, "Removed image $image from article $page ($hits times)\n"); Pearle::limit(); } } # Verify removal # Portal removal is too hard to get correct, and we don't really care about it. # Template removal isn't possible, and the template usage has already been logged. $image_data = Pearle::APIQuery(list => 'imageusage', iutitle => $image, iunamespace => [0, 12, 14], iulimit => 500); @pages = GetPageList($image_data); if(scalar(@pages) != 0) { botwarnlog("*Unable to remove all instances of [[:$image]]\n"); Pearle::myLog(2, "*Unable to remove all instances of [[:$image]]\n"); } } Pearle::myLog(2, "Finished with followup set.\n"); } #print "Finished. Total $total_images removed, $total_processed processed.\n"; unlink "$homedir/pid-followup"