Jump to content

User:DustyBot/dusty er.php

From Wikipedia, the free encyclopedia
#!/usr/bin/php
<?php
$dusty_dir = $_ENV["HOME"]."/DustyBot";

include $dusty_dir."/dustylib.php";

$task_link = "Wikipedia:Bots/Requests for approval/DustyBot 5";
$review_days = 30;
$inactive_wait_days = 7;
$notify_wait_days = 3;

/* Return an array containing all the reviews listed on the WP:ER page */
function list_reviews($title, $page) {
    $subpage_list = wp_list_subpages($title, $page);
    $reviews = array();
    foreach ($subpage_list as $t => $s) {
	if ($s == "Current requests")
	    $reviews[] = $title."/".$t;
    }
    return $reviews;
}

/* Returns the user requesting the review, given the review subpage */
function get_review_username($page) {
    $lines = explode("\n", $page);
    foreach ($lines as $l) {
	$utmpl_str = wp_find_template("User", $l);
	if (empty($utmpl_str))
	    $utmpl_str = wp_find_template("User2", $l);
	if (empty($utmpl_str))
	    continue;
	$username = wptmpl_get_arg(wp_parse_template($utmpl_str), 0);
	break;
    }
    return $username;
}

/* Returns a map of archived reviews, keyed by the review subpage title. Content at the beginning
 * of the archive page is stored in a special value called "header". */
function get_archive_sections($page) {
    $lines = explode("\n", $page);
    $sections = array();
    $sections["header"] = "";
    unset($archive_section);
    foreach ($lines as $l) {
	$l = rtrim($l);
	if (eregi("==([a-z0-9-]+)==", $l, $regs)) {
	    $archive_section = $regs[1];
	    continue;
	}
	if (!isset($archive_section)) {
	    $sections["header"] .= $l."\n";
	    continue;
	}
	if (empty($l))
	    continue;
	if (eregi("\*[[:space:]]*\[\[(W[a-z]+:E[a-z _]+/.*)\|.*\]\].*", $l, $regs)) {
	    $rlink = str_replace("_", " ", $regs[1]);
	    $sections[$rlink] = $l;
	    continue;
	}
	die("Unhandled archive line ".$l."\n");
    }
    return $sections;
}

/* Given a list of timestamps, return the year strings */
function list_archive_years($tc_dates) {
    $years = array();
    foreach ($tc_dates as $tc => $ts)
	$years[] = gmdate("Y", $ts);
    return array_unique($years);
}

/* Returns the name of a subpage for a full title */
function get_short_review_name($r) {
    if (!ereg(".+:.+/(.+)", $r, $regs))
	die("Unable to decode review title ".$r."\n");
    return $regs[1];
}

/* Returns the archive section this review would go in */
function get_review_section_title($r) {
    $first = substr(get_short_review_name($r), 0, 1);
    if (eregi("[a-z]", $first))
	return strtoupper($first);
    else
	return "0-9";
}

/* Returns a new archive page with the added reviews */
function archive_add_reviews($old_page, $data) {
    $standard_sections = array(
	"0-9", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
	"N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"
    );
    $old = array();
    if (!empty($old_page)) {
	$old = get_archive_sections($old_page);
	$new_page = $old["header"];
    }
    else {
	$new_page = "{{Editor Review Navigation}}\n";
	$new_page .= "These are links to reviews of editors from [[Wikipedia:Editor review]] from ".$data["year"].". ";
	$new_page .= "They are in alphabetical order and include multiple submissions. ";
	$new_page .= "The date listed is the date of the original submission by the editor.\n";
	$new_page .= "{{CompactTOC}}\n";
    }
    $new_reviews = $data["reviews"];
    foreach ($standard_sections as $s) {
	$new_page .= "==".$s."==\n";
	$links = array();
	foreach ($old as $r => $l) {
	    if ($r == "header")
		continue;
	    if (get_review_section_title($r) == $s)
		$links[$r] = $l;
	}
	foreach ($new_reviews as $r => $l) {
	    if (get_review_section_title($r) == $s)
		$links[$r] = $l;
	}
	$flipped = array_flip($links);
	natcasesort($flipped);
	$links = array_keys($flipped);
	foreach ($links as $l)
	    $new_page .= $l."\n";
	$new_page .= "\n";
    }
    return $new_page;
}

/* Generic summary string creator */
function make_edit_summary($initial, $list, $tasklink = null, $maxlen = 255) {
    $linkstr = "";
    if (isset($tasklink))
	$linkstr = " ([[".$tasklink."|task]])";
    $best_summary = $initial.$linkstr;
    if (empty($list))
	return $best_summary;
    $lc = count($list);
    for ($i = 1; $i <= $lc; $i++) {
	$sublist = array_slice($list, 0, $i);
	$remaining = $lc - $i;
	if ($remaining > 0)
	    array_push($sublist, "and ".$remaining." more");
	$summary = $initial.": ".implode(", ", $sublist).$linkstr;
	if (strlen($summary) <= $maxlen)
	    $best_summary = $summary;
    }
    return $best_summary;
}

/* Adds discussion archive templates to the review subpage */
function seal_review($old_page, $data) {
    $old_dtop = wp_find_template("Discussiontop", $old_page);
    if (!empty($old_dtop))
	return $old_page;
    $new_page = "";
    $lines = explode("\n", trim($old_page));
    foreach ($lines as $l) {
	$new_page .= $l."\n";
	if (eregi("===.+===", $l))
	    $new_page .= "{{discussiontop}}\n";
    }
    $new_page .= "{{discussionbottom}}\n";
    return $new_page;
}

/* Returns true if the line produces a horizontal rule */
function line_hr($l) {
    return strpos($l, "----") !== false;
}

/* Removes the listed reviews from the WP:ER page
 * NOTE: This will leave an extra "----" at the beginning if the first review in the
 * list is removed, but this should never happen. */
function remove_reviews($old_page, $rlist) {
    $lines = explode("\n", $old_page);
    $last_line = "";
    foreach ($lines as $old_l) {
	if (line_hr($last_line) && line_hr($old_l))
	    continue;
	$l = str_replace("_", " ", trim($old_l));
	$delete = false;
	foreach ($rlist as $r) {
	    if (eregi("{{[[:space:]]*".quotemeta($r)."[[:space:]]*}}", $l))
		$delete = true;
	}
	if (!$delete) {
	    $new_page .= $old_l."\n";
	    $last_line = $old_l;
	}
    }
    return $new_page;
}

$settings = parse_ini_file($dusty_dir."/dustycfg.ini");

$er_title = "Wikipedia:Editor review";
/* Only one process may use the memory file at a time */
$lock_file = $dusty_dir."/".$settings["lock_file"];
if (file_exists($lock_file)) {
    echo("Memory locked: ".$lock_file."\n");
    exit();
}
file_put_contents($lock_file, $task_link);

/* Notification records are stored in a serialized file, updated every run */
$memfile = $dusty_dir."/".$settings["memory_file"];
if (file_exists($memfile))
    $memory = unserialize(file_get_contents($memfile));
else
    $memory = array();
if (!array_key_exists("er", $memory))
    $memory["er"] = array();
if (!array_key_exists("notifications", $memory["er"]))
    $memory["er"]["notifications"] = array();

$ctx = wp_create_context($settings["maxlag"], $settings["bot_flag"], $settings["api_url"]);
wp_context_set_query_limit(50, $ctx);

if ($settings["post_to_wiki"])
    wp_login($settings["username"], $settings["password"], $ctx);

/* Get the current WP:ER page */
$er_page = wp_get($er_title, $ctx, $er_ts);

/* Read the archive settings from WP:ER */
$dusty_text = wp_find_template("User:DustyBot/Archive settings", $er_page);
if (empty($dusty_text)) {
    if ($settings["post_to_wiki"])
	wp_logout($ctx);
    echo("Archive settings template not found.\n");
    unlink($lock_file);
    exit();
}
$dusty_tmpl = wp_parse_template($dusty_text);
if (wptmpl_has_arg($dusty_tmpl, "archive")) {
    $archsw = trim(strtolower(wptmpl_get_arg($dusty_tmpl, "archive")));
    if ($archsw != "on" && $archsw != "yes") {
	if ($settings["post_to_wiki"])
	    wp_logout($ctx);
	echo("Archiving disabled.\n");
	unlink($lock_file);
	exit();
    }
}
if (wptmpl_has_arg($dusty_tmpl, "reviewdays"))
    $review_days = (int)wptmpl_get_arg($dusty_tmpl, "reviewdays");
if ($review_days < 15)
    $review_days = 15;
if (wptmpl_has_arg($dusty_tmpl, "inactivewaitdays"))
    $inactive_wait_days = (int)wptmpl_get_arg($dusty_tmpl, "inactivewaitdays");
if ($inactive_wait_days < 3)
    $inactive_wait_days = 3;
if (wptmpl_has_arg($dusty_tmpl, "notifywaitdays"))
    $notify_wait_days = (int)wptmpl_get_arg($dusty_tmpl, "notifywaitdays");
if ($notify_wait_days < 0)
    $notify_wait_days = 0;

/* Find the time when every current review was transcluded */
$current_reviews = list_reviews($er_title, $er_page);
$tc_dates = wp_transcluded_dates($er_title, "list_reviews", $current_reviews, $ctx);
$all_reviews = array();
foreach ($tc_dates as $r => $tc)
    $all_reviews[] = $r;

$unreviewed = wp_get_category_members("Category:Wikipedians on Editor review/Backlog", $ctx);

/* Most of the current reviews will be inactionable */
$potential_closes = array();
foreach ($current_reviews as $r) {
    if (time() - $tc_dates[$r] <= ($review_days - $notify_wait_days)*24*60*60)
	continue;
    if (in_array($r, $unreviewed))
	continue;
    $potential_closes[] = $r;
}
/* Determine which reviews to close and which users need to be notified of scheduled actions */
$pc_ts = array();
$pc_pages = wp_get_multiple($potential_closes, $ctx, $pc_ts);
$notify_users = array();
$close_reviews = array();
foreach ($pc_pages as $r => $page) {
    if (eregi("<!--.*noautoarchive.*-->", $page))
	continue;
    $username = get_review_username($page);
    if (empty($username)) {
	echo("Unable to find username in ".$r."\n");
	continue;
    }
    $days_open = (time() - $tc_dates[$r])/(24*60*60);
    $days_inactive = (time() - $pc_ts[$r])/(24*60*60);
    $notify = $days_inactive > $inactive_wait_days - $notify_wait_days;
    $notified = array_key_exists($r, $memory["er"]["notifications"]);
    if (!$notified && $notify && $notify_wait_days == 0) /* Don't bother */
	$notified = true;
    if (!$notified && $notify) {
	$notify_users[$r] = $username;
	$memory["er"]["notifications"][$r] = time();
	continue;
    }
    if ($notified && time() - $memory["er"]["notifications"][$r] <= $notify_wait_days*24*60*60)
	continue;
    $close = $days_open > $review_days && $days_inactive > $inactive_wait_days;
    if ($notified && $close)
	$close_reviews[] = $r;
}

if ($settings["post_to_wiki"])
    $edtoken = wp_get_edit_token($er_title, $ctx);

/* Seal closed reviews */
foreach ($close_reviews as $r) {
    echo("Closing ".$r."\n");
    if ($settings["post_to_wiki"])
	wp_edit_war($r, "Closing review", "seal_review", null, $ctx, $edtoken);
    else
	wp_edit_test($r, "seal_review", null, $ctx);
    $closed_review_links[] = "[[".$r."|".get_short_review_name($r)."]]";
}

/* Remove closed reviews from WP:ER */
$remove_summary = make_edit_summary("Closed", $closed_review_links, $task_link);
if ($settings["post_to_wiki"])
    wp_edit_war($er_title, $remove_summary, "remove_reviews", $close_reviews, $ctx, $edtoken,
	$er_page, $er_ts);
else
    wp_edit_test($er_title, "remove_reviews", $close_reviews, $ctx);

/* Notify users of impending closures */
foreach ($notify_users as $r => $username) {
    echo("Notifying ".$username."\n");
    $talktitle = "User talk:".$username;
    $talk_page = wp_get($talktitle);
    if (empty($talk_page))
	die("Error accessing ".$talktitle."\n");
    if (!wp_page_allows_bot($talk_page, $ctx))
	continue;
    $close_date = gmdate("j F Y", time() + $notify_wait_days*24*60*60);
    $mtitle = "Automatic processing of your editor review";
    $mcontent = "This is an automated message. Your [[".$r."|editor review]] is scheduled to be ";
    $mcontent .= "closed on ".$close_date." because it will have been open for more than ";
    $mcontent .= $review_days."&nbsp;days and inactive for more than ".$inactive_wait_days.". ";
    $mcontent .= "You can keep it open longer by posting a comment to the review page requesting more input. ";
    $mcontent .= "Adding <span style=\"font-family: monospace\"><nowiki><!--n&#111;aut&#111;archive--></nowiki></span> to the review page will prevent further automated actions. ";
    $mcontent .= "End of line. ~~~~";
    if ($settings["post_to_wiki"])
	wp_append_section($talktitle, $mtitle, $mcontent, $edtoken, $ctx);
}

/* Add closed reviews to the archives */
$archive_years = list_archive_years($tc_dates);
foreach ($archive_years as $y) {
    $archive_title = $er_title."/Archive (".$y.")";
    unset($archive_ts);
    $archive_page = wp_get($archive_title, $ctx, $archive_ts);
    $sections = get_archive_sections($archive_page);
    $archive_review_links = array();
    unset($archive_reviews);
    foreach ($close_reviews as $r) {
	if (gmdate("Y", $tc_dates[$r]) != $y)
	    continue;
	if (array_key_exists($r, $sections))
	    continue;
	$archive_reviews[$r] = "*[[".$r."|".get_short_review_name($r)."]]".", ".gmdate("j F Y", $tc_dates[$r]);
	$archive_review_links[] = "[[".$r."|".get_short_review_name($r)."]]";
	echo("Archiving ".$r."\n");
    }
    if (empty($archive_reviews))
	continue;
    $summary = make_edit_summary("Archived", $archive_review_links, $task_link);
    unset($data);
    $data["year"] = "y";
    $data["reviews"] = $archive_reviews;
    if ($settings["post_to_wiki"])
	wp_edit_war($archive_title, $summary, "archive_add_reviews", $data, $ctx, $edtoken,
	    $archive_page, $archive_ts);
    else
	wp_edit_test($archive_title, "archive_add_reviews", $data, $ctx);
}

file_put_contents($memfile, serialize($memory));
unlink($lock_file);

if ($settings["post_to_wiki"])
    wp_logout($ctx);
?>