User:Polbot/source/stats.pl
Appearance
< User:Polbot | source
use strict; use Perlwikipedia; use LWP::UserAgent; my $firstletter = shift; print "\nStarting polbot\n" ; my $pw=Perlwikipedia->new(); #$pw->{debug} = 1; $pw->{mech}->agent('Bot/WP/EN/Quadell/polbot'); print "Logging in\n"; my $login_status=$pw->login('Polbot','(bot password)'); die "I can't log in." unless ($login_status eq 0); # Pull from FJC print "Getting list of all judges starting with $firstletter\n"; my @judge_ids = (); my $url = 'http://www.fjc.gov/servlet/tAsearch?lname=' . $firstletter; print " $url\n"; my $ua = LWP::UserAgent->new; $ua->agent("Mozilla/6.0"); my $res = $ua->get($url); die "could not connect" unless ($res->is_success); my $html = $res->content; while ($html =~ m/<A HREF=\"\/servlet\/tGetInfo\?jid=(\d+)\">([^<]*)</g) { my $thisjid = $1; my $thisname = $2; push @judge_ids, $thisjid; } print scalar(@judge_ids) . " judges found.\n\n"; my $wiki_out; foreach my $jid (@judge_ids) { my $url = "http:\/\/www.fjc.gov\/servlet\/tGetInfo\?jid=$jid"; print "\n$jid: "; $res = $ua->get($url); die "could not connect" unless ($res->is_success); $html = $res->content; $html =~ s/\`/'/g; # Output variables my $name = "-"; my $race = "-"; my $gender = "-"; # extract name $html =~ m/\<FONT SIZE\=\+1 COLOR\=BLACK\>\<B\>([^\n]*?) *\<\/B\>\<\/FONT\>/m; my $rev_name = $1; $rev_name =~ s/ +/ /g; $rev_name =~ s/\[//g; $rev_name =~ s/\]//g; $rev_name =~ m/^(.*?)\, (.*?)( Jr\.| Sr\.| II| III| IV)?$/; my $last_name = $1; $name = "[[$2 $last_name$3]]"; print "$name\n"; # extract gender if ($html =~ m/<BR><B>Gender:<\/B> *([^<]+)</) { $gender = $1; } # extract race if ($html =~ m/<BR><BR><B>Race or Ethnicity:<\/B> *([^<]+)</) { $race = $1; } # Extract judgeships if ($html =~ m/<B>Federal Judicial Service:<\/B><BR>(.*?)<BR>\s*<BR>\s*<B>/si) { my $ju_string = $1; my @jus = split(/ *<[Bb][Rr]><[Bb][Rr]> */, $ju_string); foreach my $ju (@jus) { # Output variables my $court = "-"; my $president = "-"; my $nominated_date = "-"; my $recess_date = "-"; my $confirmed_date = "-"; my $commission_date = "-"; my $senior_date = "-"; my $end_date = "-"; my $end_reason = "-"; if ($ju =~ m/Judge, U\. S\. District Court, ([^<]*)<[Bb][Rr]>/) { $court = "[[United States District Court for the $1]]"; } elsif ($ju =~ m/U\. S\. District Court for the District of Columbia/) { $court = "[[United States District Court for the District of Columbia]]"; } elsif ($ju =~ m/Judge, U\. S\. Circuit Courts ([^<]*)<[Bb][Rr]>/) { $court = "[[United States circuit court]] $1"; } elsif ($ju =~ m/Judge, Circuit Court for the District of Columbia([^<]*)<[Bb][Rr]>/) { $court = "[[United States circuit court]] for the D.C. Circuit"; } elsif ($ju =~ m/U\. S\. Court of Appeals for District of Columbia Circuit<[Bb][Rr]>/) { $court = "[[United States Court of Appeals for the D.C. Circuit]]"; } elsif ($ju =~ m/Judge, U\. S\. Court of Appeals ([^<]*)<[Bb][Rr]>/) { $court = "[[United States Court of Appeals $1]]"; } elsif ($ju =~ m/Supreme Court of the United States/) { $court = "[[Supreme Court of the United States]]"; } if ($ju =~ m/Nominated by (.*?) on (\w+ \d+, \d+), to/) { $president = "[[$1]]"; $nominated_date = $2; } elsif ($ju =~ m/Received a recess appointment from (.*?) on (\w+ \d+, \d+), to/) { $president = "[[$1]]"; $recess_date = $2; if ($ju =~ m/; nominated on (\w+ \d+, \d+);/) { $nominated_date = $1; } } if ($ju =~ m/Confirmed by the Senate on (\w+ \d+, \d+), and received commission on (\w+ \d+, \d+)\./) { $confirmed_date = $1; $commission_date = $2; } if ($ju =~ m/Assumed senior status on (\w+ \d+, \d+)\./) { $senior_date = $1; } if ($ju =~ m/Service terminated on (\w+ \d+, \d+), due to (.*?)\./) { $end_date = $1; $end_reason = $2; $end_reason =~ s/appointment to another judicial position/reappointment/; } if ($court ne "-") { $wiki_out .= "|-\n| $name || $court || $president || $recess_date || $nominated_date || $confirmed_date || $commission_date || $senior_date || $end_date || $end_reason || $race || $gender \n"; } } } } print "Writing... "; my $listsofar = $pw->get_text("Wikipedia:WikiProject United States courts and judges/judgestats"); $listsofar .= $wiki_out; $pw->edit("Wikipedia:WikiProject United States courts and judges/judgestats", $listsofar, "Adding judges that start with $firstletter"); print "done.\n";