#!/usr/bin/perl use strict; use warnings; use LWP; use HTTP::Cookies; use URI; require Crypt::SSLeay; # seems to be needed for HTTPS with LWP # getDarwinSource: # This script can be used to download the Darwin source code # (i.e. the open-source part of OS X) # # The files that are downloaded are those that meet the requirements of # the 'shouldDownload' routine. It looks at the @inclusions and @exclusions # arrays of filename patterns. It also looks at the file size restriction # specified by $maxSizeMB. # The tarballs are expanded and put in a newly-created sub-directory of the # current directory named "Darwin$version" # The script checks for existing source code directories under the # "Darwin$version" directory and avoids downloading the same code again. This # means that you can run it a second time after changing some parameters to # pick up some files that were missed the first time. # # Cameron Hayne (macdev@hayne.net) January 2006 # Global variables (change these to fit your situation) # ----------------------------------------------------- my $accountName = "your_name\@your_Isp.com"; my $accountPassword = undef; # Set $version to the version of OS X that you want to get the source code for my $version = "10.4.7.ppc"; # must match what is in the URL my $maxSizeMB = 2.0; # maximum size (in megabytes) of individual files # inclusions: patterns of files to definitely be downloaded # Inclusions take precedence over exclusions and file size limits # (the following merely reflects the files I was interested in # - you should edit the list to suit your interests) my @inclusions = ( "^BootX.*", "^CF.*", "^DiskArbitration.*", "^Libc.*", "^Libsystem.*", "^PowerManagement.*", "^Secur.*", "^bash.*", "^curl.*", "^gnutar.*", "^sudo.*", "^xnu.*", ); # exclusions: patterns of files to avoid my @exclusions = ( "^Apple.*", "^[A-Z].*", "^apache.*", "^gcc.*", "^gdb.*", "^libsecurity.*", ); my $darwinsourceUrl = "http://www.opensource.apple.com/darwinsource"; my $browser; # LWP agent used for all web access #----- FUNCTIONS ----- sub startLogFile() { open(LOG, ">LOG") or die "Failed to open LOG file\n"; } sub endLogFile() { close(LOG); } # printMsg: print a message to STDOUT and to LOG sub printMsg($$) { my ($msg, $shouldLog) = @_; print "$msg\n"; if ($shouldLog) { print LOG "$msg\n"; } } # newBrowser: returns a new LWP browser sub newBrowser() { my $browser = LWP::UserAgent->new(); # write cookies to an in-memory hash my $cookieJar = HTTP::Cookies->new(); $browser->cookie_jar($cookieJar); # allow redirections from POST requests push(@{$browser->requests_redirectable}, 'POST'); return $browser; } # this is just a wrapper around LWP::UserAgent.head() sub httpHead($) { my ($url) = @_; my $response = $browser->head($url); return $response; } # this is just a wrapper around LWP::UserAgent.get() sub httpGet($) { my ($url) = @_; my $response = $browser->get($url); return $response; } # httpGetFile: this is just a wrapper around LWP::UserAgent.get() # that uses the pseudo-header ":content_file" # to put the content into the specified file sub httpGetFile($$) { my ($url, $filename) = @_; my $response = $browser->get($url, ':content_file' => $filename); return $response; } # httpPost: this is just a wrapper around LWP::UserAgent.post() sub httpPost($$) { my ($url, $values) = @_; my $response = $browser->post($url, $values); return $response; } # httpCookies: returns cookies as a string sub httpCookies() { my $cookies = $browser->cookie_jar->as_string(); return $cookies; } # printResponseInfo: for debugging sub printResponseInfo($) { my ($response) = @_; my $statusLine = $response->status_line(); my $requestedUrl = $response->request->url; print "Response->request->url: $requestedUrl\n"; print "Response->status_line: $statusLine\n"; } # printCookies: for debugging sub printCookies($) { my ($msg) = @_; my $cookies = httpCookies(); print "$msg\n", $cookies, "\n"; } # login: returns 1 on success, 0 on failure sub login($$) { my ($accountName, $accountPassword) = @_; print "Starting login\n"; my $apslTarballsUrl = "$darwinsourceUrl/tarballs/apsl/"; my $response = httpGet($apslTarballsUrl); unless ($response->is_success) { print "Failed to get APSL tarballs page ($apslTarballsUrl)\n"; printResponseInfo($response); return 0; } # The above page should have redirected to the APSL login page # which has an HTML form which we want to post to. # First we need to extract the form's action URL # Note that since the login page uses HTTPS, # it seems that LWP requires that the Crypt::SSLeay module be installed my $loginUrl = $response->request->url; #print "loginUrl: $loginUrl\n"; my $contents = $response->content; $contents =~ s/\n/ /g; # remove carriage returns my $str = qq(