#!/usr/bin/perl use strict; use warnings; use LWP; use HTTP::Cookies; use URI; require Crypt::SSLeay; # seems to be needed for HTTPS with LWP # getDarwinSource: # This script can be used to download the Darwin source code # (i.e. the open-source part of OS X) # # The files that are downloaded are those that meet the requirements of # the 'shouldDownload' routine. It looks at the @inclusions and @exclusions # arrays of filename patterns. It also looks at the file size restriction # specified by $maxSizeMB. # The tarballs are expanded and put in a newly-created sub-directory of the # current directory named "Darwin$version" # The script checks for existing source code directories under the # "Darwin$version" directory and avoids downloading the same code again. This # means that you can run it a second time after changing some parameters to # pick up some files that were missed the first time. # # Cameron Hayne (macdev@hayne.net) January 2006 # Global variables (change these to fit your situation) # ----------------------------------------------------- my $accountName = "your_name\@your_Isp.com"; my $accountPassword = undef; # Set $version to the version of OS X that you want to get the source code for my $version = "10.4.7.ppc"; # must match what is in the URL my $maxSizeMB = 2.0; # maximum size (in megabytes) of individual files # inclusions: patterns of files to definitely be downloaded # Inclusions take precedence over exclusions and file size limits # (the following merely reflects the files I was interested in # - you should edit the list to suit your interests) my @inclusions = ( "^BootX.*", "^CF.*", "^DiskArbitration.*", "^Libc.*", "^Libsystem.*", "^PowerManagement.*", "^Secur.*", "^bash.*", "^curl.*", "^gnutar.*", "^sudo.*", "^xnu.*", ); # exclusions: patterns of files to avoid my @exclusions = ( "^Apple.*", "^[A-Z].*", "^apache.*", "^gcc.*", "^gdb.*", "^libsecurity.*", ); my $darwinsourceUrl = "http://www.opensource.apple.com/darwinsource"; my $browser; # LWP agent used for all web access #----- FUNCTIONS ----- sub startLogFile() { open(LOG, ">LOG") or die "Failed to open LOG file\n"; } sub endLogFile() { close(LOG); } # printMsg: print a message to STDOUT and to LOG sub printMsg($$) { my ($msg, $shouldLog) = @_; print "$msg\n"; if ($shouldLog) { print LOG "$msg\n"; } } # newBrowser: returns a new LWP browser sub newBrowser() { my $browser = LWP::UserAgent->new(); # write cookies to an in-memory hash my $cookieJar = HTTP::Cookies->new(); $browser->cookie_jar($cookieJar); # allow redirections from POST requests push(@{$browser->requests_redirectable}, 'POST'); return $browser; } # this is just a wrapper around LWP::UserAgent.head() sub httpHead($) { my ($url) = @_; my $response = $browser->head($url); return $response; } # this is just a wrapper around LWP::UserAgent.get() sub httpGet($) { my ($url) = @_; my $response = $browser->get($url); return $response; } # httpGetFile: this is just a wrapper around LWP::UserAgent.get() # that uses the pseudo-header ":content_file" # to put the content into the specified file sub httpGetFile($$) { my ($url, $filename) = @_; my $response = $browser->get($url, ':content_file' => $filename); return $response; } # httpPost: this is just a wrapper around LWP::UserAgent.post() sub httpPost($$) { my ($url, $values) = @_; my $response = $browser->post($url, $values); return $response; } # httpCookies: returns cookies as a string sub httpCookies() { my $cookies = $browser->cookie_jar->as_string(); return $cookies; } # printResponseInfo: for debugging sub printResponseInfo($) { my ($response) = @_; my $statusLine = $response->status_line(); my $requestedUrl = $response->request->url; print "Response->request->url: $requestedUrl\n"; print "Response->status_line: $statusLine\n"; } # printCookies: for debugging sub printCookies($) { my ($msg) = @_; my $cookies = httpCookies(); print "$msg\n", $cookies, "\n"; } # login: returns 1 on success, 0 on failure sub login($$) { my ($accountName, $accountPassword) = @_; print "Starting login\n"; my $apslTarballsUrl = "$darwinsourceUrl/tarballs/apsl/"; my $response = httpGet($apslTarballsUrl); unless ($response->is_success) { print "Failed to get APSL tarballs page ($apslTarballsUrl)\n"; printResponseInfo($response); return 0; } # The above page should have redirected to the APSL login page # which has an HTML form which we want to post to. # First we need to extract the form's action URL # Note that since the login page uses HTTPS, # it seems that LWP requires that the Crypt::SSLeay module be installed my $loginUrl = $response->request->url; #print "loginUrl: $loginUrl\n"; my $contents = $response->content; $contents =~ s/\n/ /g; # remove carriage returns my $str = qq(
}io; unless ($actionUrl) { print "Failed to extract action URL from APSL login page ($loginUrl)\n"; return 0; } #print "actionUrl: $actionUrl\n"; my $loginUri = URI->new($loginUrl); my $actionFullUrl = $loginUri->scheme . "://" . $loginUri->host . $actionUrl; #print "actionFullUrl: $actionFullUrl\n"; # Now we are ready to login my $values = [ 'theAccountName' => $accountName, 'theAccountPW' => $accountPassword, ]; $response = httpPost($actionFullUrl, $values); unless ($response->is_success) { print "Posting of the login form failed\n"; printResponseInfo($response); return 0; } # If Apple didn't give us cookies, the login must have failed #printCookies("Cookies after login:"); unless (httpCookies()) { print "Didn't get any cookies from login " . "- probably due to incorrect password\n"; return 0; } return 1; } # getSizeOfDownload: returns the size in bytes of the file specified by URL sub getSizeOfDownload($) { my ($url) = @_; my $response = httpHead($url); unless ($response->is_success) { print "Failed to get headers for $url\n"; return -1; } my $size = $response->content_length(); $size = -1 unless defined($size); return $size; } # getTarUrls: returns an array of the tarball URLs sub getTarUrls($) { my ($baseUrl) = @_; my $response = httpGet($baseUrl); unless ($response->is_success) { die "Failed to get darwinsource index page ($baseUrl)\n"; } my $contents = $response->content; $contents =~ s/\n/ /g; # remove carriage returns # Links on the darwinsource page are like: # .tar.gz # Directory is either 'other' or 'apsl' my @tarUrls = $contents =~ m{\.tar\.gz<\/a>}ig; return @tarUrls; } # shouldDownload: handles the rules for including or excluding files # Returns 1 iff the file should be downloaded sub shouldDownload($$$) { my ($name, $type, $fullUrl) = @_; if (-d $name) { printMsg("Skipping $name since previously downloaded", 0); return 0; } foreach my $incl (@inclusions) { if ($name =~ /$incl/) { printMsg("Downloading $name since it matches inclusion $incl", 0); return 1; } } foreach my $excl (@exclusions) { if ($name =~ /$excl/) { printMsg("Skipping $name since it matches exclusion $excl", 1); return 0; } } my $size = getSizeOfDownload($fullUrl); #print "name $name size: $size\n"; if ($size > 0) { my $sizeMB = sprintf("%.2f", $size / (1024 * 1024)); if ($sizeMB > $maxSizeMB) { my $msg = "Skipping $name since size ($sizeMB MB) " . "is greater than max ($maxSizeMB MB)"; printMsg($msg, 1); return 0; } } print "Downloading $name\n"; return 1; } MAIN: { unless (defined($accountPassword)) { print STDERR "Password for APSL account: "; chomp($accountPassword = ); print STDERR "\n"; } my $baseUrl = "$darwinsourceUrl/$version/"; $browser = newBrowser(); login($accountName, $accountPassword) or die "Failed to login\n"; my @tarUrls = getTarUrls($baseUrl); if (scalar(@tarUrls) < 1) { die "Failed to extract tarUrls from index page\n"; } # create the directory where the source code will be put # and cd to that directory my $darwinSrcDir = "Darwin$version"; unless (-d $darwinSrcDir) { mkdir $darwinSrcDir or die "Can't create directory $darwinSrcDir: $!\n"; } chdir $darwinSrcDir or die "Can't chdir to $darwinSrcDir: $!\n"; startLogFile(); foreach my $url (@tarUrls) { #print "url: $url\n"; my $fullUrl = $baseUrl . $url; my $type; my $tarball; unless (($type, $tarball) = $url =~ m{(other|apsl)/(.*)$}) { die "Malformed URL: '$url'\n"; } my ($name) = $tarball =~ m{^(.*)\.tar\.gz$}; #print "type: $type tarball: $tarball name: $name\n"; if (shouldDownload($name, $type, $fullUrl)) { my $tempFilename = "temp_tarball"; httpGetFile($fullUrl, $tempFilename); system("/usr/bin/tar zxf $tempFilename"); unlink $tempFilename; } } endLogFile(); }