#!/usr/bin/perl # combineCSV: # This Perl script combines specified columns from several CSV files # into one CSV file. # The columns to be extracted are specified via the first command-line argument. # This "columnSelector" argument is a comma-separated list of column numbers # (1-based, like in Excel) for each file, with semicolons or colons # separating the lists for each file. # For example, a columnSelector of "1,2;2,5;4" specifies # that columns 1 and 2 of file1 are to be extracted, # followed by columns 2 and 5 of file2 # followed by column 4 of file3 (and all subsequent files). # The remaining command-line arguments are the names of the CSV files. # The contents of the combined CSV file are output to STDOUT. # # Cameron Hayne (macdev@hayne.net) October 2009 use strict; use warnings; sub readCSV($) { my ($filename) = @_; my @values = (); open(FILE, "<$filename") or die "Can't open file '$filename': $!\n"; while () { chomp(); my @rowValues = split(/,/); push(@values, \@rowValues); } close(FILE); return \@values; } MAIN: { die "Usage: combineCSV columnSelector file1 [file2 ...]\n" unless @ARGV >= 2; my $columnSelectorStr = shift @ARGV; my @files = @ARGV; my $numFiles = scalar(@files); my @columnSelectors = map([split(/,/, $_)], split(/[;:]/, $columnSelectorStr)); my $numColumnSelectors = scalar(@columnSelectors); # check validity of colSelectors foreach my $colSelector (@columnSelectors) { foreach my $colNum (@{$colSelector}) { die "Invalid columnSelector\n" unless $colNum =~ /^[1-9]\d*$/; } } # repeat the last columnSelector for the remaining files for (my $i = $numColumnSelectors; $i < $numFiles; $i++) { $columnSelectors[$i] = $columnSelectors[$numColumnSelectors - 1] } #print join(',', map('(' . join(',', @{$_}) . ')', @columnSelectors)), "\n"; my @fileContents = (); my $numCombinedRows = 0; for (my $i = 0; $i < $numFiles; $i++) { $fileContents[$i] = readCSV($files[$i]); my $numRows = scalar(@{$fileContents[$i]}); $numCombinedRows = $numRows if $numRows > $numCombinedRows; } for (my $row = 0; $row < $numCombinedRows; $row++) { for (my $i = 0; $i < $numFiles; $i++) { foreach my $colNum (@{$columnSelectors[$i]}) { my $col = $colNum - 1; if ($row < scalar(@{$fileContents[$i]}) and $col < scalar(@{$fileContents[$i]->[$row]})) { my $value = $fileContents[$i]->[$row]->[$col]; print "$value,"; } else { print ","; } } } print "\n"; } }