#!/usr/bin/perl

# combineCSV:
# This Perl script combines specified columns from several CSV files
# into one CSV file.
# The columns to be extracted are specified via the first command-line argument.
# This "columnSelector" argument is a comma-separated list of column numbers
# (1-based, like in Excel) for each file, with semicolons or colons
# separating the lists for each file.
# For example, a columnSelector of "1,2;2,5;4" specifies
# that columns 1 and 2 of file1 are to be extracted,
# followed by columns 2 and 5 of file2
# followed by column 4 of file3 (and all subsequent files).
# The remaining command-line arguments are the names of the CSV files.
# The contents of the combined CSV file are output to STDOUT.
#
# Cameron Hayne (macdev@hayne.net)  October 2009

use strict;
use warnings;

sub readCSV($)
{
    my ($filename) = @_;

    my @values = ();
    open(FILE, "<$filename")
            or die "Can't open file '$filename': $!\n";
    while (<FILE>)
    {
        chomp();
        my @rowValues = split(/,/);
        push(@values, \@rowValues);
    }
    close(FILE);
    return \@values;
}

MAIN:
{
    die "Usage: combineCSV columnSelector file1 [file2 ...]\n"
            unless @ARGV >= 2;
    my $columnSelectorStr = shift @ARGV;
    my @files = @ARGV;
    my $numFiles = scalar(@files);
    my @columnSelectors = map([split(/,/, $_)],
                               split(/[;:]/, $columnSelectorStr));
    my $numColumnSelectors = scalar(@columnSelectors);
    # check validity of colSelectors
    foreach my $colSelector (@columnSelectors)
    {
        foreach my $colNum (@{$colSelector})
        {
            die "Invalid columnSelector\n" unless $colNum =~ /^[1-9]\d*$/;
        }
    }
    # repeat the last columnSelector for the remaining files
    for (my $i = $numColumnSelectors; $i < $numFiles; $i++)
    {
        $columnSelectors[$i] = $columnSelectors[$numColumnSelectors - 1]
    }
    #print join(',', map('(' . join(',', @{$_}) . ')', @columnSelectors)), "\n";

    my @fileContents = ();
    my $numCombinedRows = 0;
    for (my $i = 0; $i < $numFiles; $i++)
    {
        $fileContents[$i] = readCSV($files[$i]);
        my $numRows = scalar(@{$fileContents[$i]});
        $numCombinedRows = $numRows if $numRows > $numCombinedRows;
    }

    for (my $row = 0; $row < $numCombinedRows; $row++)
    {
        for (my $i = 0; $i < $numFiles; $i++)
        {
            foreach my $colNum (@{$columnSelectors[$i]})
            {
                my $col = $colNum - 1;
                if ($row < scalar(@{$fileContents[$i]})
                    and $col < scalar(@{$fileContents[$i]->[$row]}))
                {
                    my $value = $fileContents[$i]->[$row]->[$col];
                    print "$value,";
                }
                else
                {
                    print ",";
                }
            }
        }
        print "\n";
    }
}
