#!/usr/bin/perl -T ## ----------------------------------------------------------------------- ## ## Copyright 2011 Intel Corporation; author: H. Peter Anvin ## ## This program is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation, Inc.; either version 2 ## of the License, or (at your option) any later version; ## incorporated herein by reference. ## ## ----------------------------------------------------------------------- # # kernel.org bulk file upload client # use strict; use warnings; use bytes; use Encode qw(encode decode); use File::Spec; use Config::Simple; my $blksiz = 1024*1024; # Global options my %opt = ( 'rsh' => 'ssh -a -x -k -T', 'host' => undef, 'subcmd' => undef, 'batch' => 0, 'verbose' => 0, ); # Read the config file settings and override the above my $cfg_file = $ENV{'HOME'}.'/.kuprc'; my $cfg = new Config::Simple($cfg_file); if (defined($cfg)) { # Update %opt with cfgfile settings (only rsh, subcmd, and host vars) my %cfg_opt = $cfg->vars(); if (defined($cfg_opt{'default.host'})) { $opt{'host'} = $cfg_opt{'default.host'}; } if (defined($cfg_opt{'default.subcmd'})) { $opt{'subcmd'} = $cfg_opt{'default.subcmd'}; } if (defined($cfg_opt{'default.rsh'})) { $opt{'rsh'} = $cfg_opt{'default.rsh'}; } } # If anyone's ssh is somewhere other than /bin:/usr/bin, they can specify # where it is by setting up their .kuprc. This also lets us run with -T # without playing untaint tricks. # $ENV{'PATH'} = '/bin:/usr/bin'; if (defined $ENV{'KUP_RSH'}) { $opt{'rsh'} = $ENV{'KUP_RSH'}; } if (defined $ENV{'KUP_HOST'}) { $opt{'host'} = $ENV{'KUP_HOST'}; } if (defined $ENV{'KUP_SUBCMD'}) { $opt{'subcmd'} = $ENV{'KUP_SUBCMD'}; } delete @ENV{qw(IFS CDPATH ENV BASH_ENV)}; # Make %ENV safer # We process the command set twice, once as a dry run and one for real, # to catch as many errors as early as possible my @args; my $real; # Usage description sub usage($) { my($err) = @_; print STDERR "Usage: $0 [global options] command [-- command...]\n"; print STDERR "\n"; print STDERR "Global options:\n"; print STDERR " -b --batch Output command stream to stdout\n"; print STDERR " -e --rsh=command Send output to command, override KUP_RSH\n"; print STDERR " -o --host=[user@]host Connect to [user@]host, override KUP_HOST\n"; print STDERR " -c --subcmd=cmd After connecting via ssh, issue this subcommand\n"; print STDERR " -v --verbose Print each command to stderr as it is sent\n"; print STDERR "\n"; print STDERR "Commands:\n"; print STDERR " put local_file signature remote_path\n"; print STDERR " put --tar [--prefix=] remote_tree ref signature remote_path\n"; print STDERR " put --diff remote_tree ref1 ref2 signature remote_path\n"; print STDERR " mkdir remote_path\n"; print STDERR " mv|move old_path new_path\n"; print STDERR " ln|link old_path new_path\n"; print STDERR " rm|del|delete old_path\n"; print STDERR " ls|dir path...\n"; print STDERR " info\n"; exit $err; } # Return true if the supplied string is valid UTF-8 without special # characters sub is_clean_string($) { no bytes; # use feature 'unicode_strings'; -- is this needed here? my($b) = @_; my $f = decode('UTF-8', $b, Encode::FB_DEFAULT); return 0 if ($f =~ m:[\x{0000}-\x{001f}\x{007f}-\x{00a0}\x{fffd}-\x{ffff}]:); return 1; } # This returns true if the given argument is a valid filename in its # canonical form. Double slashes, relative paths, dot files, control # characters, and malformed UTF-8 is not permitted. We cap the length # of each pathname component to 100 bytes so we can add an extension # without worrying about it, and the entire pathname to 1024 bytes. sub is_valid_filename($) { use bytes; my($f) = @_; return 0 if (!defined($f)); # If undefined, clearly not valid return 0 if (length($f) > 1024); # Reject ridiculously long paths return 0 if (!is_clean_string($f)); # Reject bad UTF-8 and control characters return 0 if ($f !~ m:^/:); # Reject relative paths return 0 if ($f =~ m:/$:); # Reject paths ending in / return 0 if ($f =~ m://:); # Reject double slashes # Reject filename components starting with dot or dash, covers . and .. return 0 if ($f =~ m:/[\.\-]:); # Reject undesirable filename characters anywhere in the name. # This isn't inherently security-critical, and could be tuned if # users need it... return 0 if ($f =~ m:[\!\"\$\&\'\*\;\<\>\?\\\`\|]:); # Make sure we can create a filename after adding .bz2 or similar. # We can't use the obvious regexp here, because regexps operate on # characters, not bytes. The limit of 100 is semi-arbitrary, but # we shouldn't need filenames that long. my $n = 0; my $nmax = 0; for (my $i = 0; $i < length($f); $i++) { my $c = substr($f, $i, 1); $n = ($c eq '/') ? 0 : $n+1; $nmax = ($n > $nmax) ? $n : $nmax; } return 0 if ($nmax > 100); return 1; } # Clean up a filename so that it is more likely to pass the # canonicalization test. An optional second argument is used with # two-filename commands (move, link); it should be the already # canonicalized first argument. # # This can return undef for some invalid pathnames. This needs to be # caught by is_valid_filename(). sub canonicalize_path($;$) { my($file, $root) = @_; $root = '/' unless defined($root); my $tail = ''; if ($root =~ m:^(.*/)([^/]+)$:) { $root = $1; $tail = $2; } if ($root !~ m:^/: || $root !~ m:/$:) { die "$0: internal error: non-canonical root\n"; } if ($file !~ m:^/:) { $file = $root . $file; } if ($file =~ m:/$:) { $file .= $tail; } my @path = (); my $wasspc = 1; # The -1 argument to split means "preserve trailing empty fields" foreach my $s (split(/\//, $file, -1)) { if ($s eq '' || $s eq '.') { $wasspc = 1; } elsif ($s eq '..') { # If this ran off the root, error return undef if (!defined(pop(@path))); $wasspc = 1; } else { push(@path, $s); $wasspc = 0; } } # If this ended in a special component, error return undef if ($wasspc); # The initial '' forces the result to begin with a slash return join('/', '', @path); } # Parse global options sub parse_global_options() { while (scalar @ARGV && $ARGV[0] =~ /^-/) { my $arg = shift(@ARGV); if ($arg eq '-b' || $arg eq '--batch') { $opt{'batch'} = 1; } elsif ($arg eq '-e' || $arg eq '--rsh' || $arg eq '--ssh') { $opt{'rsh'} = shift(@ARGV); } elsif ($arg =~ /^--rsh=(.+)$/) { $opt{'rsh'} = $1; } elsif ($arg eq '-o' || $arg eq '--host') { $opt{'host'} = shift(@ARGV); } elsif ($arg =~ /^--host=(.+)$/) { $opt{'host'} = $1; } elsif ($arg eq '-c' || $arg eq '--subcmd') { $opt{'subcmd'} = shift(@ARGV); } elsif ($arg =~ /^--subcmd=(.+)$/) { $opt{'subcmd'} = $1; } elsif ($arg eq '-v' || $arg eq '--verbose') { $opt{'verbose'}++; } elsif ($arg eq '-h' || $arg eq '--help') { usage(0); } else { die "$0: unknown option: $arg\n"; } } } # Encode a string sub url_encode($) { my($s) = @_; # Hack to encode an empty string return '%' if ($s eq ''); my $o = ''; foreach my $c (unpack("C*", $s)) { if ($c > 32 && $c < 126 && $c != 37 && $c != 43) { $o .= chr($c); } elsif ($c == 32) { $o .= '+'; } else { $o .= sprintf("%%%02X", $c); } } return $o; } # Configure the output stream sub setup_output() { # In batch mode, we dump the output to stdout so the user can # aggregate it best they wish unless ($opt{'batch'}) { if ($opt{'rsh'} !~ /^([-a-zA-Z0-9._=\@:\s\/]+)$/) { die "$0: suspicious KUP_RSH setting\n"; } my $rsh = $1; if ($opt{'host'} !~ /^([-a-zA-Z0-9._\@]+)$/) { die "$0: suspicious KUP_HOST\n"; } $rsh .= " \Q$1"; if ($opt{'subcmd'}) { if ($opt{'subcmd'} !~ /^([-a-zA-Z0-9_]+)$/) { die "$0: suspicious KUP_SUBCMD\n"; } # Add the subcommand for the receiving server $rsh .= " \Q$1"; } open(STDOUT, '|-', $rsh) or die "$0: cannot execute rsh command ", $rsh, "\n"; } binmode(STDOUT); } # Terminate the output process sub close_output() { $| = 1; # Flush STDOUT unless ($opt{'batch'}) { close(STDOUT); } } # Print a command to STDOUT, and if requested, to STDERR sub command(@) { if ($real) { my $cmd = join(' ', @_); print STDERR $cmd, "\n" if ($opt{'verbose'}); print $cmd, "\n"; } } sub cat_file($$$) { my($cmd, $file, $fmt) = @_; if (!defined($fmt)) { if ($file =~ /\.((gz|bz2|xz))$/) { $fmt = $1; } else { $fmt = '%'; } } my $data; open($data, '<', $file) or die "$0: cannot open: $file: $!\n"; if (! -f $data) { die "$0: not a plain file: $file\n"; } my $size = -s _; binmode($data); if ($real) { if ($size < 2) { # Must be a plain file $fmt = '%'; } if (defined($fmt)) { command($cmd, $size, $fmt); } my $blk; my $len; while ($size) { $len = ($size < $blksiz) ? $size : $blksiz; $len = read($data, $blk, $len); if (!$len) { die "$0: premature end of data (file changed?): $file\n"; } print $blk; $size -= $len; } } close($data); } # PUT command sub cmd_put() { my $file = shift @args; my $file_tail = undef; if ($file eq '-t' || $file eq '--tar') { # tar hack my $remote_tree = shift @args; my $prefix = ''; if ($remote_tree eq '-p' || $remote_tree eq '--prefix') { $prefix = shift @args; $remote_tree = shift @args; } elsif ($remote_tree =~ /^--prefix=(.+)$/) { $prefix = $1; $remote_tree = shift @args; } my $ref = shift(@args); if (!defined($ref)) { usage(1); } my $xrt = $remote_tree; $remote_tree = canonicalize_path($remote_tree); if (!is_valid_filename($remote_tree) || $remote_tree !~ /\.git$/) { die "$0: invalid path name for git tree: $xrt\n"; } if (!is_clean_string($ref)) { die "$0: invalid ref: $ref\n"; } command('TAR', url_encode($remote_tree), url_encode($ref), url_encode($prefix)); } elsif ($file eq '-d' || $file eq '--diff') { # diff hack my $remote_tree = shift @args; my $prefix = ''; my $ref1 = shift(@args); my $ref2 = shift(@args); if (!defined($ref2)) { usage(1); } my $xrt = $remote_tree; $remote_tree = canonicalize_path($remote_tree); if (!is_valid_filename($remote_tree) || $remote_tree !~ /\.git$/) { die "$0: invalid path name for git tree: $xrt\n"; } if (!is_clean_string($ref1)) { die "$0: invalid ref: $ref1\n"; } if (!is_clean_string($ref2)) { die "$0: invalid ref: $ref2\n"; } command('DIFF', url_encode($remote_tree), url_encode($ref1), url_encode($ref2)); } elsif ($file =~ /^-/) { die "$0: unknown option to put command: $file\n"; } else { # Plain data blob. We don't actively attempt to compress it # since ssh usually has a layer of compression, but if it is # already a compressed file we send it as-is and let the # server decompress it. cat_file('DATA', $file, undef); # Get the local filename without directory my($vol, $dir); ($vol, $dir, $file_tail) = File::Spec->splitpath($file); } my $sign = shift @args; my $remote = shift @args; if (!defined($remote)) { usage(1); } # This allows the user to not specify the filename if it is # the same as on the local filesystem by ending the pathname # with a slash if ($remote =~ m:/$: && defined($file_tail)) { $remote .= $file_tail; } my $xrt = $remote; $remote = canonicalize_path($remote); if (!is_valid_filename($remote)) { die "$0: invalid pathname: $xrt\n"; } if ($remote =~ /\.sign$/) { die "$0: target filename cannot end in .sign\n"; } # DWIM: .bz2, .xz -> .gz $remote =~ s/\.(bz2|xz)$/.gz/; cat_file('SIGN', $sign, undef); command('PUT', url_encode($remote)); } # MKDIR command sub cmd_mkdir() { my $remote = shift @args; if (!defined($remote)) { usage(1); } my $xrt = $remote; $remote = canonicalize_path($remote); if (!is_valid_filename($remote)) { die "$0: invalid pathname: $xrt\n"; } if ($remote =~ /\.(sign|gz|bz2|xz)$/) { die "$0: a directory name cannot end in .sign, .gz, .bz2, .xz\n"; } command('MKDIR', url_encode($remote)); } # DELETE command sub cmd_delete() { my $remote = shift @args; if (!defined($remote)) { usage(1); } my $xrt = $remote; $remote = canonicalize_path($remote); if (!is_valid_filename($remote)) { die "$0: invalid pathname: $xrt\n"; } if ($remote =~ /\.sign$/) { die "$0: cannot delete .sign files directly\n"; } # DWIM: .bz2, .xz -> .gz $remote =~ s/\.(bz2|xz)$/.gz/; command('DELETE', url_encode($remote)); } # MOVE or LINK command sub cmd_move_link($) { my($cmd) = @_; my $from = shift @args; my $to = shift @args; if (!defined($to)) { usage(1); } my $xrt = $from; $from = canonicalize_path($from); if (!is_valid_filename($from)) { die "$0: invalid pathname: $xrt\n"; } $xrt = $to; $to = canonicalize_path($to, $from); if (!is_valid_filename($to)) { die "$0: invalid pathname: $xrt\n"; } if ($from =~ /\.sign$/ || $to =~ /\.sign$/) { die "$0: cannot explicitly move .sign files\n"; } if ($from =~ /\.(gz|bz2|xz)$/ && $to =~ /\.(gz|bz2|xz)$/) { $from =~ s/\.(bz2|xz)$/.gz/; $to =~ s/\.(bz2|xz)$/.gz/; } elsif ($from =~ /\.(gz|bz2|xz)$/ || $to =~ /\.(gz|bz2|xz)$/) { die "$0: cannot move to or from compressed filenames\n"; } if ($from eq $to) { die "$0: moving filename to self: $from\n"; } command($cmd, url_encode($from), url_encode($to)); } # DIR command (supports arbitrary number of arguments) sub cmd_dir() { while (defined($args[0]) && $args[0] ne '--') { my $d = shift @args; $d =~ s:/$::g; if ($d ne '') { my $xrt = $d; $d = canonicalize_path($d); if (!is_valid_filename($d)) { die "$0: invalid pathname: $xrt\n"; } } $d .= '/'; command('DIR', $d); } } # INFO command (no arguments) sub cmd_info() { command('INFO'); } # Process commands sub process_commands() { while (1) { my $cmd = shift(@args); if (!defined($cmd)) { usage(1); } $cmd = "\L${cmd}"; if ($cmd eq 'put') { cmd_put(); } elsif ($cmd eq 'mkdir') { cmd_mkdir(); } elsif ($cmd eq 'move' || $cmd eq 'mv') { cmd_move_link('MOVE'); } elsif ($cmd eq 'link' || $cmd eq 'ln') { cmd_move_link('LINK'); } elsif ($cmd eq 'delete' || $cmd eq 'del' || $cmd eq 'rm') { cmd_delete(); } elsif ($cmd eq 'ls' || $cmd eq 'dir') { cmd_dir(); } elsif ($cmd eq 'info') { cmd_info(); } else { die "$0: unknown command: $cmd\n"; } my $sep = shift(@args); last if (!defined($sep)); # End of command line if ($sep ne '--') { die "$0: garbage at end of $cmd command\n"; } } } # Main program parse_global_options(); if (!defined($opt{'host'})) { die "$0: please specify --host, KUP_HOST, or set up ~/.kuprc\n"; } # "Dry run" pass $real = 0; @args = @ARGV; process_commands(); # Establish output stream setup_output(); # "Real" pass $real = 1; @args = @ARGV; process_commands(); # Close the output to allow the child process to complete close_output(); exit 0; # vim: noet