package Text::ParseWords;

require 5.000;
require Exporter;
require AutoLoader;
use Carp;

@ISA = qw(Exporter AutoLoader);
@EXPORT = qw(shellwords quotewords);
@EXPORT_OK = qw(old_shellwords);

# This code needs updating to use new Perl 5 features (regexp etc).

# ParseWords.pm
#
# Usage:
#	use ParseWords;
#	@words = &quotewords($delim, $keep, @lines);
#	@words = &shellwords(@lines);
#	@words = &old_shellwords(@lines);

# Hal Pomeranz (pomeranz@netcom.com), 23 March 1994
# Permission to use and distribute under the same terms as Perl.
# No warranty expressed or implied.

# Basically an update and generalization of the old shellwords.pl.
# Much code shamelessly stolen from the old version (author unknown).
#
# &quotewords() accepts a delimiter (which can be a regular expression)
# and a list of lines and then breaks those lines up into a list of
# words ignoring delimiters that appear inside quotes.
#
# The $keep argument is a boolean flag.  If true, the quotes are kept
# with each word, otherwise quotes are stripped in the splitting process.
# $keep also defines whether unprotected backslashes are retained.
#

1;
__END__


sub shellwords {

    # A &shellwords() replacement is included to demonstrate the new package.
    # This version differs from the original in that it will _NOT_ default
    # to using $_ if no arguments are given.  I personally find the old behavior
    # to be a mis-feature.

    local(@lines) = @_;
    $lines[$#lines] =~ s/\s+$//;
    &quotewords('\s+', 0, @lines);
}



sub quotewords {

# &quotewords() works by simply jamming all of @lines into a single
# string in $_ and then pulling off words a bit at a time until $_
# is exhausted.
#
# The inner "for" loop builds up each word (or $field) one $snippet
# at a time.  A $snippet is a quoted string, a backslashed character,
# or an unquoted string.  We fall out of the "for" loop when we reach
# the end of $_ or when we hit a delimiter.  Falling out of the "for"
# loop, we push the $field we've been building up onto the list of
# @words we'll be returning, and then loop back and pull another word
# off of $_.
#
# The first two cases inside the "for" loop deal with quoted strings.
# The first case matches a double quoted string, removes it from $_,
# and assigns the double quoted string to $snippet in the body of the
# conditional.  The second case handles single quoted strings.  In
# the third case we've found a quote at the current beginning of $_,
# but it didn't match the quoted string regexps in the first two cases,
# so it must be an unbalanced quote and we croak with an error (which can
# be caught by eval()).
#
# The next case handles backslashed characters, and the next case is the
# exit case on reaching the end of the string or finding a delimiter.
#
# Otherwise, we've found an unquoted thing and we pull of characters one
# at a time until we reach something that could start another $snippet--
# a quote of some sort, a backslash, or the delimiter.  This one character
# at a time behavior was necessary if the delimiter was going to be a
# regexp (love to hear it if you can figure out a better way).

    local($delim, $keep, @lines) = @_;
    local(@words,$snippet,$field,$_);

    $_ = join('', @lines);
    while ($_) {
	$field = '';
	for (;;) {
            $snippet = '';
	    if (s/^"(([^"\\]|\\[\\"])*)"//) {
		$snippet = $1;
                $snippet = "\"$snippet\"" if ($keep);
	    }
	    elsif (s/^'(([^'\\]|\\[\\'])*)'//) {
		$snippet = $1;
                $snippet = "'$snippet'" if ($keep);
	    }
	    elsif (/^["']/) {
		croak "Unmatched quote";
	    }
            elsif (s/^\\(.)//) {
                $snippet = $1;
                $snippet = "\\$snippet" if ($keep);
            }
	    elsif (!$_ || s/^$delim//) {
               last;
	    }
	    else {
                while ($_ && !(/^$delim/ || /^['"\\]/)) {
		   $snippet .=  substr($_, 0, 1);
                   substr($_, 0, 1) = '';
                }
	    }
	    $field .= $snippet;
	}
	push(@words, $field);
    }
    @words;
}


sub old_shellwords {

    # Usage:
    #	use ParseWords;
    #	@words = old_shellwords($line);
    #	or
    #	@words = old_shellwords(@lines);

    local($_) = join('', @_);
    my(@words,$snippet,$field);

    s/^\s+//;
    while ($_ ne '') {
	$field = '';
	for (;;) {
	    if (s/^"(([^"\\]|\\.)*)"//) {
		($snippet = $1) =~ s#\\(.)#$1#g;
	    }
	    elsif (/^"/) {
		croak "Unmatched double quote: $_";
	    }
	    elsif (s/^'(([^'\\]|\\.)*)'//) {
		($snippet = $1) =~ s#\\(.)#$1#g;
	    }
	    elsif (/^'/) {
		croak "Unmatched single quote: $_";
	    }
	    elsif (s/^\\(.)//) {
		$snippet = $1;
	    }
	    elsif (s/^([^\s\\'"]+)//) {
		$snippet = $1;
	    }
	    else {
		s/^\s+//;
		last;
	    }
	    $field .= $snippet;
	}
	push(@words, $field);
    }
    @words;
}
