This little script will split the standard input on each line matching the regular expression given as parameter.
- regexp: a perl regular expression
- [prefix]: by default, slice of the file are named slice.0, ..., slice.n you can specify another prefix to be used.
- -kl: by default the matching line is removed from output files. Use this option if you want to keep it as the last line of each input file.
- -kf: by default the matching line is removed from output files. Use this option if you want to keep it as the first line of each input file.
Here is the code:
#!/usr/bin/perl -s
####################################################################
#
# This is a little perl script to split files in parts. Splitting is done at
# each line containing a certain regular expression.
#
# This is a simple implementation of one functionality of the Unix
# csplit command.
#
# Copyright (C) 2004 Pierre ANDREWS
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
####################################################################
if($#ARGV+1 == 0) {
die "usage: myCsplit [-k] regexp [prefix]\n
split the file on matching lines. \n
\t regexp: a perl regular expression \n
\t [prefix]: by default, slice of the file are named slice.0, ..., slice.n
\t you can specify another prefix to be used.\n
\t -kl: by default the matching line is removed from output files.
\t Use this option if you want to keep it as the last line of each input file.\n
\t -kf: by default the matching line is removed from output files.
\t Use this option if you want to keep it as the first line of each input file.
";
}
$regexp = $ARGV[0];
if($#ARGV+1 > 1) {
$prefix = $ARGV[1];
} else {
$prefix = 'slice';
}
$count = 0;
open(OUTPUT,"> $prefix.0");
while(<STDIN>) {
if($_ =~ /$regexp/) {
if($kl) {
print OUTPUT $_
}
close OUTPUT;
$count++;
open(OUTPUT,"> $prefix.$count");
if($kf) {
print OUTPUT $_
}
} else {
print OUTPUT $_
}
}



