| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| |
|
| | #include <config.h> |
| |
|
| | #include <ctype.h> |
| | #include <getopt.h> |
| | #include <sys/types.h> |
| | #include <signal.h> |
| |
|
| | #include "system.h" |
| |
|
| | #include <regex.h> |
| |
|
| | #include "c-ctype.h" |
| | #include "fd-reopen.h" |
| | #include "quote.h" |
| | #include "safe-read.h" |
| | #include "stdio--.h" |
| | #include "xdectoint.h" |
| | #include "xstrtol.h" |
| |
|
| | |
| | #define PROGRAM_NAME "csplit" |
| |
|
| | #define AUTHORS \ |
| | proper_name ("Stuart Kemp"), \ |
| | proper_name ("David MacKenzie") |
| |
|
| | |
| | #define DEFAULT_PREFIX "xx" |
| |
|
| | |
| | struct control |
| | { |
| | intmax_t offset; |
| | intmax_t lines_required; |
| | intmax_t repeat; |
| | int argnum; |
| | bool repeat_forever; |
| | bool ignore; |
| | bool regexpr; |
| | struct re_pattern_buffer re_compiled; |
| | }; |
| |
|
| | |
| | #define START_SIZE 8191 |
| |
|
| | |
| | #define CTRL_SIZE 80 |
| |
|
| | #ifdef DEBUG |
| | |
| | # define START_SIZE 200 |
| | # define CTRL_SIZE 1 |
| | #endif |
| |
|
| | |
| | struct cstring |
| | { |
| | idx_t len; |
| | char *str; |
| | }; |
| |
|
| | |
| | |
| | struct line |
| | { |
| | idx_t used; |
| | idx_t insert_index; |
| | idx_t retrieve_index; |
| | struct cstring starts[CTRL_SIZE]; |
| | struct line *next; |
| | }; |
| |
|
| | |
| | |
| | |
| | struct buffer_record |
| | { |
| | idx_t bytes_alloc; |
| | idx_t bytes_used; |
| | intmax_t start_line; |
| | intmax_t first_available; |
| | idx_t num_lines; |
| | char *buffer; |
| | struct line *line_start; |
| | struct line *curr_line; |
| | struct buffer_record *next; |
| | }; |
| |
|
| | static void close_output_file (void); |
| | static void create_output_file (void); |
| | static void delete_all_files (bool); |
| | static void save_line_to_file (const struct cstring *line); |
| |
|
| | |
| | static struct buffer_record *head = nullptr; |
| |
|
| | |
| | static char *hold_area = nullptr; |
| |
|
| | |
| | static idx_t hold_count = 0; |
| |
|
| | |
| | static intmax_t last_line_number = 0; |
| |
|
| | |
| | static intmax_t current_line = 0; |
| |
|
| | |
| | static bool have_read_eof = false; |
| |
|
| | |
| | static char *volatile filename_space = nullptr; |
| |
|
| | |
| | static char const *volatile prefix = nullptr; |
| |
|
| | |
| | static char *volatile suffix = nullptr; |
| |
|
| | |
| | static int volatile digits = 2; |
| |
|
| | |
| | static int volatile files_created = 0; |
| |
|
| | |
| | static intmax_t bytes_written; |
| |
|
| | |
| | static FILE *output_stream = nullptr; |
| |
|
| | |
| | static char *output_filename = nullptr; |
| |
|
| | |
| | static char **global_argv; |
| |
|
| | |
| | static bool suppress_count; |
| |
|
| | |
| | static bool volatile remove_files; |
| |
|
| | |
| | static bool elide_empty_files; |
| |
|
| | |
| | static bool suppress_matched; |
| |
|
| | |
| | |
| | static struct control *controls; |
| |
|
| | |
| | static idx_t control_used; |
| |
|
| | |
| | static sigset_t caught_signals; |
| |
|
| | |
| | |
| | enum |
| | { |
| | SUPPRESS_MATCHED_OPTION = CHAR_MAX + 1 |
| | }; |
| |
|
| | static struct option const longopts[] = |
| | { |
| | {"digits", required_argument, nullptr, 'n'}, |
| | {"quiet", no_argument, nullptr, 'q'}, |
| | {"silent", no_argument, nullptr, 's'}, |
| | {"keep-files", no_argument, nullptr, 'k'}, |
| | {"elide-empty-files", no_argument, nullptr, 'z'}, |
| | {"prefix", required_argument, nullptr, 'f'}, |
| | {"suffix-format", required_argument, nullptr, 'b'}, |
| | {"suppress-matched", no_argument, nullptr, SUPPRESS_MATCHED_OPTION}, |
| | {GETOPT_HELP_OPTION_DECL}, |
| | {GETOPT_VERSION_OPTION_DECL}, |
| | {nullptr, 0, nullptr, 0} |
| | }; |
| |
|
| | |
| | |
| |
|
| | static void |
| | cleanup (void) |
| | { |
| | sigset_t oldset; |
| |
|
| | close_output_file (); |
| |
|
| | sigprocmask (SIG_BLOCK, &caught_signals, &oldset); |
| | delete_all_files (false); |
| | sigprocmask (SIG_SETMASK, &oldset, nullptr); |
| | } |
| |
|
| | static _Noreturn void |
| | cleanup_fatal (void) |
| | { |
| | cleanup (); |
| | exit (EXIT_FAILURE); |
| | } |
| |
|
| | extern void |
| | xalloc_die (void) |
| | { |
| | error (0, 0, "%s", _("memory exhausted")); |
| | cleanup_fatal (); |
| | } |
| |
|
| | static void |
| | interrupt_handler (int sig) |
| | { |
| | delete_all_files (true); |
| | signal (sig, SIG_DFL); |
| | |
| | |
| | |
| | raise (sig); |
| | } |
| |
|
| | |
| | |
| |
|
| | static void |
| | save_to_hold_area (char *start, idx_t num) |
| | { |
| | free (hold_area); |
| | hold_area = start; |
| | hold_count = num; |
| | } |
| |
|
| | |
| | |
| |
|
| | static idx_t |
| | read_input (char *dest, idx_t max_n_bytes) |
| | { |
| | if (max_n_bytes == 0) |
| | return 0; |
| |
|
| | ptrdiff_t bytes_read = safe_read (STDIN_FILENO, dest, max_n_bytes); |
| |
|
| | if (bytes_read == 0) |
| | have_read_eof = true; |
| |
|
| | if (bytes_read < 0) |
| | { |
| | error (0, errno, _("read error")); |
| | cleanup_fatal (); |
| | } |
| |
|
| | return bytes_read; |
| | } |
| |
|
| | |
| |
|
| | static void |
| | clear_line_control (struct line *p) |
| | { |
| | p->used = 0; |
| | p->insert_index = 0; |
| | p->retrieve_index = 0; |
| | } |
| |
|
| | |
| |
|
| | static struct line * |
| | new_line_control (void) |
| | { |
| | struct line *p = xmalloc (sizeof *p); |
| |
|
| | p->next = nullptr; |
| | clear_line_control (p); |
| |
|
| | return p; |
| | } |
| |
|
| | |
| | |
| |
|
| | static void |
| | keep_new_line (struct buffer_record *b, char *line_start, idx_t line_len) |
| | { |
| | struct line *l; |
| |
|
| | |
| | if (b->line_start == nullptr) |
| | b->line_start = b->curr_line = new_line_control (); |
| |
|
| | |
| | if (b->curr_line->used == CTRL_SIZE) |
| | { |
| | b->curr_line->next = new_line_control (); |
| | b->curr_line = b->curr_line->next; |
| | } |
| |
|
| | l = b->curr_line; |
| |
|
| | |
| | l->starts[l->insert_index].str = line_start; |
| | l->starts[l->insert_index].len = line_len; |
| | l->used++; |
| | l->insert_index++; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | static idx_t |
| | record_line_starts (struct buffer_record *b) |
| | { |
| | char *line_start; |
| | idx_t lines; |
| | idx_t line_length; |
| |
|
| | if (b->bytes_used == 0) |
| | return 0; |
| |
|
| | lines = 0; |
| | line_start = b->buffer; |
| | char *buffer_end = line_start + b->bytes_used; |
| | *buffer_end = '\n'; |
| |
|
| | while (true) |
| | { |
| | char *line_end = rawmemchr (line_start, '\n'); |
| | if (line_end == buffer_end) |
| | break; |
| | line_length = line_end - line_start + 1; |
| | keep_new_line (b, line_start, line_length); |
| | line_start = line_end + 1; |
| | lines++; |
| | } |
| |
|
| | |
| | idx_t bytes_left = buffer_end - line_start; |
| | if (bytes_left) |
| | { |
| | if (have_read_eof) |
| | { |
| | keep_new_line (b, line_start, bytes_left); |
| | lines++; |
| | } |
| | else |
| | save_to_hold_area (ximemdup (line_start, bytes_left), bytes_left); |
| | } |
| |
|
| | b->num_lines = lines; |
| | b->first_available = b->start_line = last_line_number + 1; |
| | last_line_number += lines; |
| |
|
| | return lines; |
| | } |
| |
|
| | |
| | #if 13 <= __GNUC__ |
| | # pragma GCC diagnostic ignored "-Wanalyzer-mismatching-deallocation" |
| | # pragma GCC diagnostic ignored "-Wanalyzer-use-after-free" |
| | # pragma GCC diagnostic ignored "-Wanalyzer-use-of-uninitialized-value" |
| | #endif |
| |
|
| | static void |
| | free_buffer (struct buffer_record *buf) |
| | { |
| | for (struct line *l = buf->line_start; l;) |
| | { |
| | struct line *n = l->next; |
| | free (l); |
| | l = n; |
| | } |
| | free (buf->buffer); |
| | free (buf); |
| | } |
| |
|
| | |
| |
|
| | static ATTRIBUTE_DEALLOC (free_buffer, 1) |
| | struct buffer_record * |
| | get_new_buffer (idx_t min_size) |
| | { |
| | struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer); |
| | new_buffer->bytes_alloc = 0; |
| | new_buffer->buffer = xpalloc (nullptr, &new_buffer->bytes_alloc, min_size, |
| | -1, 1); |
| | new_buffer->bytes_used = 0; |
| | new_buffer->start_line = new_buffer->first_available = last_line_number + 1; |
| | new_buffer->num_lines = 0; |
| | new_buffer->line_start = new_buffer->curr_line = nullptr; |
| | new_buffer->next = nullptr; |
| |
|
| | return new_buffer; |
| | } |
| |
|
| | |
| | |
| |
|
| | static void |
| | save_buffer (struct buffer_record *buf) |
| | { |
| | struct buffer_record *p; |
| |
|
| | buf->next = nullptr; |
| | buf->curr_line = buf->line_start; |
| |
|
| | if (head == nullptr) |
| | head = buf; |
| | else |
| | { |
| | for (p = head; p->next; p = p->next) |
| | ; |
| | p->next = buf; |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | static bool |
| | load_buffer (void) |
| | { |
| | if (have_read_eof) |
| | return false; |
| |
|
| | |
| | |
| | |
| | idx_t bytes_wanted = MAX (START_SIZE, hold_count + 1); |
| |
|
| | while (true) |
| | { |
| | struct buffer_record *b = get_new_buffer (bytes_wanted); |
| | idx_t bytes_alloc = b->bytes_alloc; |
| | idx_t bytes_avail = bytes_alloc; |
| | char *p = b->buffer; |
| |
|
| | |
| | if (hold_count) |
| | { |
| | p = mempcpy (p, hold_area, hold_count); |
| | b->bytes_used += hold_count; |
| | bytes_avail -= hold_count; |
| | hold_count = 0; |
| | } |
| |
|
| | b->bytes_used += read_input (p, bytes_avail - 1); |
| |
|
| | if (record_line_starts (b) != 0) |
| | { |
| | save_buffer (b); |
| | return true; |
| | } |
| |
|
| | free_buffer (b); |
| | if (have_read_eof) |
| | return false; |
| | if (ckd_add (&bytes_wanted, bytes_alloc, bytes_alloc >> 1)) |
| | xalloc_die (); |
| | } |
| | } |
| |
|
| | |
| | |
| |
|
| | static intmax_t |
| | get_first_line_in_buffer (void) |
| | { |
| | if (head == nullptr && !load_buffer ()) |
| | return 0; |
| |
|
| | return head->first_available; |
| | } |
| |
|
| | |
| | |
| | |
| |
|
| | static struct cstring * |
| | remove_line (void) |
| | { |
| | |
| | |
| | |
| | static struct buffer_record *prev_buf = nullptr; |
| |
|
| | struct cstring *line; |
| | struct line *l; |
| |
|
| | if (prev_buf) |
| | { |
| | free_buffer (prev_buf); |
| | prev_buf = nullptr; |
| | } |
| |
|
| | if (head == nullptr && !load_buffer ()) |
| | return nullptr; |
| |
|
| | if (current_line < head->first_available) |
| | current_line = head->first_available; |
| |
|
| | ++(head->first_available); |
| |
|
| | l = head->curr_line; |
| |
|
| | line = &l->starts[l->retrieve_index]; |
| |
|
| | |
| | if (++l->retrieve_index == l->used) |
| | { |
| | |
| | head->curr_line = l->next; |
| | if (head->curr_line == nullptr || head->curr_line->used == 0) |
| | { |
| | |
| | |
| | |
| | prev_buf = head; |
| | head = head->next; |
| | } |
| | } |
| |
|
| | return line; |
| | } |
| |
|
| | |
| | |
| |
|
| | static struct cstring * |
| | find_line (intmax_t linenum) |
| | { |
| | struct buffer_record *b; |
| |
|
| | if (head == nullptr && !load_buffer ()) |
| | return nullptr; |
| |
|
| | if (linenum < head->start_line) |
| | return nullptr; |
| |
|
| | for (b = head;;) |
| | { |
| | if (linenum < b->start_line + b->num_lines) |
| | { |
| | |
| | struct line *l; |
| | idx_t offset; |
| |
|
| | l = b->line_start; |
| | offset = linenum - b->start_line; |
| | |
| | while (offset >= CTRL_SIZE) |
| | { |
| | l = l->next; |
| | offset -= CTRL_SIZE; |
| | } |
| | return &l->starts[offset]; |
| | } |
| | if (b->next == nullptr && !load_buffer ()) |
| | return nullptr; |
| | b = b->next; |
| | } |
| | } |
| |
|
| | |
| |
|
| | static bool |
| | no_more_lines (void) |
| | { |
| | return find_line (current_line + 1) == nullptr; |
| | } |
| |
|
| | |
| |
|
| | static void |
| | set_input_file (char const *name) |
| | { |
| | if (! streq (name, "-") && fd_reopen (STDIN_FILENO, name, O_RDONLY, 0) < 0) |
| | error (EXIT_FAILURE, errno, _("cannot open %s for reading"), |
| | quoteaf (name)); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| |
|
| | static void |
| | write_to_file (intmax_t last_line, bool ignore, int argnum) |
| | { |
| | struct cstring *line; |
| | intmax_t first_line; |
| | intmax_t lines; |
| | intmax_t i; |
| |
|
| | first_line = get_first_line_in_buffer (); |
| |
|
| | if (! first_line || first_line > last_line) |
| | { |
| | error (0, 0, _("%s: line number out of range"), |
| | quote (global_argv[argnum])); |
| | cleanup_fatal (); |
| | } |
| |
|
| | lines = last_line - first_line; |
| |
|
| | for (i = 0; i < lines; i++) |
| | { |
| | line = remove_line (); |
| | if (line == nullptr) |
| | { |
| | error (0, 0, _("%s: line number out of range"), |
| | quote (global_argv[argnum])); |
| | cleanup_fatal (); |
| | } |
| | if (!ignore) |
| | save_line_to_file (line); |
| | } |
| | } |
| |
|
| | |
| |
|
| | static void |
| | dump_rest_of_file (void) |
| | { |
| | struct cstring *line; |
| |
|
| | while ((line = remove_line ()) != nullptr) |
| | save_line_to_file (line); |
| | } |
| |
|
| | |
| | |
| |
|
| | static void |
| | handle_line_error (const struct control *p, intmax_t repetition) |
| | { |
| | char buf[INT_BUFSIZE_BOUND (intmax_t)]; |
| |
|
| | fprintf (stderr, _("%s: %s: line number out of range"), |
| | program_name, quote (imaxtostr (p->lines_required, buf))); |
| | if (repetition) |
| | fprintf (stderr, _(" on repetition %jd\n"), repetition); |
| | else |
| | fprintf (stderr, "\n"); |
| |
|
| | cleanup_fatal (); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| |
|
| | static void |
| | process_line_count (const struct control *p, intmax_t repetition) |
| | { |
| | intmax_t linenum; |
| | intmax_t last_line_to_save = p->lines_required * (repetition + 1); |
| |
|
| | create_output_file (); |
| |
|
| | |
| | |
| | |
| | if (no_more_lines () && suppress_matched) |
| | handle_line_error (p, repetition); |
| |
|
| | if (!(linenum = get_first_line_in_buffer ())) |
| | handle_line_error (p, repetition); |
| |
|
| | while (linenum++ < last_line_to_save) |
| | { |
| | struct cstring *line = remove_line (); |
| | if (line == nullptr) |
| | handle_line_error (p, repetition); |
| | save_line_to_file (line); |
| | } |
| |
|
| | close_output_file (); |
| |
|
| | if (suppress_matched) |
| | remove_line (); |
| |
|
| | |
| | |
| | if (no_more_lines () && !suppress_matched) |
| | handle_line_error (p, repetition); |
| | } |
| |
|
| | static void |
| | regexp_error (struct control *p, intmax_t repetition, bool ignore) |
| | { |
| | fprintf (stderr, _("%s: %s: match not found"), |
| | program_name, quote (global_argv[p->argnum])); |
| |
|
| | if (repetition) |
| | fprintf (stderr, _(" on repetition %jd\n"), repetition); |
| | else |
| | fprintf (stderr, "\n"); |
| |
|
| | if (!ignore) |
| | { |
| | dump_rest_of_file (); |
| | close_output_file (); |
| | } |
| | cleanup_fatal (); |
| | } |
| |
|
| | |
| | |
| | |
| |
|
| | static void |
| | process_regexp (struct control *p, intmax_t repetition) |
| | { |
| | struct cstring *line; |
| | idx_t line_len; |
| | intmax_t break_line; |
| | bool ignore = p->ignore; |
| | regoff_t ret; |
| |
|
| | if (!ignore) |
| | create_output_file (); |
| |
|
| | |
| | |
| |
|
| | if (p->offset >= 0) |
| | { |
| | while (true) |
| | { |
| | line = find_line (++current_line); |
| | if (line == nullptr) |
| | { |
| | if (p->repeat_forever) |
| | { |
| | if (!ignore) |
| | { |
| | dump_rest_of_file (); |
| | close_output_file (); |
| | } |
| | exit (EXIT_SUCCESS); |
| | } |
| | else |
| | regexp_error (p, repetition, ignore); |
| | } |
| | line_len = line->len; |
| | if (line->str[line_len - 1] == '\n') |
| | line_len--; |
| | ret = re_search (&p->re_compiled, line->str, line_len, |
| | 0, line_len, nullptr); |
| | if (ret == -2) |
| | { |
| | error (0, 0, _("error in regular expression search")); |
| | cleanup_fatal (); |
| | } |
| | if (ret == -1) |
| | { |
| | line = remove_line (); |
| | if (!ignore) |
| | save_line_to_file (line); |
| | } |
| | else |
| | break; |
| | } |
| | } |
| | else |
| | { |
| | |
| | while (true) |
| | { |
| | line = find_line (++current_line); |
| | if (line == nullptr) |
| | { |
| | if (p->repeat_forever) |
| | { |
| | if (!ignore) |
| | { |
| | dump_rest_of_file (); |
| | close_output_file (); |
| | } |
| | exit (EXIT_SUCCESS); |
| | } |
| | else |
| | regexp_error (p, repetition, ignore); |
| | } |
| | line_len = line->len; |
| | if (line->str[line_len - 1] == '\n') |
| | line_len--; |
| | ret = re_search (&p->re_compiled, line->str, line_len, |
| | 0, line_len, nullptr); |
| | if (ret == -2) |
| | { |
| | error (0, 0, _("error in regular expression search")); |
| | cleanup_fatal (); |
| | } |
| | if (ret != -1) |
| | break; |
| | } |
| | } |
| |
|
| | |
| | break_line = current_line + p->offset; |
| |
|
| | write_to_file (break_line, ignore, p->argnum); |
| |
|
| | if (!ignore) |
| | close_output_file (); |
| |
|
| | if (p->offset > 0) |
| | current_line = break_line; |
| |
|
| | if (suppress_matched) |
| | remove_line (); |
| | } |
| |
|
| | |
| |
|
| | static void |
| | split_file (void) |
| | { |
| | for (idx_t i = 0; i < control_used; i++) |
| | { |
| | intmax_t j; |
| | if (controls[i].regexpr) |
| | { |
| | for (j = 0; (controls[i].repeat_forever |
| | || j <= controls[i].repeat); j++) |
| | process_regexp (&controls[i], j); |
| | } |
| | else |
| | { |
| | for (j = 0; (controls[i].repeat_forever |
| | || j <= controls[i].repeat); j++) |
| | process_line_count (&controls[i], j); |
| | } |
| | } |
| |
|
| | create_output_file (); |
| | dump_rest_of_file (); |
| | close_output_file (); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | static char * |
| | make_filename (int num) |
| | { |
| | strcpy (filename_space, prefix); |
| | if (suffix) |
| | sprintf (filename_space + strlen (prefix), suffix, num); |
| | else |
| | sprintf (filename_space + strlen (prefix), "%0*d", digits, num); |
| | return filename_space; |
| | } |
| |
|
| | |
| |
|
| | static void |
| | create_output_file (void) |
| | { |
| | int nfiles = files_created; |
| | bool fopen_ok; |
| | int fopen_errno; |
| |
|
| | output_filename = make_filename (nfiles); |
| |
|
| | if (nfiles == INT_MAX) |
| | { |
| | fopen_ok = false; |
| | fopen_errno = EOVERFLOW; |
| | } |
| | else |
| | { |
| | |
| | sigset_t oldset; |
| | sigprocmask (SIG_BLOCK, &caught_signals, &oldset); |
| | output_stream = fopen (output_filename, "w"); |
| | fopen_ok = (output_stream != nullptr); |
| | fopen_errno = errno; |
| | files_created = nfiles + fopen_ok; |
| | sigprocmask (SIG_SETMASK, &oldset, nullptr); |
| | } |
| |
|
| | if (! fopen_ok) |
| | { |
| | error (0, fopen_errno, "%s", quotef (output_filename)); |
| | cleanup_fatal (); |
| | } |
| | bytes_written = 0; |
| | } |
| |
|
| | |
| | |
| |
|
| | static void |
| | delete_all_files (bool in_signal_handler) |
| | { |
| | if (! remove_files) |
| | return; |
| |
|
| | for (int i = files_created; 0 <= --i; ) |
| | { |
| | char const *name = make_filename (i); |
| | if (unlink (name) != 0 && errno != ENOENT && !in_signal_handler) |
| | error (0, errno, "%s", quotef (name)); |
| | } |
| |
|
| | files_created = 0; |
| | } |
| |
|
| | |
| | |
| |
|
| | static void |
| | close_output_file (void) |
| | { |
| | if (output_stream) |
| | { |
| | if (ferror (output_stream)) |
| | { |
| | error (0, 0, _("write error for %s"), quoteaf (output_filename)); |
| | output_stream = nullptr; |
| | cleanup_fatal (); |
| | } |
| | if (fclose (output_stream) != 0) |
| | { |
| | error (0, errno, "%s", quotef (output_filename)); |
| | output_stream = nullptr; |
| | cleanup_fatal (); |
| | } |
| | if (bytes_written == 0 && elide_empty_files) |
| | { |
| | sigset_t oldset; |
| | bool unlink_ok; |
| | int unlink_errno; |
| |
|
| | |
| | sigprocmask (SIG_BLOCK, &caught_signals, &oldset); |
| | unlink_ok = (unlink (output_filename) == 0); |
| | unlink_errno = errno; |
| | files_created--; |
| | sigprocmask (SIG_SETMASK, &oldset, nullptr); |
| |
|
| | if (! unlink_ok && unlink_errno != ENOENT) |
| | error (0, unlink_errno, "%s", quotef (output_filename)); |
| | } |
| | else |
| | { |
| | if (!suppress_count) |
| | fprintf (stdout, "%jd\n", bytes_written); |
| | } |
| | output_stream = nullptr; |
| | } |
| | } |
| |
|
| | |
| | |
| |
|
| | static void |
| | save_line_to_file (const struct cstring *line) |
| | { |
| | idx_t l = fwrite (line->str, sizeof (char), line->len, output_stream); |
| | if (l != line->len) |
| | { |
| | error (0, errno, _("write error for %s"), quoteaf (output_filename)); |
| | output_stream = nullptr; |
| | cleanup_fatal (); |
| | } |
| | bytes_written += line->len; |
| | } |
| |
|
| | |
| |
|
| | static struct control * |
| | new_control_record (void) |
| | { |
| | static idx_t control_allocated = 0; |
| | struct control *p; |
| |
|
| | if (control_used == control_allocated) |
| | controls = xpalloc (controls, &control_allocated, 1, -1, sizeof *controls); |
| | p = &controls[control_used++]; |
| | p->regexpr = false; |
| | p->repeat = 0; |
| | p->repeat_forever = false; |
| | p->lines_required = 0; |
| | p->offset = 0; |
| | return p; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| |
|
| | static void |
| | check_for_offset (struct control *p, char const *str, char const *num) |
| | { |
| | if (xstrtoimax (num, nullptr, 10, &p->offset, "") != LONGINT_OK) |
| | error (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"), |
| | quote (str)); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| |
|
| | static void |
| | parse_repeat_count (int argnum, struct control *p, char *str) |
| | { |
| | char *end; |
| |
|
| | end = str + strlen (str) - 1; |
| | if (*end != '}') |
| | error (EXIT_FAILURE, 0, _("%s: '}' is required in repeat count"), |
| | quote (str)); |
| | *end = '\0'; |
| |
|
| | if (str + 1 == end - 1 && *(str + 1) == '*') |
| | p->repeat_forever = true; |
| | else |
| | { |
| | uintmax_t val; |
| | if (xstrtoumax (str + 1, nullptr, 10, &val, "") != LONGINT_OK |
| | || ckd_add (&p->repeat, val, 0)) |
| | { |
| | error (EXIT_FAILURE, 0, |
| | _("%s}: integer required between '{' and '}'"), |
| | quote (global_argv[argnum])); |
| | } |
| | } |
| |
|
| | *end = '}'; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| |
|
| | static struct control * |
| | extract_regexp (int argnum, bool ignore, char const *str) |
| | { |
| | idx_t len; |
| | char delim = *str; |
| | char const *closing_delim; |
| | struct control *p; |
| | char const *err; |
| |
|
| | closing_delim = strrchr (str + 1, delim); |
| | if (closing_delim == nullptr) |
| | error (EXIT_FAILURE, 0, |
| | _("%s: closing delimiter '%c' missing"), str, delim); |
| |
|
| | len = closing_delim - str - 1; |
| | p = new_control_record (); |
| | p->argnum = argnum; |
| | p->ignore = ignore; |
| |
|
| | p->regexpr = true; |
| | p->re_compiled.buffer = nullptr; |
| | p->re_compiled.allocated = 0; |
| | p->re_compiled.fastmap = xmalloc (UCHAR_MAX + 1); |
| | p->re_compiled.translate = nullptr; |
| | re_syntax_options = |
| | RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES; |
| | err = re_compile_pattern (str + 1, len, &p->re_compiled); |
| | if (err) |
| | { |
| | error (0, 0, _("%s: invalid regular expression: %s"), quote (str), err); |
| | cleanup_fatal (); |
| | } |
| |
|
| | if (closing_delim[1]) |
| | check_for_offset (p, str, closing_delim + 1); |
| |
|
| | return p; |
| | } |
| |
|
| | |
| | |
| |
|
| | static void |
| | parse_patterns (int argc, int start, char **argv) |
| | { |
| | struct control *p; |
| | static intmax_t last_val = 0; |
| |
|
| | for (int i = start; i < argc; i++) |
| | { |
| | if (*argv[i] == '/' || *argv[i] == '%') |
| | { |
| | p = extract_regexp (i, *argv[i] == '%', argv[i]); |
| | } |
| | else |
| | { |
| | p = new_control_record (); |
| | p->argnum = i; |
| |
|
| | uintmax_t val; |
| | if (xstrtoumax (argv[i], nullptr, 10, &val, "") != LONGINT_OK |
| | || INTMAX_MAX < val) |
| | error (EXIT_FAILURE, 0, _("%s: invalid pattern"), quote (argv[i])); |
| | if (val == 0) |
| | error (EXIT_FAILURE, 0, |
| | _("%s: line number must be greater than zero"), argv[i]); |
| | if (val < last_val) |
| | error (EXIT_FAILURE, 0, |
| | _("line number %s is smaller than preceding line number," |
| | " %jd"), quote (argv[i]), last_val); |
| |
|
| | if (val == last_val) |
| | error (0, 0, |
| | _("warning: line number %s is the same as preceding line number"), |
| | quote (argv[i])); |
| |
|
| | last_val = val; |
| |
|
| | p->lines_required = val; |
| | } |
| |
|
| | if (i + 1 < argc && *argv[i + 1] == '{') |
| | { |
| | |
| | i++; |
| | parse_repeat_count (i, p, argv[i]); |
| | } |
| | } |
| | } |
| |
|
| |
|
| |
|
| | |
| | enum { FLAG_THOUSANDS = 1, FLAG_ALTERNATIVE = 2 }; |
| |
|
| | |
| | |
| | static idx_t |
| | get_format_flags (char const *format, int *flags_ptr) |
| | { |
| | int flags = 0; |
| |
|
| | for (idx_t count = 0; ; count++) |
| | { |
| | switch (format[count]) |
| | { |
| | case '-': |
| | case '0': |
| | break; |
| |
|
| | case '\'': |
| | flags |= FLAG_THOUSANDS; |
| | break; |
| |
|
| | case '#': |
| | flags |= FLAG_ALTERNATIVE; |
| | break; |
| |
|
| | default: |
| | *flags_ptr = flags; |
| | return count; |
| | } |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | static void |
| | check_format_conv_type (char *format, int flags) |
| | { |
| | unsigned char ch = *format; |
| | int compatible_flags = FLAG_THOUSANDS; |
| |
|
| | switch (ch) |
| | { |
| | case 'd': |
| | case 'i': |
| | break; |
| |
|
| | case 'u': |
| | *format = 'd'; |
| | break; |
| |
|
| | case 'o': |
| | case 'x': |
| | case 'X': |
| | compatible_flags = FLAG_ALTERNATIVE; |
| | break; |
| |
|
| | case 0: |
| | error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix")); |
| |
|
| | default: |
| | if (isprint (ch)) |
| | error (EXIT_FAILURE, 0, |
| | _("invalid conversion specifier in suffix: %c"), ch); |
| | else |
| | error (EXIT_FAILURE, 0, |
| | _("invalid conversion specifier in suffix: \\%.3o"), ch); |
| | } |
| |
|
| | if (flags & ~ compatible_flags) |
| | error (EXIT_FAILURE, 0, |
| | _("invalid flags in conversion specification: %%%c%c"), |
| | (flags & ~ compatible_flags & FLAG_ALTERNATIVE ? '#' : '\''), ch); |
| | } |
| |
|
| | |
| | |
| | |
| | static idx_t |
| | max_out (char *format) |
| | { |
| | bool percent = false; |
| |
|
| | for (char *f = format; *f; f++) |
| | if (*f == '%' && *++f != '%') |
| | { |
| | if (percent) |
| | error (EXIT_FAILURE, 0, |
| | _("too many %% conversion specifications in suffix")); |
| | percent = true; |
| | int flags; |
| | f += get_format_flags (f, &flags); |
| | while (c_isdigit (*f)) |
| | f++; |
| | if (*f == '.') |
| | while (c_isdigit (*++f)) |
| | continue; |
| | check_format_conv_type (f, flags); |
| | } |
| |
|
| | if (! percent) |
| | error (EXIT_FAILURE, 0, |
| | _("missing %% conversion specification in suffix")); |
| |
|
| | int maxlen = snprintf (nullptr, 0, format, INT_MAX); |
| | if (! (0 <= maxlen && maxlen <= IDX_MAX)) |
| | xalloc_die (); |
| | return maxlen; |
| | } |
| |
|
| | int |
| | main (int argc, char **argv) |
| | { |
| | int optc; |
| |
|
| | initialize_main (&argc, &argv); |
| | set_program_name (argv[0]); |
| | setlocale (LC_ALL, ""); |
| | bindtextdomain (PACKAGE, LOCALEDIR); |
| | textdomain (PACKAGE); |
| |
|
| | atexit (close_stdout); |
| |
|
| | global_argv = argv; |
| | controls = nullptr; |
| | control_used = 0; |
| | suppress_count = false; |
| | remove_files = true; |
| | suppress_matched = false; |
| | prefix = DEFAULT_PREFIX; |
| |
|
| | while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, nullptr)) |
| | != -1) |
| | switch (optc) |
| | { |
| | case 'f': |
| | prefix = optarg; |
| | break; |
| |
|
| | case 'b': |
| | suffix = optarg; |
| | break; |
| |
|
| | case 'k': |
| | remove_files = false; |
| | break; |
| |
|
| | case 'n': |
| | digits = xdectoimax (optarg, 0, MIN (INT_MAX, IDX_MAX), "", |
| | _("invalid number"), 0); |
| | break; |
| |
|
| | case 's': |
| | case 'q': |
| | suppress_count = true; |
| | break; |
| |
|
| | case 'z': |
| | elide_empty_files = true; |
| | break; |
| |
|
| | case SUPPRESS_MATCHED_OPTION: |
| | suppress_matched = true; |
| | break; |
| |
|
| | case_GETOPT_HELP_CHAR; |
| |
|
| | case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); |
| |
|
| | default: |
| | usage (EXIT_FAILURE); |
| | } |
| |
|
| | if (argc - optind < 2) |
| | { |
| | if (argc <= optind) |
| | error (0, 0, _("missing operand")); |
| | else |
| | error (0, 0, _("missing operand after %s"), quote (argv[argc - 1])); |
| | usage (EXIT_FAILURE); |
| | } |
| |
|
| | idx_t prefix_len = strlen (prefix); |
| | idx_t max_digit_string_len |
| | = (suffix |
| | ? max_out (suffix) |
| | : MAX (INT_STRLEN_BOUND (int), digits)); |
| | idx_t filename_size; |
| | if (ckd_add (&filename_size, prefix_len, max_digit_string_len + 1)) |
| | xalloc_die (); |
| | filename_space = ximalloc (filename_size); |
| |
|
| | set_input_file (argv[optind++]); |
| |
|
| | parse_patterns (argc, optind, argv); |
| |
|
| | { |
| | int i; |
| | static int const sig[] = |
| | { |
| | |
| | SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM, |
| | #ifdef SIGPOLL |
| | SIGPOLL, |
| | #endif |
| | #ifdef SIGPROF |
| | SIGPROF, |
| | #endif |
| | #ifdef SIGVTALRM |
| | SIGVTALRM, |
| | #endif |
| | #ifdef SIGXCPU |
| | SIGXCPU, |
| | #endif |
| | #ifdef SIGXFSZ |
| | SIGXFSZ, |
| | #endif |
| | }; |
| | enum { nsigs = countof (sig) }; |
| |
|
| | struct sigaction act; |
| |
|
| | sigemptyset (&caught_signals); |
| | for (i = 0; i < nsigs; i++) |
| | { |
| | sigaction (sig[i], nullptr, &act); |
| | if (act.sa_handler != SIG_IGN) |
| | sigaddset (&caught_signals, sig[i]); |
| | } |
| |
|
| | act.sa_handler = interrupt_handler; |
| | act.sa_mask = caught_signals; |
| | act.sa_flags = 0; |
| |
|
| | for (i = 0; i < nsigs; i++) |
| | if (sigismember (&caught_signals, sig[i])) |
| | sigaction (sig[i], &act, nullptr); |
| | } |
| |
|
| | split_file (); |
| |
|
| | if (close (STDIN_FILENO) != 0) |
| | { |
| | error (0, errno, _("read error")); |
| | cleanup_fatal (); |
| | } |
| |
|
| | return EXIT_SUCCESS; |
| | } |
| |
|
| | void |
| | usage (int status) |
| | { |
| | if (status != EXIT_SUCCESS) |
| | emit_try_help (); |
| | else |
| | { |
| | printf (_("\ |
| | Usage: %s [OPTION]... FILE PATTERN...\n\ |
| | "), |
| | program_name); |
| | fputs (_("\ |
| | Output pieces of FILE separated by PATTERN(s) to files 'xx00', 'xx01', ...,\n\ |
| | and output byte counts of each piece to standard output.\n\ |
| | "), stdout); |
| | fputs (_("\ |
| | \n\ |
| | Read standard input if FILE is -\n\ |
| | "), stdout); |
| |
|
| | emit_mandatory_arg_note (); |
| |
|
| | fputs (_("\ |
| | -b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\ |
| | -f, --prefix=PREFIX use PREFIX instead of 'xx'\n\ |
| | -k, --keep-files do not remove output files on errors\n\ |
| | "), stdout); |
| | fputs (_("\ |
| | --suppress-matched suppress the lines matching PATTERN\n\ |
| | "), stdout); |
| | fputs (_("\ |
| | -n, --digits=DIGITS use specified number of digits instead of 2\n\ |
| | -s, --quiet, --silent do not print counts of output file sizes\n\ |
| | -z, --elide-empty-files suppress empty output files\n\ |
| | "), stdout); |
| | fputs (HELP_OPTION_DESCRIPTION, stdout); |
| | fputs (VERSION_OPTION_DESCRIPTION, stdout); |
| | fputs (_("\ |
| | \n\ |
| | Each PATTERN may be:\n\ |
| | INTEGER copy up to but not including specified line number\n\ |
| | /REGEXP/[OFFSET] copy up to but not including a matching line\n\ |
| | %REGEXP%[OFFSET] skip to, but not including a matching line\n\ |
| | {INTEGER} repeat the previous pattern specified number of times\n\ |
| | {*} repeat the previous pattern as many times as possible\n\ |
| | \n\ |
| | A line OFFSET is an integer optionally preceded by '+' or '-'\n\ |
| | "), stdout); |
| | emit_ancillary_info (PROGRAM_NAME); |
| | } |
| | exit (status); |
| | } |
| |
|