From: Chris Hanson Date: Tue, 14 Jul 1987 03:00:59 +0000 (+0000) Subject: Initial revision X-Git-Tag: 20090517-FFI~13261 X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=9baad2a7cb1106ed351b534811c3db371f55d718;p=mit-scheme.git Initial revision --- diff --git a/v7/src/microcode/regex.c b/v7/src/microcode/regex.c new file mode 100644 index 000000000..adba8fc58 --- /dev/null +++ b/v7/src/microcode/regex.c @@ -0,0 +1,1137 @@ +/* -*-C-*- + +$Header: /Users/cph/tmp/foo/mit-scheme/mit-scheme/v7/src/microcode/regex.c,v 1.1 1987/07/14 03:00:59 cph Exp $ + +Copyright (c) 1987 Massachusetts Institute of Technology + +This material was developed by the Scheme project at the Massachusetts +Institute of Technology, Department of Electrical Engineering and +Computer Science. Permission to copy this software, to redistribute +it, and to use it for any purpose is granted, subject to the following +restrictions and understandings. + +1. Any copy made of this software must include this copyright notice +in full. + +2. Users of this software agree to make their best efforts (a) to +return to the MIT Scheme project any improvements or extensions that +they make, so that these may be included in future releases; and (b) +to inform MIT of noteworthy uses of this software. + +3. All materials developed as a consequence of the use of this +software shall duly acknowledge such use, in accordance with the usual +standards of acknowledging credit in academic research. + +4. MIT has made no warrantee or representation that the operation of +this software will be error-free, and MIT is under no obligation to +provide any services, by way of maintenance, update, or otherwise. + +5. In conjunction with products arising from the use of this material, +there shall be no use of the name of the Massachusetts Institute of +Technology nor of any adaptation thereof in any advertising, +promotional, or sales literature without prior written consent from +MIT in each case. */ + +/* Regular expression matching and search. + Translated from GNU Emacs. */ + +/* This code is not yet tested. -- CPH */ + +#include "scheme.h" +#include "character.h" +#include "syntax.h" +#include "regex.h" + +#ifndef SIGN_EXTEND_CHAR +#define SIGN_EXTEND_CHAR(x) (x) +#endif /* not SIGN_EXTEND_CHAR */ + +#ifndef SWITCH_ENUM +#define SWITCH_ENUM(enum_type, expression) \ + switch ((enum enum_type) (expression)) +#endif /* not SWITCH_ENUM */ + +#define RE_NFAILURES 80 + +#define FOR_INDEX_RANGE(index, start, end) \ + for (index = (start); (index < (end)); index += 1) + +#define FOR_INDEX_BELOW(index, limit) \ + FOR_INDEX_RANGE (index, 0, (limit)) + +#define FOR_ALL_ASCII(index) \ + FOR_INDEX_BELOW (index, MAX_ASCII) + +#define FOR_ALL_ASCII_SUCH_THAT(index, expression) \ + FOR_ALL_ASCII (index) \ + if (expression) + +#define TRANSLATE_CHAR(ascii) \ + ((translation == NULL) ? (ascii) : (translation [(ascii)])) + +#define WORD_CONSTITUENT_P(ascii) \ + (SYNTAX_CONSTITUENT_P (syntaxcode_word, (ascii))) + +#define SYNTAX_CONSTITUENT_P(code, ascii) \ + ((SYNTAX_ENTRY_CODE (SYNTAX_TABLE_REF (syntax_table, (ascii)))) == (code)) + +#define CHAR_SET_MEMBER_P(length, char_set, ascii) \ + (((ascii) < ((length) * ASCII_LENGTH)) && \ + (CHAR_SET_MEMBER_P_INTERNAL (char_set, ascii))) + +#define CHAR_SET_MEMBER_P_INTERNAL(char_set, ascii) \ + ((((char_set) [((ascii) / ASCII_LENGTH)]) & \ + (1 << ((ascii) % ASCII_LENGTH))) \ + != 0) + +#define READ_PATTERN_CHAR(target) do \ +{ \ + if (pattern_pc >= pattern_end) \ + BAD_PATTERN (); \ + (target) = (*pattern_pc++); \ +} while (0) + +#define READ_PATTERN_OFFSET(target) do \ +{ \ + if ((pattern_pc + 1) >= pattern_end) \ + BAD_PATTERN (); \ + (target) = (*pattern_pc++); \ + (target) += \ + ((SIGN_EXTEND_CHAR (* ((char *) (pattern_pc++)))) << ASCII_LENGTH); \ + if (((pattern_pc + (target)) < pattern_start) || \ + ((pattern_pc + (target)) > pattern_end)) \ + BAD_PATTERN (); \ +} while (0) + +#define READ_PATTERN_LENGTH(target) do \ +{ \ + if ((pattern_pc >= pattern_end) || \ + ((pattern_pc + ((target) = (*pattern_pc++))) > pattern_end)) \ + BAD_PATTERN (); \ +} while (0) + +#define READ_PATTERN_REGISTER(target) do \ +{ \ + if ((pattern_pc >= pattern_end) || \ + (((target) = (*pattern_pc++)) >= RE_NREGS)) \ + BAD_PATTERN (); \ +} while (0) + +#define READ_PATTERN_SYNTAXCODE(target) do \ +{ \ + if ((pattern_pc >= pattern_end) || \ + (((int) ((target) = ((enum syntaxcode) (*pattern_pc++)))) \ + >= ((int) syntaxcode_max))) \ + BAD_PATTERN (); \ +} while (0) + +#define BAD_PATTERN() RE_RETURN (-2) + +#define PUSH_FAILURE_POINT(pattern_pc, match_pc) do \ +{ \ + if (stack_pointer == stack_end) \ + { \ + long stack_length; \ + unsigned char **stack_temporary; \ + \ + stack_length = ((stack_end - stack_start) * 2); \ + if (stack_length > (re_max_failures * 2)) \ + RE_RETURN (-4); \ + stack_temporary = \ + ((unsigned char **) \ + (realloc \ + (stack_start, (stack_length * (sizeof (unsigned char *)))))); \ + if (stack_temporary == NULL) \ + RE_RETURN (-3); \ + stack_end = (& (stack_temporary [stack_length])); \ + stack_pointer = \ + (& (stack_temporary [(stack_pointer - stack_start)])); \ + stack_start = stack_temporary; \ + } \ + (*stack_pointer++) = (pattern_pc); \ + (*stack_pointer++) = (match_pc); \ +} while (0) + +#define RE_RETURN(value) \ +{ \ + return_value = (value); \ + goto return_point; \ +} + +void +re_buffer_initialize (buffer, translation, syntax_table, text, + text_start_index, text_end_index, + gap_start_index, gap_end_index) + struct re_buffer *buffer; + unsigned char *translation; + SYNTAX_TABLE_TYPE syntax_table; + unsigned char *text; + unsigned long text_start_index, text_end_index, + gap_start_index, gap_end_index; +{ + unsigned char *text_start, *text_end, *gap_start, *gap_end; + + /* Assumes that + ((text_start_index <= gap_start_index) && + (gap_start_index <= gap_end_index) && + (gap_end_index <= text_end_index)) */ + + text_start = (text + text_start_index); + text_end = (text + text_end_index); + gap_start = (text + gap_start_index); + gap_end = (text + gap_end_index); + + (buffer -> translation) = translation; + (buffer -> syntax_table) = syntax_table; + (buffer -> text) = text; + (buffer -> text_start) = ((text_start == gap_start) ? gap_end : text_start); + (buffer -> text_end) = ((text_end == gap_end) ? gap_start : text_end); + (buffer -> gap_start) = gap_start; + (buffer -> gap_end) = gap_end; + return; +} + +/* Given a compiled pattern between `pattern_start' and `pattern_end', + generate a character set which is true of all characters which can + be the first character of a match. + + See the documentation of `struct re_buffer' for a description of + `translation' and `syntax_table'. + + `fastmap' is the resulting character set. It is a character array + whose elements are either `FASTMAP_FALSE' or `FASTMAP_TRUE'. + + Return values: + 0 => pattern cannot match the null string. + 1 => pattern can match the null string. + 2 => pattern can match the null string, but only at end of match + text or to left of a character in `fastmap'. + -2 => the pattern is improperly formed. + else => undefined. */ + +#define FASTMAP_FALSE '\0' +#define FASTMAP_TRUE '\1' + +int +re_compile_fastmap (pattern_start, pattern_end, translation, syntax_table, + fastmap) + unsigned char *pattern_start; + fast unsigned char *pattern_end; + unsigned char *translation; + SYNTAX_TABLE_TYPE syntax_table; + fast unsigned char *fastmap; +{ + fast unsigned char *pattern_pc; + unsigned char *stack_start[RE_NFAILURES]; + unsigned char **stack_pointer; + int return_value; + + pattern_pc = pattern_start; + return_value = 0; + stack_pointer = stack_start; + + { + fast int i; + + FOR_ALL_ASCII (i) + (fastmap [i]) = FASTMAP_FALSE; + } + + loop: + if (pattern_pc >= pattern_end) + RE_RETURN (1); + + SWITCH_ENUM (regexpcode, (*pattern_pc++)) + { + case regexpcode_unused: + case regexpcode_line_start: + case regexpcode_buffer_start: + case regexpcode_buffer_end: + case regexpcode_word_start: + case regexpcode_word_end: + case regexpcode_word_bound: + case regexpcode_not_word_bound: + goto loop; + + case regexpcode_line_end: + { + (fastmap [(TRANSLATE_CHAR ('\n'))]) = FASTMAP_TRUE; + if (return_value == 0) + return_value = 2; + goto next; + } + + case regexpcode_exact_1: + { + fast int ascii; + + READ_PATTERN_CHAR (ascii); + (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE; + goto next; + } + + case regexpcode_exact_n: + { + fast int length; + + READ_PATTERN_LENGTH (length); + if (length == 0) + goto loop; + (fastmap [(TRANSLATE_CHAR (pattern_pc [1]))]) = FASTMAP_TRUE; + goto next; + } + + case regexpcode_any_char: + { + fast int ascii; + + FOR_ALL_ASCII_SUCH_THAT (ascii, (ascii != '\n')) + (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE; + if (return_value != 0) + goto return_point; + } + + case regexpcode_char_set: + { + fast int length; + fast int ascii; + + READ_PATTERN_LENGTH (length); + length = (length * ASCII_LENGTH); + FOR_INDEX_BELOW (ascii, length) + if (CHAR_SET_MEMBER_P_INTERNAL (pattern_pc, ascii)) + (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE; + goto next; + } + + case regexpcode_not_char_set: + { + fast int length; + fast int ascii; + + READ_PATTERN_LENGTH (length); + length = (length * ASCII_LENGTH); + FOR_INDEX_BELOW (ascii, length) + if (! (CHAR_SET_MEMBER_P_INTERNAL (pattern_pc, ascii))) + (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE; + FOR_INDEX_RANGE (ascii, length, MAX_ASCII) + (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE; + goto next; + } + + case regexpcode_word_char: + { + fast int ascii; + + FOR_ALL_ASCII_SUCH_THAT (ascii, (WORD_CONSTITUENT_P (ascii))) + (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE; + goto next; + } + + case regexpcode_not_word_char: + { + fast int ascii; + + FOR_ALL_ASCII_SUCH_THAT (ascii, (! (WORD_CONSTITUENT_P (ascii)))) + (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE; + goto next; + } + + case regexpcode_syntax_spec: + { + fast enum syntaxcode code; + fast int ascii; + + READ_PATTERN_SYNTAXCODE (code); + FOR_ALL_ASCII_SUCH_THAT (ascii, (SYNTAX_CONSTITUENT_P (code, ascii))) + (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE; + goto next; + } + + case regexpcode_not_syntax_spec: + { + fast enum syntaxcode code; + fast int ascii; + + READ_PATTERN_SYNTAXCODE (code); + FOR_ALL_ASCII_SUCH_THAT (ascii, + (! (SYNTAX_CONSTITUENT_P (code, ascii)))) + (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE; + goto next; + } + + case regexpcode_start_memory: + case regexpcode_stop_memory: + { + fast int register_number; + + READ_PATTERN_REGISTER (register_number); + goto loop; + } + + case regexpcode_duplicate: + { + fast int register_number; + fast int ascii; + + READ_PATTERN_REGISTER (register_number); + FOR_ALL_ASCII (ascii) + (fastmap [ascii]) = FASTMAP_TRUE; + RE_RETURN (1); + } + + case regexpcode_jump: + case regexpcode_finalize_jump: + case regexpcode_maybe_finalize_jump: + case regexpcode_dummy_failure_jump: + { + fast int offset; + + return_value = 1; + READ_PATTERN_OFFSET (offset); + pattern_pc += offset; + if (offset > 0) + goto loop; + + /* Jump backward reached implies we just went through the + body of a loop and matched nothing. Opcode jumped to + should be an on_failure_jump. Just treat it like an + ordinary jump. For a * loop, it has pushed its failure + point already; if so, discard that as redundant. */ + if (pattern_pc >= pattern_end) + BAD_PATTERN (); + if (((enum regexpcode) (pattern_pc [0])) != + regexpcode_on_failure_jump) + goto loop; + READ_PATTERN_OFFSET (offset); + pattern_pc += offset; + if ((stack_pointer != stack_start) && + ((stack_pointer [-1]) == pattern_pc)) + stack_pointer -= 1; + goto loop; + } + + case regexpcode_on_failure_jump: + { + fast int offset; + + READ_PATTERN_OFFSET (offset); + (*stack_pointer++) = (pattern_pc + offset); + goto loop; + } + + default: + BAD_PATTERN (); + } + + next: + if (stack_pointer != stack_start) + { + pattern_pc = (*--stack_pointer); + goto loop; + } + + return_point: + return (return_value); +} + +/* Match the compiled pattern described by `pattern_start' and + `pattern_end' against the characters in `buffer' between + `match_start' and `match_end'. + + `registers', if not NULL, will be filled with the start and end + indices of the match registers if the match succeeds. + + It is assumed that the following is true: + + (! ((gap_start < gap_end) && + (match_start < match_end) && + ((match_start == gap_start) || (match_end == gap_end)))) + + Return values: + + non-negative => the end index (exclusive) of the match. + -1 => no match. + -2 => the pattern is badly formed. + -3 => memory allocation error. + -4 => match stack overflow. + other => undefined. */ + +#define RE_MATCH_FAILED (-1) + +#define ADDRESS_TO_INDEX(address) \ + ((((address) > gap_start) ? ((address) - gap_length) : (address)) \ + - (buffer -> text)) + +#define READ_MATCH_CHAR(target) do \ +{ \ + if (match_pc >= match_end) \ + goto re_match_fail; \ + (target) = (TRANSLATE_CHAR (*match_pc++)); \ + if (match_pc == gap_start) \ + match_pc = gap_end; \ +} while (0) + +static Boolean +beq_translate (scan1, scan2, length, translation) + fast unsigned char *scan1, *scan2; + fast long length; + fast unsigned char *translation; +{ + while ((length--) > 0) + if ((TRANSLATE_CHAR (*scan1++)) != (TRANSLATE_CHAR (*scan2++))) + return (false); + return (true); +} + +int re_max_failures = 1000; + +int +re_match (pattern_start, pattern_end, buffer, registers, + match_start, match_end) + unsigned char *pattern_start, *pattern_end; + struct re_buffer *buffer; + struct re_registers *registers; + unsigned char *match_start, *match_end; +{ + fast unsigned char *pattern_pc, *match_pc; + unsigned char *gap_start, *gap_end; + unsigned char *translation; + SYNTAX_TABLE_TYPE syntax_table; + long gap_length; + int return_value; + + /* Failure point stack. Each place that can handle a failure + further down the line pushes a failure point on this stack. It + consists of two char *'s. The first one pushed is where to + resume scanning the pattern; the second pushed is where to resume + scanning the match text. If the latter is NULL, the failure + point is a "dummy". If a failure happens and the innermost + failure point is dormant, it discards that failure point and + tries the next one. */ + + unsigned char **stack_start, **stack_end, **stack_pointer; + + /* Information on the "contents" of registers. These are pointers + into the match text; they record just what was matched (on this + attempt) by some part of the pattern. The start_memory command + stores the start of a register's contents and the stop_memory + command stores the end. + + At that point, (register_start [regnum]) points to the first + character in the register, and (register_end [regnum]) points to + the first character beyond the end of the register. */ + + unsigned char *register_start[RE_NREGS]; + unsigned char *register_end[RE_NREGS]; + + pattern_pc = pattern_start; + match_pc = match_start; + gap_start = (buffer -> gap_start); + gap_end = (buffer -> gap_end); + gap_length = (gap_end - gap_start); + translation = (buffer -> translation); + syntax_table = (buffer -> syntax_table); + + stack_start = + ((unsigned char **) (malloc ((2 * RE_NFAILURES) * (sizeof (char *))))); + if (stack_start == NULL) + RE_RETURN (-3); + + stack_end = (& (stack_start [(2 * RE_NFAILURES)])); + stack_pointer = stack_start; + + { + fast int i; + + FOR_INDEX_BELOW (i, RE_NREGS) + { + (register_start [i]) = NULL; + (register_end [i]) = NULL; + } + } + + re_match_loop: + if (pattern_pc >= pattern_end) + { + /* Reaching here indicates that match was successful. */ + if (registers != NULL) + { + fast int i; + + (register_start [0]) = match_start; + (register_end [0]) = match_pc; + FOR_INDEX_BELOW (i, RE_NREGS) + { + ((registers -> start) [i]) = + (((register_start [i]) == NULL) + ? -1 + : (ADDRESS_TO_INDEX (register_start [i]))); + ((registers -> end) [i]) = + (((register_end [i]) == NULL) + ? -1 + : (ADDRESS_TO_INDEX (register_end [i]))); + } + } + RE_RETURN (ADDRESS_TO_INDEX (match_pc)); + } + + SWITCH_ENUM (regexpcode, (*pattern_pc++)) + { + case regexpcode_unused: + goto re_match_loop; + + case regexpcode_exact_1: + { + fast int ascii; + fast int ascii_p; + + READ_MATCH_CHAR (ascii); + READ_PATTERN_CHAR (ascii_p); + if (ascii == ascii_p) + goto re_match_loop; + goto re_match_fail; + } + + case regexpcode_exact_n: + { + fast int length; + fast int ascii; + + READ_PATTERN_LENGTH (length); + while ((length--) > 0) + { + READ_MATCH_CHAR (ascii); + if (ascii != (*pattern_pc++)) + goto re_match_fail; + } + goto re_match_loop; + } + + case regexpcode_any_char: + { + fast int ascii; + + READ_MATCH_CHAR (ascii); + if (ascii == '\n') + goto re_match_fail; + goto re_match_loop; + } + +#define RE_MATCH_CHAR_SET(winning_label, losing_label) \ + { \ + fast int ascii; \ + fast int length; \ + \ + READ_MATCH_CHAR (ascii); \ + READ_PATTERN_LENGTH (length); \ + if (CHAR_SET_MEMBER_P (length, pattern_pc, ascii)) \ + { \ + pattern_pc += length; \ + goto winning_label; \ + } \ + else \ + { \ + pattern_pc += length; \ + goto losing_label; \ + } \ + } + + case regexpcode_char_set: + RE_MATCH_CHAR_SET (re_match_loop, re_match_fail); + + case regexpcode_not_char_set: + RE_MATCH_CHAR_SET (re_match_fail, re_match_loop); + +#undef RE_MATCH_CHAR_SET + + /* \( is represented by a start_memory, \) by a stop_memory. Both + of those commands contain a "register number" argument. The + text matched within the \( and \) is recorded under that + number. Then, \ turns into a `duplicate' command which + is followed by the numeric value of as the register + number. */ + + case regexpcode_start_memory: + { + fast int register_number; + + READ_PATTERN_REGISTER (register_number); + (register_start [register_number]) = match_pc; + goto re_match_loop; + } + + case regexpcode_stop_memory: + { + fast int register_number; + + READ_PATTERN_REGISTER (register_number); + (register_end [register_number]) = + ((match_pc == gap_end) ? gap_start : match_pc); + goto re_match_loop; + } + + case regexpcode_duplicate: + { + fast int register_number; + unsigned char *start, *end, *new_end; + long length; + + READ_PATTERN_REGISTER (register_number); + start = (register_start [register_number]); + end = (register_end [register_number]); + length = (end - start); + if (length <= 0) + goto re_match_loop; + new_end = (match_pc + length); + if (new_end > match_end) + goto re_match_fail; + if ((match_pc <= gap_start) && (new_end > gap_start)) + { + long length1, length2; + + new_end += gap_length; + if (new_end > match_end) + goto re_match_fail; + length1 = (gap_start - match_pc); + length2 = (length - length1); + if (! + ((beq_translate (match_pc, start, length1, translation)) && + (beq_translate (gap_end, (start + length1), length2, + translation)))) + goto re_match_fail; + } + else if ((start <= gap_start) && (end > gap_start)) + { + long length1, length2; + + length1 = (gap_start - start); + length2 = (end - gap_end); + if (! + ((beq_translate (match_pc, start, length1, translation)) && + (beq_translate ((match_pc + length1), gap_end, length2, + translation)))) + goto re_match_fail; + } + else + { + if (! (beq_translate (match_pc, start, length, translation))) + goto re_match_fail; + } + match_pc = ((new_end == gap_start) ? gap_end : new_end); + goto re_match_loop; + } + + case regexpcode_buffer_start: + { + if (match_pc == (buffer -> text_start)) + goto re_match_loop; + goto re_match_fail; + } + + case regexpcode_buffer_end: + { + if (match_pc == (buffer -> text_end)) + goto re_match_loop; + goto re_match_fail; + } + + case regexpcode_line_start: + { + if (match_pc == (buffer -> text_start)) + goto re_match_loop; + if ((TRANSLATE_CHAR + (((match_pc == gap_end) ? gap_start : match_pc) [-1])) + == '\n') + goto re_match_loop; + goto re_match_fail; + } + + case regexpcode_line_end: + { + if ((match_pc == (buffer -> text_end)) || + ((TRANSLATE_CHAR (match_pc [0])) == '\n')) + goto re_match_loop; + goto re_match_fail; + } + +#define RE_MATCH_WORD_BOUND(word_bound_p) \ + { \ + if ((match_pc == gap_end) \ + ? (word_bound_p \ + (((gap_start != (buffer -> text_start)) && \ + (WORD_CONSTITUENT_P (TRANSLATE_CHAR (gap_start [-1])))), \ + ((gap_end != (buffer -> text_end)) && \ + (WORD_CONSTITUENT_P (TRANSLATE_CHAR (gap_end [0])))))) \ + : (word_bound_p \ + (((match_pc != (buffer -> text_start)) && \ + (WORD_CONSTITUENT_P (TRANSLATE_CHAR (match_pc [-1])))), \ + ((match_pc != (buffer -> text_end)) && \ + (WORD_CONSTITUENT_P (TRANSLATE_CHAR (match_pc [0]))))))) \ + goto re_match_loop; \ + goto re_match_fail; \ + } + + case regexpcode_word_bound: +#define WORD_BOUND_P(left_p, right_p) ((left_p) != (right_p)) + RE_MATCH_WORD_BOUND (WORD_BOUND_P); +#undef WORD_BOUND_P + + case regexpcode_not_word_bound: +#define NOT_WORD_BOUND_P(left_p, right_p) ((left_p) == (right_p)) + RE_MATCH_WORD_BOUND (NOT_WORD_BOUND_P); +#undef NOT_WORD_BOUND_P + + case regexpcode_word_start: +#define WORD_START_P(left_p, right_p) ((! (left_p)) && (right_p)) + RE_MATCH_WORD_BOUND (WORD_START_P); +#undef WORD_START_P + + case regexpcode_word_end: +#define WORD_END_P(left_p, right_p) ((left_p) && (! (right_p))) + RE_MATCH_WORD_BOUND (WORD_END_P); +#undef WORD_END_P + +#undef RE_MATCH_WORD_BOUND + + case regexpcode_syntax_spec: + { + fast int ascii; + fast enum syntaxcode code; + + READ_MATCH_CHAR (ascii); + READ_PATTERN_SYNTAXCODE (code); + if (SYNTAX_CONSTITUENT_P (code, ascii)) + goto re_match_loop; + goto re_match_fail; + } + + case regexpcode_not_syntax_spec: + { + fast int ascii; + fast enum syntaxcode code; + + READ_MATCH_CHAR (ascii); + READ_PATTERN_SYNTAXCODE (code); + if (! (SYNTAX_CONSTITUENT_P (code, ascii))) + goto re_match_loop; + goto re_match_fail; + } + + case regexpcode_word_char: + { + fast int ascii; + + READ_MATCH_CHAR (ascii); + if (WORD_CONSTITUENT_P (ascii)) + goto re_match_loop; + goto re_match_fail; + } + + case regexpcode_not_word_char: + { + fast int ascii; + + READ_MATCH_CHAR (ascii); + if (! (WORD_CONSTITUENT_P (ascii))) + goto re_match_loop; + goto re_match_fail; + } + + /* "or" constructs ("|") are handled by starting each alternative + with an on_failure_jump that points to the start of the next + alternative. Each alternative except the last ends with a jump + to the joining point. (Actually, each jump except for the last + one really jumps to the following jump, because tensioning the + jumps is a hassle.) + + The start of a stupid repeat has an on_failure_jump that points + past the end of the repeat text. This makes a failure point so + that, on failure to match a repetition, matching restarts past + as many repetitions have been found with no way to fail and + look for another one. + + A smart repeat is similar but loops back to the on_failure_jump + so that each repetition makes another failure point. */ + + case regexpcode_on_failure_jump: + { + fast long offset; + + READ_PATTERN_OFFSET (offset); + PUSH_FAILURE_POINT ((pattern_pc + offset), match_pc); + goto re_match_loop; + } + + /* The end of a smart repeat has a maybe_finalize_jump back. + Change it either to a finalize_jump or an ordinary jump. */ + + case regexpcode_maybe_finalize_jump: + { + fast long offset; + fast long ascii; + + READ_PATTERN_OFFSET (offset); + if (pattern_pc == pattern_end) + goto maybe_finalize_jump_finalize; + + /* Compare what follows with the beginning of the repeat. + If we can establish that there is nothing that they + would both match, we can change to `finalize_jump'. */ + + SWITCH_ENUM (regexpcode, (pattern_pc [0])) + { + case regexpcode_exact_1: + ascii = (pattern_pc [1]); + break; + + case regexpcode_exact_n: + ascii = (pattern_pc [2]); + break; + + case regexpcode_line_end: + ascii = ('\n'); + break; + + default: + goto maybe_finalize_jump_not_finalize; + } + + /* (pattern_pc [(offset - 3)]) is an `on_failure_jump'. + Examine what follows that. */ + SWITCH_ENUM (regexpcode, (pattern_pc [offset])) + { + case regexpcode_exact_1: + { + if (ascii != (pattern_pc [(offset + 1)])) + goto maybe_finalize_jump_finalize; + goto maybe_finalize_jump_not_finalize; + } + + case regexpcode_exact_n: + { + if (ascii != (pattern_pc [(offset + 2)])) + goto maybe_finalize_jump_finalize; + goto maybe_finalize_jump_not_finalize; + } + + case regexpcode_char_set: + { + if (CHAR_SET_MEMBER_P ((pattern_pc [(offset + 1)]), + (& (pattern_pc [(offset + 2)])), + ascii)) + goto maybe_finalize_jump_not_finalize; + goto maybe_finalize_jump_finalize; + } + + case regexpcode_not_char_set: + { + if (CHAR_SET_MEMBER_P ((pattern_pc [(offset + 1)]), + (& (pattern_pc [(offset + 2)])), + ascii)) + goto maybe_finalize_jump_finalize; + goto maybe_finalize_jump_not_finalize; + } + + default: + goto maybe_finalize_jump_not_finalize; + } + + maybe_finalize_jump_finalize: + pattern_pc -= 2; + (pattern_pc [-1]) = ((unsigned char) regexpcode_finalize_jump); + goto re_match_finalize_jump; + + maybe_finalize_jump_not_finalize: + pattern_pc -= 2; + (pattern_pc [-1]) = ((unsigned char) regexpcode_jump); + goto re_match_jump; + } + + case regexpcode_finalize_jump: + re_match_finalize_jump: + { + stack_pointer -= 2; + goto re_match_jump; + } + + case regexpcode_jump: + re_match_jump: + { + fast long offset; + + READ_PATTERN_OFFSET (offset); + pattern_pc += offset; + goto re_match_loop; + } + + case regexpcode_dummy_failure_jump: + { + PUSH_FAILURE_POINT (NULL, NULL); + goto re_match_jump; + } + + default: + { + BAD_PATTERN (); + } + } + + re_match_fail: + if (stack_pointer == stack_start) + RE_RETURN (RE_MATCH_FAILED); + match_pc = (*--stack_pointer); + pattern_pc = (*--stack_pointer); + if (pattern_pc != NULL) + goto re_match_loop; + goto re_match_fail; + + return_point: + if (stack_start != NULL) + free (stack_start); + return (return_value); +} + +#define DEFINE_RE_SEARCH(name) \ +int name (pattern_start, pattern_end, buffer, registers, \ + match_start, match_end) \ + unsigned char *pattern_start, *pattern_end; \ + struct re_buffer *buffer; \ + struct re_registers *registers; \ + unsigned char *match_start; \ + unsigned char *match_end; + +#define INITIALIZE_RE_SEARCH(pc, limit, gap_limit) \ + int can_be_null; \ + unsigned char *translation; \ + int match_result; \ + \ + fast unsigned char *match_pc; \ + fast unsigned char *match_limit; \ + fast unsigned char *gap_limit; \ + fast unsigned char fastmap[MAX_ASCII]; \ + \ + translation = (buffer -> translation); \ + can_be_null = \ + (re_compile_fastmap \ + (pattern_start, pattern_end, translation, \ + (buffer -> syntax_table), fastmap)); \ + if (can_be_null < 0) \ + return (can_be_null); \ + \ + match_pc = (pc); \ + match_limit = (limit); \ + gap_limit = (buffer -> gap_limit) + +#define RE_SEARCH_TEST(start) \ + (re_match \ + (pattern_start, pattern_end, buffer, registers, (start), match_end)) + +#define RE_SEARCH_FORWARD_FAST(limit) do \ +{ \ + while (true) \ + { \ + if (match_pc >= (limit)) \ + break; \ + \ + if ((fastmap [(TRANSLATE_CHAR (*match_pc++))]) != FASTMAP_FALSE) \ + continue; \ + \ + match_result = (RE_SEARCH_TEST (match_pc - 1)); \ + if (match_result == RE_MATCH_FAILED) \ + continue; \ + \ + return (match_result); \ + } \ +} while (0) + +DEFINE_RE_SEARCH (re_search_forward) +{ + INITIALIZE_RE_SEARCH (match_start, match_end, gap_start); + + if (can_be_null != 1) + { + if ((match_pc < gap_start) && (gap_start < match_limit)) + RE_SEARCH_FORWARD_FAST (gap_start); + if (match_pc == gap_start) + match_pc = (buffer -> gap_end); + RE_SEARCH_FORWARD_FAST (match_limit); + return + ((can_be_null == 0) + ? RE_MATCH_FAILED + : (RE_SEARCH_TEST (match_limit))); + } + else + { + while (true) + { + match_result = (RE_SEARCH_TEST (match_pc)); + if (match_result != RE_MATCH_FAILED) + return (match_result); + match_pc += 1; + if (match_pc == gap_start) + match_pc = (buffer -> gap_end); + if (match_pc > match_limit) + return (RE_MATCH_FAILED); + } + } +} + +#define RE_SEARCH_BACKWARD_FAST(limit) do \ +{ \ + while (true) \ + { \ + if (match_pc <= (limit)) \ + break; \ + \ + if ((fastmap [(TRANSLATE_CHAR (*--match_pc))]) != FASTMAP_FALSE) \ + continue; \ + \ + match_result = (RE_SEARCH_TEST (match_pc)); \ + if (match_result == RE_MATCH_FAILED) \ + continue; \ + \ + RE_SEARCH_BACKWARD_RETURN (match_pc); \ + } \ +} while (0) + +#define RE_SEARCH_BACKWARD_RETURN(start) \ + return \ + ((match_result < 0) \ + ? match_result \ + : ((((start) > (buffer -> gap_start)) \ + ? ((start) - (gap_end - (buffer -> gap_start))) \ + : (start)) \ + - (buffer -> text))) + +DEFINE_RE_SEARCH (re_search_backward) +{ + INITIALIZE_RE_SEARCH (match_end, match_start, gap_end); + + if (can_be_null != 1) + { + if ((match_pc > gap_end) && (gap_end > match_limit)) + RE_SEARCH_BACKWARD_FAST (gap_end); + if (match_pc == gap_end) + match_pc = (buffer -> gap_start); + RE_SEARCH_BACKWARD_FAST (match_limit); + if (can_be_null == 0) + return (RE_MATCH_FAILED); + match_result = (RE_SEARCH_TEST (match_limit)); + RE_SEARCH_BACKWARD_RETURN (match_limit); + } + else + { + while (true) + { + match_result = (RE_SEARCH_TEST (match_pc)); + if (match_result != RE_MATCH_FAILED) + RE_SEARCH_BACKWARD_RETURN (match_pc); + if (match_pc == gap_end) + match_pc = (buffer -> gap_start); + match_pc -= 1; + if (match_pc < match_limit) + return (RE_MATCH_FAILED); + } + } +} diff --git a/v7/src/microcode/regex.h b/v7/src/microcode/regex.h new file mode 100644 index 000000000..c42cd0b4a --- /dev/null +++ b/v7/src/microcode/regex.h @@ -0,0 +1,199 @@ +/* -*-C-*- + +$Header: /Users/cph/tmp/foo/mit-scheme/mit-scheme/v7/src/microcode/regex.h,v 1.1 1987/07/14 03:00:23 cph Rel $ + +Copyright (c) 1987 Massachusetts Institute of Technology + +This material was developed by the Scheme project at the Massachusetts +Institute of Technology, Department of Electrical Engineering and +Computer Science. Permission to copy this software, to redistribute +it, and to use it for any purpose is granted, subject to the following +restrictions and understandings. + +1. Any copy made of this software must include this copyright notice +in full. + +2. Users of this software agree to make their best efforts (a) to +return to the MIT Scheme project any improvements or extensions that +they make, so that these may be included in future releases; and (b) +to inform MIT of noteworthy uses of this software. + +3. All materials developed as a consequence of the use of this +software shall duly acknowledge such use, in accordance with the usual +standards of acknowledging credit in academic research. + +4. MIT has made no warrantee or representation that the operation of +this software will be error-free, and MIT is under no obligation to +provide any services, by way of maintenance, update, or otherwise. + +5. In conjunction with products arising from the use of this material, +there shall be no use of the name of the Massachusetts Institute of +Technology nor of any adaptation thereof in any advertising, +promotional, or sales literature without prior written consent from +MIT in each case. */ + +/* Translated from GNU Emacs. */ + +/* Structure to represent a buffer of text to match against. + This contains the information that an editor buffer would have + to supply for the matching process to be executed. + + `translation' is an array of MAX_ASCII characters which is used to + map each character before matching. Both the pattern and the match + text are mapped. This is normally used to implement case + insensitive searches. + + `syntax_table' describes the syntax of the match text. See the + syntax table primitives for more information. + + `text' points to the beginning of the match text. It is used only + for translating match text pointers into indices. + + `text_start' and `text_end' delimit the match text. They define + the buffer-start and buffer-end for those matching commands that + refer to them. Also, all matching must take place within these + limits. + + `gap_start' and `gap_end' delimit a gap in the match text. Editor + buffers normally have such a gap. For applications without a gap, + it is recommended that these be set to the same value as + `text_end'. + + Both `text_start' and `gap_start' are inclusive indices, while + `text_end' and `gap_end' are exclusive. + + The following conditions must be true: + + (text <= text_start) + (text_start <= text_end) + (gap_start <= gap_end) + (! ((text_start < text_end) && + (gap_start < gap_end) && + ((text_start == gap_start) || (text_end == gap_end)))) + + */ + +struct re_buffer + { + unsigned char *translation; + SYNTAX_TABLE_TYPE syntax_table; + unsigned char *text; + unsigned char *text_start; + unsigned char *text_end; + unsigned char *gap_start; + unsigned char *gap_end; + }; + +/* Structure to store "register" contents data in. + + Pass the address of such a structure as an argument to re_match, + etc., if you want this information back. + + start[i] and end[i] record the string matched by \( ... \) grouping + i, for i from 1 to RE_NREGS - 1. + + start[0] and end[0] record the entire string matched. */ + +#define RE_NREGS 10 + +struct re_registers + { + long start[RE_NREGS]; + long end[RE_NREGS]; + }; + +/* These are the command codes that appear in compiled regular + expressions, one per byte. Some command codes are followed by + argument bytes. A command code can specify any interpretation + whatever for its arguments. Zero-bytes may appear in the compiled + regular expression. */ + +enum regexpcode + { + regexpcode_unused, + regexpcode_exact_1, /* Followed by 1 literal byte */ + + /* Followed by one byte giving n, and then by n literal bytes. */ + regexpcode_exact_n, + + regexpcode_line_start, /* Fails unless at beginning of line */ + regexpcode_line_end, /* Fails unless at end of line */ + + /* Followed by two bytes giving relative address to jump to. */ + regexpcode_jump, + + /* Followed by two bytes giving relative address of place to + resume at in case of failure. */ + regexpcode_on_failure_jump, + + /* Throw away latest failure point and then jump to address. */ + regexpcode_finalize_jump, + + /* Like jump but finalize if safe to do so. This is used to jump + back to the beginning of a repeat. If the command that follows + this jump is clearly incompatible with the one at the beginning + of the repeat, such that we can be sure that there is no use + backtracking out of repetitions already completed, then we + finalize. */ + regexpcode_maybe_finalize_jump, + + /* jump, and push a dummy failure point. This failure point will + be thrown away if an attempt is made to use it for a failure. + A + construct makes this before the first repeat. */ + regexpcode_dummy_failure_jump, + + regexpcode_any_char, /* Matches any one character */ + + /* Matches any one char belonging to specified set. First + following byte is # bitmap bytes. Then come bytes for a + bit-map saying which chars are in. Bits in each byte are + ordered low-bit-first. A character is in the set if its bit is + 1. A character too large to have a bit in the map is + automatically not in the set. */ + regexpcode_char_set, + + /* Similar but match any character that is NOT one of those + specified. */ + regexpcode_not_char_set, + + /* Starts remembering the text that is matched and stores it in a + memory register. Followed by one byte containing the register + number. Register numbers must be in the range 0 through + (RE_NREGS - 1) inclusive. */ + regexpcode_start_memory, + + /* Stops remembering the text that is matched and stores it in a + memory register. Followed by one byte containing the register + number. Register numbers must be in the range 0 through + (RE_NREGS - 1) inclusive. */ + regexpcode_stop_memory, + + /* Match a duplicate of something remembered. Followed by one + byte containing the index of the memory register. */ + regexpcode_duplicate, + + regexpcode_buffer_start, /* Succeeds if at beginning of buffer */ + regexpcode_buffer_end, /* Succeeds if at end of buffer */ + regexpcode_word_char, /* Matches any word-constituent character */ + + /* Matches any char that is not a word-constituent. */ + regexpcode_not_word_char, + + regexpcode_word_start, /* Succeeds if at word beginning */ + regexpcode_word_end, /* Succeeds if at word end */ + regexpcode_word_bound, /* Succeeds if at a word boundary */ + regexpcode_not_word_bound, /* Succeeds if not at a word boundary */ + + /* Matches any character whose syntax is specified. Followed by a + byte which contains a syntax code, Sword or such like. */ + regexpcode_syntax_spec, + + /* Matches any character whose syntax differs from the specified. */ + regexpcode_not_syntax_spec + }; + +extern void re_buffer_initialize (); +extern int re_compile_fastmap (); +extern int re_match (); +extern int re_search_forward (); +extern int re_search_backward (); diff --git a/v7/src/microcode/rgxprim.c b/v7/src/microcode/rgxprim.c new file mode 100644 index 000000000..86c05da3e --- /dev/null +++ b/v7/src/microcode/rgxprim.c @@ -0,0 +1,247 @@ +/* -*-C-*- + +$Header: /Users/cph/tmp/foo/mit-scheme/mit-scheme/v7/src/microcode/rgxprim.c,v 1.1 1987/07/14 03:00:03 cph Exp $ + +Copyright (c) 1987 Massachusetts Institute of Technology + +This material was developed by the Scheme project at the Massachusetts +Institute of Technology, Department of Electrical Engineering and +Computer Science. Permission to copy this software, to redistribute +it, and to use it for any purpose is granted, subject to the following +restrictions and understandings. + +1. Any copy made of this software must include this copyright notice +in full. + +2. Users of this software agree to make their best efforts (a) to +return to the MIT Scheme project any improvements or extensions that +they make, so that these may be included in future releases; and (b) +to inform MIT of noteworthy uses of this software. + +3. All materials developed as a consequence of the use of this +software shall duly acknowledge such use, in accordance with the usual +standards of acknowledging credit in academic research. + +4. MIT has made no warrantee or representation that the operation of +this software will be error-free, and MIT is under no obligation to +provide any services, by way of maintenance, update, or otherwise. + +5. In conjunction with products arising from the use of this material, +there shall be no use of the name of the Massachusetts Institute of +Technology nor of any adaptation thereof in any advertising, +promotional, or sales literature without prior written consent from +MIT in each case. */ + +/* Primitives for regular expression matching and search. */ + +/* This code is not yet tested. -- CPH */ + +#include "scheme.h" +#include "primitive.h" +#include "stringprim.h" +#include "character.h" +#include "edwin.h" +#include "syntax.h" +#include "regex.h" + +#define RE_CHAR_SET_P(object) \ + ((STRING_P (object)) && \ + ((string_length (object)) == (MAX_ASCII / ASCII_LENGTH))) + +#define CHAR_SET_P(argument) \ + ((STRING_P (argument)) && ((string_length (argument)) == MAX_ASCII)) + +#define CHAR_TRANSLATION_P(argument) \ + ((STRING_P (argument)) && ((string_length (argument)) == MAX_ASCII)) + +#define RE_REGISTERS_P(object) \ + (((object) == NIL) || \ + ((VECTOR_P (object)) && \ + ((Vector_Length (object)) == (RE_NREGS + RE_NREGS)))) + +#define RE_MATCH_RESULTS(result, vector) do \ +{ \ + if ((result) >= 0) \ + { \ + if ((vector) != NIL) \ + { \ + int i; \ + long index; \ + \ + for (i = 0; (i < RE_NREGS); i += 1) \ + { \ + index = ((registers . start) [i]); \ + User_Vector_Set \ + (vector, \ + i, \ + ((index == -1) \ + ? NIL \ + : (C_Integer_To_Scheme_Integer (index)))); \ + index = ((registers . end) [i]); \ + User_Vector_Set \ + (vector, \ + (i + RE_NREGS), \ + ((index == -1) \ + ? NIL \ + : (C_Integer_To_Scheme_Integer (index)))); \ + } \ + } \ + return (C_Integer_To_Scheme_Integer (result)); \ + } \ + else if ((result) == (-1)) \ + return (NIL); \ + else if ((result) == (-2)) \ + error_bad_range_arg (1); \ + else \ + error_external_return (); \ +} while (0) + +Built_In_Primitive (Prim_re_char_set_adjoin, 2, "RE-CHAR-SET-ADJOIN!", 0x190) +{ + int ascii; + Primitive_2_Args (); + + CHECK_ARG (1, RE_CHAR_SET_P); + ascii = (arg_ascii_char (2)); + (* (string_pointer (Arg1, (ascii / ASCII_LENGTH)))) |= + (1 << (ascii % ASCII_LENGTH)); + return (NIL); +} + +Built_In_Primitive (Prim_re_compile_fastmap, 4, "RE-COMPILE-FASTMAP", 0x191) +{ + int can_be_null; + Primitive_4_Args (); + + CHECK_ARG (1, STRING_P); + CHECK_ARG (2, CHAR_TRANSLATION_P); + CHECK_ARG (3, SYNTAX_TABLE_P); + CHECK_ARG (4, CHAR_SET_P); + + can_be_null = + (re_compile_fastmap ((string_pointer (Arg1, 0)), + (string_pointer (Arg1, (string_length (Arg1)))), + (string_pointer (Arg2, 0)), + Arg3, + (string_pointer (Arg4, 0)))); + + if (can_be_null >= 0) + return (C_Integer_To_Scheme_Integer (can_be_null)); + else if (can_be_null == (-2)) + error_bad_range_arg (1); + else + error_external_return (); +} + +/* (re-match-substring regexp translation syntax-table registers + string start end) + + Attempt to match REGEXP against the substring [STRING, START, END]. + Return the index of the end of the match (exclusive) if successful. + Otherwise return false. REGISTERS, if not false, is set to contain + the appropriate indices for the match registers. */ + +#define RE_SUBSTRING_PRIMITIVE(procedure) \ +{ \ + long match_start, match_end, text_end; \ + char *text; \ + struct re_buffer buffer; \ + struct re_registers registers; \ + int result; \ + Primitive_7_Args (); \ + \ + CHECK_ARG (1, STRING_P); \ + CHECK_ARG (2, CHAR_TRANSLATION_P); \ + CHECK_ARG (3, SYNTAX_TABLE_P); \ + CHECK_ARG (4, RE_REGISTERS_P); \ + CHECK_ARG (5, STRING_P); \ + match_start = (arg_nonnegative_integer (6)); \ + match_end = (arg_nonnegative_integer (7)); \ + text = (string_pointer (Arg5, 0)); \ + text_end = (string_length (Arg5)); \ + \ + if (match_end > text_end) error_bad_range_arg (7); \ + if (match_start > match_end) error_bad_range_arg (6); \ + \ + re_buffer_initialize \ + ((& buffer), (string_pointer (Arg2, 0)), Arg3, text, 0, text_end, \ + text_end, text_end); \ + \ + result = \ + (procedure ((string_pointer (Arg1, 0)), \ + (string_pointer (Arg1, (string_length (Arg1)))), \ + (& buffer), \ + ((Arg4 == NIL) ? NULL : (& registers)), \ + (& (text [match_start])), \ + (& (text [match_end])))); \ + RE_MATCH_RESULTS (result, Arg4); \ +} + +Built_In_Primitive (Prim_re_match_substring, 7, "RE-MATCH-SUBSTRING", 0x118) + RE_SUBSTRING_PRIMITIVE (re_match) + +Built_In_Primitive (Prim_re_search_substring_forward, 7, + "RE-SEARCH-SUBSTRING-FORWARD", 0x119) + RE_SUBSTRING_PRIMITIVE (re_search_forward) + +Built_In_Primitive (Prim_re_search_substring_backward, 7, + "RE-SEARCH-SUBSTRING-BACKWARD", 0x11A) + RE_SUBSTRING_PRIMITIVE (re_search_backward) + +#define RE_BUFFER_PRIMITIVE(procedure) \ +{ \ + long match_start, match_end, text_start, text_end, gap_start; \ + char *text; \ + struct re_buffer buffer; \ + struct re_registers registers; \ + int result; \ + Primitive_7_Args (); \ + \ + CHECK_ARG (1, STRING_P); \ + CHECK_ARG (2, CHAR_TRANSLATION_P); \ + CHECK_ARG (3, SYNTAX_TABLE_P); \ + CHECK_ARG (4, RE_REGISTERS_P); \ + CHECK_ARG (5, GROUP_P); \ + match_start = (arg_nonnegative_integer (6)); \ + match_end = (arg_nonnegative_integer (7)); \ + \ + text = (string_pointer ((GROUP_TEXT (Arg5)), 0)); \ + text_start = (MARK_POSITION (GROUP_START_MARK (Arg5))); \ + text_end = (MARK_POSITION (GROUP_END_MARK (Arg5))); \ + gap_start = (GROUP_GAP_START (Arg5)); \ + \ + if (match_end > gap_start) \ + { \ + match_end += (GROUP_GAP_LENGTH (Arg5)); \ + if (match_start >= gap_start) \ + match_start += (GROUP_GAP_LENGTH (Arg5)); \ + } \ + \ + if (match_start > match_end) error_bad_range_arg (6); \ + if (match_end > text_end) error_bad_range_arg (7); \ + if (match_start < text_start) error_bad_range_arg (6); \ + \ + re_buffer_initialize \ + ((& buffer), (string_pointer (Arg2, 0)), Arg3, text, text_start, \ + text_end, gap_start, (GROUP_GAP_END (Arg5))); \ + \ + result = \ + (procedure ((string_pointer (Arg1, 0)), \ + (string_pointer (Arg1, (string_length (Arg1)))), \ + (& buffer), \ + ((Arg4 == NIL) ? NULL : (& registers)), \ + (& (text [match_start])), \ + (& (text [match_end])))); \ + RE_MATCH_RESULTS (result, Arg4); \ +} + +Built_In_Primitive (Prim_re_match_buffer, 7, "RE-MATCH-BUFFER", 0x192) + RE_BUFFER_PRIMITIVE (re_match) + +Built_In_Primitive (Prim_re_search_buffer_forward, 7, + "RE-SEARCH-BUFFER-FORWARD", 0x193) + RE_BUFFER_PRIMITIVE (re_search_forward) + +Built_In_Primitive (Prim_re_search_buffer_backward, 7, + "RE-SEARCH-BUFFER-BACKWARD", 0x194) + RE_BUFFER_PRIMITIVE (re_search_backward)