--- /dev/null
+/* -*-C-*-
+
+$Header: /Users/cph/tmp/foo/mit-scheme/mit-scheme/v7/src/microcode/regex.c,v 1.1 1987/07/14 03:00:59 cph Exp $
+
+Copyright (c) 1987 Massachusetts Institute of Technology
+
+This material was developed by the Scheme project at the Massachusetts
+Institute of Technology, Department of Electrical Engineering and
+Computer Science. Permission to copy this software, to redistribute
+it, and to use it for any purpose is granted, subject to the following
+restrictions and understandings.
+
+1. Any copy made of this software must include this copyright notice
+in full.
+
+2. Users of this software agree to make their best efforts (a) to
+return to the MIT Scheme project any improvements or extensions that
+they make, so that these may be included in future releases; and (b)
+to inform MIT of noteworthy uses of this software.
+
+3. All materials developed as a consequence of the use of this
+software shall duly acknowledge such use, in accordance with the usual
+standards of acknowledging credit in academic research.
+
+4. MIT has made no warrantee or representation that the operation of
+this software will be error-free, and MIT is under no obligation to
+provide any services, by way of maintenance, update, or otherwise.
+
+5. In conjunction with products arising from the use of this material,
+there shall be no use of the name of the Massachusetts Institute of
+Technology nor of any adaptation thereof in any advertising,
+promotional, or sales literature without prior written consent from
+MIT in each case. */
+
+/* Regular expression matching and search.
+ Translated from GNU Emacs. */
+
+/* This code is not yet tested. -- CPH */
+
+#include "scheme.h"
+#include "character.h"
+#include "syntax.h"
+#include "regex.h"
+\f
+#ifndef SIGN_EXTEND_CHAR
+#define SIGN_EXTEND_CHAR(x) (x)
+#endif /* not SIGN_EXTEND_CHAR */
+
+#ifndef SWITCH_ENUM
+#define SWITCH_ENUM(enum_type, expression) \
+ switch ((enum enum_type) (expression))
+#endif /* not SWITCH_ENUM */
+
+#define RE_NFAILURES 80
+
+#define FOR_INDEX_RANGE(index, start, end) \
+ for (index = (start); (index < (end)); index += 1)
+
+#define FOR_INDEX_BELOW(index, limit) \
+ FOR_INDEX_RANGE (index, 0, (limit))
+
+#define FOR_ALL_ASCII(index) \
+ FOR_INDEX_BELOW (index, MAX_ASCII)
+
+#define FOR_ALL_ASCII_SUCH_THAT(index, expression) \
+ FOR_ALL_ASCII (index) \
+ if (expression)
+
+#define TRANSLATE_CHAR(ascii) \
+ ((translation == NULL) ? (ascii) : (translation [(ascii)]))
+
+#define WORD_CONSTITUENT_P(ascii) \
+ (SYNTAX_CONSTITUENT_P (syntaxcode_word, (ascii)))
+
+#define SYNTAX_CONSTITUENT_P(code, ascii) \
+ ((SYNTAX_ENTRY_CODE (SYNTAX_TABLE_REF (syntax_table, (ascii)))) == (code))
+
+#define CHAR_SET_MEMBER_P(length, char_set, ascii) \
+ (((ascii) < ((length) * ASCII_LENGTH)) && \
+ (CHAR_SET_MEMBER_P_INTERNAL (char_set, ascii)))
+
+#define CHAR_SET_MEMBER_P_INTERNAL(char_set, ascii) \
+ ((((char_set) [((ascii) / ASCII_LENGTH)]) & \
+ (1 << ((ascii) % ASCII_LENGTH))) \
+ != 0)
+\f
+#define READ_PATTERN_CHAR(target) do \
+{ \
+ if (pattern_pc >= pattern_end) \
+ BAD_PATTERN (); \
+ (target) = (*pattern_pc++); \
+} while (0)
+
+#define READ_PATTERN_OFFSET(target) do \
+{ \
+ if ((pattern_pc + 1) >= pattern_end) \
+ BAD_PATTERN (); \
+ (target) = (*pattern_pc++); \
+ (target) += \
+ ((SIGN_EXTEND_CHAR (* ((char *) (pattern_pc++)))) << ASCII_LENGTH); \
+ if (((pattern_pc + (target)) < pattern_start) || \
+ ((pattern_pc + (target)) > pattern_end)) \
+ BAD_PATTERN (); \
+} while (0)
+
+#define READ_PATTERN_LENGTH(target) do \
+{ \
+ if ((pattern_pc >= pattern_end) || \
+ ((pattern_pc + ((target) = (*pattern_pc++))) > pattern_end)) \
+ BAD_PATTERN (); \
+} while (0)
+
+#define READ_PATTERN_REGISTER(target) do \
+{ \
+ if ((pattern_pc >= pattern_end) || \
+ (((target) = (*pattern_pc++)) >= RE_NREGS)) \
+ BAD_PATTERN (); \
+} while (0)
+
+#define READ_PATTERN_SYNTAXCODE(target) do \
+{ \
+ if ((pattern_pc >= pattern_end) || \
+ (((int) ((target) = ((enum syntaxcode) (*pattern_pc++)))) \
+ >= ((int) syntaxcode_max))) \
+ BAD_PATTERN (); \
+} while (0)
+
+#define BAD_PATTERN() RE_RETURN (-2)
+\f
+#define PUSH_FAILURE_POINT(pattern_pc, match_pc) do \
+{ \
+ if (stack_pointer == stack_end) \
+ { \
+ long stack_length; \
+ unsigned char **stack_temporary; \
+ \
+ stack_length = ((stack_end - stack_start) * 2); \
+ if (stack_length > (re_max_failures * 2)) \
+ RE_RETURN (-4); \
+ stack_temporary = \
+ ((unsigned char **) \
+ (realloc \
+ (stack_start, (stack_length * (sizeof (unsigned char *)))))); \
+ if (stack_temporary == NULL) \
+ RE_RETURN (-3); \
+ stack_end = (& (stack_temporary [stack_length])); \
+ stack_pointer = \
+ (& (stack_temporary [(stack_pointer - stack_start)])); \
+ stack_start = stack_temporary; \
+ } \
+ (*stack_pointer++) = (pattern_pc); \
+ (*stack_pointer++) = (match_pc); \
+} while (0)
+
+#define RE_RETURN(value) \
+{ \
+ return_value = (value); \
+ goto return_point; \
+}
+\f
+void
+re_buffer_initialize (buffer, translation, syntax_table, text,
+ text_start_index, text_end_index,
+ gap_start_index, gap_end_index)
+ struct re_buffer *buffer;
+ unsigned char *translation;
+ SYNTAX_TABLE_TYPE syntax_table;
+ unsigned char *text;
+ unsigned long text_start_index, text_end_index,
+ gap_start_index, gap_end_index;
+{
+ unsigned char *text_start, *text_end, *gap_start, *gap_end;
+
+ /* Assumes that
+ ((text_start_index <= gap_start_index) &&
+ (gap_start_index <= gap_end_index) &&
+ (gap_end_index <= text_end_index)) */
+
+ text_start = (text + text_start_index);
+ text_end = (text + text_end_index);
+ gap_start = (text + gap_start_index);
+ gap_end = (text + gap_end_index);
+
+ (buffer -> translation) = translation;
+ (buffer -> syntax_table) = syntax_table;
+ (buffer -> text) = text;
+ (buffer -> text_start) = ((text_start == gap_start) ? gap_end : text_start);
+ (buffer -> text_end) = ((text_end == gap_end) ? gap_start : text_end);
+ (buffer -> gap_start) = gap_start;
+ (buffer -> gap_end) = gap_end;
+ return;
+}
+\f
+/* Given a compiled pattern between `pattern_start' and `pattern_end',
+ generate a character set which is true of all characters which can
+ be the first character of a match.
+
+ See the documentation of `struct re_buffer' for a description of
+ `translation' and `syntax_table'.
+
+ `fastmap' is the resulting character set. It is a character array
+ whose elements are either `FASTMAP_FALSE' or `FASTMAP_TRUE'.
+
+ Return values:
+ 0 => pattern cannot match the null string.
+ 1 => pattern can match the null string.
+ 2 => pattern can match the null string, but only at end of match
+ text or to left of a character in `fastmap'.
+ -2 => the pattern is improperly formed.
+ else => undefined. */
+
+#define FASTMAP_FALSE '\0'
+#define FASTMAP_TRUE '\1'
+
+int
+re_compile_fastmap (pattern_start, pattern_end, translation, syntax_table,
+ fastmap)
+ unsigned char *pattern_start;
+ fast unsigned char *pattern_end;
+ unsigned char *translation;
+ SYNTAX_TABLE_TYPE syntax_table;
+ fast unsigned char *fastmap;
+{
+ fast unsigned char *pattern_pc;
+ unsigned char *stack_start[RE_NFAILURES];
+ unsigned char **stack_pointer;
+ int return_value;
+
+ pattern_pc = pattern_start;
+ return_value = 0;
+ stack_pointer = stack_start;
+
+ {
+ fast int i;
+
+ FOR_ALL_ASCII (i)
+ (fastmap [i]) = FASTMAP_FALSE;
+ }
+\f
+ loop:
+ if (pattern_pc >= pattern_end)
+ RE_RETURN (1);
+
+ SWITCH_ENUM (regexpcode, (*pattern_pc++))
+ {
+ case regexpcode_unused:
+ case regexpcode_line_start:
+ case regexpcode_buffer_start:
+ case regexpcode_buffer_end:
+ case regexpcode_word_start:
+ case regexpcode_word_end:
+ case regexpcode_word_bound:
+ case regexpcode_not_word_bound:
+ goto loop;
+
+ case regexpcode_line_end:
+ {
+ (fastmap [(TRANSLATE_CHAR ('\n'))]) = FASTMAP_TRUE;
+ if (return_value == 0)
+ return_value = 2;
+ goto next;
+ }
+
+ case regexpcode_exact_1:
+ {
+ fast int ascii;
+
+ READ_PATTERN_CHAR (ascii);
+ (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
+ goto next;
+ }
+
+ case regexpcode_exact_n:
+ {
+ fast int length;
+
+ READ_PATTERN_LENGTH (length);
+ if (length == 0)
+ goto loop;
+ (fastmap [(TRANSLATE_CHAR (pattern_pc [1]))]) = FASTMAP_TRUE;
+ goto next;
+ }
+
+ case regexpcode_any_char:
+ {
+ fast int ascii;
+
+ FOR_ALL_ASCII_SUCH_THAT (ascii, (ascii != '\n'))
+ (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
+ if (return_value != 0)
+ goto return_point;
+ }
+\f
+ case regexpcode_char_set:
+ {
+ fast int length;
+ fast int ascii;
+
+ READ_PATTERN_LENGTH (length);
+ length = (length * ASCII_LENGTH);
+ FOR_INDEX_BELOW (ascii, length)
+ if (CHAR_SET_MEMBER_P_INTERNAL (pattern_pc, ascii))
+ (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
+ goto next;
+ }
+
+ case regexpcode_not_char_set:
+ {
+ fast int length;
+ fast int ascii;
+
+ READ_PATTERN_LENGTH (length);
+ length = (length * ASCII_LENGTH);
+ FOR_INDEX_BELOW (ascii, length)
+ if (! (CHAR_SET_MEMBER_P_INTERNAL (pattern_pc, ascii)))
+ (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
+ FOR_INDEX_RANGE (ascii, length, MAX_ASCII)
+ (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
+ goto next;
+ }
+\f
+ case regexpcode_word_char:
+ {
+ fast int ascii;
+
+ FOR_ALL_ASCII_SUCH_THAT (ascii, (WORD_CONSTITUENT_P (ascii)))
+ (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
+ goto next;
+ }
+
+ case regexpcode_not_word_char:
+ {
+ fast int ascii;
+
+ FOR_ALL_ASCII_SUCH_THAT (ascii, (! (WORD_CONSTITUENT_P (ascii))))
+ (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
+ goto next;
+ }
+
+ case regexpcode_syntax_spec:
+ {
+ fast enum syntaxcode code;
+ fast int ascii;
+
+ READ_PATTERN_SYNTAXCODE (code);
+ FOR_ALL_ASCII_SUCH_THAT (ascii, (SYNTAX_CONSTITUENT_P (code, ascii)))
+ (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
+ goto next;
+ }
+
+ case regexpcode_not_syntax_spec:
+ {
+ fast enum syntaxcode code;
+ fast int ascii;
+
+ READ_PATTERN_SYNTAXCODE (code);
+ FOR_ALL_ASCII_SUCH_THAT (ascii,
+ (! (SYNTAX_CONSTITUENT_P (code, ascii))))
+ (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
+ goto next;
+ }
+\f
+ case regexpcode_start_memory:
+ case regexpcode_stop_memory:
+ {
+ fast int register_number;
+
+ READ_PATTERN_REGISTER (register_number);
+ goto loop;
+ }
+
+ case regexpcode_duplicate:
+ {
+ fast int register_number;
+ fast int ascii;
+
+ READ_PATTERN_REGISTER (register_number);
+ FOR_ALL_ASCII (ascii)
+ (fastmap [ascii]) = FASTMAP_TRUE;
+ RE_RETURN (1);
+ }
+\f
+ case regexpcode_jump:
+ case regexpcode_finalize_jump:
+ case regexpcode_maybe_finalize_jump:
+ case regexpcode_dummy_failure_jump:
+ {
+ fast int offset;
+
+ return_value = 1;
+ READ_PATTERN_OFFSET (offset);
+ pattern_pc += offset;
+ if (offset > 0)
+ goto loop;
+
+ /* Jump backward reached implies we just went through the
+ body of a loop and matched nothing. Opcode jumped to
+ should be an on_failure_jump. Just treat it like an
+ ordinary jump. For a * loop, it has pushed its failure
+ point already; if so, discard that as redundant. */
+ if (pattern_pc >= pattern_end)
+ BAD_PATTERN ();
+ if (((enum regexpcode) (pattern_pc [0])) !=
+ regexpcode_on_failure_jump)
+ goto loop;
+ READ_PATTERN_OFFSET (offset);
+ pattern_pc += offset;
+ if ((stack_pointer != stack_start) &&
+ ((stack_pointer [-1]) == pattern_pc))
+ stack_pointer -= 1;
+ goto loop;
+ }
+
+ case regexpcode_on_failure_jump:
+ {
+ fast int offset;
+
+ READ_PATTERN_OFFSET (offset);
+ (*stack_pointer++) = (pattern_pc + offset);
+ goto loop;
+ }
+
+ default:
+ BAD_PATTERN ();
+ }
+
+ next:
+ if (stack_pointer != stack_start)
+ {
+ pattern_pc = (*--stack_pointer);
+ goto loop;
+ }
+
+ return_point:
+ return (return_value);
+}
+\f
+/* Match the compiled pattern described by `pattern_start' and
+ `pattern_end' against the characters in `buffer' between
+ `match_start' and `match_end'.
+
+ `registers', if not NULL, will be filled with the start and end
+ indices of the match registers if the match succeeds.
+
+ It is assumed that the following is true:
+
+ (! ((gap_start < gap_end) &&
+ (match_start < match_end) &&
+ ((match_start == gap_start) || (match_end == gap_end))))
+
+ Return values:
+
+ non-negative => the end index (exclusive) of the match.
+ -1 => no match.
+ -2 => the pattern is badly formed.
+ -3 => memory allocation error.
+ -4 => match stack overflow.
+ other => undefined. */
+
+#define RE_MATCH_FAILED (-1)
+
+#define ADDRESS_TO_INDEX(address) \
+ ((((address) > gap_start) ? ((address) - gap_length) : (address)) \
+ - (buffer -> text))
+
+#define READ_MATCH_CHAR(target) do \
+{ \
+ if (match_pc >= match_end) \
+ goto re_match_fail; \
+ (target) = (TRANSLATE_CHAR (*match_pc++)); \
+ if (match_pc == gap_start) \
+ match_pc = gap_end; \
+} while (0)
+
+static Boolean
+beq_translate (scan1, scan2, length, translation)
+ fast unsigned char *scan1, *scan2;
+ fast long length;
+ fast unsigned char *translation;
+{
+ while ((length--) > 0)
+ if ((TRANSLATE_CHAR (*scan1++)) != (TRANSLATE_CHAR (*scan2++)))
+ return (false);
+ return (true);
+}
+
+int re_max_failures = 1000;
+\f
+int
+re_match (pattern_start, pattern_end, buffer, registers,
+ match_start, match_end)
+ unsigned char *pattern_start, *pattern_end;
+ struct re_buffer *buffer;
+ struct re_registers *registers;
+ unsigned char *match_start, *match_end;
+{
+ fast unsigned char *pattern_pc, *match_pc;
+ unsigned char *gap_start, *gap_end;
+ unsigned char *translation;
+ SYNTAX_TABLE_TYPE syntax_table;
+ long gap_length;
+ int return_value;
+
+ /* Failure point stack. Each place that can handle a failure
+ further down the line pushes a failure point on this stack. It
+ consists of two char *'s. The first one pushed is where to
+ resume scanning the pattern; the second pushed is where to resume
+ scanning the match text. If the latter is NULL, the failure
+ point is a "dummy". If a failure happens and the innermost
+ failure point is dormant, it discards that failure point and
+ tries the next one. */
+
+ unsigned char **stack_start, **stack_end, **stack_pointer;
+
+ /* Information on the "contents" of registers. These are pointers
+ into the match text; they record just what was matched (on this
+ attempt) by some part of the pattern. The start_memory command
+ stores the start of a register's contents and the stop_memory
+ command stores the end.
+
+ At that point, (register_start [regnum]) points to the first
+ character in the register, and (register_end [regnum]) points to
+ the first character beyond the end of the register. */
+
+ unsigned char *register_start[RE_NREGS];
+ unsigned char *register_end[RE_NREGS];
+\f
+ pattern_pc = pattern_start;
+ match_pc = match_start;
+ gap_start = (buffer -> gap_start);
+ gap_end = (buffer -> gap_end);
+ gap_length = (gap_end - gap_start);
+ translation = (buffer -> translation);
+ syntax_table = (buffer -> syntax_table);
+
+ stack_start =
+ ((unsigned char **) (malloc ((2 * RE_NFAILURES) * (sizeof (char *)))));
+ if (stack_start == NULL)
+ RE_RETURN (-3);
+
+ stack_end = (& (stack_start [(2 * RE_NFAILURES)]));
+ stack_pointer = stack_start;
+
+ {
+ fast int i;
+
+ FOR_INDEX_BELOW (i, RE_NREGS)
+ {
+ (register_start [i]) = NULL;
+ (register_end [i]) = NULL;
+ }
+ }
+
+ re_match_loop:
+ if (pattern_pc >= pattern_end)
+ {
+ /* Reaching here indicates that match was successful. */
+ if (registers != NULL)
+ {
+ fast int i;
+
+ (register_start [0]) = match_start;
+ (register_end [0]) = match_pc;
+ FOR_INDEX_BELOW (i, RE_NREGS)
+ {
+ ((registers -> start) [i]) =
+ (((register_start [i]) == NULL)
+ ? -1
+ : (ADDRESS_TO_INDEX (register_start [i])));
+ ((registers -> end) [i]) =
+ (((register_end [i]) == NULL)
+ ? -1
+ : (ADDRESS_TO_INDEX (register_end [i])));
+ }
+ }
+ RE_RETURN (ADDRESS_TO_INDEX (match_pc));
+ }
+\f
+ SWITCH_ENUM (regexpcode, (*pattern_pc++))
+ {
+ case regexpcode_unused:
+ goto re_match_loop;
+
+ case regexpcode_exact_1:
+ {
+ fast int ascii;
+ fast int ascii_p;
+
+ READ_MATCH_CHAR (ascii);
+ READ_PATTERN_CHAR (ascii_p);
+ if (ascii == ascii_p)
+ goto re_match_loop;
+ goto re_match_fail;
+ }
+
+ case regexpcode_exact_n:
+ {
+ fast int length;
+ fast int ascii;
+
+ READ_PATTERN_LENGTH (length);
+ while ((length--) > 0)
+ {
+ READ_MATCH_CHAR (ascii);
+ if (ascii != (*pattern_pc++))
+ goto re_match_fail;
+ }
+ goto re_match_loop;
+ }
+
+ case regexpcode_any_char:
+ {
+ fast int ascii;
+
+ READ_MATCH_CHAR (ascii);
+ if (ascii == '\n')
+ goto re_match_fail;
+ goto re_match_loop;
+ }
+\f
+#define RE_MATCH_CHAR_SET(winning_label, losing_label) \
+ { \
+ fast int ascii; \
+ fast int length; \
+ \
+ READ_MATCH_CHAR (ascii); \
+ READ_PATTERN_LENGTH (length); \
+ if (CHAR_SET_MEMBER_P (length, pattern_pc, ascii)) \
+ { \
+ pattern_pc += length; \
+ goto winning_label; \
+ } \
+ else \
+ { \
+ pattern_pc += length; \
+ goto losing_label; \
+ } \
+ }
+
+ case regexpcode_char_set:
+ RE_MATCH_CHAR_SET (re_match_loop, re_match_fail);
+
+ case regexpcode_not_char_set:
+ RE_MATCH_CHAR_SET (re_match_fail, re_match_loop);
+
+#undef RE_MATCH_CHAR_SET
+
+ /* \( is represented by a start_memory, \) by a stop_memory. Both
+ of those commands contain a "register number" argument. The
+ text matched within the \( and \) is recorded under that
+ number. Then, \<digit> turns into a `duplicate' command which
+ is followed by the numeric value of <digit> as the register
+ number. */
+
+ case regexpcode_start_memory:
+ {
+ fast int register_number;
+
+ READ_PATTERN_REGISTER (register_number);
+ (register_start [register_number]) = match_pc;
+ goto re_match_loop;
+ }
+
+ case regexpcode_stop_memory:
+ {
+ fast int register_number;
+
+ READ_PATTERN_REGISTER (register_number);
+ (register_end [register_number]) =
+ ((match_pc == gap_end) ? gap_start : match_pc);
+ goto re_match_loop;
+ }
+\f
+ case regexpcode_duplicate:
+ {
+ fast int register_number;
+ unsigned char *start, *end, *new_end;
+ long length;
+
+ READ_PATTERN_REGISTER (register_number);
+ start = (register_start [register_number]);
+ end = (register_end [register_number]);
+ length = (end - start);
+ if (length <= 0)
+ goto re_match_loop;
+ new_end = (match_pc + length);
+ if (new_end > match_end)
+ goto re_match_fail;
+ if ((match_pc <= gap_start) && (new_end > gap_start))
+ {
+ long length1, length2;
+
+ new_end += gap_length;
+ if (new_end > match_end)
+ goto re_match_fail;
+ length1 = (gap_start - match_pc);
+ length2 = (length - length1);
+ if (!
+ ((beq_translate (match_pc, start, length1, translation)) &&
+ (beq_translate (gap_end, (start + length1), length2,
+ translation))))
+ goto re_match_fail;
+ }
+ else if ((start <= gap_start) && (end > gap_start))
+ {
+ long length1, length2;
+
+ length1 = (gap_start - start);
+ length2 = (end - gap_end);
+ if (!
+ ((beq_translate (match_pc, start, length1, translation)) &&
+ (beq_translate ((match_pc + length1), gap_end, length2,
+ translation))))
+ goto re_match_fail;
+ }
+ else
+ {
+ if (! (beq_translate (match_pc, start, length, translation)))
+ goto re_match_fail;
+ }
+ match_pc = ((new_end == gap_start) ? gap_end : new_end);
+ goto re_match_loop;
+ }
+\f
+ case regexpcode_buffer_start:
+ {
+ if (match_pc == (buffer -> text_start))
+ goto re_match_loop;
+ goto re_match_fail;
+ }
+
+ case regexpcode_buffer_end:
+ {
+ if (match_pc == (buffer -> text_end))
+ goto re_match_loop;
+ goto re_match_fail;
+ }
+
+ case regexpcode_line_start:
+ {
+ if (match_pc == (buffer -> text_start))
+ goto re_match_loop;
+ if ((TRANSLATE_CHAR
+ (((match_pc == gap_end) ? gap_start : match_pc) [-1]))
+ == '\n')
+ goto re_match_loop;
+ goto re_match_fail;
+ }
+
+ case regexpcode_line_end:
+ {
+ if ((match_pc == (buffer -> text_end)) ||
+ ((TRANSLATE_CHAR (match_pc [0])) == '\n'))
+ goto re_match_loop;
+ goto re_match_fail;
+ }
+\f
+#define RE_MATCH_WORD_BOUND(word_bound_p) \
+ { \
+ if ((match_pc == gap_end) \
+ ? (word_bound_p \
+ (((gap_start != (buffer -> text_start)) && \
+ (WORD_CONSTITUENT_P (TRANSLATE_CHAR (gap_start [-1])))), \
+ ((gap_end != (buffer -> text_end)) && \
+ (WORD_CONSTITUENT_P (TRANSLATE_CHAR (gap_end [0])))))) \
+ : (word_bound_p \
+ (((match_pc != (buffer -> text_start)) && \
+ (WORD_CONSTITUENT_P (TRANSLATE_CHAR (match_pc [-1])))), \
+ ((match_pc != (buffer -> text_end)) && \
+ (WORD_CONSTITUENT_P (TRANSLATE_CHAR (match_pc [0]))))))) \
+ goto re_match_loop; \
+ goto re_match_fail; \
+ }
+
+ case regexpcode_word_bound:
+#define WORD_BOUND_P(left_p, right_p) ((left_p) != (right_p))
+ RE_MATCH_WORD_BOUND (WORD_BOUND_P);
+#undef WORD_BOUND_P
+
+ case regexpcode_not_word_bound:
+#define NOT_WORD_BOUND_P(left_p, right_p) ((left_p) == (right_p))
+ RE_MATCH_WORD_BOUND (NOT_WORD_BOUND_P);
+#undef NOT_WORD_BOUND_P
+
+ case regexpcode_word_start:
+#define WORD_START_P(left_p, right_p) ((! (left_p)) && (right_p))
+ RE_MATCH_WORD_BOUND (WORD_START_P);
+#undef WORD_START_P
+
+ case regexpcode_word_end:
+#define WORD_END_P(left_p, right_p) ((left_p) && (! (right_p)))
+ RE_MATCH_WORD_BOUND (WORD_END_P);
+#undef WORD_END_P
+
+#undef RE_MATCH_WORD_BOUND
+\f
+ case regexpcode_syntax_spec:
+ {
+ fast int ascii;
+ fast enum syntaxcode code;
+
+ READ_MATCH_CHAR (ascii);
+ READ_PATTERN_SYNTAXCODE (code);
+ if (SYNTAX_CONSTITUENT_P (code, ascii))
+ goto re_match_loop;
+ goto re_match_fail;
+ }
+
+ case regexpcode_not_syntax_spec:
+ {
+ fast int ascii;
+ fast enum syntaxcode code;
+
+ READ_MATCH_CHAR (ascii);
+ READ_PATTERN_SYNTAXCODE (code);
+ if (! (SYNTAX_CONSTITUENT_P (code, ascii)))
+ goto re_match_loop;
+ goto re_match_fail;
+ }
+
+ case regexpcode_word_char:
+ {
+ fast int ascii;
+
+ READ_MATCH_CHAR (ascii);
+ if (WORD_CONSTITUENT_P (ascii))
+ goto re_match_loop;
+ goto re_match_fail;
+ }
+
+ case regexpcode_not_word_char:
+ {
+ fast int ascii;
+
+ READ_MATCH_CHAR (ascii);
+ if (! (WORD_CONSTITUENT_P (ascii)))
+ goto re_match_loop;
+ goto re_match_fail;
+ }
+\f
+ /* "or" constructs ("|") are handled by starting each alternative
+ with an on_failure_jump that points to the start of the next
+ alternative. Each alternative except the last ends with a jump
+ to the joining point. (Actually, each jump except for the last
+ one really jumps to the following jump, because tensioning the
+ jumps is a hassle.)
+
+ The start of a stupid repeat has an on_failure_jump that points
+ past the end of the repeat text. This makes a failure point so
+ that, on failure to match a repetition, matching restarts past
+ as many repetitions have been found with no way to fail and
+ look for another one.
+
+ A smart repeat is similar but loops back to the on_failure_jump
+ so that each repetition makes another failure point. */
+
+ case regexpcode_on_failure_jump:
+ {
+ fast long offset;
+
+ READ_PATTERN_OFFSET (offset);
+ PUSH_FAILURE_POINT ((pattern_pc + offset), match_pc);
+ goto re_match_loop;
+ }
+\f
+ /* The end of a smart repeat has a maybe_finalize_jump back.
+ Change it either to a finalize_jump or an ordinary jump. */
+
+ case regexpcode_maybe_finalize_jump:
+ {
+ fast long offset;
+ fast long ascii;
+
+ READ_PATTERN_OFFSET (offset);
+ if (pattern_pc == pattern_end)
+ goto maybe_finalize_jump_finalize;
+
+ /* Compare what follows with the beginning of the repeat.
+ If we can establish that there is nothing that they
+ would both match, we can change to `finalize_jump'. */
+
+ SWITCH_ENUM (regexpcode, (pattern_pc [0]))
+ {
+ case regexpcode_exact_1:
+ ascii = (pattern_pc [1]);
+ break;
+
+ case regexpcode_exact_n:
+ ascii = (pattern_pc [2]);
+ break;
+
+ case regexpcode_line_end:
+ ascii = ('\n');
+ break;
+
+ default:
+ goto maybe_finalize_jump_not_finalize;
+ }
+\f
+ /* (pattern_pc [(offset - 3)]) is an `on_failure_jump'.
+ Examine what follows that. */
+ SWITCH_ENUM (regexpcode, (pattern_pc [offset]))
+ {
+ case regexpcode_exact_1:
+ {
+ if (ascii != (pattern_pc [(offset + 1)]))
+ goto maybe_finalize_jump_finalize;
+ goto maybe_finalize_jump_not_finalize;
+ }
+
+ case regexpcode_exact_n:
+ {
+ if (ascii != (pattern_pc [(offset + 2)]))
+ goto maybe_finalize_jump_finalize;
+ goto maybe_finalize_jump_not_finalize;
+ }
+
+ case regexpcode_char_set:
+ {
+ if (CHAR_SET_MEMBER_P ((pattern_pc [(offset + 1)]),
+ (& (pattern_pc [(offset + 2)])),
+ ascii))
+ goto maybe_finalize_jump_not_finalize;
+ goto maybe_finalize_jump_finalize;
+ }
+
+ case regexpcode_not_char_set:
+ {
+ if (CHAR_SET_MEMBER_P ((pattern_pc [(offset + 1)]),
+ (& (pattern_pc [(offset + 2)])),
+ ascii))
+ goto maybe_finalize_jump_finalize;
+ goto maybe_finalize_jump_not_finalize;
+ }
+
+ default:
+ goto maybe_finalize_jump_not_finalize;
+ }
+
+ maybe_finalize_jump_finalize:
+ pattern_pc -= 2;
+ (pattern_pc [-1]) = ((unsigned char) regexpcode_finalize_jump);
+ goto re_match_finalize_jump;
+
+ maybe_finalize_jump_not_finalize:
+ pattern_pc -= 2;
+ (pattern_pc [-1]) = ((unsigned char) regexpcode_jump);
+ goto re_match_jump;
+ }
+\f
+ case regexpcode_finalize_jump:
+ re_match_finalize_jump:
+ {
+ stack_pointer -= 2;
+ goto re_match_jump;
+ }
+
+ case regexpcode_jump:
+ re_match_jump:
+ {
+ fast long offset;
+
+ READ_PATTERN_OFFSET (offset);
+ pattern_pc += offset;
+ goto re_match_loop;
+ }
+
+ case regexpcode_dummy_failure_jump:
+ {
+ PUSH_FAILURE_POINT (NULL, NULL);
+ goto re_match_jump;
+ }
+
+ default:
+ {
+ BAD_PATTERN ();
+ }
+ }
+
+ re_match_fail:
+ if (stack_pointer == stack_start)
+ RE_RETURN (RE_MATCH_FAILED);
+ match_pc = (*--stack_pointer);
+ pattern_pc = (*--stack_pointer);
+ if (pattern_pc != NULL)
+ goto re_match_loop;
+ goto re_match_fail;
+
+ return_point:
+ if (stack_start != NULL)
+ free (stack_start);
+ return (return_value);
+}
+\f
+#define DEFINE_RE_SEARCH(name) \
+int name (pattern_start, pattern_end, buffer, registers, \
+ match_start, match_end) \
+ unsigned char *pattern_start, *pattern_end; \
+ struct re_buffer *buffer; \
+ struct re_registers *registers; \
+ unsigned char *match_start; \
+ unsigned char *match_end;
+
+#define INITIALIZE_RE_SEARCH(pc, limit, gap_limit) \
+ int can_be_null; \
+ unsigned char *translation; \
+ int match_result; \
+ \
+ fast unsigned char *match_pc; \
+ fast unsigned char *match_limit; \
+ fast unsigned char *gap_limit; \
+ fast unsigned char fastmap[MAX_ASCII]; \
+ \
+ translation = (buffer -> translation); \
+ can_be_null = \
+ (re_compile_fastmap \
+ (pattern_start, pattern_end, translation, \
+ (buffer -> syntax_table), fastmap)); \
+ if (can_be_null < 0) \
+ return (can_be_null); \
+ \
+ match_pc = (pc); \
+ match_limit = (limit); \
+ gap_limit = (buffer -> gap_limit)
+
+#define RE_SEARCH_TEST(start) \
+ (re_match \
+ (pattern_start, pattern_end, buffer, registers, (start), match_end))
+\f
+#define RE_SEARCH_FORWARD_FAST(limit) do \
+{ \
+ while (true) \
+ { \
+ if (match_pc >= (limit)) \
+ break; \
+ \
+ if ((fastmap [(TRANSLATE_CHAR (*match_pc++))]) != FASTMAP_FALSE) \
+ continue; \
+ \
+ match_result = (RE_SEARCH_TEST (match_pc - 1)); \
+ if (match_result == RE_MATCH_FAILED) \
+ continue; \
+ \
+ return (match_result); \
+ } \
+} while (0)
+
+DEFINE_RE_SEARCH (re_search_forward)
+{
+ INITIALIZE_RE_SEARCH (match_start, match_end, gap_start);
+
+ if (can_be_null != 1)
+ {
+ if ((match_pc < gap_start) && (gap_start < match_limit))
+ RE_SEARCH_FORWARD_FAST (gap_start);
+ if (match_pc == gap_start)
+ match_pc = (buffer -> gap_end);
+ RE_SEARCH_FORWARD_FAST (match_limit);
+ return
+ ((can_be_null == 0)
+ ? RE_MATCH_FAILED
+ : (RE_SEARCH_TEST (match_limit)));
+ }
+ else
+ {
+ while (true)
+ {
+ match_result = (RE_SEARCH_TEST (match_pc));
+ if (match_result != RE_MATCH_FAILED)
+ return (match_result);
+ match_pc += 1;
+ if (match_pc == gap_start)
+ match_pc = (buffer -> gap_end);
+ if (match_pc > match_limit)
+ return (RE_MATCH_FAILED);
+ }
+ }
+}
+\f
+#define RE_SEARCH_BACKWARD_FAST(limit) do \
+{ \
+ while (true) \
+ { \
+ if (match_pc <= (limit)) \
+ break; \
+ \
+ if ((fastmap [(TRANSLATE_CHAR (*--match_pc))]) != FASTMAP_FALSE) \
+ continue; \
+ \
+ match_result = (RE_SEARCH_TEST (match_pc)); \
+ if (match_result == RE_MATCH_FAILED) \
+ continue; \
+ \
+ RE_SEARCH_BACKWARD_RETURN (match_pc); \
+ } \
+} while (0)
+
+#define RE_SEARCH_BACKWARD_RETURN(start) \
+ return \
+ ((match_result < 0) \
+ ? match_result \
+ : ((((start) > (buffer -> gap_start)) \
+ ? ((start) - (gap_end - (buffer -> gap_start))) \
+ : (start)) \
+ - (buffer -> text)))
+
+DEFINE_RE_SEARCH (re_search_backward)
+{
+ INITIALIZE_RE_SEARCH (match_end, match_start, gap_end);
+
+ if (can_be_null != 1)
+ {
+ if ((match_pc > gap_end) && (gap_end > match_limit))
+ RE_SEARCH_BACKWARD_FAST (gap_end);
+ if (match_pc == gap_end)
+ match_pc = (buffer -> gap_start);
+ RE_SEARCH_BACKWARD_FAST (match_limit);
+ if (can_be_null == 0)
+ return (RE_MATCH_FAILED);
+ match_result = (RE_SEARCH_TEST (match_limit));
+ RE_SEARCH_BACKWARD_RETURN (match_limit);
+ }
+ else
+ {
+ while (true)
+ {
+ match_result = (RE_SEARCH_TEST (match_pc));
+ if (match_result != RE_MATCH_FAILED)
+ RE_SEARCH_BACKWARD_RETURN (match_pc);
+ if (match_pc == gap_end)
+ match_pc = (buffer -> gap_start);
+ match_pc -= 1;
+ if (match_pc < match_limit)
+ return (RE_MATCH_FAILED);
+ }
+ }
+}
--- /dev/null
+/* -*-C-*-
+
+$Header: /Users/cph/tmp/foo/mit-scheme/mit-scheme/v7/src/microcode/regex.h,v 1.1 1987/07/14 03:00:23 cph Rel $
+
+Copyright (c) 1987 Massachusetts Institute of Technology
+
+This material was developed by the Scheme project at the Massachusetts
+Institute of Technology, Department of Electrical Engineering and
+Computer Science. Permission to copy this software, to redistribute
+it, and to use it for any purpose is granted, subject to the following
+restrictions and understandings.
+
+1. Any copy made of this software must include this copyright notice
+in full.
+
+2. Users of this software agree to make their best efforts (a) to
+return to the MIT Scheme project any improvements or extensions that
+they make, so that these may be included in future releases; and (b)
+to inform MIT of noteworthy uses of this software.
+
+3. All materials developed as a consequence of the use of this
+software shall duly acknowledge such use, in accordance with the usual
+standards of acknowledging credit in academic research.
+
+4. MIT has made no warrantee or representation that the operation of
+this software will be error-free, and MIT is under no obligation to
+provide any services, by way of maintenance, update, or otherwise.
+
+5. In conjunction with products arising from the use of this material,
+there shall be no use of the name of the Massachusetts Institute of
+Technology nor of any adaptation thereof in any advertising,
+promotional, or sales literature without prior written consent from
+MIT in each case. */
+
+/* Translated from GNU Emacs. */
+\f
+/* Structure to represent a buffer of text to match against.
+ This contains the information that an editor buffer would have
+ to supply for the matching process to be executed.
+
+ `translation' is an array of MAX_ASCII characters which is used to
+ map each character before matching. Both the pattern and the match
+ text are mapped. This is normally used to implement case
+ insensitive searches.
+
+ `syntax_table' describes the syntax of the match text. See the
+ syntax table primitives for more information.
+
+ `text' points to the beginning of the match text. It is used only
+ for translating match text pointers into indices.
+
+ `text_start' and `text_end' delimit the match text. They define
+ the buffer-start and buffer-end for those matching commands that
+ refer to them. Also, all matching must take place within these
+ limits.
+
+ `gap_start' and `gap_end' delimit a gap in the match text. Editor
+ buffers normally have such a gap. For applications without a gap,
+ it is recommended that these be set to the same value as
+ `text_end'.
+
+ Both `text_start' and `gap_start' are inclusive indices, while
+ `text_end' and `gap_end' are exclusive.
+
+ The following conditions must be true:
+
+ (text <= text_start)
+ (text_start <= text_end)
+ (gap_start <= gap_end)
+ (! ((text_start < text_end) &&
+ (gap_start < gap_end) &&
+ ((text_start == gap_start) || (text_end == gap_end))))
+
+ */
+
+struct re_buffer
+ {
+ unsigned char *translation;
+ SYNTAX_TABLE_TYPE syntax_table;
+ unsigned char *text;
+ unsigned char *text_start;
+ unsigned char *text_end;
+ unsigned char *gap_start;
+ unsigned char *gap_end;
+ };
+\f
+/* Structure to store "register" contents data in.
+
+ Pass the address of such a structure as an argument to re_match,
+ etc., if you want this information back.
+
+ start[i] and end[i] record the string matched by \( ... \) grouping
+ i, for i from 1 to RE_NREGS - 1.
+
+ start[0] and end[0] record the entire string matched. */
+
+#define RE_NREGS 10
+
+struct re_registers
+ {
+ long start[RE_NREGS];
+ long end[RE_NREGS];
+ };
+\f
+/* These are the command codes that appear in compiled regular
+ expressions, one per byte. Some command codes are followed by
+ argument bytes. A command code can specify any interpretation
+ whatever for its arguments. Zero-bytes may appear in the compiled
+ regular expression. */
+
+enum regexpcode
+ {
+ regexpcode_unused,
+ regexpcode_exact_1, /* Followed by 1 literal byte */
+
+ /* Followed by one byte giving n, and then by n literal bytes. */
+ regexpcode_exact_n,
+
+ regexpcode_line_start, /* Fails unless at beginning of line */
+ regexpcode_line_end, /* Fails unless at end of line */
+
+ /* Followed by two bytes giving relative address to jump to. */
+ regexpcode_jump,
+
+ /* Followed by two bytes giving relative address of place to
+ resume at in case of failure. */
+ regexpcode_on_failure_jump,
+
+ /* Throw away latest failure point and then jump to address. */
+ regexpcode_finalize_jump,
+
+ /* Like jump but finalize if safe to do so. This is used to jump
+ back to the beginning of a repeat. If the command that follows
+ this jump is clearly incompatible with the one at the beginning
+ of the repeat, such that we can be sure that there is no use
+ backtracking out of repetitions already completed, then we
+ finalize. */
+ regexpcode_maybe_finalize_jump,
+
+ /* jump, and push a dummy failure point. This failure point will
+ be thrown away if an attempt is made to use it for a failure.
+ A + construct makes this before the first repeat. */
+ regexpcode_dummy_failure_jump,
+
+ regexpcode_any_char, /* Matches any one character */
+
+ /* Matches any one char belonging to specified set. First
+ following byte is # bitmap bytes. Then come bytes for a
+ bit-map saying which chars are in. Bits in each byte are
+ ordered low-bit-first. A character is in the set if its bit is
+ 1. A character too large to have a bit in the map is
+ automatically not in the set. */
+ regexpcode_char_set,
+
+ /* Similar but match any character that is NOT one of those
+ specified. */
+ regexpcode_not_char_set,
+\f
+ /* Starts remembering the text that is matched and stores it in a
+ memory register. Followed by one byte containing the register
+ number. Register numbers must be in the range 0 through
+ (RE_NREGS - 1) inclusive. */
+ regexpcode_start_memory,
+
+ /* Stops remembering the text that is matched and stores it in a
+ memory register. Followed by one byte containing the register
+ number. Register numbers must be in the range 0 through
+ (RE_NREGS - 1) inclusive. */
+ regexpcode_stop_memory,
+
+ /* Match a duplicate of something remembered. Followed by one
+ byte containing the index of the memory register. */
+ regexpcode_duplicate,
+
+ regexpcode_buffer_start, /* Succeeds if at beginning of buffer */
+ regexpcode_buffer_end, /* Succeeds if at end of buffer */
+ regexpcode_word_char, /* Matches any word-constituent character */
+
+ /* Matches any char that is not a word-constituent. */
+ regexpcode_not_word_char,
+
+ regexpcode_word_start, /* Succeeds if at word beginning */
+ regexpcode_word_end, /* Succeeds if at word end */
+ regexpcode_word_bound, /* Succeeds if at a word boundary */
+ regexpcode_not_word_bound, /* Succeeds if not at a word boundary */
+
+ /* Matches any character whose syntax is specified. Followed by a
+ byte which contains a syntax code, Sword or such like. */
+ regexpcode_syntax_spec,
+
+ /* Matches any character whose syntax differs from the specified. */
+ regexpcode_not_syntax_spec
+ };
+
+extern void re_buffer_initialize ();
+extern int re_compile_fastmap ();
+extern int re_match ();
+extern int re_search_forward ();
+extern int re_search_backward ();
--- /dev/null
+/* -*-C-*-
+
+$Header: /Users/cph/tmp/foo/mit-scheme/mit-scheme/v7/src/microcode/rgxprim.c,v 1.1 1987/07/14 03:00:03 cph Exp $
+
+Copyright (c) 1987 Massachusetts Institute of Technology
+
+This material was developed by the Scheme project at the Massachusetts
+Institute of Technology, Department of Electrical Engineering and
+Computer Science. Permission to copy this software, to redistribute
+it, and to use it for any purpose is granted, subject to the following
+restrictions and understandings.
+
+1. Any copy made of this software must include this copyright notice
+in full.
+
+2. Users of this software agree to make their best efforts (a) to
+return to the MIT Scheme project any improvements or extensions that
+they make, so that these may be included in future releases; and (b)
+to inform MIT of noteworthy uses of this software.
+
+3. All materials developed as a consequence of the use of this
+software shall duly acknowledge such use, in accordance with the usual
+standards of acknowledging credit in academic research.
+
+4. MIT has made no warrantee or representation that the operation of
+this software will be error-free, and MIT is under no obligation to
+provide any services, by way of maintenance, update, or otherwise.
+
+5. In conjunction with products arising from the use of this material,
+there shall be no use of the name of the Massachusetts Institute of
+Technology nor of any adaptation thereof in any advertising,
+promotional, or sales literature without prior written consent from
+MIT in each case. */
+
+/* Primitives for regular expression matching and search. */
+
+/* This code is not yet tested. -- CPH */
+
+#include "scheme.h"
+#include "primitive.h"
+#include "stringprim.h"
+#include "character.h"
+#include "edwin.h"
+#include "syntax.h"
+#include "regex.h"
+\f
+#define RE_CHAR_SET_P(object) \
+ ((STRING_P (object)) && \
+ ((string_length (object)) == (MAX_ASCII / ASCII_LENGTH)))
+
+#define CHAR_SET_P(argument) \
+ ((STRING_P (argument)) && ((string_length (argument)) == MAX_ASCII))
+
+#define CHAR_TRANSLATION_P(argument) \
+ ((STRING_P (argument)) && ((string_length (argument)) == MAX_ASCII))
+
+#define RE_REGISTERS_P(object) \
+ (((object) == NIL) || \
+ ((VECTOR_P (object)) && \
+ ((Vector_Length (object)) == (RE_NREGS + RE_NREGS))))
+
+#define RE_MATCH_RESULTS(result, vector) do \
+{ \
+ if ((result) >= 0) \
+ { \
+ if ((vector) != NIL) \
+ { \
+ int i; \
+ long index; \
+ \
+ for (i = 0; (i < RE_NREGS); i += 1) \
+ { \
+ index = ((registers . start) [i]); \
+ User_Vector_Set \
+ (vector, \
+ i, \
+ ((index == -1) \
+ ? NIL \
+ : (C_Integer_To_Scheme_Integer (index)))); \
+ index = ((registers . end) [i]); \
+ User_Vector_Set \
+ (vector, \
+ (i + RE_NREGS), \
+ ((index == -1) \
+ ? NIL \
+ : (C_Integer_To_Scheme_Integer (index)))); \
+ } \
+ } \
+ return (C_Integer_To_Scheme_Integer (result)); \
+ } \
+ else if ((result) == (-1)) \
+ return (NIL); \
+ else if ((result) == (-2)) \
+ error_bad_range_arg (1); \
+ else \
+ error_external_return (); \
+} while (0)
+\f
+Built_In_Primitive (Prim_re_char_set_adjoin, 2, "RE-CHAR-SET-ADJOIN!", 0x190)
+{
+ int ascii;
+ Primitive_2_Args ();
+
+ CHECK_ARG (1, RE_CHAR_SET_P);
+ ascii = (arg_ascii_char (2));
+ (* (string_pointer (Arg1, (ascii / ASCII_LENGTH)))) |=
+ (1 << (ascii % ASCII_LENGTH));
+ return (NIL);
+}
+
+Built_In_Primitive (Prim_re_compile_fastmap, 4, "RE-COMPILE-FASTMAP", 0x191)
+{
+ int can_be_null;
+ Primitive_4_Args ();
+
+ CHECK_ARG (1, STRING_P);
+ CHECK_ARG (2, CHAR_TRANSLATION_P);
+ CHECK_ARG (3, SYNTAX_TABLE_P);
+ CHECK_ARG (4, CHAR_SET_P);
+
+ can_be_null =
+ (re_compile_fastmap ((string_pointer (Arg1, 0)),
+ (string_pointer (Arg1, (string_length (Arg1)))),
+ (string_pointer (Arg2, 0)),
+ Arg3,
+ (string_pointer (Arg4, 0))));
+
+ if (can_be_null >= 0)
+ return (C_Integer_To_Scheme_Integer (can_be_null));
+ else if (can_be_null == (-2))
+ error_bad_range_arg (1);
+ else
+ error_external_return ();
+}
+\f
+/* (re-match-substring regexp translation syntax-table registers
+ string start end)
+
+ Attempt to match REGEXP against the substring [STRING, START, END].
+ Return the index of the end of the match (exclusive) if successful.
+ Otherwise return false. REGISTERS, if not false, is set to contain
+ the appropriate indices for the match registers. */
+
+#define RE_SUBSTRING_PRIMITIVE(procedure) \
+{ \
+ long match_start, match_end, text_end; \
+ char *text; \
+ struct re_buffer buffer; \
+ struct re_registers registers; \
+ int result; \
+ Primitive_7_Args (); \
+ \
+ CHECK_ARG (1, STRING_P); \
+ CHECK_ARG (2, CHAR_TRANSLATION_P); \
+ CHECK_ARG (3, SYNTAX_TABLE_P); \
+ CHECK_ARG (4, RE_REGISTERS_P); \
+ CHECK_ARG (5, STRING_P); \
+ match_start = (arg_nonnegative_integer (6)); \
+ match_end = (arg_nonnegative_integer (7)); \
+ text = (string_pointer (Arg5, 0)); \
+ text_end = (string_length (Arg5)); \
+ \
+ if (match_end > text_end) error_bad_range_arg (7); \
+ if (match_start > match_end) error_bad_range_arg (6); \
+ \
+ re_buffer_initialize \
+ ((& buffer), (string_pointer (Arg2, 0)), Arg3, text, 0, text_end, \
+ text_end, text_end); \
+ \
+ result = \
+ (procedure ((string_pointer (Arg1, 0)), \
+ (string_pointer (Arg1, (string_length (Arg1)))), \
+ (& buffer), \
+ ((Arg4 == NIL) ? NULL : (& registers)), \
+ (& (text [match_start])), \
+ (& (text [match_end])))); \
+ RE_MATCH_RESULTS (result, Arg4); \
+}
+
+Built_In_Primitive (Prim_re_match_substring, 7, "RE-MATCH-SUBSTRING", 0x118)
+ RE_SUBSTRING_PRIMITIVE (re_match)
+
+Built_In_Primitive (Prim_re_search_substring_forward, 7,
+ "RE-SEARCH-SUBSTRING-FORWARD", 0x119)
+ RE_SUBSTRING_PRIMITIVE (re_search_forward)
+
+Built_In_Primitive (Prim_re_search_substring_backward, 7,
+ "RE-SEARCH-SUBSTRING-BACKWARD", 0x11A)
+ RE_SUBSTRING_PRIMITIVE (re_search_backward)
+\f
+#define RE_BUFFER_PRIMITIVE(procedure) \
+{ \
+ long match_start, match_end, text_start, text_end, gap_start; \
+ char *text; \
+ struct re_buffer buffer; \
+ struct re_registers registers; \
+ int result; \
+ Primitive_7_Args (); \
+ \
+ CHECK_ARG (1, STRING_P); \
+ CHECK_ARG (2, CHAR_TRANSLATION_P); \
+ CHECK_ARG (3, SYNTAX_TABLE_P); \
+ CHECK_ARG (4, RE_REGISTERS_P); \
+ CHECK_ARG (5, GROUP_P); \
+ match_start = (arg_nonnegative_integer (6)); \
+ match_end = (arg_nonnegative_integer (7)); \
+ \
+ text = (string_pointer ((GROUP_TEXT (Arg5)), 0)); \
+ text_start = (MARK_POSITION (GROUP_START_MARK (Arg5))); \
+ text_end = (MARK_POSITION (GROUP_END_MARK (Arg5))); \
+ gap_start = (GROUP_GAP_START (Arg5)); \
+ \
+ if (match_end > gap_start) \
+ { \
+ match_end += (GROUP_GAP_LENGTH (Arg5)); \
+ if (match_start >= gap_start) \
+ match_start += (GROUP_GAP_LENGTH (Arg5)); \
+ } \
+ \
+ if (match_start > match_end) error_bad_range_arg (6); \
+ if (match_end > text_end) error_bad_range_arg (7); \
+ if (match_start < text_start) error_bad_range_arg (6); \
+ \
+ re_buffer_initialize \
+ ((& buffer), (string_pointer (Arg2, 0)), Arg3, text, text_start, \
+ text_end, gap_start, (GROUP_GAP_END (Arg5))); \
+ \
+ result = \
+ (procedure ((string_pointer (Arg1, 0)), \
+ (string_pointer (Arg1, (string_length (Arg1)))), \
+ (& buffer), \
+ ((Arg4 == NIL) ? NULL : (& registers)), \
+ (& (text [match_start])), \
+ (& (text [match_end])))); \
+ RE_MATCH_RESULTS (result, Arg4); \
+}
+
+Built_In_Primitive (Prim_re_match_buffer, 7, "RE-MATCH-BUFFER", 0x192)
+ RE_BUFFER_PRIMITIVE (re_match)
+
+Built_In_Primitive (Prim_re_search_buffer_forward, 7,
+ "RE-SEARCH-BUFFER-FORWARD", 0x193)
+ RE_BUFFER_PRIMITIVE (re_search_forward)
+
+Built_In_Primitive (Prim_re_search_buffer_backward, 7,
+ "RE-SEARCH-BUFFER-BACKWARD", 0x194)
+ RE_BUFFER_PRIMITIVE (re_search_backward)