From: Chris Hanson <org/chris-hanson/cph>
Date: Tue, 14 Jul 1987 03:00:59 +0000 (+0000)
Subject: Initial revision
X-Git-Tag: 20090517-FFI~13261
X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=9baad2a7cb1106ed351b534811c3db371f55d718;p=mit-scheme.git

Initial revision
---

diff --git a/v7/src/microcode/regex.c b/v7/src/microcode/regex.c
new file mode 100644
index 000000000..adba8fc58
--- /dev/null
+++ b/v7/src/microcode/regex.c
@@ -0,0 +1,1137 @@
+/* -*-C-*-
+
+$Header: /Users/cph/tmp/foo/mit-scheme/mit-scheme/v7/src/microcode/regex.c,v 1.1 1987/07/14 03:00:59 cph Exp $
+
+Copyright (c) 1987 Massachusetts Institute of Technology
+
+This material was developed by the Scheme project at the Massachusetts
+Institute of Technology, Department of Electrical Engineering and
+Computer Science.  Permission to copy this software, to redistribute
+it, and to use it for any purpose is granted, subject to the following
+restrictions and understandings.
+
+1. Any copy made of this software must include this copyright notice
+in full.
+
+2. Users of this software agree to make their best efforts (a) to
+return to the MIT Scheme project any improvements or extensions that
+they make, so that these may be included in future releases; and (b)
+to inform MIT of noteworthy uses of this software.
+
+3. All materials developed as a consequence of the use of this
+software shall duly acknowledge such use, in accordance with the usual
+standards of acknowledging credit in academic research.
+
+4. MIT has made no warrantee or representation that the operation of
+this software will be error-free, and MIT is under no obligation to
+provide any services, by way of maintenance, update, or otherwise.
+
+5. In conjunction with products arising from the use of this material,
+there shall be no use of the name of the Massachusetts Institute of
+Technology nor of any adaptation thereof in any advertising,
+promotional, or sales literature without prior written consent from
+MIT in each case. */
+
+/* Regular expression matching and search.
+   Translated from GNU Emacs. */
+
+/* This code is not yet tested. -- CPH */
+
+#include "scheme.h"
+#include "character.h"
+#include "syntax.h"
+#include "regex.h"
+
+#ifndef SIGN_EXTEND_CHAR
+#define SIGN_EXTEND_CHAR(x) (x)
+#endif /* not SIGN_EXTEND_CHAR */
+
+#ifndef SWITCH_ENUM
+#define SWITCH_ENUM(enum_type, expression)				\
+  switch ((enum enum_type) (expression))
+#endif /* not SWITCH_ENUM */
+
+#define RE_NFAILURES 80
+
+#define FOR_INDEX_RANGE(index, start, end)				\
+  for (index = (start); (index < (end)); index += 1)
+
+#define FOR_INDEX_BELOW(index, limit)					\
+  FOR_INDEX_RANGE (index, 0, (limit))
+
+#define FOR_ALL_ASCII(index)						\
+  FOR_INDEX_BELOW (index, MAX_ASCII)
+
+#define FOR_ALL_ASCII_SUCH_THAT(index, expression)			\
+  FOR_ALL_ASCII (index)							\
+    if (expression)
+
+#define TRANSLATE_CHAR(ascii)						\
+  ((translation == NULL) ? (ascii) : (translation [(ascii)]))
+
+#define WORD_CONSTITUENT_P(ascii)					\
+  (SYNTAX_CONSTITUENT_P (syntaxcode_word, (ascii)))
+
+#define SYNTAX_CONSTITUENT_P(code, ascii)				\
+  ((SYNTAX_ENTRY_CODE (SYNTAX_TABLE_REF (syntax_table, (ascii)))) == (code))
+
+#define CHAR_SET_MEMBER_P(length, char_set, ascii)			\
+  (((ascii) < ((length) * ASCII_LENGTH)) &&				\
+   (CHAR_SET_MEMBER_P_INTERNAL (char_set, ascii)))
+
+#define CHAR_SET_MEMBER_P_INTERNAL(char_set, ascii)			\
+  ((((char_set) [((ascii) / ASCII_LENGTH)]) &				\
+    (1 << ((ascii) % ASCII_LENGTH)))					\
+   != 0)
+
+#define READ_PATTERN_CHAR(target) do					\
+{									\
+  if (pattern_pc >= pattern_end)					\
+    BAD_PATTERN ();							\
+  (target) = (*pattern_pc++);						\
+} while (0)
+
+#define READ_PATTERN_OFFSET(target) do					\
+{									\
+  if ((pattern_pc + 1) >= pattern_end)					\
+    BAD_PATTERN ();							\
+  (target) = (*pattern_pc++);						\
+  (target) +=								\
+    ((SIGN_EXTEND_CHAR (* ((char *) (pattern_pc++)))) << ASCII_LENGTH);	\
+  if (((pattern_pc + (target)) < pattern_start) ||			\
+      ((pattern_pc + (target)) > pattern_end))				\
+    BAD_PATTERN ();							\
+} while (0)
+
+#define READ_PATTERN_LENGTH(target) do					\
+{									\
+  if ((pattern_pc >= pattern_end) ||					\
+      ((pattern_pc + ((target) = (*pattern_pc++))) > pattern_end))	\
+    BAD_PATTERN ();							\
+} while (0)
+
+#define READ_PATTERN_REGISTER(target) do				\
+{									\
+  if ((pattern_pc >= pattern_end) ||					\
+      (((target) = (*pattern_pc++)) >= RE_NREGS))			\
+    BAD_PATTERN ();							\
+} while (0)
+
+#define READ_PATTERN_SYNTAXCODE(target) do				\
+{									\
+  if ((pattern_pc >= pattern_end) ||					\
+      (((int) ((target) = ((enum syntaxcode) (*pattern_pc++))))		\
+       >= ((int) syntaxcode_max)))					\
+    BAD_PATTERN ();							\
+} while (0)
+
+#define BAD_PATTERN() RE_RETURN (-2)
+
+#define PUSH_FAILURE_POINT(pattern_pc, match_pc) do			\
+{									\
+  if (stack_pointer == stack_end)					\
+    {									\
+      long stack_length;						\
+      unsigned char **stack_temporary;					\
+									\
+      stack_length = ((stack_end - stack_start) * 2);			\
+      if (stack_length > (re_max_failures * 2))				\
+	RE_RETURN (-4);							\
+      stack_temporary =							\
+	((unsigned char **)						\
+	 (realloc							\
+	  (stack_start, (stack_length * (sizeof (unsigned char *))))));	\
+      if (stack_temporary == NULL)					\
+	RE_RETURN (-3);							\
+      stack_end = (& (stack_temporary [stack_length]));			\
+      stack_pointer =							\
+	(& (stack_temporary [(stack_pointer - stack_start)]));		\
+      stack_start = stack_temporary;					\
+    }									\
+  (*stack_pointer++) = (pattern_pc);					\
+  (*stack_pointer++) = (match_pc);					\
+} while (0)
+
+#define RE_RETURN(value)						\
+{									\
+  return_value = (value);						\
+  goto return_point;							\
+}
+
+void
+re_buffer_initialize (buffer, translation, syntax_table, text,
+		      text_start_index, text_end_index,
+		      gap_start_index, gap_end_index)
+     struct re_buffer *buffer;
+     unsigned char *translation;
+     SYNTAX_TABLE_TYPE syntax_table;
+     unsigned char *text;
+     unsigned long text_start_index, text_end_index,
+       gap_start_index, gap_end_index;
+{
+  unsigned char *text_start, *text_end, *gap_start, *gap_end;
+
+  /* Assumes that
+     ((text_start_index <= gap_start_index) &&
+      (gap_start_index <= gap_end_index) &&
+      (gap_end_index <= text_end_index)) */
+
+  text_start = (text + text_start_index);
+  text_end = (text + text_end_index);
+  gap_start = (text + gap_start_index);
+  gap_end = (text + gap_end_index);
+
+  (buffer -> translation) = translation;
+  (buffer -> syntax_table) = syntax_table;
+  (buffer -> text) = text;
+  (buffer -> text_start) = ((text_start == gap_start) ? gap_end : text_start);
+  (buffer -> text_end) = ((text_end == gap_end) ? gap_start : text_end);
+  (buffer -> gap_start) = gap_start;
+  (buffer -> gap_end) = gap_end;
+  return;
+}
+
+/* Given a compiled pattern between `pattern_start' and `pattern_end',
+   generate a character set which is true of all characters which can
+   be the first character of a match.
+
+   See the documentation of `struct re_buffer' for a description of
+   `translation' and `syntax_table'.
+
+   `fastmap' is the resulting character set.  It is a character array
+   whose elements are either `FASTMAP_FALSE' or `FASTMAP_TRUE'.
+
+   Return values:
+   0 => pattern cannot match the null string.
+   1 => pattern can match the null string.
+   2 => pattern can match the null string, but only at end of match
+     text or to left of a character in `fastmap'.
+   -2 => the pattern is improperly formed.
+   else => undefined. */
+
+#define FASTMAP_FALSE '\0'
+#define FASTMAP_TRUE '\1'
+
+int
+re_compile_fastmap (pattern_start, pattern_end, translation, syntax_table,
+		    fastmap)
+     unsigned char *pattern_start;
+     fast unsigned char *pattern_end;
+     unsigned char *translation;
+     SYNTAX_TABLE_TYPE syntax_table;
+     fast unsigned char *fastmap;
+{
+  fast unsigned char *pattern_pc;
+  unsigned char *stack_start[RE_NFAILURES];
+  unsigned char **stack_pointer;
+  int return_value;
+
+  pattern_pc = pattern_start;
+  return_value = 0;
+  stack_pointer = stack_start;
+
+  {
+    fast int i;
+
+    FOR_ALL_ASCII (i)
+      (fastmap [i]) = FASTMAP_FALSE;
+  }
+
+ loop:
+  if (pattern_pc >= pattern_end)
+    RE_RETURN (1);
+
+  SWITCH_ENUM (regexpcode, (*pattern_pc++))
+    {
+    case regexpcode_unused:
+    case regexpcode_line_start:
+    case regexpcode_buffer_start:
+    case regexpcode_buffer_end:
+    case regexpcode_word_start:
+    case regexpcode_word_end:
+    case regexpcode_word_bound:
+    case regexpcode_not_word_bound:
+      goto loop;
+
+    case regexpcode_line_end:
+      {
+	(fastmap [(TRANSLATE_CHAR ('\n'))]) = FASTMAP_TRUE;
+	if (return_value == 0)
+	  return_value = 2;
+	goto next;
+      }
+
+    case regexpcode_exact_1:
+      {
+	fast int ascii;
+
+	READ_PATTERN_CHAR (ascii);
+	(fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
+	goto next;
+      }
+
+    case regexpcode_exact_n:
+      {
+	fast int length;
+
+	READ_PATTERN_LENGTH (length);
+	if (length == 0)
+	  goto loop;
+	(fastmap [(TRANSLATE_CHAR (pattern_pc [1]))]) = FASTMAP_TRUE;
+	goto next;
+      }
+
+    case regexpcode_any_char:
+      {
+	fast int ascii;
+
+	FOR_ALL_ASCII_SUCH_THAT (ascii, (ascii != '\n'))
+	  (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
+	if (return_value != 0)
+	  goto return_point;
+      }
+
+    case regexpcode_char_set:
+      {
+	fast int length;
+	fast int ascii;
+
+	READ_PATTERN_LENGTH (length);
+	length = (length * ASCII_LENGTH);
+	FOR_INDEX_BELOW (ascii, length)
+	  if (CHAR_SET_MEMBER_P_INTERNAL (pattern_pc, ascii))
+	    (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
+	goto next;
+      }
+
+    case regexpcode_not_char_set:
+      {
+	fast int length;
+	fast int ascii;
+
+	READ_PATTERN_LENGTH (length);
+	length = (length * ASCII_LENGTH);
+	FOR_INDEX_BELOW (ascii, length)
+	  if (! (CHAR_SET_MEMBER_P_INTERNAL (pattern_pc, ascii)))
+	    (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
+	FOR_INDEX_RANGE (ascii, length, MAX_ASCII)
+	  (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
+	goto next;
+      }
+
+    case regexpcode_word_char:
+      {
+	fast int ascii;
+
+	FOR_ALL_ASCII_SUCH_THAT (ascii, (WORD_CONSTITUENT_P (ascii)))
+	  (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
+	goto next;
+      }
+
+    case regexpcode_not_word_char:
+      {
+	fast int ascii;
+
+	FOR_ALL_ASCII_SUCH_THAT (ascii, (! (WORD_CONSTITUENT_P (ascii))))
+	  (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
+	goto next;
+      }
+
+    case regexpcode_syntax_spec:
+      {
+	fast enum syntaxcode code;
+	fast int ascii;
+
+	READ_PATTERN_SYNTAXCODE (code);
+	FOR_ALL_ASCII_SUCH_THAT (ascii, (SYNTAX_CONSTITUENT_P (code, ascii)))
+	  (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
+	goto next;
+      }
+
+    case regexpcode_not_syntax_spec:
+      {
+	fast enum syntaxcode code;
+	fast int ascii;
+
+	READ_PATTERN_SYNTAXCODE (code);
+	FOR_ALL_ASCII_SUCH_THAT (ascii,
+				 (! (SYNTAX_CONSTITUENT_P (code, ascii))))
+	  (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
+	goto next;
+      }
+
+    case regexpcode_start_memory:
+    case regexpcode_stop_memory:
+      {
+	fast int register_number;
+
+	READ_PATTERN_REGISTER (register_number);
+	goto loop;
+      }
+
+    case regexpcode_duplicate:
+      {
+	fast int register_number;
+	fast int ascii;
+
+	READ_PATTERN_REGISTER (register_number);
+	FOR_ALL_ASCII (ascii)
+	  (fastmap [ascii]) = FASTMAP_TRUE;
+	RE_RETURN (1);
+      }
+
+    case regexpcode_jump:
+    case regexpcode_finalize_jump:
+    case regexpcode_maybe_finalize_jump:
+    case regexpcode_dummy_failure_jump:
+      {
+	fast int offset;
+
+	return_value = 1;
+	READ_PATTERN_OFFSET (offset);
+	pattern_pc += offset;
+	if (offset > 0)
+	  goto loop;
+
+	/* Jump backward reached implies we just went through the
+	   body of a loop and matched nothing.  Opcode jumped to
+	   should be an on_failure_jump.  Just treat it like an
+	   ordinary jump.  For a * loop, it has pushed its failure
+	   point already; if so, discard that as redundant. */
+	if (pattern_pc >= pattern_end)
+	  BAD_PATTERN ();
+	if (((enum regexpcode) (pattern_pc [0])) !=
+	    regexpcode_on_failure_jump)
+	  goto loop;
+	READ_PATTERN_OFFSET (offset);
+	pattern_pc += offset;
+	if ((stack_pointer != stack_start) &&
+	    ((stack_pointer [-1]) == pattern_pc))
+	  stack_pointer -= 1;
+	goto loop;
+      }
+
+    case regexpcode_on_failure_jump:
+      {
+	fast int offset;
+
+	READ_PATTERN_OFFSET (offset);
+	(*stack_pointer++) = (pattern_pc + offset);
+	goto loop;
+      }
+
+    default:
+      BAD_PATTERN ();
+    }
+
+ next:
+  if (stack_pointer != stack_start)
+    {
+      pattern_pc = (*--stack_pointer);
+      goto loop;
+    }
+
+ return_point:
+  return (return_value);
+}
+
+/* Match the compiled pattern described by `pattern_start' and
+   `pattern_end' against the characters in `buffer' between
+   `match_start' and `match_end'.
+
+   `registers', if not NULL, will be filled with the start and end
+   indices of the match registers if the match succeeds.
+
+   It is assumed that the following is true:
+
+   (! ((gap_start < gap_end) &&
+       (match_start < match_end) &&
+       ((match_start == gap_start) || (match_end == gap_end))))
+
+   Return values:
+
+   non-negative => the end index (exclusive) of the match.
+   -1 => no match.
+   -2 => the pattern is badly formed.
+   -3 => memory allocation error.
+   -4 => match stack overflow.
+   other => undefined. */
+
+#define RE_MATCH_FAILED (-1)
+
+#define ADDRESS_TO_INDEX(address)					\
+  ((((address) > gap_start) ? ((address) - gap_length) : (address))	\
+   - (buffer -> text))
+
+#define READ_MATCH_CHAR(target) do					\
+{									\
+  if (match_pc >= match_end)						\
+    goto re_match_fail;							\
+  (target) = (TRANSLATE_CHAR (*match_pc++));				\
+  if (match_pc == gap_start)						\
+    match_pc = gap_end;							\
+} while (0)
+
+static Boolean
+beq_translate (scan1, scan2, length, translation)
+     fast unsigned char *scan1, *scan2;
+     fast long length;
+     fast unsigned char *translation;
+{
+  while ((length--) > 0)
+    if ((TRANSLATE_CHAR (*scan1++)) != (TRANSLATE_CHAR (*scan2++)))
+      return (false);
+  return (true);
+}
+
+int re_max_failures = 1000;
+
+int
+re_match (pattern_start, pattern_end, buffer, registers,
+	  match_start, match_end)
+     unsigned char *pattern_start, *pattern_end;
+     struct re_buffer *buffer;
+     struct re_registers *registers;
+     unsigned char *match_start, *match_end;
+{
+  fast unsigned char *pattern_pc, *match_pc;
+  unsigned char *gap_start, *gap_end;
+  unsigned char *translation;
+  SYNTAX_TABLE_TYPE syntax_table;
+  long gap_length;
+  int return_value;
+
+  /* Failure point stack.  Each place that can handle a failure
+     further down the line pushes a failure point on this stack.  It
+     consists of two char *'s.  The first one pushed is where to
+     resume scanning the pattern; the second pushed is where to resume
+     scanning the match text.  If the latter is NULL, the failure
+     point is a "dummy".  If a failure happens and the innermost
+     failure point is dormant, it discards that failure point and
+     tries the next one. */
+
+  unsigned char **stack_start, **stack_end, **stack_pointer;
+
+  /* Information on the "contents" of registers.  These are pointers
+     into the match text; they record just what was matched (on this
+     attempt) by some part of the pattern.  The start_memory command
+     stores the start of a register's contents and the stop_memory
+     command stores the end.
+
+     At that point, (register_start [regnum]) points to the first
+     character in the register, and (register_end [regnum]) points to
+     the first character beyond the end of the register. */
+
+  unsigned char *register_start[RE_NREGS];
+  unsigned char *register_end[RE_NREGS];
+
+  pattern_pc = pattern_start;
+  match_pc = match_start;
+  gap_start = (buffer -> gap_start);
+  gap_end = (buffer -> gap_end);
+  gap_length = (gap_end - gap_start);
+  translation = (buffer -> translation);
+  syntax_table = (buffer -> syntax_table);
+
+  stack_start =
+    ((unsigned char **) (malloc ((2 * RE_NFAILURES) * (sizeof (char *)))));
+  if (stack_start == NULL)
+    RE_RETURN (-3);
+
+  stack_end = (& (stack_start [(2 * RE_NFAILURES)]));
+  stack_pointer = stack_start;
+
+  {
+    fast int i;
+
+    FOR_INDEX_BELOW (i, RE_NREGS)
+      {
+	(register_start [i]) = NULL;
+	(register_end [i]) = NULL;
+      }
+  }
+
+ re_match_loop:
+  if (pattern_pc >= pattern_end)
+    {
+      /* Reaching here indicates that match was successful. */
+      if (registers != NULL)
+	{
+	  fast int i;
+
+	  (register_start [0]) = match_start;
+	  (register_end [0]) = match_pc;
+	  FOR_INDEX_BELOW (i, RE_NREGS)
+	    {
+	      ((registers -> start) [i]) =
+		(((register_start [i]) == NULL)
+		 ? -1
+		 : (ADDRESS_TO_INDEX (register_start [i])));
+	      ((registers -> end) [i]) =
+		(((register_end [i]) == NULL)
+		 ? -1
+		 : (ADDRESS_TO_INDEX (register_end [i])));
+	    }
+	}
+      RE_RETURN (ADDRESS_TO_INDEX (match_pc));
+    }
+
+  SWITCH_ENUM (regexpcode, (*pattern_pc++))
+    {
+    case regexpcode_unused:
+      goto re_match_loop;
+
+    case regexpcode_exact_1:
+      {
+	fast int ascii;
+	fast int ascii_p;
+
+	READ_MATCH_CHAR (ascii);
+	READ_PATTERN_CHAR (ascii_p);
+	if (ascii == ascii_p)
+	  goto re_match_loop;
+	goto re_match_fail;
+      }
+
+    case regexpcode_exact_n:
+      {
+	fast int length;
+	fast int ascii;
+
+	READ_PATTERN_LENGTH (length);
+	while ((length--) > 0)
+	  {
+	    READ_MATCH_CHAR (ascii);
+	    if (ascii != (*pattern_pc++))
+	      goto re_match_fail;
+	  }
+	goto re_match_loop;
+      }
+
+    case regexpcode_any_char:
+      {
+	fast int ascii;
+
+	READ_MATCH_CHAR (ascii);
+	if (ascii == '\n')
+	  goto re_match_fail;
+	goto re_match_loop;
+      }
+
+#define RE_MATCH_CHAR_SET(winning_label, losing_label)			\
+      {									\
+	fast int ascii;							\
+	fast int length;						\
+									\
+	READ_MATCH_CHAR (ascii);					\
+	READ_PATTERN_LENGTH (length);					\
+	if (CHAR_SET_MEMBER_P (length, pattern_pc, ascii))		\
+	  {								\
+	    pattern_pc += length;					\
+	    goto winning_label;						\
+	  }								\
+	else								\
+	  {								\
+	    pattern_pc += length;					\
+	    goto losing_label;						\
+	  }								\
+      }
+
+    case regexpcode_char_set:
+      RE_MATCH_CHAR_SET (re_match_loop, re_match_fail);
+
+    case regexpcode_not_char_set:
+      RE_MATCH_CHAR_SET (re_match_fail, re_match_loop);
+
+#undef RE_MATCH_CHAR_SET
+
+    /* \( is represented by a start_memory, \) by a stop_memory.  Both
+       of those commands contain a "register number" argument.  The
+       text matched within the \( and \) is recorded under that
+       number.  Then, \<digit> turns into a `duplicate' command which
+       is followed by the numeric value of <digit> as the register
+       number. */
+
+    case regexpcode_start_memory:
+      {
+	fast int register_number;
+
+	READ_PATTERN_REGISTER (register_number);
+	(register_start [register_number]) = match_pc;
+	goto re_match_loop;
+      }
+
+    case regexpcode_stop_memory:
+      {
+	fast int register_number;
+
+	READ_PATTERN_REGISTER (register_number);
+	(register_end [register_number]) =
+	  ((match_pc == gap_end) ? gap_start : match_pc);
+	goto re_match_loop;
+      }
+
+    case regexpcode_duplicate:
+      {
+	fast int register_number;
+	unsigned char *start, *end, *new_end;
+	long length;
+
+	READ_PATTERN_REGISTER (register_number);
+	start = (register_start [register_number]);
+	end = (register_end [register_number]);
+	length = (end - start);
+	if (length <= 0)
+	  goto re_match_loop;
+	new_end = (match_pc + length);
+	if (new_end > match_end)
+	  goto re_match_fail;
+	if ((match_pc <= gap_start) && (new_end > gap_start))
+	  {
+	    long length1, length2;
+
+	    new_end += gap_length;
+	    if (new_end > match_end)
+	      goto re_match_fail;
+	    length1 = (gap_start - match_pc);
+	    length2 = (length - length1);
+	    if (!
+		((beq_translate (match_pc, start, length1, translation)) &&
+		 (beq_translate (gap_end, (start + length1), length2,
+				 translation))))
+	      goto re_match_fail;
+	  }
+	else if ((start <= gap_start) && (end > gap_start))
+	  {
+	    long length1, length2;
+
+	    length1 = (gap_start - start);
+	    length2 = (end - gap_end);
+	    if (!
+		((beq_translate (match_pc, start, length1, translation)) &&
+		 (beq_translate ((match_pc + length1), gap_end, length2,
+				 translation))))
+	      goto re_match_fail;
+	  }
+	else
+	  {
+	    if (! (beq_translate (match_pc, start, length, translation)))
+	      goto re_match_fail;
+	  }
+	match_pc = ((new_end == gap_start) ? gap_end : new_end);
+	goto re_match_loop;
+      }
+
+    case regexpcode_buffer_start:
+      {
+	if (match_pc == (buffer -> text_start))
+	  goto re_match_loop;
+	goto re_match_fail;
+      }
+
+    case regexpcode_buffer_end:
+      {
+	if (match_pc == (buffer -> text_end))
+	  goto re_match_loop;
+	goto re_match_fail;
+      }
+
+    case regexpcode_line_start:
+      {
+	if (match_pc == (buffer -> text_start))
+	  goto re_match_loop;
+	if ((TRANSLATE_CHAR
+	     (((match_pc == gap_end) ? gap_start : match_pc) [-1]))
+	    == '\n')
+	  goto re_match_loop;
+	goto re_match_fail;
+      }
+
+    case regexpcode_line_end:
+      {
+	if ((match_pc == (buffer -> text_end)) ||
+	    ((TRANSLATE_CHAR (match_pc [0])) == '\n'))
+	  goto re_match_loop;
+	goto re_match_fail;
+      }
+
+#define RE_MATCH_WORD_BOUND(word_bound_p)				\
+      {									\
+	if ((match_pc == gap_end)					\
+	    ? (word_bound_p						\
+	       (((gap_start != (buffer -> text_start)) &&		\
+		 (WORD_CONSTITUENT_P (TRANSLATE_CHAR (gap_start [-1])))), \
+		((gap_end != (buffer -> text_end)) &&			\
+		 (WORD_CONSTITUENT_P (TRANSLATE_CHAR (gap_end [0]))))))	\
+	    : (word_bound_p						\
+	       (((match_pc != (buffer -> text_start)) &&		\
+		 (WORD_CONSTITUENT_P (TRANSLATE_CHAR (match_pc [-1])))), \
+		((match_pc != (buffer -> text_end)) &&			\
+		 (WORD_CONSTITUENT_P (TRANSLATE_CHAR (match_pc [0]))))))) \
+	  goto re_match_loop;						\
+	goto re_match_fail;						\
+      }
+
+    case regexpcode_word_bound:
+#define WORD_BOUND_P(left_p, right_p) ((left_p) != (right_p))
+      RE_MATCH_WORD_BOUND (WORD_BOUND_P);
+#undef WORD_BOUND_P
+
+    case regexpcode_not_word_bound:
+#define NOT_WORD_BOUND_P(left_p, right_p) ((left_p) == (right_p))
+      RE_MATCH_WORD_BOUND (NOT_WORD_BOUND_P);
+#undef NOT_WORD_BOUND_P
+
+    case regexpcode_word_start:
+#define WORD_START_P(left_p, right_p) ((! (left_p)) && (right_p))
+      RE_MATCH_WORD_BOUND (WORD_START_P);
+#undef WORD_START_P
+
+    case regexpcode_word_end:
+#define WORD_END_P(left_p, right_p) ((left_p) && (! (right_p)))
+      RE_MATCH_WORD_BOUND (WORD_END_P);
+#undef WORD_END_P
+
+#undef RE_MATCH_WORD_BOUND
+
+    case regexpcode_syntax_spec:
+      {
+	fast int ascii;
+	fast enum syntaxcode code;
+
+	READ_MATCH_CHAR (ascii);
+	READ_PATTERN_SYNTAXCODE (code);
+	if (SYNTAX_CONSTITUENT_P (code, ascii))
+	  goto re_match_loop;
+	goto re_match_fail;
+      }
+
+    case regexpcode_not_syntax_spec:
+      {
+	fast int ascii;
+	fast enum syntaxcode code;
+
+	READ_MATCH_CHAR (ascii);
+	READ_PATTERN_SYNTAXCODE (code);
+	if (! (SYNTAX_CONSTITUENT_P (code, ascii)))
+	  goto re_match_loop;
+	goto re_match_fail;
+      }
+
+    case regexpcode_word_char:
+      {
+	fast int ascii;
+
+	READ_MATCH_CHAR (ascii);
+	if (WORD_CONSTITUENT_P (ascii))
+	  goto re_match_loop;
+	goto re_match_fail;
+      }
+
+    case regexpcode_not_word_char:
+      {
+	fast int ascii;
+
+	READ_MATCH_CHAR (ascii);
+	if (! (WORD_CONSTITUENT_P (ascii)))
+	  goto re_match_loop;
+	goto re_match_fail;
+      }
+
+    /* "or" constructs ("|") are handled by starting each alternative
+       with an on_failure_jump that points to the start of the next
+       alternative.  Each alternative except the last ends with a jump
+       to the joining point.  (Actually, each jump except for the last
+       one really jumps to the following jump, because tensioning the
+       jumps is a hassle.)
+
+       The start of a stupid repeat has an on_failure_jump that points
+       past the end of the repeat text.  This makes a failure point so
+       that, on failure to match a repetition, matching restarts past
+       as many repetitions have been found with no way to fail and
+       look for another one.
+
+       A smart repeat is similar but loops back to the on_failure_jump
+       so that each repetition makes another failure point. */
+
+    case regexpcode_on_failure_jump:
+      {
+	fast long offset;
+
+	READ_PATTERN_OFFSET (offset);
+	PUSH_FAILURE_POINT ((pattern_pc + offset), match_pc);
+	goto re_match_loop;
+      }
+
+    /* The end of a smart repeat has a maybe_finalize_jump back.
+       Change it either to a finalize_jump or an ordinary jump. */
+
+    case regexpcode_maybe_finalize_jump:
+      {
+	fast long offset;
+	fast long ascii;
+
+	READ_PATTERN_OFFSET (offset);
+	if (pattern_pc == pattern_end)
+	  goto maybe_finalize_jump_finalize;
+
+	/* Compare what follows with the beginning of the repeat.
+	   If we can establish that there is nothing that they
+	   would both match, we can change to `finalize_jump'. */
+
+	SWITCH_ENUM (regexpcode, (pattern_pc [0]))
+	  {
+	  case regexpcode_exact_1:
+	    ascii = (pattern_pc [1]);
+	    break;
+
+	  case regexpcode_exact_n:
+	    ascii = (pattern_pc [2]);
+	    break;
+
+	  case regexpcode_line_end:
+	    ascii = ('\n');
+	    break;
+
+	  default:
+	    goto maybe_finalize_jump_not_finalize;
+	  }
+
+	/* (pattern_pc [(offset - 3)]) is an `on_failure_jump'.
+	   Examine what follows that. */
+	SWITCH_ENUM (regexpcode, (pattern_pc [offset]))
+	  {
+	  case regexpcode_exact_1:
+	    {
+	      if (ascii != (pattern_pc [(offset + 1)]))
+		goto maybe_finalize_jump_finalize;
+	      goto maybe_finalize_jump_not_finalize;
+	    }
+
+	  case regexpcode_exact_n:
+	    {
+	      if (ascii != (pattern_pc [(offset + 2)]))
+		goto maybe_finalize_jump_finalize;
+	      goto maybe_finalize_jump_not_finalize;
+	    }
+
+	  case regexpcode_char_set:
+	    {
+	      if (CHAR_SET_MEMBER_P ((pattern_pc [(offset + 1)]),
+				     (& (pattern_pc [(offset + 2)])),
+				     ascii))
+		goto maybe_finalize_jump_not_finalize;
+	      goto maybe_finalize_jump_finalize;
+	    }
+
+	  case regexpcode_not_char_set:
+	    {
+	      if (CHAR_SET_MEMBER_P ((pattern_pc [(offset + 1)]),
+				     (& (pattern_pc [(offset + 2)])),
+				     ascii))
+		goto maybe_finalize_jump_finalize;
+	      goto maybe_finalize_jump_not_finalize;
+	    }
+
+	  default:
+	    goto maybe_finalize_jump_not_finalize;
+	  }
+
+      maybe_finalize_jump_finalize:
+	pattern_pc -= 2;
+	(pattern_pc [-1]) = ((unsigned char) regexpcode_finalize_jump);
+	goto re_match_finalize_jump;
+
+      maybe_finalize_jump_not_finalize:
+	pattern_pc -= 2;
+	(pattern_pc [-1]) = ((unsigned char) regexpcode_jump);
+	goto re_match_jump;
+      }
+
+    case regexpcode_finalize_jump:
+    re_match_finalize_jump:
+      {
+	stack_pointer -= 2;
+	goto re_match_jump;
+      }
+
+    case regexpcode_jump:
+    re_match_jump:
+      {
+	fast long offset;
+
+	READ_PATTERN_OFFSET (offset);
+	pattern_pc += offset;
+	goto re_match_loop;
+      }
+
+    case regexpcode_dummy_failure_jump:
+      {
+	PUSH_FAILURE_POINT (NULL, NULL);
+	goto re_match_jump;
+      }
+
+    default:
+      {
+	BAD_PATTERN ();
+      }
+    }
+
+ re_match_fail:
+  if (stack_pointer == stack_start)
+    RE_RETURN (RE_MATCH_FAILED);
+  match_pc = (*--stack_pointer);
+  pattern_pc = (*--stack_pointer);
+  if (pattern_pc != NULL)
+    goto re_match_loop;
+  goto re_match_fail;
+
+ return_point:
+  if (stack_start != NULL)
+    free (stack_start);
+  return (return_value);
+}
+
+#define DEFINE_RE_SEARCH(name)						\
+int name (pattern_start, pattern_end, buffer, registers,		\
+	  match_start, match_end)					\
+     unsigned char *pattern_start, *pattern_end;			\
+     struct re_buffer *buffer;						\
+     struct re_registers *registers;					\
+     unsigned char *match_start;					\
+     unsigned char *match_end;
+
+#define INITIALIZE_RE_SEARCH(pc, limit, gap_limit)			\
+  int can_be_null;							\
+  unsigned char *translation;						\
+  int match_result;							\
+									\
+  fast unsigned char *match_pc;						\
+  fast unsigned char *match_limit;					\
+  fast unsigned char *gap_limit;					\
+  fast unsigned char fastmap[MAX_ASCII];				\
+									\
+  translation = (buffer -> translation);				\
+  can_be_null =								\
+    (re_compile_fastmap							\
+     (pattern_start, pattern_end, translation,				\
+      (buffer -> syntax_table), fastmap));				\
+  if (can_be_null < 0)							\
+    return (can_be_null);						\
+									\
+  match_pc = (pc);							\
+  match_limit = (limit);						\
+  gap_limit = (buffer -> gap_limit)
+
+#define RE_SEARCH_TEST(start)						\
+  (re_match								\
+   (pattern_start, pattern_end, buffer, registers, (start), match_end))
+
+#define RE_SEARCH_FORWARD_FAST(limit) do				\
+{									\
+  while (true)								\
+    {									\
+      if (match_pc >= (limit))						\
+	break;								\
+									\
+      if ((fastmap [(TRANSLATE_CHAR (*match_pc++))]) != FASTMAP_FALSE)	\
+	continue;							\
+									\
+      match_result = (RE_SEARCH_TEST (match_pc - 1));			\
+      if (match_result == RE_MATCH_FAILED)				\
+	continue;							\
+									\
+      return (match_result);						\
+    }									\
+} while (0)
+
+DEFINE_RE_SEARCH (re_search_forward)
+{
+  INITIALIZE_RE_SEARCH (match_start, match_end, gap_start);
+
+  if (can_be_null != 1)
+    {
+      if ((match_pc < gap_start) && (gap_start < match_limit))
+	RE_SEARCH_FORWARD_FAST (gap_start);
+      if (match_pc == gap_start)
+	match_pc = (buffer -> gap_end);
+      RE_SEARCH_FORWARD_FAST (match_limit);
+      return
+	((can_be_null == 0)
+	 ? RE_MATCH_FAILED
+	 : (RE_SEARCH_TEST (match_limit)));
+    }
+  else
+    {
+      while (true)
+	{
+	  match_result = (RE_SEARCH_TEST (match_pc));
+	  if (match_result != RE_MATCH_FAILED)
+	    return (match_result);
+	  match_pc += 1;
+	  if (match_pc == gap_start)
+	    match_pc = (buffer -> gap_end);
+	  if (match_pc > match_limit)
+	    return (RE_MATCH_FAILED);
+	}
+    }
+}
+
+#define RE_SEARCH_BACKWARD_FAST(limit) do				\
+{									\
+  while (true)								\
+    {									\
+      if (match_pc <= (limit))						\
+	break;								\
+									\
+      if ((fastmap [(TRANSLATE_CHAR (*--match_pc))]) != FASTMAP_FALSE)	\
+	continue;							\
+									\
+      match_result = (RE_SEARCH_TEST (match_pc));			\
+      if (match_result == RE_MATCH_FAILED)				\
+	continue;							\
+									\
+      RE_SEARCH_BACKWARD_RETURN (match_pc);				\
+    }									\
+} while (0)
+
+#define RE_SEARCH_BACKWARD_RETURN(start)				\
+  return								\
+    ((match_result < 0)							\
+     ? match_result							\
+     : ((((start) > (buffer -> gap_start))				\
+	 ? ((start) - (gap_end - (buffer -> gap_start)))		\
+	 : (start))							\
+	- (buffer -> text)))
+
+DEFINE_RE_SEARCH (re_search_backward)
+{
+  INITIALIZE_RE_SEARCH (match_end, match_start, gap_end);
+
+  if (can_be_null != 1)
+    {
+      if ((match_pc > gap_end) && (gap_end > match_limit))
+	RE_SEARCH_BACKWARD_FAST (gap_end);
+      if (match_pc == gap_end)
+	match_pc = (buffer -> gap_start);
+      RE_SEARCH_BACKWARD_FAST (match_limit);
+      if (can_be_null == 0)
+	return (RE_MATCH_FAILED);
+      match_result = (RE_SEARCH_TEST (match_limit));
+      RE_SEARCH_BACKWARD_RETURN (match_limit);
+    }
+  else
+    {
+      while (true)
+	{
+	  match_result = (RE_SEARCH_TEST (match_pc));
+	  if (match_result != RE_MATCH_FAILED)
+	    RE_SEARCH_BACKWARD_RETURN (match_pc);
+	  if (match_pc == gap_end)
+	    match_pc = (buffer -> gap_start);
+	  match_pc -= 1;
+	  if (match_pc < match_limit)
+	    return (RE_MATCH_FAILED);
+	}
+    }
+}
diff --git a/v7/src/microcode/regex.h b/v7/src/microcode/regex.h
new file mode 100644
index 000000000..c42cd0b4a
--- /dev/null
+++ b/v7/src/microcode/regex.h
@@ -0,0 +1,199 @@
+/* -*-C-*-
+
+$Header: /Users/cph/tmp/foo/mit-scheme/mit-scheme/v7/src/microcode/regex.h,v 1.1 1987/07/14 03:00:23 cph Rel $
+
+Copyright (c) 1987 Massachusetts Institute of Technology
+
+This material was developed by the Scheme project at the Massachusetts
+Institute of Technology, Department of Electrical Engineering and
+Computer Science.  Permission to copy this software, to redistribute
+it, and to use it for any purpose is granted, subject to the following
+restrictions and understandings.
+
+1. Any copy made of this software must include this copyright notice
+in full.
+
+2. Users of this software agree to make their best efforts (a) to
+return to the MIT Scheme project any improvements or extensions that
+they make, so that these may be included in future releases; and (b)
+to inform MIT of noteworthy uses of this software.
+
+3. All materials developed as a consequence of the use of this
+software shall duly acknowledge such use, in accordance with the usual
+standards of acknowledging credit in academic research.
+
+4. MIT has made no warrantee or representation that the operation of
+this software will be error-free, and MIT is under no obligation to
+provide any services, by way of maintenance, update, or otherwise.
+
+5. In conjunction with products arising from the use of this material,
+there shall be no use of the name of the Massachusetts Institute of
+Technology nor of any adaptation thereof in any advertising,
+promotional, or sales literature without prior written consent from
+MIT in each case. */
+
+/* Translated from GNU Emacs. */
+
+/* Structure to represent a buffer of text to match against.
+   This contains the information that an editor buffer would have
+   to supply for the matching process to be executed.
+
+   `translation' is an array of MAX_ASCII characters which is used to
+   map each character before matching.  Both the pattern and the match
+   text are mapped.  This is normally used to implement case
+   insensitive searches.
+
+   `syntax_table' describes the syntax of the match text.  See the
+   syntax table primitives for more information.
+
+   `text' points to the beginning of the match text.  It is used only
+   for translating match text pointers into indices.
+
+   `text_start' and `text_end' delimit the match text.  They define
+   the buffer-start and buffer-end for those matching commands that
+   refer to them.  Also, all matching must take place within these
+   limits.
+
+   `gap_start' and `gap_end' delimit a gap in the match text.  Editor
+   buffers normally have such a gap.  For applications without a gap,
+   it is recommended that these be set to the same value as
+   `text_end'.
+
+   Both `text_start' and `gap_start' are inclusive indices, while
+   `text_end' and `gap_end' are exclusive.
+
+   The following conditions must be true:
+
+   (text <= text_start)
+   (text_start <= text_end)
+   (gap_start <= gap_end)
+   (! ((text_start < text_end) &&
+       (gap_start < gap_end) &&
+       ((text_start == gap_start) || (text_end == gap_end))))
+
+   */
+
+struct re_buffer
+  {
+    unsigned char *translation;
+    SYNTAX_TABLE_TYPE syntax_table;
+    unsigned char *text;
+    unsigned char *text_start;
+    unsigned char *text_end;
+    unsigned char *gap_start;
+    unsigned char *gap_end;
+  };
+
+/* Structure to store "register" contents data in.
+
+   Pass the address of such a structure as an argument to re_match,
+   etc., if you want this information back.
+
+   start[i] and end[i] record the string matched by \( ... \) grouping
+   i, for i from 1 to RE_NREGS - 1.
+
+   start[0] and end[0] record the entire string matched. */
+
+#define RE_NREGS 10
+
+struct re_registers
+  {
+    long start[RE_NREGS];
+    long end[RE_NREGS];
+  };
+
+/* These are the command codes that appear in compiled regular
+   expressions, one per byte.  Some command codes are followed by
+   argument bytes.  A command code can specify any interpretation
+   whatever for its arguments.  Zero-bytes may appear in the compiled
+   regular expression. */
+
+enum regexpcode
+  {
+    regexpcode_unused,
+    regexpcode_exact_1,		/* Followed by 1 literal byte */
+
+    /* Followed by one byte giving n, and then by n literal bytes. */
+    regexpcode_exact_n,
+
+    regexpcode_line_start,	/* Fails unless at beginning of line */
+    regexpcode_line_end,	/* Fails unless at end of line */
+
+    /* Followed by two bytes giving relative address to jump to. */
+    regexpcode_jump,
+
+    /* Followed by two bytes giving relative address of place to
+       resume at in case of failure. */
+    regexpcode_on_failure_jump,	
+
+    /* Throw away latest failure point and then jump to address. */
+    regexpcode_finalize_jump,
+
+    /* Like jump but finalize if safe to do so.  This is used to jump
+       back to the beginning of a repeat.  If the command that follows
+       this jump is clearly incompatible with the one at the beginning
+       of the repeat, such that we can be sure that there is no use
+       backtracking out of repetitions already completed, then we
+       finalize. */
+    regexpcode_maybe_finalize_jump,
+
+    /* jump, and push a dummy failure point.  This failure point will
+       be thrown away if an attempt is made to use it for a failure.
+       A + construct makes this before the first repeat. */
+    regexpcode_dummy_failure_jump,
+
+    regexpcode_any_char,	/* Matches any one character */
+
+    /* Matches any one char belonging to specified set.  First
+       following byte is # bitmap bytes.  Then come bytes for a
+       bit-map saying which chars are in.  Bits in each byte are
+       ordered low-bit-first.  A character is in the set if its bit is
+       1.  A character too large to have a bit in the map is
+       automatically not in the set. */
+    regexpcode_char_set,
+
+    /* Similar but match any character that is NOT one of those
+       specified. */
+    regexpcode_not_char_set,
+
+    /* Starts remembering the text that is matched and stores it in a
+       memory register.  Followed by one byte containing the register
+       number.  Register numbers must be in the range 0 through
+       (RE_NREGS - 1) inclusive.  */
+    regexpcode_start_memory,
+
+    /* Stops remembering the text that is matched and stores it in a
+       memory register.  Followed by one byte containing the register
+       number.  Register numbers must be in the range 0 through
+       (RE_NREGS - 1) inclusive.  */
+    regexpcode_stop_memory,
+
+    /* Match a duplicate of something remembered.  Followed by one
+       byte containing the index of the memory register. */
+    regexpcode_duplicate,
+
+    regexpcode_buffer_start,	/* Succeeds if at beginning of buffer */
+    regexpcode_buffer_end,	/* Succeeds if at end of buffer */
+    regexpcode_word_char,	/* Matches any word-constituent character */
+
+    /* Matches any char that is not a word-constituent. */
+    regexpcode_not_word_char,
+
+    regexpcode_word_start,	/* Succeeds if at word beginning */
+    regexpcode_word_end,	/* Succeeds if at word end */
+    regexpcode_word_bound,	/* Succeeds if at a word boundary */
+    regexpcode_not_word_bound,	/* Succeeds if not at a word boundary */
+
+    /* Matches any character whose syntax is specified.  Followed by a
+       byte which contains a syntax code, Sword or such like. */
+    regexpcode_syntax_spec,
+
+    /* Matches any character whose syntax differs from the specified. */
+    regexpcode_not_syntax_spec
+  };
+
+extern void re_buffer_initialize ();
+extern int re_compile_fastmap ();
+extern int re_match ();
+extern int re_search_forward ();
+extern int re_search_backward ();
diff --git a/v7/src/microcode/rgxprim.c b/v7/src/microcode/rgxprim.c
new file mode 100644
index 000000000..86c05da3e
--- /dev/null
+++ b/v7/src/microcode/rgxprim.c
@@ -0,0 +1,247 @@
+/* -*-C-*-
+
+$Header: /Users/cph/tmp/foo/mit-scheme/mit-scheme/v7/src/microcode/rgxprim.c,v 1.1 1987/07/14 03:00:03 cph Exp $
+
+Copyright (c) 1987 Massachusetts Institute of Technology
+
+This material was developed by the Scheme project at the Massachusetts
+Institute of Technology, Department of Electrical Engineering and
+Computer Science.  Permission to copy this software, to redistribute
+it, and to use it for any purpose is granted, subject to the following
+restrictions and understandings.
+
+1. Any copy made of this software must include this copyright notice
+in full.
+
+2. Users of this software agree to make their best efforts (a) to
+return to the MIT Scheme project any improvements or extensions that
+they make, so that these may be included in future releases; and (b)
+to inform MIT of noteworthy uses of this software.
+
+3. All materials developed as a consequence of the use of this
+software shall duly acknowledge such use, in accordance with the usual
+standards of acknowledging credit in academic research.
+
+4. MIT has made no warrantee or representation that the operation of
+this software will be error-free, and MIT is under no obligation to
+provide any services, by way of maintenance, update, or otherwise.
+
+5. In conjunction with products arising from the use of this material,
+there shall be no use of the name of the Massachusetts Institute of
+Technology nor of any adaptation thereof in any advertising,
+promotional, or sales literature without prior written consent from
+MIT in each case. */
+
+/* Primitives for regular expression matching and search. */
+
+/* This code is not yet tested. -- CPH */
+
+#include "scheme.h"
+#include "primitive.h"
+#include "stringprim.h"
+#include "character.h"
+#include "edwin.h"
+#include "syntax.h"
+#include "regex.h"
+
+#define RE_CHAR_SET_P(object)						\
+  ((STRING_P (object)) &&						\
+   ((string_length (object)) == (MAX_ASCII / ASCII_LENGTH)))
+
+#define CHAR_SET_P(argument)						\
+  ((STRING_P (argument)) && ((string_length (argument)) == MAX_ASCII))
+
+#define CHAR_TRANSLATION_P(argument)					\
+  ((STRING_P (argument)) && ((string_length (argument)) == MAX_ASCII))
+
+#define RE_REGISTERS_P(object)						\
+  (((object) == NIL) ||							\
+   ((VECTOR_P (object)) &&						\
+    ((Vector_Length (object)) == (RE_NREGS + RE_NREGS))))
+
+#define RE_MATCH_RESULTS(result, vector) do				\
+{									\
+  if ((result) >= 0)							\
+    {									\
+      if ((vector) != NIL)						\
+	{								\
+	  int i;							\
+	  long index;							\
+									\
+	  for (i = 0; (i < RE_NREGS); i += 1)				\
+	    {								\
+	      index = ((registers . start) [i]);			\
+	      User_Vector_Set						\
+		(vector,						\
+		 i,							\
+		 ((index == -1)						\
+		  ? NIL							\
+		  : (C_Integer_To_Scheme_Integer (index))));		\
+	      index = ((registers . end) [i]);				\
+	      User_Vector_Set						\
+		(vector,						\
+		 (i + RE_NREGS),					\
+		 ((index == -1)						\
+		  ? NIL							\
+		  : (C_Integer_To_Scheme_Integer (index))));		\
+	    }								\
+	}								\
+      return (C_Integer_To_Scheme_Integer (result));			\
+    }									\
+  else if ((result) == (-1))						\
+    return (NIL);							\
+  else if ((result) == (-2))						\
+    error_bad_range_arg (1);						\
+  else									\
+    error_external_return ();						\
+} while (0)
+
+Built_In_Primitive (Prim_re_char_set_adjoin, 2, "RE-CHAR-SET-ADJOIN!", 0x190)
+{
+  int ascii;
+  Primitive_2_Args ();
+
+  CHECK_ARG (1, RE_CHAR_SET_P);
+  ascii = (arg_ascii_char (2));
+  (* (string_pointer (Arg1, (ascii / ASCII_LENGTH)))) |=
+    (1 << (ascii % ASCII_LENGTH));
+  return (NIL);
+}
+
+Built_In_Primitive (Prim_re_compile_fastmap, 4, "RE-COMPILE-FASTMAP", 0x191)
+{
+  int can_be_null;
+  Primitive_4_Args ();
+
+  CHECK_ARG (1, STRING_P);
+  CHECK_ARG (2, CHAR_TRANSLATION_P);
+  CHECK_ARG (3, SYNTAX_TABLE_P);
+  CHECK_ARG (4, CHAR_SET_P);
+
+  can_be_null =
+    (re_compile_fastmap ((string_pointer (Arg1, 0)),
+			 (string_pointer (Arg1, (string_length (Arg1)))),
+			 (string_pointer (Arg2, 0)),
+			 Arg3,
+			 (string_pointer (Arg4, 0))));
+
+  if (can_be_null >= 0)
+    return (C_Integer_To_Scheme_Integer (can_be_null));
+  else if (can_be_null == (-2))
+    error_bad_range_arg (1);
+  else
+    error_external_return ();
+}
+
+/* (re-match-substring regexp translation syntax-table registers
+		       string start end)
+
+   Attempt to match REGEXP against the substring [STRING, START, END].
+   Return the index of the end of the match (exclusive) if successful.
+   Otherwise return false.  REGISTERS, if not false, is set to contain
+   the appropriate indices for the match registers. */
+
+#define RE_SUBSTRING_PRIMITIVE(procedure)				\
+{									\
+  long match_start, match_end, text_end;				\
+  char *text;								\
+  struct re_buffer buffer;						\
+  struct re_registers registers;					\
+  int result;								\
+  Primitive_7_Args ();							\
+									\
+  CHECK_ARG (1, STRING_P);						\
+  CHECK_ARG (2, CHAR_TRANSLATION_P);					\
+  CHECK_ARG (3, SYNTAX_TABLE_P);					\
+  CHECK_ARG (4, RE_REGISTERS_P);					\
+  CHECK_ARG (5, STRING_P);						\
+  match_start = (arg_nonnegative_integer (6));				\
+  match_end = (arg_nonnegative_integer (7));				\
+  text = (string_pointer (Arg5, 0));					\
+  text_end = (string_length (Arg5));					\
+									\
+  if (match_end > text_end) error_bad_range_arg (7);			\
+  if (match_start > match_end) error_bad_range_arg (6);			\
+									\
+  re_buffer_initialize							\
+    ((& buffer), (string_pointer (Arg2, 0)), Arg3, text, 0, text_end,	\
+     text_end, text_end);						\
+									\
+  result =								\
+    (procedure ((string_pointer (Arg1, 0)),				\
+		(string_pointer (Arg1, (string_length (Arg1)))),	\
+		(& buffer),						\
+		((Arg4 == NIL) ? NULL : (& registers)),			\
+		(& (text [match_start])),				\
+		(& (text [match_end]))));				\
+  RE_MATCH_RESULTS (result, Arg4);					\
+}
+
+Built_In_Primitive (Prim_re_match_substring, 7, "RE-MATCH-SUBSTRING", 0x118)
+  RE_SUBSTRING_PRIMITIVE (re_match)
+
+Built_In_Primitive (Prim_re_search_substring_forward, 7,
+		    "RE-SEARCH-SUBSTRING-FORWARD", 0x119)
+  RE_SUBSTRING_PRIMITIVE (re_search_forward)
+
+Built_In_Primitive (Prim_re_search_substring_backward, 7,
+		    "RE-SEARCH-SUBSTRING-BACKWARD", 0x11A)
+  RE_SUBSTRING_PRIMITIVE (re_search_backward)
+
+#define RE_BUFFER_PRIMITIVE(procedure)					\
+{									\
+  long match_start, match_end, text_start, text_end, gap_start;		\
+  char *text;								\
+  struct re_buffer buffer;						\
+  struct re_registers registers;					\
+  int result;								\
+  Primitive_7_Args ();							\
+									\
+  CHECK_ARG (1, STRING_P);						\
+  CHECK_ARG (2, CHAR_TRANSLATION_P);					\
+  CHECK_ARG (3, SYNTAX_TABLE_P);					\
+  CHECK_ARG (4, RE_REGISTERS_P);					\
+  CHECK_ARG (5, GROUP_P);						\
+  match_start = (arg_nonnegative_integer (6));				\
+  match_end = (arg_nonnegative_integer (7));				\
+									\
+  text = (string_pointer ((GROUP_TEXT (Arg5)), 0));			\
+  text_start = (MARK_POSITION (GROUP_START_MARK (Arg5)));		\
+  text_end = (MARK_POSITION (GROUP_END_MARK (Arg5)));			\
+  gap_start = (GROUP_GAP_START (Arg5));					\
+									\
+  if (match_end > gap_start)						\
+    {									\
+      match_end += (GROUP_GAP_LENGTH (Arg5));				\
+      if (match_start >= gap_start)					\
+	match_start += (GROUP_GAP_LENGTH (Arg5));			\
+    }									\
+									\
+  if (match_start > match_end) error_bad_range_arg (6);			\
+  if (match_end > text_end) error_bad_range_arg (7);			\
+  if (match_start < text_start) error_bad_range_arg (6);		\
+									\
+  re_buffer_initialize							\
+    ((& buffer), (string_pointer (Arg2, 0)), Arg3, text, text_start,	\
+     text_end, gap_start, (GROUP_GAP_END (Arg5)));			\
+									\
+  result =								\
+    (procedure ((string_pointer (Arg1, 0)),				\
+		(string_pointer (Arg1, (string_length (Arg1)))),	\
+		(& buffer),						\
+		((Arg4 == NIL) ? NULL : (& registers)),			\
+		(& (text [match_start])),				\
+		(& (text [match_end]))));				\
+  RE_MATCH_RESULTS (result, Arg4);					\
+}
+
+Built_In_Primitive (Prim_re_match_buffer, 7, "RE-MATCH-BUFFER", 0x192)
+  RE_BUFFER_PRIMITIVE (re_match)
+
+Built_In_Primitive (Prim_re_search_buffer_forward, 7,
+		    "RE-SEARCH-BUFFER-FORWARD", 0x193)
+  RE_BUFFER_PRIMITIVE (re_search_forward)
+
+Built_In_Primitive (Prim_re_search_buffer_backward, 7,
+		    "RE-SEARCH-BUFFER-BACKWARD", 0x194)
+  RE_BUFFER_PRIMITIVE (re_search_backward)