/* -*-C-*-
-$Id: regex.c,v 1.15 1994/12/19 22:27:11 cph Exp $
+$Id: regex.c,v 1.16 1996/03/04 20:37:25 cph Exp $
-Copyright (c) 1987-94 Massachusetts Institute of Technology
+Copyright (c) 1987-96 Massachusetts Institute of Technology
This material was developed by the Scheme project at the Massachusetts
Institute of Technology, Department of Electrical Engineering and
((((char_set) [((ascii) / ASCII_LENGTH)]) & \
(1 << ((ascii) % ASCII_LENGTH))) \
!= 0)
-\f
+
#define READ_PATTERN_CHAR(target) do \
{ \
if (pattern_pc >= pattern_end) \
} while (0)
#define BAD_PATTERN() RE_RETURN (-2)
-\f
+
#define PUSH_FAILURE_POINT(pattern_pc, match_pc) do \
{ \
if (stack_pointer == stack_end) \
FOR_ALL_ASCII (i)
(fastmap [i]) = FASTMAP_FALSE;
}
-\f
+
loop:
if (pattern_pc >= pattern_end)
RE_RETURN (1);
goto return_point;
goto next;
}
-\f
+
case regexpcode_char_set:
{
fast int length;
(fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
goto next;
}
-\f
+
case regexpcode_word_char:
{
fast int ascii;
(fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
goto next;
}
-\f
+
case regexpcode_start_memory:
case regexpcode_stop_memory:
{
(fastmap [ascii]) = FASTMAP_TRUE;
RE_RETURN (1);
}
-\f
+
case regexpcode_jump:
case regexpcode_finalize_jump:
case regexpcode_maybe_finalize_jump:
}
int re_max_failures = 1000;
-\f
+
int
DEFUN (re_match,
(pattern_start, pattern_end, buffer, registers, match_start, match_end),
unsigned char *register_start[RE_NREGS];
unsigned char *register_end[RE_NREGS];
-\f
+
pattern_pc = pattern_start;
match_pc = match_start;
gap_start = (buffer -> gap_start);
}
RE_RETURN (ADDRESS_TO_INDEX (match_pc));
}
-\f
+
SWITCH_ENUM (regexpcode, (*pattern_pc++))
{
case regexpcode_unused:
goto re_match_fail;
goto re_match_loop;
}
-\f
+
#define RE_MATCH_CHAR_SET(winning_label, losing_label) \
{ \
fast int ascii; \
((match_pc == gap_end) ? gap_start : match_pc);
goto re_match_loop;
}
-\f
+
case regexpcode_duplicate:
{
fast int register_number;
match_pc = ((new_end == gap_start) ? gap_end : new_end);
goto re_match_loop;
}
-\f
+
case regexpcode_buffer_start:
{
- if (match_pc == (buffer -> text_start))
+ if ((ADDRESS_TO_INDEX (match_pc))
+ == (ADDRESS_TO_INDEX (buffer -> text_start)))
goto re_match_loop;
goto re_match_fail;
}
case regexpcode_buffer_end:
{
- if (match_pc == (buffer -> text_end))
+ if ((ADDRESS_TO_INDEX (match_pc))
+ == (ADDRESS_TO_INDEX (buffer -> text_end)))
goto re_match_loop;
goto re_match_fail;
}
case regexpcode_line_start:
{
- if (match_pc == (buffer -> text_start))
+ if ((ADDRESS_TO_INDEX (match_pc))
+ == (ADDRESS_TO_INDEX (buffer -> text_start)))
goto re_match_loop;
if ((TRANSLATE_CHAR
(((match_pc == gap_end) ? gap_start : match_pc) [-1]))
case regexpcode_line_end:
{
- if ((match_pc == (buffer -> text_end)) ||
- ((TRANSLATE_CHAR (match_pc [0])) == '\n'))
+ if (((ADDRESS_TO_INDEX (match_pc))
+ == (ADDRESS_TO_INDEX (buffer -> text_end)))
+ || ((TRANSLATE_CHAR (match_pc [0])) == '\n'))
goto re_match_loop;
goto re_match_fail;
}
-\f
+
#define RE_MATCH_WORD_BOUND(word_bound_p) \
if ((match_pc == gap_end) \
- ? (word_bound_p (((gap_start != (buffer -> text_start)) && \
- (WORD_CONSTITUENT_P (TRANSLATE_CHAR (gap_start [-1])) \
- )), \
- ((gap_end != (buffer -> text_end)) && \
- (WORD_CONSTITUENT_P (TRANSLATE_CHAR (gap_end [0]))) \
- ))) \
- : (word_bound_p (((match_pc != (buffer -> text_start)) && \
- (WORD_CONSTITUENT_P (TRANSLATE_CHAR (match_pc [-1]))) \
- ), \
- ((match_pc != (buffer -> text_end)) && \
- (WORD_CONSTITUENT_P (TRANSLATE_CHAR (match_pc [0]))) \
- )))) \
+ ? (word_bound_p \
+ (((gap_start != (buffer -> text_start)) \
+ && (WORD_CONSTITUENT_P (TRANSLATE_CHAR (gap_start[-1])))), \
+ ((gap_end != (buffer -> text_end)) \
+ && (WORD_CONSTITUENT_P (TRANSLATE_CHAR (gap_end[0])))))) \
+ : (word_bound_p \
+ (((match_pc != (buffer -> text_start)) \
+ && (WORD_CONSTITUENT_P (TRANSLATE_CHAR (match_pc[-1])))), \
+ ((match_pc != (buffer -> text_end)) \
+ && (WORD_CONSTITUENT_P (TRANSLATE_CHAR (match_pc[0]))))))) \
goto re_match_loop; \
goto re_match_fail
#undef WORD_END_P
#undef RE_MATCH_WORD_BOUND
-\f
+
case regexpcode_syntax_spec:
{
fast int ascii;
goto re_match_loop;
goto re_match_fail;
}
-\f
+
/* "or" constructs ("|") are handled by starting each alternative
with an on_failure_jump that points to the start of the next
alternative. Each alternative except the last ends with a jump
PUSH_FAILURE_POINT ((pattern_pc + offset), match_pc);
goto re_match_loop;
}
-\f
+
/* The end of a smart repeat has a maybe_finalize_jump back.
Change it either to a finalize_jump or an ordinary jump. */
default:
goto dont_finalize_jump;
}
-\f
+
/* (pattern_pc [(offset - 3)]) is an `on_failure_jump'.
Examine what follows that. */
SWITCH_ENUM (regexpcode, (pattern_pc [offset]))
(pattern_pc [-1]) = ((unsigned char) regexpcode_jump);
goto re_match_jump;
}
-\f
+
case regexpcode_finalize_jump:
re_match_finalize_jump:
{
#define RE_SEARCH_TEST(start) \
(re_match \
(pattern_start, pattern_end, buffer, registers, (start), match_end))
-\f
+
#define RE_SEARCH_FORWARD_FAST(limit) do \
{ \
while (true) \
}
}
}
-\f
+
#define RE_SEARCH_BACKWARD_FAST(limit) do \
{ \
while (true) \