From 4fdd5b1d17090379187e2248adc5c684299bd5f3 Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Mon, 11 May 1987 17:47:53 +0000 Subject: [PATCH] Initial revision --- v7/src/microcode/edwin.h | 48 ++ v7/src/microcode/syntax.c | 896 ++++++++++++++++++++++++++++++++++++++ v7/src/microcode/syntax.h | 89 ++++ 3 files changed, 1033 insertions(+) create mode 100644 v7/src/microcode/edwin.h create mode 100644 v7/src/microcode/syntax.c create mode 100644 v7/src/microcode/syntax.h diff --git a/v7/src/microcode/edwin.h b/v7/src/microcode/edwin.h new file mode 100644 index 000000000..5cb9ccf41 --- /dev/null +++ b/v7/src/microcode/edwin.h @@ -0,0 +1,48 @@ +/* -*-C-*- + +$Header: /Users/cph/tmp/foo/mit-scheme/mit-scheme/v7/src/microcode/edwin.h,v 1.1 1987/05/11 17:47:53 cph Rel $ + +Copyright (c) 1987 Massachusetts Institute of Technology + +This material was developed by the Scheme project at the Massachusetts +Institute of Technology, Department of Electrical Engineering and +Computer Science. Permission to copy this software, to redistribute +it, and to use it for any purpose is granted, subject to the following +restrictions and understandings. + +1. Any copy made of this software must include this copyright notice +in full. + +2. Users of this software agree to make their best efforts (a) to +return to the MIT Scheme project any improvements or extensions that +they make, so that these may be included in future releases; and (b) +to inform MIT of noteworthy uses of this software. + +3. All materials developed as a consequence of the use of this +software shall duly acknowledge such use, in accordance with the usual +standards of acknowledging credit in academic research. + +4. MIT has made no warrantee or representation that the operation of +this software will be error-free, and MIT is under no obligation to +provide any services, by way of maintenance, update, or otherwise. + +5. In conjunction with products arising from the use of this material, +there shall be no use of the name of the Massachusetts Institute of +Technology nor of any adaptation thereof in any advertising, +promotional, or sales literature without prior written consent from +MIT in each case. */ + +/* Definitions for Edwin data structures. + This MUST match the definitions in the Edwin source code. */ + +#define GROUP_P(object) ((pointer_type (object)) == TC_VECTOR) +#define GROUP_TEXT(group) (User_Vector_Ref ((group), 1)) +#define GROUP_GAP_START(group) (Get_Integer (User_Vector_Ref ((group), 2))) +#define GROUP_GAP_LENGTH(group) (Get_Integer (User_Vector_Ref ((group), 3))) +#define GROUP_GAP_END(group) (Get_Integer (User_Vector_Ref ((group), 4))) +#define GROUP_START_MARK(group) (User_Vector_Ref ((group), 6)) +#define GROUP_END_MARK(group) (User_Vector_Ref ((group), 7)) + +#define MARK_GROUP(mark) (User_Vector_Ref ((mark), 1)) +#define MARK_POSITION(mark) (Get_Integer (User_Vector_Ref ((mark), 2))) +#define MARK_LEFT_INSERTING(mark) ((User_Vector_Ref ((mark), 3)) != NIL) diff --git a/v7/src/microcode/syntax.c b/v7/src/microcode/syntax.c new file mode 100644 index 000000000..532fe2b58 --- /dev/null +++ b/v7/src/microcode/syntax.c @@ -0,0 +1,896 @@ +/* -*-C-*- + +$Header: /Users/cph/tmp/foo/mit-scheme/mit-scheme/v7/src/microcode/syntax.c,v 1.1 1987/05/11 17:47:12 cph Exp $ + +Copyright (c) 1987 Massachusetts Institute of Technology + +This material was developed by the Scheme project at the Massachusetts +Institute of Technology, Department of Electrical Engineering and +Computer Science. Permission to copy this software, to redistribute +it, and to use it for any purpose is granted, subject to the following +restrictions and understandings. + +1. Any copy made of this software must include this copyright notice +in full. + +2. Users of this software agree to make their best efforts (a) to +return to the MIT Scheme project any improvements or extensions that +they make, so that these may be included in future releases; and (b) +to inform MIT of noteworthy uses of this software. + +3. All materials developed as a consequence of the use of this +software shall duly acknowledge such use, in accordance with the usual +standards of acknowledging credit in academic research. + +4. MIT has made no warrantee or representation that the operation of +this software will be error-free, and MIT is under no obligation to +provide any services, by way of maintenance, update, or otherwise. + +5. In conjunction with products arising from the use of this material, +there shall be no use of the name of the Massachusetts Institute of +Technology nor of any adaptation thereof in any advertising, +promotional, or sales literature without prior written consent from +MIT in each case. */ + +/* Primitives to support Edwin syntax tables, word and list parsing. + Translated from GNU Emacs. */ + +/* This code is not yet tested. -- CPH */ + +#include "scheme.h" +#include "primitive.h" +#include "stringprim.h" +#include "character.h" +#include "edwin.h" +#include "syntax.h" + +/* Syntax Codes */ + +/* Convert a letter which signifies a syntax code + into the code it signifies. */ + +#define ILLEGAL ((char) syntaxcode_max) + +char syntax_spec_code[0200] = + { + ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, + ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, + ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, + ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, + + ((char) syntaxcode_whitespace), ILLEGAL, ((char) syntaxcode_string), + ILLEGAL, ((char) syntaxcode_math), ILLEGAL, ILLEGAL, + ((char) syntaxcode_quote), + ((char) syntaxcode_open), ((char) syntaxcode_close), ILLEGAL, ILLEGAL, + ILLEGAL, ((char) syntaxcode_whitespace), ((char) syntaxcode_punct), + ((char) syntaxcode_charquote), + ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, + ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ((char) syntaxcode_comment), + ILLEGAL, ((char) syntaxcode_endcomment), ILLEGAL, + + ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, + ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, + ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, + ((char) syntaxcode_word), + ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ((char) syntaxcode_escape), ILLEGAL, + ILLEGAL, ((char) syntaxcode_symbol), + + ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, + ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, + ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, + ((char) syntaxcode_word), + ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL + }; + +/* Indexed by syntax code, give the letter that describes it. */ + +char syntax_code_spec[13] = + { + ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>' + }; + +Built_In_Primitive (Prim_String_To_Syntax_Entry, 1, "STRING->SYNTAX-ENTRY", + 0x176) +{ + long length, c, result; + char *scan; + Primitive_1_Arg (); + + guarantee_string_arg_1 (); + length = (string_length (Arg1)); + if (length > 6) error_bad_range_arg_1 (); + scan = (string_pointer (Arg1, 0)); + + if (length-- > 0) + { + c = (char_to_long (*scan++)); + if (c >= 0200) error_bad_range_arg_1 (); + result = (char_to_long (syntax_spec_code[c])); + if (result == ILLEGAL) error_bad_range_arg_1 (); + } + else + result = ((long) syntaxcode_whitespace); + + if (length-- > 0) + { + c = (char_to_long (*scan++)); + if (c != ' ') result |= (c << 8); + } + + while (length-- > 0) + switch (*scan++) + { + case '1': result |= (1 << 16); break; + case '2': result |= (1 << 17); break; + case '3': result |= (1 << 18); break; + case '4': result |= (1 << 19); break; + default: error_bad_range_arg_1 (); + } + + return (Make_Unsigned_Fixnum (result)); +} + +Built_In_Primitive (Prim_Char_To_Syntax_Code, 2, "CHAR->SYNTAX-CODE", 0x17E) +{ + Primitive_2_Args (); + + if (! (SYNTAX_TABLE_P (Arg1))) + error_wrong_type_arg_1 (); + return + (c_char_to_scheme_char + ((char) + (SYNTAX_ENTRY_CODE + (SYNTAX_TABLE_REF (Arg1, (guarantee_ascii_char_arg_2 (Arg2))))))); +} + +/* Parser Initialization */ + +#define STANDARD_INITIALIZATION_COMMON(primitive_initialization) \ + fast char *start; \ + char *first_char, *end; \ + long entry; \ + long gap_length; \ + primitive_initialization (); \ + \ + if (! (SYNTAX_TABLE_P (Arg1))) \ + error_wrong_type_arg_1 (); \ + if (! (GROUP_P (Arg2))) \ + error_wrong_type_arg_2 (); \ + \ + first_char = (string_pointer ((GROUP_TEXT (Arg2)), 0)); \ + start = (first_char + (guarantee_nonnegative_int_arg_3 (Arg3))); \ + end = (first_char + (guarantee_nonnegative_int_arg_4 (Arg4))); \ + gap_start = (first_char + (GROUP_GAP_START (Arg2))); \ + gap_length = (GROUP_GAP_LENGTH (Arg2)); \ + gap_end = (first_char + (GROUP_GAP_END (Arg2))) + +#define STANDARD_INITIALIZATION_FORWARD(primitive_initialization) \ + char *gap_start; \ + fast char *gap_end; \ + STANDARD_INITIALIZATION_COMMON (primitive_initialization); \ + if (start >= gap_start) \ + start += gap_length; \ + if (end >= gap_start) \ + end += gap_length + +#define STANDARD_INITIALIZATION_BACKWARD(primitive_initialization) \ + fast char *gap_start; \ + char *gap_end; \ + Boolean quoted; \ + STANDARD_INITIALIZATION_COMMON (primitive_initialization); \ + if (start > gap_start) \ + start += gap_length; \ + if (end > gap_start) \ + end += gap_length + +#define SCAN_LIST_INITIALIZATION(initialization) \ + long depth, min_depth; \ + Boolean sexp_flag, ignore_comments, math_exit; \ + char c; \ + initialization (Primitive_7_Args); \ + guarantee_fixnum_arg_5 (); \ + Sign_Extend (Arg5, depth); \ + min_depth = ((depth >= 0) ? 0 : depth); \ + sexp_flag = (Arg6 != NIL); \ + ignore_comments = (Arg7 != NIL); \ + math_exit = false + +/* Parse Scanning */ + +#define PEEK_RIGHT(scan) (SYNTAX_TABLE_REF (Arg1, (*scan))) +#define PEEK_LEFT(scan) (SYNTAX_TABLE_REF (Arg1, (scan[-1]))) + +#define INCREMENT_SCAN(scan) \ +do \ +{ \ + if (++scan == gap_start) \ + scan = gap_end; \ +} while (0) + +#define DECREMENT_SCAN(scan) \ +do \ +{ \ + if (--scan == gap_end) \ + scan = gap_start; \ +} while (0) + +#define READ_RIGHT(scan, target) \ +do \ +{ \ + target = (SYNTAX_TABLE_REF (Arg1, (*scan++))); \ + if (scan == gap_start) \ + scan = gap_end; \ +} while (0) + +#define READ_LEFT(scan, target) \ +do \ +{ \ + target = (SYNTAX_TABLE_REF (Arg1, (*--scan))); \ + if (scan == gap_end) \ + scan = gap_start; \ +} while (0) + +#define RIGHT_END_P(scan) (scan >= end) +#define LEFT_END_P(scan) (scan <= end) + +#define LOSE_IF(expression) \ +do \ +{ \ + if (expression) \ + return (NIL); \ +} while (0) + +#define LOSE_IF_RIGHT_END(scan) LOSE_IF (RIGHT_END_P (scan)) +#define LOSE_IF_LEFT_END(scan) LOSE_IF (LEFT_END_P (scan)) + +#define SCAN_TO_INDEX(scan) \ + ((((scan) > gap_start) ? ((scan) - gap_length) : (scan)) - first_char) + +#define WIN_IF(expression) \ +do \ +{ \ + if (expression) \ + return (Make_Unsigned_Fixnum (SCAN_TO_INDEX (start))); \ +} while (0) + +#define WIN_IF_RIGHT_END(scan) WIN_IF (RIGHT_END_P (scan)) +#define WIN_IF_LEFT_END(scan) WIN_IF (LEFT_END_P (scan)) + +#define RIGHT_QUOTED_P_INTERNAL(scan, quoted) \ +do \ +{ \ + long entry; \ + \ + quoted = false; \ + while (true) \ + { \ + if (LEFT_END_P (scan)) \ + break; \ + READ_LEFT (scan, entry); \ + if (! (SYNTAX_ENTRY_QUOTE (entry))) \ + break; \ + quoted = (! quoted); \ + } \ +} while (0) + +#define RIGHT_QUOTED_P(scan_init, quoted) \ +do \ +{ \ + char *scan; \ + \ + scan = (scan_init); \ + RIGHT_QUOTED_P_INTERNAL (scan, quoted); \ +} while (0) + +#define LEFT_QUOTED_P(scan_init, quoted) \ +do \ +{ \ + char *scan; \ + \ + scan = (scan_init); \ + DECREMENT_SCAN (scan); \ + RIGHT_QUOTED_P_INTERNAL (scan, quoted); \ +} while (0) + +/* Quote Parsers */ + +Built_In_Primitive (Prim_Quoted_Char_P, 4, "QUOTED-CHAR?", 0x17F) +{ + STANDARD_INITIALIZATION_BACKWARD (Primitive_4_Args); + + RIGHT_QUOTED_P (start, quoted); + return (quoted ? TRUTH : NIL); +} + +/* This is used in conjunction with `scan-list-backward' to find the + beginning of an s-expression. */ + +Built_In_Primitive (Prim_Scan_Backward_Prefix_Chars, 4, + "SCAN-BACKWARD-PREFIX-CHARS", 0x17D) +{ + STANDARD_INITIALIZATION_BACKWARD (Primitive_4_Args); + + while (true) + { + WIN_IF_LEFT_END (start); + LEFT_QUOTED_P (start, quoted); + WIN_IF (quoted || + ((SYNTAX_ENTRY_CODE (PEEK_LEFT (start))) != syntaxcode_quote)); + DECREMENT_SCAN (start); + } +} + +/* Word Parsers */ + +Built_In_Primitive + (Prim_Scan_Forward_To_Word, 4, "SCAN-FORWARD-TO-WORD", 0x17C) +{ + STANDARD_INITIALIZATION_FORWARD (Primitive_4_Args); + + while (true) + { + LOSE_IF_RIGHT_END (start); + WIN_IF ((SYNTAX_ENTRY_CODE (PEEK_RIGHT (start))) == syntaxcode_word); + INCREMENT_SCAN (start); + } +} + +Built_In_Primitive (Prim_Scan_Word_Forward, 4, "SCAN-WORD-FORWARD", 0x177) +{ + STANDARD_INITIALIZATION_FORWARD (Primitive_4_Args); + + while (true) + { + LOSE_IF_RIGHT_END (start); + READ_RIGHT (start, entry); + if ((SYNTAX_ENTRY_CODE (entry)) == syntaxcode_word) + break; + } + while (true) + { + WIN_IF_RIGHT_END (start); + WIN_IF ((SYNTAX_ENTRY_CODE (PEEK_RIGHT (start))) != syntaxcode_word); + INCREMENT_SCAN (start); + } +} + +Built_In_Primitive (Prim_Scan_Word_Backward, 4, "SCAN-WORD-BACKWARD", 0x178) +{ + STANDARD_INITIALIZATION_BACKWARD (Primitive_4_Args); + + while (true) + { + LOSE_IF_LEFT_END (start); + READ_LEFT (start, entry); + if ((SYNTAX_ENTRY_CODE (entry)) == syntaxcode_word) + break; + } + while (true) + { + WIN_IF_LEFT_END (start); + WIN_IF ((SYNTAX_ENTRY_CODE (PEEK_LEFT (start))) != syntaxcode_word); + DECREMENT_SCAN (start); + } +} + +/* S-Expression Parsers */ + +Built_In_Primitive (Prim_Scan_List_Forward, 7, "SCAN-LIST-FORWARD", 0x179) +{ + SCAN_LIST_INITIALIZATION (STANDARD_INITIALIZATION_FORWARD); + + while (true) + { + LOSE_IF_RIGHT_END (start); + c = (*start); + READ_RIGHT(start, entry); + + if ((! (RIGHT_END_P (start))) && + (SYNTAX_ENTRY_COMSTART_FIRST (entry)) && + (SYNTAX_ENTRY_COMSTART_SECOND (PEEK_RIGHT (start)))) + { + INCREMENT_SCAN (start); + LOSE_IF_RIGHT_END (start); + while (true) + { + READ_RIGHT (start, entry); + LOSE_IF_RIGHT_END (start); + if ((SYNTAX_ENTRY_COMEND_FIRST (entry)) && + (SYNTAX_ENTRY_COMEND_SECOND (PEEK_RIGHT (start)))) + { + INCREMENT_SCAN (start); + break; + } + } + break; + } + + switch (SYNTAX_ENTRY_CODE (entry)) + { + case syntaxcode_escape: + case syntaxcode_charquote: + LOSE_IF_RIGHT_END (start); + INCREMENT_SCAN (start); + + case syntaxcode_word: + case syntaxcode_symbol: + if ((depth != 0) || (! sexp_flag)) + break; + while (true) + { + WIN_IF_RIGHT_END (start); + switch (SYNTAX_ENTRY_CODE (PEEK_RIGHT (start))) + { + case syntaxcode_escape: + case syntaxcode_charquote: + INCREMENT_SCAN (start); + LOSE_IF_RIGHT_END (start); + + case syntaxcode_word: + case syntaxcode_symbol: + INCREMENT_SCAN (start); + break; + + default: + WIN_IF (true); + } + } + + case syntaxcode_comment: + if (! ignore_comments) + break; + while (true) + { + LOSE_IF_RIGHT_END (start); + if ((SYNTAX_ENTRY_CODE (PEEK_RIGHT (start))) == + syntaxcode_endcomment) + break; + INCREMENT_SCAN (start); + } + break; + + case syntaxcode_math: + if (! sexp_flag) + break; + if ((! (RIGHT_END_P (start))) && (c == *start)) + INCREMENT_SCAN (start); + if (math_exit) + { + WIN_IF (--depth == 0); + LOSE_IF (depth < min_depth); + math_exit = false; + } + else + { + WIN_IF (++depth == 0); + math_exit = true; + } + break; + + case syntaxcode_open: + WIN_IF (++depth == 0); + break; + + case syntaxcode_close: + WIN_IF (--depth == 0); + LOSE_IF (depth < min_depth); + break; + + case syntaxcode_string: + while (true) + { + LOSE_IF_RIGHT_END (start); + if (c == *start) + break; + READ_RIGHT (start, entry); + if (SYNTAX_ENTRY_QUOTE (entry)) + { + LOSE_IF_RIGHT_END (start); + INCREMENT_SCAN (start); + } + } + INCREMENT_SCAN (start); + WIN_IF ((depth == 0) || sexp_flag); + break; + + default: + break; + } + } +} + +Built_In_Primitive (Prim_Scan_List_Backward, 7, "SCAN-LIST-BACKWARD", 0x17A) +{ + SCAN_LIST_INITIALIZATION (STANDARD_INITIALIZATION_BACKWARD); + + while (true) + { + LOSE_IF_LEFT_END (start); + LEFT_QUOTED_P (start, quoted); + if (quoted) + { + DECREMENT_SCAN (start); + /* existence of this character is guaranteed by LEFT_QUOTED_P. */ + READ_LEFT (start, entry); + goto word_entry; + } + c = (start[-1]); + READ_LEFT (start, entry); + if ((! (LEFT_END_P (start))) && + (SYNTAX_ENTRY_COMEND_SECOND (entry)) && + (SYNTAX_ENTRY_COMEND_FIRST (PEEK_LEFT (start)))) + { + LEFT_QUOTED_P (start, quoted); + if (! quoted) + { + DECREMENT_SCAN (start); + LOSE_IF_LEFT_END (start); + while (true) + { + READ_LEFT (start, entry); + LOSE_IF_LEFT_END (start); + if ((SYNTAX_ENTRY_COMSTART_SECOND (entry)) && + (SYNTAX_ENTRY_COMSTART_SECOND (PEEK_LEFT (start)))) + { + DECREMENT_SCAN (start); + break; + } + } + break; + } + } + + switch (SYNTAX_ENTRY_CODE (entry)) + { + case syntaxcode_word: + case syntaxcode_symbol: +word_entry: + if ((depth != 0) || (! sexp_flag)) + break; + while (true) + { + WIN_IF_LEFT_END (start); + LEFT_QUOTED_P (start, quoted); + if (quoted) + DECREMENT_SCAN (start); + else + { + entry = (PEEK_LEFT (start)); + WIN_IF (((SYNTAX_ENTRY_CODE (entry)) != syntaxcode_word) && + ((SYNTAX_ENTRY_CODE (entry)) != syntaxcode_symbol)); + } + DECREMENT_SCAN (start); + } + + case syntaxcode_math: + if (! sexp_flag) + break; + if ((! (LEFT_END_P (start))) && (c == start[-1])) + DECREMENT_SCAN (start); + if (math_exit) + { + WIN_IF (--depth == 0); + LOSE_IF (depth < min_depth); + math_exit = false; + } + else + { + WIN_IF (++depth == 0); + math_exit = true; + } + break; + + case syntaxcode_close: + WIN_IF (++depth == 0); + break; + + case syntaxcode_open: + WIN_IF (--depth == 0); + LOSE_IF (depth < min_depth); + break; + + case syntaxcode_string: + while (true) + { + LOSE_IF_LEFT_END (start); + LEFT_QUOTED_P (start, quoted); + if ((! quoted) && (c == start[-1])) + break; + DECREMENT_SCAN (start); + } + DECREMENT_SCAN (start); + WIN_IF ((depth == 0) && sexp_flag); + break; + + case syntaxcode_endcomment: + if (! ignore_comments) + break; + while (true) + { + LOSE_IF_LEFT_END (start); + if ((SYNTAX_ENTRY_CODE (PEEK_LEFT (start))) == + syntaxcode_comment) + break; + DECREMENT_SCAN (start); + } + break; + + default: + break; + } + } +} + +/* Partial S-Expression Parser */ + +#define LEVEL_ARRAY_LENGTH 100 +struct levelstruct { char *last, *previous; }; + +#define DONE_IF(expression) \ +do \ +{ \ + if (expression) \ + goto done; \ +} while (0) + +#define DONE_IF_RIGHT_END(scan) DONE_IF (RIGHT_END_P (scan)) + +#define SEXP_START() \ +if (stop_before) goto stop; \ +level->last = start + +Built_In_Primitive (Prim_Scan_Sexps_Forward, 7, "SCAN-SEXPS-FORWARD", 0x17B) +{ + long target_depth; + Boolean stop_before; + long depth; + long in_string; /* -1 or delimiter character */ + long in_comment; /* 0, 1, or 2 */ + Boolean quoted; + struct levelstruct level_start[LEVEL_ARRAY_LENGTH]; + struct levelstruct *level; + struct levelstruct *level_end; + char c; + Pointer result; + STANDARD_INITIALIZATION_FORWARD (Primitive_7_Args); + + guarantee_fixnum_arg_5 (); + Sign_Extend (Arg5, target_depth); + stop_before = (Arg6 != NIL); + + level = level_start; + level_end = (level_start + LEVEL_ARRAY_LENGTH); + level->previous = NULL; + + /* Initialize the state variables from the state argument. */ + + if (Arg7 == NIL) + { + depth = 0; + in_string = -1; + in_comment = 0; + quoted = false; + } + else if (((pointer_type (Arg7)) == TC_VECTOR) && + (Vector_Length (Arg7)) == 7) + { + Pointer temp; + + temp = (User_Vector_Ref (Arg7, 0)); + if (fixnum_p (temp)) + { + Sign_Extend (temp, depth); + } + else + error_bad_range_arg_7 (); + + temp = (User_Vector_Ref (Arg7, 1)); + if (temp == NIL) + in_string = -1; + else if ((fixnum_p (temp)) && ((pointer_datum (temp)) < MAX_ASCII)) + in_string = (pointer_datum (temp)); + else + error_bad_range_arg_7 (); + + temp = (User_Vector_Ref (Arg7, 2)); + if (temp == NIL) + in_comment = 0; + else if (temp == (Make_Unsigned_Fixnum (1))) + in_comment = 1; + else if (temp == (Make_Unsigned_Fixnum (2))) + in_comment = 2; + else + error_bad_range_arg_7 (); + + quoted = ((User_Vector_Ref (Arg7, 3)) != NIL); + + if ((in_comment != 0) && ((in_string != -1) || (quoted != false))) + error_bad_range_arg_7 (); + + } + else + error_wrong_type_arg_7 (); + + /* Make sure there is enough room for the result before we start. */ + + Primitive_GC_If_Needed (8); + + /* Enter main loop at place appropiate for initial state. */ + + if (in_comment == 1) + goto start_in_comment; + if (in_comment == 2) + goto start_in_comment2; + if (quoted) + { + quoted = false; + if (in_string != -1) + goto start_quoted_in_string; + else + goto start_quoted; + } + if (in_string != -1) + goto start_in_string; + + while (true) + { + LOSE_IF_RIGHT_END (start); + c = (*start); + READ_RIGHT (start, entry); + if ((! (RIGHT_END_P (start))) && + (SYNTAX_ENTRY_COMSTART_FIRST (entry)) && + (SYNTAX_ENTRY_COMSTART_FIRST (PEEK_RIGHT (start)))) + { + INCREMENT_SCAN (start); + in_comment = 2; +start_in_comment2: + while (true) + { + DONE_IF_RIGHT_END (start); + READ_RIGHT (start, entry); + if (SYNTAX_ENTRY_COMEND_FIRST (entry)) + { + /* Actually, terminating here is a special case. There + should be a third value of in_comment to handle it. */ + DONE_IF_RIGHT_END (start); + if (SYNTAX_ENTRY_COMEND_SECOND (PEEK_RIGHT (start))) + { + INCREMENT_SCAN (start); + break; + } + } + } + in_comment = 0; + } + else + + switch (SYNTAX_ENTRY_CODE (entry)) + { + case syntaxcode_escape: + case syntaxcode_charquote: + SEXP_START (); +start_quoted: + if (RIGHT_END_P (start)) + { + quoted = true; + DONE_IF (true); + } + INCREMENT_SCAN (start); + goto start_atom; + + case syntaxcode_word: + case syntaxcode_symbol: + SEXP_START (); +start_atom: + while (! (RIGHT_END_P (start))) + { + switch (SYNTAX_ENTRY_CODE (PEEK_RIGHT (start))) + { + case syntaxcode_escape: + case syntaxcode_charquote: + INCREMENT_SCAN (start); + if (RIGHT_END_P (start)) + { + quoted = true; + DONE_IF (true); + } + + case syntaxcode_word: + case syntaxcode_symbol: + INCREMENT_SCAN (start); + break; + + default: + goto end_atom; + } + } +end_atom: + level->previous = level->last; + break; + + case syntaxcode_comment: + in_comment = 1; +start_in_comment: + while (true) + { + DONE_IF_RIGHT_END (start); + READ_RIGHT (start, entry); + if ((SYNTAX_ENTRY_CODE (entry)) == syntaxcode_endcomment) + break; + } + in_comment = 0; + break; + + case syntaxcode_open: + SEXP_START (); + depth += 1; + level += 1; + if (level == level_end) + error_bad_range_arg_5 (); /* random error */ + level->last = NULL; + level->previous = NULL; + DONE_IF ((--target_depth) == 0); + break; + + case syntaxcode_close: + depth -= 1; + if (level != level_start) + level -= 1; + level->previous = level->last; + DONE_IF ((++target_depth) == 0); + break; + + case syntaxcode_string: + SEXP_START (); + in_string = (char_to_long (c)); +start_in_string: + while (true) + { + DONE_IF_RIGHT_END (start); + if (in_string == (*start)) + break; + READ_RIGHT (start, entry); + if (SYNTAX_ENTRY_QUOTE (entry)) + { +start_quoted_in_string: + if (RIGHT_END_P (start)) + { + quoted = true; + DONE_IF (true); + } + } + } + in_string = -1; + level->previous = level->last; + INCREMENT_SCAN (start); + break; + } + } + /* NOTREACHED */ + +stop: + /* Back up to point at character that starts sexp. */ + if (start == gap_end) + start = gap_start; + start -= 1; + +done: + result = ((Pointer) Free); + *Free++ = (Make_Non_Pointer (TC_MANIFEST_VECTOR, 7)); + *Free++ = (Make_Signed_Fixnum (depth)); + *Free++ = ((in_string == -1) ? NIL : (Make_Unsigned_Fixnum (in_string))); + *Free++ = ((in_comment == 0) ? NIL : (Make_Unsigned_Fixnum (in_comment))); + *Free++ = ((quoted == false) ? NIL : TRUTH); + /* Decrement the following indices by one since we recorded them after + `start' was advanced past the opening character. */ + *Free++ = + ((level->previous == NULL) + ? NIL + : (Make_Unsigned_Fixnum ((SCAN_TO_INDEX (level->previous)) - 1))); + *Free++ = + (((level == level_start) || (level->previous == NULL)) + ? NIL + : (Make_Unsigned_Fixnum ((SCAN_TO_INDEX ((level - 1)->last)) - 1))); + *Free++ = (Make_Unsigned_Fixnum (SCAN_TO_INDEX (start))); + return (result); +} diff --git a/v7/src/microcode/syntax.h b/v7/src/microcode/syntax.h new file mode 100644 index 000000000..45a726d2b --- /dev/null +++ b/v7/src/microcode/syntax.h @@ -0,0 +1,89 @@ +/* -*-C-*- + +$Header: /Users/cph/tmp/foo/mit-scheme/mit-scheme/v7/src/microcode/syntax.h,v 1.1 1987/05/11 17:47:34 cph Exp $ + +Copyright (c) 1987 Massachusetts Institute of Technology + +This material was developed by the Scheme project at the Massachusetts +Institute of Technology, Department of Electrical Engineering and +Computer Science. Permission to copy this software, to redistribute +it, and to use it for any purpose is granted, subject to the following +restrictions and understandings. + +1. Any copy made of this software must include this copyright notice +in full. + +2. Users of this software agree to make their best efforts (a) to +return to the MIT Scheme project any improvements or extensions that +they make, so that these may be included in future releases; and (b) +to inform MIT of noteworthy uses of this software. + +3. All materials developed as a consequence of the use of this +software shall duly acknowledge such use, in accordance with the usual +standards of acknowledging credit in academic research. + +4. MIT has made no warrantee or representation that the operation of +this software will be error-free, and MIT is under no obligation to +provide any services, by way of maintenance, update, or otherwise. + +5. In conjunction with products arising from the use of this material, +there shall be no use of the name of the Massachusetts Institute of +Technology nor of any adaptation thereof in any advertising, +promotional, or sales literature without prior written consent from +MIT in each case. */ + +/* Definitions for Edwin syntax tables. */ + +/* CODE is the syntax code for the character. */ + +#define SYNTAX_ENTRY_CODE(entry) ((enum syntaxcode) ((entry) & 0xFF)) + +/* MATCH is a matching delimiter, if the character is a delimiter type. + For example, if the character is '(', then MATCH is usually ')'. */ + +#define SYNTAX_ENTRY_MATCH(entry) (((entry) >> 8) & 0xFF) + +/* Bits indicating whether this character is part of a two-character + comment delimiter sequence. */ + +#define SYNTAX_ENTRY_COMSTART_FIRST(entry) (((entry) >> 16) & 1) +#define SYNTAX_ENTRY_COMSTART_SECOND(entry) (((entry) >> 17) & 1) +#define SYNTAX_ENTRY_COMEND_FIRST(entry) (((entry) >> 18) & 1) +#define SYNTAX_ENTRY_COMEND_SECOND(entry) (((entry) >> 19) & 1) + +/* The possible syntax codes. */ + +enum syntaxcode + { + syntaxcode_whitespace, /* whitespace char */ + syntaxcode_punct, /* random punctuation char */ + syntaxcode_word, /* word constituent */ + syntaxcode_symbol, /* symbol constituent other than word */ + syntaxcode_open, /* beginning delimiter */ + syntaxcode_close, /* ending delimiter */ + syntaxcode_quote, /* prefix char like Lisp ' */ + syntaxcode_string, /* string-grouping char like Lisp " */ + syntaxcode_math, /* delimiters like $ in Tex. */ + syntaxcode_escape, /* char that begins a C-style escape */ + syntaxcode_charquote, /* char that quotes the following char */ + syntaxcode_comment, /* a comment-starting char */ + syntaxcode_endcomment, /* a comment-ending char */ + syntaxcode_max /* Upper bound on codes that are meaningful */ + }; + +#define SYNTAX_ENTRY_QUOTE(entry) \ + (((SYNTAX_ENTRY_CODE (entry)) == syntaxcode_escape) || \ + ((SYNTAX_ENTRY_CODE (entry)) == syntaxcode_charquote)) + +/* This array, indexed by a character, contains the syntax code which that + character signifies (as a char). For example, + (enum syntaxcode) syntax_spec_code['w'] is syntaxcode_word. */ + +extern char syntax_spec_code[0200]; + +#define SYNTAX_TABLE_P(argument) \ + (((pointer_type (argument)) == TC_VECTOR) && \ + ((Vector_Length (argument)) == 0x100)) + +#define SYNTAX_TABLE_REF(table, index) \ + (User_Vector_Ref ((table), ((index) & 0xFF))) -- 2.25.1