File psl-1983/kernel/token-scanner.red artifact 5384bf4bc9 part of check-in 30d10c278c


%
% TOKEN-SCANNER.RED - Table-driven token scanner
% 
% Author:      Eric Benson
%	       Symbolic Computation Group
%              Computer Science Dept.
%              University of Utah
% Date:        27 August 1981
% Copyright (c) 1981 University of Utah
%

% Edit by Cris Perdue, 29 Jan 1983 1338-PST
% Occurrences of "dipthong" changed to "diphthong"
%  <PSL.KERNEL>TOKEN-SCANNER.RED.2, 16-Dec-82 14:55:55, Edit by BENSON
%  MakeBufIntoFloat uses floating point arithmetic on each digit
%  <PSL.INTERP>TOKEN-SCANNER.RED.6, 15-Sep-82 10:49:54, Edit by BENSON
%  Can now scan 1+ and 1-
%  <PSL.INTERP>TOKEN-SCANNER.RED.12, 10-Jan-82 21:53:28, Edit by BENSON
%  Fixed bug in floating point parsing
%  <PSL.INTERP>TOKEN-SCANNER.RED.9,  8-Jan-82 07:06:23, Edit by GRISS
%  MakeBufIntoLispInteger becomes procedure for BigNums
%  <PSL.INTERP>TOKEN-SCANNER.RED.7, 28-Dec-81 22:09:14, Edit by BENSON
%  Made dipthong indicator last element of scan table

fluid '(CurrentScanTable!* !*Raise !*Compressing !*EOLInStringOK);
LoadTime <<
!*Raise := T;
!*Compressing := NIL;
!*EOLInStringOK := NIL;
>>;

CompileTime flag('(ReadInBuf MakeBufIntoID MakeBufIntoString
		   MakeBufIntoLispInteger MakeBufIntoSysNumber
		   MakeBufIntoFloat MakeStringIntoSysInteger
		   MakeStringIntoBitString ScannerError SysPowerOf2P
		   ScanPossibleDiphthong),
		 'InternalFunction);

on SysLisp;

% DIGITS are 0..9
internal WConst LETTER = 10,
		DELIMITER = 11,
		COMMENTCHAR = 12,
		DIPHTHONGSTART = 13,
		IDESCAPECHAR = 14,
		STRINGQUOTE = 15,
		PACKAGEINDICATOR = 16,
		IGNORE = 17,
		MINUSSIGN = 18,
		PLUSSIGN = 19,
		DECIMALPOINT = 20,
		IDSURROUND = 21;

internal WVar TokCh,
	      TokChannel,
	      ChTokenType,
	      CurrentChar,
	      ChangedPackages,
	      TokRadix,
	      TokSign,
	      TokFloatFractionLength,
	      TokFloatExponentSign,
	      TokFloatExponent;

CompileTime <<
syslsp smacro procedure TokenTypeOfChar Ch;
    IntInf VecItm(VecInf LispVar CurrentScanTable!*, Ch);

syslsp smacro procedure CurrentDiphthongIndicator();
    VecItm(VecInf LispVar CurrentScanTable!*, 128);

syslsp smacro procedure ResetBuf();
    CurrentChar := 0;

syslsp smacro procedure BackupBuf();
    CurrentChar := CurrentChar - 1;
>>;

syslsp procedure ReadInBuf();
<<  TokCh := ChannelReadChar TokChannel;
    StrByt(TokenBuffer, CurrentChar) := TokCh;
    ChTokenType := TokenTypeOfChar TokCh;
    if CurrentChar < MaxTokenSize then
	CurrentChar := CurrentChar + 1
    else if CurrentChar = MaxTokenSize then
    <<  ErrorPrintF("***** READ Buffer overflow, Truncating");
        CurrentChar := MaxTokenSize + 1 >>
    else CurrentChar := MaxTokenSize + 1 >>;

CompileTime <<
syslsp smacro procedure UnReadLastChar();
    ChannelUnReadChar(Channel, TokCh);

syslsp smacro procedure LowerCaseChar Ch;
    Ch >= char !a and Ch <= char !z;

syslsp smacro procedure RaiseChar Ch;
    (Ch - char !a) + char A;

syslsp smacro procedure RaiseLastChar();
    if LowerCaseChar TokCh then
	StrByt(TokenBuffer, CurrentChar - 1) := RaiseChar TokCh;
>>;

syslsp procedure MakeBufIntoID();
<<  LispVar TokType!* := '0;
    if CurrentChar eq 1 then MkID StrByt(TokenBuffer, 0)
    else
    <<  StrByt(TokenBuffer, CurrentChar) := char NULL;
	TokenBuffer[0] := CurrentChar - 1;
	if LispVar !*Compressing then NewID CopyString TokenBuffer
	else Intern MkSTR TokenBuffer >> >>;

syslsp procedure MakeBufIntoString();
<<  LispVar TokType!* := '1;
    StrByt(TokenBuffer, CurrentChar) := 0;
    TokenBuffer[0] := CurrentChar - 1;
    CopyString TokenBuffer >>;

syslsp procedure MakeBufIntoSysNumber(Radix, Sign);
<<  StrByt(TokenBuffer, CurrentChar) := 0;
    TokenBuffer[0] := CurrentChar - 1;
    MakeStringIntoSysInteger(TokenBuffer, Radix, Sign) >>;

syslsp procedure MakeBufIntoLispInteger(Radix, Sign);
<<  LispVar TokType!* := '2;
    StrByt(TokenBuffer, CurrentChar) := 0;
    TokenBuffer[0] := CurrentChar - 1;
    MakeStringIntoLispInteger(MkSTR TokenBuffer, Radix, Sign) >>;

internal WArray MakeFloatTemp1[1],
		MakeFloatTemp2[1],
		FloatTen[1]; 

% Changed to use floating point arithmetic on the characters, rather
% than converting to an integer.  This avoids overflow problems.

syslsp procedure MakeBufIntoFloat Exponent;
begin scalar F, N;
    !*WFloat(FloatTen, 10);
    !*WFloat(MakeFloatTemp1, 0);
    N := CurrentChar - 1;
    for I := 0 step 1 until N do
    <<  !*WFloat(MakeFloatTemp2, DigitToNumber StrByt(TokenBuffer, I));
	!*FTimes2(MakeFloatTemp1, MakeFloatTemp1, FloatTen);
	!*FPlus2(MakeFloatTemp1, MakeFloatTemp1, MakeFloatTemp2) >>;
    if Exponent > 0 then
	for I := 1 step 1 until Exponent do
	    !*FTimes2(MakeFloatTemp1, MakeFloatTemp1, FloatTen)
    else if Exponent < 0 then
    <<  Exponent := -Exponent;
	for I := 1 step 1 until Exponent do
	    !*FQuotient(MakeFloatTemp1, MakeFloatTemp1, FloatTen) >>;
    LispVar TokType!* := '2;
    F := GtFLTN();
    !*FAssign(FloatBase F, MakeFloatTemp1);
    return MkFLTN F;
end;


syslsp procedure ChannelReadToken Channel;	%. Token scanner
%
% This is the basic Lisp token scanner.  The value returned is a Lisp
% item corresponding to the next token from the input stream.  IDs will
% be interned.  The global Lisp variable TokType!* will be set to
%	0 if the token is an ordinary ID,
%	1 if the token is a string (delimited by double quotes),
%	2 if the token is a number, or
%	3 if the token is an unescaped delimiter.
% In the last case, the value returned by this function will be the single
% character ID corresponding to the delimiter.
%
begin
    TokChannel := Channel;
    ChangedPackages := 0;
    ResetBuf();
StartScanning:
    TokCh := ChannelReadChar Channel;
    ChTokenType := TokenTypeOfChar TokCh;
    if ChTokenType eq IGNORE then goto StartScanning;
    StrByt(TokenBuffer, CurrentChar) := TokCh;
    CurrentChar := CurrentChar + 1;
    case ChTokenType of
    0 to 9:	 % digit
    <<  TokSign := 1;
	goto InsideNumber >>;
    10:	 % Start of ID
    <<  if null LispVar !*Raise then
	    goto InsideID
	else
	<<  RaiseLastChar();
	    goto InsideRaisedID >> >>;
    11:	 % Delimiter, but not beginning of Diphthong
    <<  LispVar TokType!* := '3;
	return MkID TokCh >>;
    12:	 % Start of comment
	goto InsideComment;
    13:	 % Diphthong start - Lisp function uses P-list of starting char
	return ScanPossibleDiphthong(TokChannel, MkID TokCh);
    14:	 % ID escape character
    <<  if null LispVar !*Raise then
	    goto GotEscape
	else goto GotEscapeInRaisedID >>;
    15:	 % string quote
    <<  BackupBuf();
	goto InsideString >>;
    16:	 % Package indicator - at start of token means use global package
    <<  ResetBuf();
	ChangedPackages := 1;
	Package 'Global;
	if null LispVar !*Raise then
	    goto GotPackageMustGetID
	else goto GotPackageMustGetIDRaised >>;
    17:	 % Ignore - can't ever happen
	ScannerError("Internal error - consult a wizard");
    18:	 % Minus sign
    <<  TokSign := -1;
	goto GotSign >>;
    19:	 % Plus sign
    <<  TokSign := 1;
	goto GotSign >>;
    20:  % decimal point
    <<  ResetBuf();
	ReadInBuf();
	if ChTokenType >= 10 then
	<<  UnReadLastChar();
	    return ScanPossibleDiphthong(TokChannel, '!.) >>
	else
	<<  TokSign := 1;
	    TokFloatFractionLength := 1;
	    goto InsideFloatFraction >> >>;
    21:					% IDSURROUND, i.e. vertical bars
    <<  BackupBuf();
	goto InsideIDSurround >>;
    default:
	return ScannerError("Unknown token type")
    end;
GotEscape:
    BackupBuf();
    ReadInBuf();
    goto InsideID;
InsideID:
    ReadInBuf();
    if ChTokenType <= 10
	    or ChTokenType eq PLUSSIGN
	    or ChTokenType eq MINUSSIGN then goto InsideID
    else if ChTokenType eq IDESCAPECHAR then goto GotEscape
    else if ChTokenType eq PACKAGEINDICATOR then
    <<  BackupBuf();
	ChangedPackages := 1;
	Package MakeBufIntoID();
	ResetBuf();
	goto GotPackageMustGetID >>
    else
    <<  UnReadLastChar();
	BackupBuf();
	if ChangedPackages neq 0 then Package LispVar CurrentPackage!*;
	return MakeBufIntoID() >>;
GotPackageMustGetID:
    ReadInBuf();
    if ChTokenType eq LETTER then goto InsideID
    else if ChTokenType eq IDESCAPECHAR then goto GotEscape
    else ScannerError("Illegal to follow package indicator with non ID");
GotEscapeInRaisedID:
    BackupBuf();
    ReadInBuf();
    goto InsideRaisedID;
InsideRaisedID:
    ReadInBuf();
    if ChTokenType < 10 
	    or ChTokenType eq PLUSSIGN
	    or ChTokenType eq MINUSSIGN then goto InsideRaisedID
    else if ChTokenType eq 10 then
	<<  RaiseLastChar();
	    goto InsideRaisedID >>
    else if ChTokenType eq IDESCAPECHAR then goto GotEscapeInRaisedID
    else if ChTokenType eq PACKAGEINDICATOR then
    <<  BackupBuf();
	ChangedPackages := 1;
	Package MakeBufIntoID();
	ResetBuf();
	goto GotPackageMustGetIDRaised >>
    else
    <<  UnReadLastChar();
	BackupBuf();
	if ChangedPackages neq 0 then Package LispVar CurrentPackage!*;
	return MakeBufIntoID() >>;
GotPackageMustGetIDRaised:
    ReadInBuf();
    if ChTokenType eq LETTER then
    <<  RaiseLastChar();
	goto InsideRaisedID >>
    else if ChTokenType eq IDESCAPECHAR then goto GotEscapeInRaisedID
    else ScannerError("Illegal to follow package indicator with non ID");
InsideString:
    ReadInBuf();
    if ChTokenType eq STRINGQUOTE then
    <<  BackupBuf();
	ReadInBuf();
	if ChTokenType eq STRINGQUOTE then goto InsideString
	else
	<<  UnReadLastChar();
	    BackupBuf();
	    return MakeBufIntoString() >> >>
    else if TokCh eq char EOL and not LispVar !*EOLInStringOK then
	ErrorPrintF("*** String continued over end-of-line")
    else if TokCh eq char EOF then
	ScannerError("EOF encountered inside a string");
    goto InsideString;
InsideIDSurround:
    ReadInBuf();
    if ChTokenType eq IDSURROUND then
    <<  BackupBuf();
	return MakeBufIntoID() >>
    else if ChTokenType eq IDESCAPECHAR then
    <<  BackupBuf();
	ReadInBuf() >>
    else if TokCh eq char EOF then
	ScannerError("EOF encountered inside an ID");
    goto InsideIDSurround;
GotSign:
    ResetBuf();
    ReadInBuf();
    if TokCh eq char !. then
    <<  PutStrByt(TokenBuffer, 0, char !0);
	CurrentChar := 2;
	goto InsideFloat >>
    else if ChTokenType eq LETTER	% patch to be able to read 1+ and 1-
	    or ChTokenType eq MINUSSIGN
	    or ChTokenType eq PLUSSIGN then
    <<  ResetBuf();
	StrByt(TokenBuffer, 0) := if TokSign < 0 then char !- else char !+;
	StrByt(TokenBuffer, 1) := TokCh;
	CurrentChar := 2;
	if LispVar !*Raise then
	<<  RaiseLastChar();
	    goto InsideRaisedID >>
	else goto InsideID >>
    else if ChTokenType eq IDESCAPECHAR then
    <<  ResetBuf();
	StrByt(TokenBuffer, 0) := if TokSign < 0 then char !- else char !+;
	CurrentChar := 1;
	if LispVar !*Raise then
	    goto GotEscapeInRaisedID
	else goto GotEscape >>
    else if ChTokenType > 9 then
    <<  UnReadLastChar();	 % Allow + or - to start a Diphthong
	return ScanPossibleDiphthong(Channel,
				    MkID(if TokSign < 0 then char !-
					     else char !+)) >>
    else goto InsideNumber;
InsideNumber:
    ReadInBuf();
    if ChTokenType < 10 then goto InsideNumber;
    if TokCh eq char !# then
    <<  BackupBuf();
	TokRadix := MakeBufIntoSysNumber(10, 1);
	ResetBuf();
	if TokRadix < 2 or TokRadix > 36 then
	    return ScannerError("Radix out of range");
	if TokRadix <= 10 then goto InsideIntegerRadixUnder10
	else goto InsideIntegerRadixOver10 >>
    else if TokCh eq char !. then goto InsideFloat
    else if TokCh eq char B or TokCh eq char !b then
    <<  BackupBuf();
	return MakeBufIntoLispInteger(8, TokSign) >>
    else if TokCh eq char E or TokCh eq char !e then
    <<  TokFloatFractionLength := 0;
	goto InsideFloatExponent >>
    else if ChTokenType eq LETTER	% patch to be able to read 1+ and 1-
	    or ChTokenType eq MINUSSIGN
	    or ChTokenType eq PLUSSIGN then
	if LispVar !*Raise then
	<<  RaiseLastChar();
	    goto InsideRaisedID >>
	else goto InsideID
    else if ChTokenType eq IDESCAPECHAR then
	if LispVar !*Raise then
	    goto GotEscapeInRaisedID
	else goto GotEscape
    else
    <<  UnReadLastChar();
	BackupBuf();
	return MakeBufIntoLispInteger(10, TokSign) >>;
InsideIntegerRadixUnder10:
    ReadInBuf();
    if ChTokenType < TokRadix then goto InsideIntegerRadixUnder10;
    if ChTokenType < 10 then return ScannerError("Digit out of range");
NumReturn:
    UnReadLastChar();
    BackupBuf();
    return MakeBufIntoLispInteger(TokRadix, TokSign);
InsideIntegerRadixOver10:
    ReadInBuf();
    if ChTokenType < 10 then goto InsideIntegerRadixOver10;
    if ChTokenType > 10 then goto NumReturn;
    if LowerCaseChar TokCh then
    <<  TokCh := RaiseChar TokCh;
	StrByt(TokenBuffer, CurrentChar - 1) :=  TokCh >>;
    if TokCh >= char A - 10 + TokRadix then goto NumReturn;
    goto InsideIntegerRadixOver10;
InsideFloat:	 % got decimal point inside number
    BackupBuf();
    ReadInBuf();
    if TokCh eq char E or TokCh eq char !e then
    <<  TokFloatFractionLength := 0;
	goto InsideFloatExponent >>;
    if ChTokenType >= 10 then	 % nnn. is floating point number
    <<  UnReadLastChar();
	BackupBuf();
	return MakeBufIntoFloat 0 >>;
    TokFloatFractionLength := 1;
InsideFloatFraction:
    ReadInBuf();
    if ChTokenType < 10 then
    <<  if TokFloatFractionLength < 9 then
	    TokFloatFractionLength := TokFloatFractionLength + 1
	else BackupBuf();		% don't overflow mantissa
	goto InsideFloatFraction >>;
    if TokCh eq char E or TokCh eq char lower e then goto InsideFloatExponent;
    UnReadLastChar();
    BackupBuf();
    return MakeBufIntoFloat(-TokFloatFractionLength);
InsideFloatExponent:
    BackupBuf();
    TokFloatExponentSign := 1;
    TokFloatExponent := 0;
    TokCh := ChannelReadChar TokChannel;
    ChTokenType := TokenTypeOfChar TokCh;
    if ChTokenType < 10 then
    <<  TokFloatExponent := ChTokenType;
	goto DigitsInsideExponent >>;
    if TokCh eq char '!- then TokFloatExponentSign := -1
    else if TokCh neq char '!+ then
	return ScannerError("Missing exponent in float");
    TokCh := ChannelReadChar TokChannel;
    ChTokenType := TokenTypeOfChar TokCh;
    if ChTokenType >= 10 then
	return ScannerError("Missing exponent in float");
    TokFloatExponent := ChTokenType;
DigitsInsideExponent:
    TokCh := ChannelReadChar TokChannel;
    ChTokenType := TokenTypeOfChar TokCh;
    if ChTokenType < 10 then
    <<  TokFloatExponent := TokFloatExponent * 10 + ChTokenType;
	goto DigitsInsideExponent >>;
    ChannelUnReadChar(Channel, TokCh);
    return MakeBufIntoFloat(TokFloatExponentSign * TokFloatExponent
			    - TokFloatFractionLength);
InsideComment:
    if (TokCh := ChannelReadChar Channel) eq char EOL then
    <<  ResetBuf();
	goto StartScanning >>
    else if TokCh eq char EOF then return LispVar !$EOF!$
    else goto InsideComment;
end;

syslsp procedure RAtom();	%. Read token from current input
    ChannelReadToken LispVar IN!*;

syslsp procedure DigitToNumber D;
%
% if D is not a digit then it is assumed to be an uppercase letter
%
    if D >= char !0 and D <= char !9 then D - char !0 else D - (char A - 10);

syslsp procedure MakeStringIntoLispInteger(S, Radix, Sign);
    Sys2Int MakeStringIntoSysInteger(S, Radix, Sign);

syslsp procedure MakeStringIntoSysInteger(Strng, Radix, Sign);
%
% Unsafe string to integer conversion.  Strng is assumed to contain
% only digits and possibly uppercase letters for radices > 10.  Since it
% uses multiplication, arithmetic overflow may occur. Sign is +1 or -1
%
begin scalar Count, Tot, RadixExponent;
    if RadixExponent := SysPowerOf2P Radix then return
	MakeStringIntoBitString(Strng, Radix, RadixExponent, Sign);
    Strng := StrInf Strng;
    Count := StrLen Strng;	
    Tot := 0;
    for I := 0 step 1 until Count do
	Tot := Tot * Radix + DigitToNumber StrByt(Strng, I);
    return if Sign < 0 then -Tot else Tot;
end;

syslsp procedure MakeStringIntoBitString(Strng, Radix, RadixExponent, Sign);
begin scalar Count, Tot;
    Strng := StrInf Strng;
    Count := StrLen Strng;
    Tot := 0;
    for I := 0 step 1 until Count do
    <<  Tot := LSH(Tot, RadixExponent);
	Tot := LOR(Tot, DigitToNumber StrByt(Strng, I)) >>;
    if Sign < 0 then return -Tot;
    return Tot;
end;

syslsp procedure SysPowerOf2P Num;
    case Num of
      1: 0;
      2: 1;
      4: 2;
      8: 3;
      16: 4;
      32: 5;
      default: NIL
    end;

syslsp procedure ScannerError Message;
    StdError BldMsg("***** Error in token scanner: %s", Message);

syslsp procedure ScanPossibleDiphthong(Channel, StartChar);
begin scalar Alst, Target, Ch;
    LispVar TokType!* := '3;
    if null (Alst := get(StartChar, CurrentDiphthongIndicator())) then
	return StartChar;
    if null (Target := Atsoc(Ch := MkID ChannelReadChar Channel, Alst)) then
    <<  ChannelUnReadChar(Channel, IDInf Ch);
	return StartChar >>;
    return cdr Target;
end;

syslsp procedure ReadLine();
<<  MakeInputAvailable();
    ChannelReadLine LispVar IN!* >>;

syslsp procedure ChannelReadLine Chn;
begin scalar C;
    TokenBuffer[0] := -1;
    while (C := ChannelReadChar Chn) neq char EOL and C neq char EOF do
    <<  TokenBuffer[0] := TokenBuffer[0] + 1;
	StrByt(TokenBuffer, TokenBuffer[0]) := C >>;
    return if TokenBuffer[0] >= 0 then
    <<  StrByt(TokenBuffer, TokenBuffer[0] + 1) := char NULL;
	CopyString MkSTR TokenBuffer >>
    else '"";
end;

% Dummy definition of package conversion function

syslsp procedure Package U;
    NIL;

% Dummy definition of MakeInputAvailable, redefined by Emode

syslsp procedure MakeInputAvailable();
    NIL;

off SysLisp;

END;


REDUCE Historical
REDUCE Sourceforge Project | Historical SVN Repository | GitHub Mirror | SourceHut Mirror | NotABug Mirror | Chisel Mirror | Chisel RSS ]