aboutsummaryrefslogtreecommitdiffstats
path: root/src/lread.c
diff options
context:
space:
mode:
authorKenichi Handa <[email protected]>2002-07-24 10:51:15 +0000
committerKenichi Handa <[email protected]>2002-07-24 10:51:15 +0000
commit8792be6679372e6ba9908123fb1cf81798b6fd61 (patch)
tree70380692661ef6f5bfb1e2515f2e3f2f5c7248ff /src/lread.c
parent571407d6e005f973a75f9db8afcb3f92a203b9b1 (diff)
Include "coding.h".
(Qget_emacs_mule_file_char, Qload_force_doc_strings, load_each_byte, unread_char): New variables. (readchar_backlog): This variable deleted. (readchar): Return a character unless load_each_byte is nonzero. Handle the case that readcharfun is Qget_emacs_mule_file_char or a cons. If unread_char is not -1, simply return it. (unreadchar): Handle the case that readcharfun is Qget_emacs_mule_file_char or a cons. Set unread_char if necessary. (read_multibyte): This function deleted. (readbyte_for_lambda, readbyte_from_file, readbyte_from_string) (read_emacs_mule_char): New functions. (Fload): Even if the file doesn't have the extention ".elc", if safe_to_load_p returns a positive version number, assume that the file contains bytecompiled code. If the version is less than 22, load the file while decoding multibyte sequences by emacs-mule. (readevalloop): Don't use readchar_backlog. (Fread): Likewise. Pay attention to the case that STREAM is a cons. (Fread_from_string): Pay attention to the case that STREAM is a cons. (read_escape): The arg BYTEREP deleted. (read1): Set load_each_byte to 1 temporarily while handling #@NUMBER. Don't call read_multibyte. (read_vector): Call Fread with a cons. If readcharfun is Qget_emacs_mule_file_char, decode the read string by emacs-mule. (read_list): If doc_reference is 2, make the cdr part string as unibyte. (syms_of_lread): Intern and staticpro Qget_emacs_mule_file_char and Qload_force_doc_strings.
Diffstat (limited to 'src/lread.c')
-rw-r--r--src/lread.c603
1 files changed, 350 insertions, 253 deletions
diff --git a/src/lread.c b/src/lread.c
index b4dc71514d..3d4a639418 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -31,6 +31,7 @@ Boston, MA 02111-1307, USA. */
#include "buffer.h"
#include "character.h"
#include "charset.h"
+#include "coding.h"
#include <epaths.h>
#include "commands.h"
#include "keyboard.h"
@@ -83,6 +84,12 @@ Lisp_Object Qascii_character, Qload, Qload_file_name;
Lisp_Object Qbackquote, Qcomma, Qcomma_at, Qcomma_dot, Qfunction;
Lisp_Object Qinhibit_file_name_operation;
+/* Used instead of Qget_file_char while loading *.elc files compiled
+ by Emacs 21 or older. */
+static Lisp_Object Qget_emacs_mule_file_char;
+
+static Lisp_Object Qload_force_doc_strings;
+
extern Lisp_Object Qevent_symbol_element_mask;
extern Lisp_Object Qfile_exists_p;
@@ -126,6 +133,11 @@ static int load_force_doc_strings;
/* Nonzero means read should convert strings to unibyte. */
static int load_convert_to_unibyte;
+/* Nonzero means READCHAR should read bytes one by one (not character)
+ when READCHARFUN is Qget_file_char or Qget_emacs_mule_file_char.
+ This is set to 1 by read1 temporarily while handling #@NUMBER. */
+static int load_each_byte;
+
/* Function to use for loading an Emacs lisp source file (not
compiled) instead of readevalloop. */
Lisp_Object Vload_source_file_function;
@@ -147,10 +159,6 @@ static int read_from_string_index;
static int read_from_string_index_byte;
static int read_from_string_limit;
-/* Number of bytes left to read in the buffer character
- that `readchar' has already advanced over. */
-static int readchar_backlog;
-
/* This contains the last string skipped with #@. */
static char *saved_doc_string;
/* Length of buffer allocated in saved_doc_string. */
@@ -190,6 +198,9 @@ int load_dangerous_libraries;
static Lisp_Object Vbytecomp_version_regexp;
+static int read_emacs_mule_char P_ ((int, int (*) (int, Lisp_Object),
+ Lisp_Object));
+
static void readevalloop P_ ((Lisp_Object, FILE*, Lisp_Object,
Lisp_Object (*) (), int,
Lisp_Object, Lisp_Object));
@@ -197,23 +208,42 @@ static Lisp_Object load_unwind P_ ((Lisp_Object));
static Lisp_Object load_descriptor_unwind P_ ((Lisp_Object));
+/* Functions that read one byte from the current source READCHARFUN
+ or unreads one byte. If the integer argument C is -1, it returns
+ one read byte, or -1 when there's no more byte in the source. If C
+ is 0 or positive, it unreads C, and the return value is not
+ interesting. */
+
+static int readbyte_for_lambda P_ ((int, Lisp_Object));
+static int readbyte_from_file P_ ((int, Lisp_Object));
+static int readbyte_from_string P_ ((int, Lisp_Object));
+
/* Handle unreading and rereading of characters.
Write READCHAR to read a character,
UNREAD(c) to unread c to be read again.
- These macros actually read/unread a byte code, multibyte characters
- are not handled here. The caller should manage them if necessary.
- */
+ These macros correctly read/unread multibyte characters. */
#define READCHAR readchar (readcharfun)
#define UNREAD(c) unreadchar (readcharfun, c)
+/* When READCHARFUN is Qget_file_char, Qget_emacs_mule_file_char,
+ Qlambda, or a cons, we use this to keep unread character because a
+ file stream can't handle multibyte-char unreading. The value -1
+ means that there's no unread character. */
+static int unread_char;
+
+
static int
readchar (readcharfun)
Lisp_Object readcharfun;
{
Lisp_Object tem;
register int c;
+ int (*readbyte) P_ ((int, Lisp_Object));
+ unsigned char buf[MAX_MULTIBYTE_LENGTH];
+ int i, len;
+ int emacs_mule_encoding = 0;
if (BUFFERP (readcharfun))
{
@@ -221,19 +251,9 @@ readchar (readcharfun)
int pt_byte = BUF_PT_BYTE (inbuffer);
- if (readchar_backlog > 0)
- /* We get the address of the byte just passed,
- which is the last byte of the character.
- The other bytes in this character are consecutive with it,
- because the gap can't be in the middle of a character. */
- return *(BUF_BYTE_ADDRESS (inbuffer, BUF_PT_BYTE (inbuffer) - 1)
- - --readchar_backlog);
-
if (pt_byte >= BUF_ZV_BYTE (inbuffer))
return -1;
- readchar_backlog = -1;
-
if (! NILP (inbuffer->enable_multibyte_characters))
{
/* Fetch the character code from the buffer. */
@@ -244,6 +264,8 @@ readchar (readcharfun)
else
{
c = BUF_FETCH_BYTE (inbuffer, pt_byte);
+ if (! ASCII_BYTE_P (c))
+ c = BYTE8_TO_CHAR (c);
pt_byte++;
}
SET_BUF_PT_BOTH (inbuffer, BUF_PT (inbuffer) + 1, pt_byte);
@@ -256,19 +278,9 @@ readchar (readcharfun)
int bytepos = marker_byte_position (readcharfun);
- if (readchar_backlog > 0)
- /* We get the address of the byte just passed,
- which is the last byte of the character.
- The other bytes in this character are consecutive with it,
- because the gap can't be in the middle of a character. */
- return *(BUF_BYTE_ADDRESS (inbuffer, XMARKER (readcharfun)->bytepos - 1)
- - --readchar_backlog);
-
if (bytepos >= BUF_ZV_BYTE (inbuffer))
return -1;
- readchar_backlog = -1;
-
if (! NILP (inbuffer->enable_multibyte_characters))
{
/* Fetch the character code from the buffer. */
@@ -279,6 +291,8 @@ readchar (readcharfun)
else
{
c = BUF_FETCH_BYTE (inbuffer, bytepos);
+ if (! ASCII_BYTE_P (c))
+ c = BYTE8_TO_CHAR (c);
bytepos++;
}
@@ -289,22 +303,15 @@ readchar (readcharfun)
}
if (EQ (readcharfun, Qlambda))
- return read_bytecode_char (0);
-
+ {
+ readbyte = readbyte_for_lambda;
+ goto read_multibyte;
+ }
if (EQ (readcharfun, Qget_file_char))
{
- c = getc (instream);
-#ifdef EINTR
- /* Interrupted reads have been observed while reading over the network */
- while (c == EOF && ferror (instream) && errno == EINTR)
- {
- clearerr (instream);
- c = getc (instream);
- }
-#endif
- return c;
+ readbyte = readbyte_from_file;
+ goto read_multibyte;
}
-
if (STRINGP (readcharfun))
{
if (read_from_string_index >= read_from_string_limit)
@@ -316,14 +323,61 @@ readchar (readcharfun)
return c;
}
+ if (CONSP (readcharfun))
+ {
+ /* This is the case that read_vector is reading from a unibyte
+ string that contains a byte sequence previously skipped
+ because of #@NUMBER. The car part of readcharfun is that
+ string, and the cdr part is a value of readcharfun given to
+ read_vector. */
+ readbyte = readbyte_from_string;
+ if (EQ (XCDR (readcharfun), Qget_emacs_mule_file_char))
+ emacs_mule_encoding = 1;
+ goto read_multibyte;
+ }
+ if (EQ (readcharfun, Qget_emacs_mule_file_char))
+ {
+ readbyte = readbyte_from_file;
+ emacs_mule_encoding = 1;
+ goto read_multibyte;
+ }
tem = call0 (readcharfun);
if (NILP (tem))
return -1;
return XINT (tem);
+
+ read_multibyte:
+ if (unread_char >= 0)
+ {
+ c = unread_char;
+ unread_char = -1;
+ return c;
+ }
+ c = (*readbyte) (-1, readcharfun);
+ if (c < 0 || ASCII_BYTE_P (c) || load_each_byte)
+ return c;
+ if (emacs_mule_encoding)
+ return read_emacs_mule_char (c, readbyte, readcharfun);
+ i = 0;
+ buf[i++] = c;
+ len = BYTES_BY_CHAR_HEAD (c);
+ while (i < len)
+ {
+ c = (*readbyte) (-1, readcharfun);
+ if (c < 0 || ! TRAILING_CODE_P (c))
+ {
+ while (--i > 1)
+ (*readbyte) (buf[i], readcharfun);
+ return BYTE8_TO_CHAR (buf[0]);
+ }
+ buf[i++] = c;
+ }
+ return STRING_CHAR (buf, i);
}
+
/* Unread the character C in the way appropriate for the stream READCHARFUN.
If the stream is a user function, call it with the char as argument. */
@@ -341,36 +395,26 @@ unreadchar (readcharfun, c)
struct buffer *b = XBUFFER (readcharfun);
int bytepos = BUF_PT_BYTE (b);
- if (readchar_backlog >= 0)
- readchar_backlog++;
+ BUF_PT (b)--;
+ if (! NILP (b->enable_multibyte_characters))
+ BUF_DEC_POS (b, bytepos);
else
- {
- BUF_PT (b)--;
- if (! NILP (b->enable_multibyte_characters))
- BUF_DEC_POS (b, bytepos);
- else
- bytepos--;
+ bytepos--;
- BUF_PT_BYTE (b) = bytepos;
- }
+ BUF_PT_BYTE (b) = bytepos;
}
else if (MARKERP (readcharfun))
{
struct buffer *b = XMARKER (readcharfun)->buffer;
int bytepos = XMARKER (readcharfun)->bytepos;
- if (readchar_backlog >= 0)
- readchar_backlog++;
+ XMARKER (readcharfun)->charpos--;
+ if (! NILP (b->enable_multibyte_characters))
+ BUF_DEC_POS (b, bytepos);
else
- {
- XMARKER (readcharfun)->charpos--;
- if (! NILP (b->enable_multibyte_characters))
- BUF_DEC_POS (b, bytepos);
- else
- bytepos--;
+ bytepos--;
- XMARKER (readcharfun)->bytepos = bytepos;
- }
+ XMARKER (readcharfun)->bytepos = bytepos;
}
else if (STRINGP (readcharfun))
{
@@ -378,16 +422,152 @@ unreadchar (readcharfun, c)
read_from_string_index_byte
= string_char_to_byte (readcharfun, read_from_string_index);
}
+ else if (CONSP (readcharfun))
+ {
+ unread_char = c;
+ }
else if (EQ (readcharfun, Qlambda))
- read_bytecode_char (1);
- else if (EQ (readcharfun, Qget_file_char))
- ungetc (c, instream);
+ {
+ unread_char = c;
+ }
+ else if (EQ (readcharfun, Qget_file_char)
+ || EQ (readcharfun, Qget_emacs_mule_file_char))
+ {
+ if (load_each_byte)
+ ungetc (c, instream);
+ else
+ unread_char = c;
+ }
else
call1 (readcharfun, make_number (c));
}
+static int
+readbyte_for_lambda (c, readcharfun)
+ int c;
+ Lisp_Object readcharfun;
+{
+ return read_bytecode_char (c >= 0);
+}
+
+
+static int
+readbyte_from_file (c, readcharfun)
+ int c;
+ Lisp_Object readcharfun;
+{
+ if (c >= 0)
+ {
+ ungetc (c, instream);
+ return 0;
+ }
+
+ c = getc (instream);
+#ifdef EINTR
+ /* Interrupted reads have been observed while reading over the network */
+ while (c == EOF && ferror (instream) && errno == EINTR)
+ {
+ clearerr (instream);
+ c = getc (instream);
+ }
+#endif
+ return (c == EOF ? -1 : c);
+}
+
+static int
+readbyte_from_string (c, readcharfun)
+ int c;
+ Lisp_Object readcharfun;
+{
+ Lisp_Object string = XCAR (readcharfun);
+
+ if (c >= 0)
+ {
+ read_from_string_index--;
+ read_from_string_index_byte
+ = string_char_to_byte (string, read_from_string_index);
+ }
+
+ if (read_from_string_index >= read_from_string_limit)
+ c = -1;
+ else
+ FETCH_STRING_CHAR_ADVANCE (c, string,
+ read_from_string_index,
+ read_from_string_index_byte);
+ return c;
+}
+
+
+/* Read one non-ASCII character from INSTREAM. The character is
+ encoded in `emacs-mule' and the first byte is already read in
+ C. */
+
+extern char emacs_mule_bytes[256];
+
+static int
+read_emacs_mule_char (c, readbyte, readcharfun)
+ int c;
+ int (*readbyte) P_ ((int, Lisp_Object));
+ Lisp_Object readcharfun;
+{
+ /* Emacs-mule coding uses at most 4-byte for one character. */
+ unsigned char buf[4];
+ int len = emacs_mule_bytes[c];
+ struct charset *charset;
+ int i;
+ unsigned code;
+
+ if (len == 1)
+ /* C is not a valid leading-code of `emacs-mule'. */
+ return BYTE8_TO_CHAR (c);
+
+ i = 0;
+ buf[i++] = c;
+ while (i < len)
+ {
+ c = (*readbyte) (-1, readcharfun);
+ if (c < 0xA0)
+ {
+ while (--i > 1)
+ (*readbyte) (buf[i], readcharfun);
+ return BYTE8_TO_CHAR (buf[0]);
+ }
+ buf[i++] = c;
+ }
+
+ if (len == 2)
+ {
+ charset = emacs_mule_charset[c];
+ code = buf[1] & 0x7F;
+ }
+ else if (len == 3)
+ {
+ if (buf[0] == EMACS_MULE_LEADING_CODE_PRIVATE_11
+ || buf[0] == EMACS_MULE_LEADING_CODE_PRIVATE_12)
+ {
+ charset = emacs_mule_charset[buf[1]];
+ code = buf[2] & 0x7F;
+ }
+ else
+ {
+ charset = emacs_mule_charset[buf[0]];
+ code = ((buf[1] << 8) | buf[2]) & 0x7F7F;
+ }
+ }
+ else
+ {
+ charset = emacs_mule_charset[buf[1]];
+ code = ((buf[2] << 8) | buf[3]) & 0x7F7F;
+ }
+ c = DECODE_CHAR (charset, code);
+ if (c < 0)
+ Fsignal (Qinvalid_read_syntax,
+ Fcons (build_string ("invalid multibyte form"), Qnil));
+ return c;
+}
+
+
static Lisp_Object read0 (), read1 (), read_list (), read_vector ();
-static int read_multibyte ();
static Lisp_Object substitute_object_recurse ();
static void substitute_object_in_subtree (), substitute_in_interval ();
@@ -553,11 +733,11 @@ DEFUN ("get-file-char", Fget_file_char, Sget_file_char, 0, 0, 0,
-/* Value is non-zero if the file asswociated with file descriptor FD
- is a compiled Lisp file that's safe to load. Only files compiled
- with Emacs are safe to load. Files compiled with XEmacs can lead
- to a crash in Fbyte_code because of an incompatible change in the
- byte compiler. */
+/* Value is a version number of byte compiled code if the file
+ associated with file descriptor FD is a compiled Lisp file that's
+ safe to load. Only files compiled with Emacs are safe to load.
+ Files compiled with XEmacs can lead to a crash in Fbyte_code
+ because of an incompatible change in the byte compiler. */
static int
safe_to_load_p (fd)
@@ -578,7 +758,7 @@ safe_to_load_p (fd)
with NUL bytes following it, but note the version. */
for (i = 0; i < nbytes && buf[i] != '\n'; ++i)
if (i == 4)
- version = buf[i];
+ version = buf[i];
if (i < nbytes
&& fast_c_string_match_ignore_case (Vbytecomp_version_regexp,
@@ -637,6 +817,8 @@ Return t if file exists. */)
Lisp_Object handler;
int safe_p = 1;
char *fmode = "r";
+ int version;
+
#ifdef DOS_NT
fmode = "rt";
#endif /* DOS_NT */
@@ -744,17 +926,20 @@ Return t if file exists. */)
Vloads_in_progress = Fcons (found, Vloads_in_progress);
}
+ version = -1;
if (!bcmp (&(XSTRING (found)->data[STRING_BYTES (XSTRING (found)) - 4]),
- ".elc", 4))
+ ".elc", 4)
+ || (version = safe_to_load_p (fd)) > 0)
/* Load .elc files directly, but not when they are
remote and have no handler! */
{
if (fd != -2)
{
struct stat s1, s2;
- int result, version;
+ int result;
- if (!(version = safe_to_load_p (fd)))
+ if (version < 0
+ && ! (version = safe_to_load_p (fd)))
{
safe_p = 0;
if (!load_dangerous_libraries)
@@ -769,30 +954,6 @@ Return t if file exists. */)
compiled = 1;
- if (version == 20) /* 21 isn't used */
- /* We're loading something compiled with Mule 3, 4 or 5,
- and thus potentially emacs-mule-encoded; load it with
- code conversion. (Perhaps the test should actually be
- <22?) We could check further on whether the comment
- mentions multibyte and only code-convert if it does. I
- doubt it's worth the effort. -- fx */
- {
- Lisp_Object val;
-
- if (fd >= 0)
- emacs_close (fd);
- /* load-with-code-conversion currently fails with
- emacs-mule non-ASCII doc strings. */
- error ("Can't currently load Emacs 20/1-compiled files: %s",
- XSTRING (found)->data);
-#if 0
- val = call4 (intern ("load-with-code-conversion"), found, file,
- NILP (noerror) ? Qnil : Qt,
- NILP (nomessage) ? Qnil : Qt);
-#endif
- return unbind_to (count, val);
- }
-
#ifdef DOS_NT
fmode = "rb";
#endif /* DOS_NT */
@@ -868,7 +1029,16 @@ Return t if file exists. */)
load_descriptor_list
= Fcons (make_number (fileno (stream)), load_descriptor_list);
load_in_progress++;
- readevalloop (Qget_file_char, stream, file, Feval, 0, Qnil, Qnil);
+ if (version >= 22)
+ readevalloop (Qget_file_char, stream, file, Feval, 0, Qnil, Qnil);
+ else
+ {
+ /* We can't handle a file which was compiled with
+ byte-compile-dynamic by older version of Emacs. */
+ specbind (Qload_force_doc_strings, Qt);
+ readevalloop (Qget_emacs_mule_file_char, stream, file, Feval, 0,
+ Qnil, Qnil);
+ }
unbind_to (count, Qnil);
/* Run any load-hooks for this file. */
@@ -1242,8 +1412,6 @@ readevalloop (readcharfun, stream, sourcename, evalfun, printflag, unibyte, read
record_unwind_protect (readevalloop_1, load_convert_to_unibyte ? Qt : Qnil);
load_convert_to_unibyte = !NILP (unibyte);
- readchar_backlog = -1;
-
GCPRO1 (sourcename);
LOADHIST_ATTACH (sourcename);
@@ -1423,14 +1591,13 @@ STREAM or the value of `standard-input' may be:
if (EQ (stream, Qt))
stream = Qread_char;
- readchar_backlog = -1;
new_backquote_flag = 0;
read_objects = Qnil;
if (EQ (stream, Qread_char))
return Fread_minibuffer (build_string ("Lisp expression: "), Qnil);
- if (STRINGP (stream))
+ if (STRINGP (stream) || ((CONSP (stream) && STRINGP (XCAR (stream)))))
return Fcar (Fread_from_string (stream, Qnil, Qnil));
return read0 (stream);
@@ -1445,18 +1612,23 @@ START and END optionally delimit a substring of STRING from which to read;
Lisp_Object string, start, end;
{
int startval, endval;
+ Lisp_Object str;
Lisp_Object tem;
- CHECK_STRING (string);
+ if (CONSP (string))
+ str = XCAR (string);
+ else
+ str = string;
+ CHECK_STRING (str);
if (NILP (end))
- endval = XSTRING (string)->size;
+ endval = XSTRING (str)->size;
else
{
CHECK_NUMBER (end);
endval = XINT (end);
- if (endval < 0 || endval > XSTRING (string)->size)
- args_out_of_range (string, end);
+ if (endval < 0 || endval > XSTRING (str)->size)
+ args_out_of_range (str, end);
}
if (NILP (start))
@@ -1466,11 +1638,11 @@ START and END optionally delimit a substring of STRING from which to read;
CHECK_NUMBER (start);
startval = XINT (start);
if (startval < 0 || startval > endval)
- args_out_of_range (string, start);
+ args_out_of_range (str, start);
}
read_from_string_index = startval;
- read_from_string_index_byte = string_char_to_byte (string, startval);
+ read_from_string_index_byte = string_char_to_byte (str, startval);
read_from_string_limit = endval;
new_backquote_flag = 0;
@@ -1502,65 +1674,16 @@ read0 (readcharfun)
static int read_buffer_size;
static char *read_buffer;
-/* Read multibyte form and return it as a character. C is a first
- byte of multibyte form, and rest of them are read from
- READCHARFUN. Store the byte length of the form into *NBYTES. */
-
-static int
-read_multibyte (c, readcharfun, nbytes)
- register int c;
- Lisp_Object readcharfun;
- int *nbytes;
-{
- /* We need the actual character code of this multibyte
- characters. */
- unsigned char str[MAX_MULTIBYTE_LENGTH];
- int len = 0;
- int bytes = BYTES_BY_CHAR_HEAD (c);
-
- str[len++] = c;
- while (len < bytes)
- {
- c = READCHAR;
- if (CHAR_HEAD_P (c))
- {
- UNREAD (c);
- break;
- }
- str[len++] = c;
- }
-
- if (len == bytes && MULTIBYTE_LENGTH_NO_CHECK (str) > 0)
- {
- *nbytes = len;
- return STRING_CHAR (str, len);
- }
- /* The byte sequence is not valid as multibyte. Unread all bytes
- but the first one, and return the first byte. */
- while (--len > 0)
- UNREAD (str[len]);
- *nbytes = 1;
- return str[0];
-}
-
/* Read a \-escape sequence, assuming we already read the `\'.
- If the escape sequence forces unibyte, store 1 into *BYTEREP.
- If the escape sequence forces multibyte and the returned character
- is raw 8-bit char, store 2 into *BYTEREP.
- If the escape sequence forces multibyte and the returned character
- is not raw 8-bit char, store 3 into *BYTEREP.
- Otherwise store 0 into *BYTEREP. */
+ If the escape sequence forces unibyte, return eight-bit-char. */
static int
-read_escape (readcharfun, stringp, byterep)
+read_escape (readcharfun, stringp)
Lisp_Object readcharfun;
int stringp;
- int *byterep;
{
register int c = READCHAR;
- *byterep = 0;
-
switch (c)
{
case -1:
@@ -1597,7 +1720,7 @@ read_escape (readcharfun, stringp, byterep)
error ("Invalid escape character syntax");
c = READCHAR;
if (c == '\\')
- c = read_escape (readcharfun, 0, byterep);
+ c = read_escape (readcharfun, 0);
return c | meta_modifier;
case 'S':
@@ -1606,7 +1729,7 @@ read_escape (readcharfun, stringp, byterep)
error ("Invalid escape character syntax");
c = READCHAR;
if (c == '\\')
- c = read_escape (readcharfun, 0, byterep);
+ c = read_escape (readcharfun, 0);
return c | shift_modifier;
case 'H':
@@ -1615,7 +1738,7 @@ read_escape (readcharfun, stringp, byterep)
error ("Invalid escape character syntax");
c = READCHAR;
if (c == '\\')
- c = read_escape (readcharfun, 0, byterep);
+ c = read_escape (readcharfun, 0);
return c | hyper_modifier;
case 'A':
@@ -1624,7 +1747,7 @@ read_escape (readcharfun, stringp, byterep)
error ("Invalid escape character syntax");
c = READCHAR;
if (c == '\\')
- c = read_escape (readcharfun, 0, byterep);
+ c = read_escape (readcharfun, 0);
return c | alt_modifier;
case 's':
@@ -1633,7 +1756,7 @@ read_escape (readcharfun, stringp, byterep)
error ("Invalid escape character syntax");
c = READCHAR;
if (c == '\\')
- c = read_escape (readcharfun, 0, byterep);
+ c = read_escape (readcharfun, 0);
return c | super_modifier;
case 'C':
@@ -1643,7 +1766,7 @@ read_escape (readcharfun, stringp, byterep)
case '^':
c = READCHAR;
if (c == '\\')
- c = read_escape (readcharfun, 0, byterep);
+ c = read_escape (readcharfun, 0);
if ((c & ~CHAR_MODIFIER_MASK) == '?')
return 0177 | (c & CHAR_MODIFIER_MASK);
else if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
@@ -1683,10 +1806,8 @@ read_escape (readcharfun, stringp, byterep)
}
}
- if (c < 0x100)
- *byterep = 1;
- else
- *byterep = 3;
+ if (! ASCII_BYTE_P (i))
+ i = BYTE8_TO_CHAR (i);
return i;
}
@@ -1721,22 +1842,11 @@ read_escape (readcharfun, stringp, byterep)
}
if (count < 3 && i >= 0x80)
- *byterep = 2;
- else
- *byterep = 3;
+ return BYTE8_TO_CHAR (i);
return i;
}
default:
- if (EQ (readcharfun, Qget_file_char)
- && BASE_LEADING_CODE_P (c))
- {
- int nbytes;
-
- c = read_multibyte (c, readcharfun, &nbytes);
- if (nbytes > 1)
- *byterep = 3;
- }
return c;
}
}
@@ -1824,6 +1934,7 @@ read1 (readcharfun, pch, first_in_list)
int uninterned_symbol = 0;
*pch = 0;
+ load_each_byte = 0;
retry:
@@ -1898,12 +2009,14 @@ read1 (readcharfun, pch, first_in_list)
UNREAD (c);
tmp = read1 (readcharfun, pch, first_in_list);
- if (size_in_chars != XSTRING (tmp)->size
- /* We used to print 1 char too many
- when the number of bits was a multiple of 8.
- Accept such input in case it came from an old version. */
- && ! (XFASTINT (length)
- == (XSTRING (tmp)->size - 1) * BITS_PER_CHAR))
+ if (STRING_MULTIBYTE (tmp)
+ || (size_in_chars != XSTRING (tmp)->size
+ /* We used to print 1 char too many
+ when the number of bits was a multiple of 8.
+ Accept such input in case it came from an old
+ version. */
+ && ! (XFASTINT (length)
+ == (XSTRING (tmp)->size - 1) * BITS_PER_CHAR)))
Fsignal (Qinvalid_read_syntax,
Fcons (make_string ("#&...", 5), Qnil));
@@ -1969,6 +2082,7 @@ read1 (readcharfun, pch, first_in_list)
{
int i, nskip = 0;
+ load_each_byte = 1;
/* Read a decimal integer. */
while ((c = READCHAR) >= 0
&& c >= '0' && c <= '9')
@@ -1979,7 +2093,9 @@ read1 (readcharfun, pch, first_in_list)
if (c >= 0)
UNREAD (c);
- if (load_force_doc_strings && EQ (readcharfun, Qget_file_char))
+ if (load_force_doc_strings
+ && (EQ (readcharfun, Qget_file_char)
+ || EQ (readcharfun, Qget_emacs_mule_file_char)))
{
/* If we are supposed to force doc strings into core right now,
record the last string that we skipped,
@@ -2031,6 +2147,7 @@ read1 (readcharfun, pch, first_in_list)
c = READCHAR;
}
+ load_each_byte = 0;
goto retry;
}
if (c == '$')
@@ -2152,17 +2269,18 @@ read1 (readcharfun, pch, first_in_list)
case '?':
{
- int discard;
+ int modifiers;
c = READCHAR;
if (c < 0)
end_of_file_error ();
-
if (c == '\\')
- c = read_escape (readcharfun, 0, &discard);
- else if (EQ (readcharfun, Qget_file_char)
- && BASE_LEADING_CODE_P (c))
- c = read_multibyte (c, readcharfun, &discard);
+ c = read_escape (readcharfun, 0);
+ modifiers = c & CHAR_MODIFIER_MASK;
+ c &= ~CHAR_MODIFIER_MASK;
+ if (CHAR_BYTE8_P (c))
+ c = CHAR_TO_BYTE8 (c);
+ c |= modifiers;
return make_number (c);
}
@@ -2196,9 +2314,8 @@ read1 (readcharfun, pch, first_in_list)
if (c == '\\')
{
int modifiers;
- int byterep;
- c = read_escape (readcharfun, 1, &byterep);
+ c = read_escape (readcharfun, 1);
/* C is -1 if \ newline has just been seen */
if (c == -1)
@@ -2211,26 +2328,11 @@ read1 (readcharfun, pch, first_in_list)
modifiers = c & CHAR_MODIFIER_MASK;
c = c & ~CHAR_MODIFIER_MASK;
- if (byterep == 1)
- {
- force_singlebyte = 1;
- if (c >= 0x80)
- /* Raw 8-bit code */
- c = BYTE8_TO_CHAR (c);
- }
- else if (byterep > 1)
- {
- force_multibyte = 1;
- if (byterep == 2)
- c = BYTE8_TO_CHAR (c);
- }
- else if (c >= 0x80)
- {
- force_singlebyte = 1;
- c = BYTE8_TO_CHAR (c);
- }
-
- if (ASCII_CHAR_P (c))
+ if (CHAR_BYTE8_P (c))
+ force_singlebyte = 1;
+ else if (! ASCII_CHAR_P (c))
+ force_multibyte = 1;
+ else /* i.e. ASCII_CHAR_P (c) */
{
/* Allow `\C- ' and `\C-?'. */
if (modifiers == CHAR_CTL)
@@ -2264,34 +2366,10 @@ read1 (readcharfun, pch, first_in_list)
error ("Invalid modifier in string");
p += CHAR_STRING (c, (unsigned char *) p);
}
- else if (c >= 0x80)
+ else
{
- if (EQ (readcharfun, Qget_file_char))
- {
- if (BASE_LEADING_CODE_P (c))
- {
- int nbytes;
- c = read_multibyte (c, readcharfun, &nbytes);
- if (nbytes > 1)
- force_multibyte = 1;
- else
- {
- force_singlebyte = 1;
- c = BYTE8_TO_CHAR (c);
- }
- }
- else
- {
- force_singlebyte = 1;
- c = BYTE8_TO_CHAR (c);
- }
- }
- else
- force_multibyte = 1;
p += CHAR_STRING (c, (unsigned char *) p);
}
- else
- *p++ = c;
nchars++;
}
if (c < 0)
@@ -2371,11 +2449,7 @@ read1 (readcharfun, pch, first_in_list)
quoted = 1;
}
- if (! SINGLE_BYTE_CHAR_P (c))
- p += CHAR_STRING (c, p);
- else
- *p++ = c;
-
+ p += CHAR_STRING (c, p);
c = READCHAR;
}
@@ -2702,7 +2776,7 @@ read_vector (readcharfun, bytecodeflag)
XSTRING (bytestr)->size = STRING_BYTES (XSTRING (bytestr));
SET_STRING_BYTES (XSTRING (bytestr), -1);
- item = Fread (bytestr);
+ item = Fread (Fcons (bytestr, readcharfun));
if (!CONSP (item))
error ("invalid byte code");
@@ -2715,6 +2789,15 @@ read_vector (readcharfun, bytecodeflag)
/* Now handle the bytecode slot. */
ptr[COMPILED_BYTECODE] = read_pure ? Fpurecopy (bytestr) : bytestr;
}
+ else if (i == COMPILED_DOC_STRING
+ && STRINGP (item)
+ && ! STRING_MULTIBYTE (item))
+ {
+ if (EQ (readcharfun, Qget_emacs_mule_file_char))
+ item = Fdecode_coding_string (item, Qemacs_mule, Qnil, Qnil);
+ else
+ item = Fstring_as_multibyte (item);
+ }
}
ptr[i] = read_pure ? Fpurecopy (item) : item;
otem = XCONS (tem);
@@ -2812,7 +2895,14 @@ read_list (flag, readcharfun)
if (doc_reference == 2)
{
/* Get a doc string from the file we are loading.
- If it's in saved_doc_string, get it from there. */
+ If it's in saved_doc_string, get it from there.
+
+ Here, we don't know if the string is a
+ bytecode string or a doc string. As a
+ bytecode string must be unibyte, we always
+ return a unibyte string. If it is actually a
+ doc string, caller must make it
+ multibyte. */
int pos = XINT (XCDR (val));
/* Position is negative for user variables. */
if (pos < 0) pos = -pos;
@@ -2844,8 +2934,8 @@ read_list (flag, readcharfun)
saved_doc_string[to++] = c;
}
- return make_string (saved_doc_string + start,
- to - start);
+ return make_unibyte_string (saved_doc_string + start,
+ to - start);
}
/* Look in prev_saved_doc_string the same way. */
else if (pos >= prev_saved_doc_string_position
@@ -2876,11 +2966,12 @@ read_list (flag, readcharfun)
prev_saved_doc_string[to++] = c;
}
- return make_string (prev_saved_doc_string + start,
- to - start);
+ return make_unibyte_string (prev_saved_doc_string
+ + start,
+ to - start);
}
else
- return get_doc_string (val, 0, 0);
+ return get_doc_string (val, 1, 0);
}
return val;
@@ -3764,6 +3855,12 @@ to load. See also `load-dangerous-libraries'. */);
Qget_file_char = intern ("get-file-char");
staticpro (&Qget_file_char);
+ Qget_emacs_mule_file_char = intern ("get-emacs-mule-file-char");
+ staticpro (&Qget_emacs_mule_file_char);
+
+ Qload_force_doc_strings = intern ("load-force-doc-strings");
+ staticpro (&Qload_force_doc_strings);
+
Qbackquote = intern ("`");
staticpro (&Qbackquote);
Qcomma = intern (",");