[PATCH] Move charsets-in-region to C

Aidan Kehoe kehoea at parhasard.net
Tue Nov 14 09:51:47 EST 2006


Nothing too complex, and even the fragile test suite passes. Here are some
profiling numbers, where charsets-in-region is the implementation in C and
charsets-in-region0 is the implementation in Lisp: 

10,000 calls in *scratch*, vanilla startup

Function Name             Ticks/Total %Usage Calls GC-Usage/  Total
===============================/===== ====== ===== ========/=======
charsets-in-region           74/   88 77.083 10000    99576/ 120000
(profile overhead)           18/   18 18.750          20424/      0
charsets-in-region-*-10000    4/   96  4.167     1       40/ 120040
#<compiled-function (expr &optional arg) "...(36)" [was-profiling arg expr profiling-active-p ((byte-code ¬~Á ~Á~ [was-profiling stop-profiling] 1)) start-profiling eval profile-results clear-profiling-info ((stop-profiling))] 2 (/usr/local/lib/xemacs/xemacs-packages/lisp/xemacs-devel/profile.elc . 9000) (list (read (read-string Expression to profile: )) current-prefix-arg)>
                              0/   96  0.000              0/ 120028
Function Name              Ticks/Total %Usage Calls GC-Usage/  Total
================================/===== ====== ===== ========/=======
charsets-in-region0         1999/ 2304 89.281 10000   117000/ 120064
(profile overhead)           128/  128  5.717           3092/      0
char-charset                 110/   92  4.913 3950000                 
charsets-in-region0-*-10000    2/ 2306  0.089     1        0/ 120104
#<compiled-function (expr &optional arg) "...(36)" [was-profiling arg expr profiling-active-p ((byte-code ¬~Á ~Á~ [was-profiling stop-profiling] 1)) start-profiling eval profile-results clear-profiling-info ((stop-profiling))] 2 (/usr/local/lib/xemacs/xemacs-packages/lisp/xemacs-devel/profile.elc . 9000) (list (read (read-string Expression to profile: )) current-prefix-arg)>

10,000 calls on HELLO, vanilla startup

Function Name                  Ticks/Total %Usage Calls GC-Usage/  Total
====================================/===== ====== ===== ========/=======
charsets-in-region              2620/ 3351 80.690 10000  2040000/2040000
(in char-byte conversion)        576/  514 17.739 11570000                 
(in garbage collection)           47/   47  1.447     1                 
charsets-in-region-*-10000         4/ 3403  0.123     1       24/2040024
#<compiled-function (expr &optional arg) "...(36)" [was-profiling arg expr profiling-active-p ((byte-code ¬ƒÁ ˆÁ‡ [was-profiling stop-profiling] 1)) start-profiling eval profile-results clear-profiling-info ((stop-profiling))] 2 (/usr/local/lib/xemacs/xemacs-packages/lisp/xemacs-devel/profile.elc . 9000) (list (read (read-string Expression to profile: )) current-prefix-arg)>

(a call with charsets-in-region0 crashed in the profiler)

5,000 calls in HELLO, vanilla startup

Function Name                  Ticks/Total %Usage Calls GC-Usage/  Total
====================================/===== ====== ===== ========/=======
charsets-in-region              2567/ 3332 79.969 10000  2040000/2040000
(in char-byte conversion)        568/  479 17.695 11570000                 
(in garbage collection)           43/   43  1.340     1                 
(profile overhead)                28/   28  0.872                       
charsets-in-region-*-5000          4/ 3379  0.125     1       64/2040064

Function Name                  Ticks/Total %Usage Calls GC-Usage/  Total
====================================/===== ====== ===== ========/=======
charsets-in-region0            11693/13187 91.295 10000  2041944/2041944
(in char-byte conversion)        735/  597  5.739 23450000                 
char-charset                     326/  246  2.545 11740000                 
(in garbage collection)           46/   46  0.359     1                 
charsets-in-region0-*-5000         8/13195  0.062     1       40/2041984


lisp/ChangeLog addition:

2006-11-14  Aidan Kehoe  <kehoea at parhasard.net>

	* mule/mule-charset.el:
	* mule/mule-charset.el (charsets-in-string):
	Implement it in terms of charsets-in-string.
	* mule/mule-charset.el (charsets-in-region): Removed. It's now in
	C. 


src/ChangeLog addition:

2006-11-14  Aidan Kehoe  <kehoea at parhasard.net>

	* mule-charset.c:
	* mule-charset.c (Fcharsets_in_region):
	Added a charsets-in-region implementation in C. 


XEmacs Trunk source patch:
Diff command:   cvs -q diff -u
Files affected: src/text.c
===================================================================
RCS src/mule-charset.c
===================================================================
RCS lisp/mule/mule-charset.el
===================================================================
RCS

Index: lisp/mule/mule-charset.el
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/lisp/mule/mule-charset.el,v
retrieving revision 1.19
diff -u -r1.19 mule-charset.el
--- lisp/mule/mule-charset.el	2006/11/05 22:31:38	1.19
+++ lisp/mule/mule-charset.el	2006/11/14 13:49:48
@@ -38,42 +38,16 @@
 
 ;;;; Classifying text according to charsets
 
-;; the old version was broken in a couple of ways
-;; this is one of several versions, I tried a hash as well as the
-;; `prev-charset' cache used in the old version, but this was definitely
-;; faster than the hash version and marginally faster than the prev-charset
-;; version
-;; #### this really needs to be moved into C
-(defun charsets-in-region (start end &optional buffer)
-  "Return a list of the charsets in the region between START and END.
-BUFFER defaults to the current buffer if omitted."
-  (let (list)
-    (save-excursion
-      (if buffer
-	  (set-buffer buffer))
-      (save-restriction
-	(narrow-to-region start end)
-	(goto-char (point-min))
-	(while (not (eobp))
-	  ;; the first test will usually succeed on testing the
-	  ;; car of the list; don't waste time let-binding.
-	  (or (memq (char-charset (char-after (point))) list)
-	      (setq list (cons (char-charset (char-after (point))) list)))
-	  (forward-char))))
-    list))
-
 (defun charsets-in-string (string)
   "Return a list of the charsets in STRING."
-  (let (list)
-    (mapc (lambda (ch)
-	    ;; the first test will usually succeed on testing the
-	    ;; car of the list; don't waste time let-binding.
-	    (or (memq (char-charset ch) list)
-		(setq list (cons (char-charset ch) list))))
-	  string)
-    list))
+  (let (res)
+    (with-string-as-buffer-contents string
+      ;; charsets-in-region now in C. 
+      (setq res (charsets-in-region (point-min) (point-max))))
+    res))
 
 (defalias 'find-charset-string 'charsets-in-string)
+
 (defalias 'find-charset-region 'charsets-in-region)
 
 
Index: src/mule-charset.c
===================================================================
RCS file: /pack/xemacscvs/XEmacs/xemacs/src/mule-charset.c,v
retrieving revision 1.51
diff -u -r1.51 mule-charset.c
--- src/mule-charset.c	2006/11/12 13:40:08	1.51
+++ src/mule-charset.c	2006/11/14 13:49:48
@@ -937,6 +937,39 @@
   return Qnil;
 }
 
+DEFUN ("charsets-in-region", Fcharsets_in_region, 2, 3, 0, /*
+Return a list of the charsets in the region between START and END.
+BUFFER defaults to the current buffer if omitted.
+*/
+       (start, end, buffer))
+{
+  /* This function can GC */
+  struct buffer *buf = decode_buffer (buffer, 1);
+  Charbpos pos, stop;	/* Limits of the region. */
+  Lisp_Object res = Qnil;
+  int charsets[NUM_LEADING_BYTES];
+  Ibyte lb;
+  struct gcpro gcpro1;
+
+  memset(charsets, 0, sizeof(charsets));
+  get_buffer_range_char (buf, start, end, &pos, &stop, 0);
+
+  GCPRO1 (res);
+  while (pos < stop)
+    {
+      lb = ichar_leading_byte(BUF_FETCH_CHAR (buf, pos));
+      if (0 == charsets[lb - MIN_LEADING_BYTE])
+	{
+	  charsets[lb - MIN_LEADING_BYTE] = 1;
+	  res = Fcons (XCHARSET_NAME(charset_by_leading_byte(lb)), res);
+	}
+      ++pos;
+    }
+  UNGCPRO;
+
+  return res;
+} 
+
 
 /************************************************************************/
 /*                            memory usage                              */
@@ -1029,6 +1062,7 @@
   DEFSUBR (Fcharset_id);
   DEFSUBR (Fset_charset_ccl_program);
   DEFSUBR (Fset_charset_registries);
+  DEFSUBR (Fcharsets_in_region);
 
 #ifdef MEMORY_USAGE_STATS
   DEFSUBR (Fcharset_memory_usage);

-- 
Santa Maradona, priez pour moi!



More information about the XEmacs-Patches mailing list