[#7055] More on VC++ 2005 — Austin Ziegler <halostatue@...>

Okay. I've got Ruby compiling. I'm attempting to get everything in

17 messages 2006/01/05
[#7058] Re: More on VC++ 2005 — nobuyoshi nakada <nobuyoshi.nakada@...> 2006/01/06

Hi,

[#7084] mathn: ugly warnings — hadmut@... (Hadmut Danisch)

Hi,

22 messages 2006/01/10
[#7097] Re: mathn: ugly warnings — Daniel Berger <Daniel.Berger@...> 2006/01/10

Hadmut Danisch wrote:

[#7098] Design contracts and refactoring (was Re: mathn: ugly warnings) — mathew <meta@...> 2006/01/10

Daniel Berger wrote:

[#7118] Re: Design contracts and refactoring (was Re: mathn: ugly warnings) — mathew <meta@...> 2006/01/12

*Dean Wampler *<deanwampler gmail.com> writes:

[#7226] Fwd: Re: Question about massive API changes — "Sean E. Russell" <ser@...>

Hello,

23 messages 2006/01/28
[#7228] Re: Question about massive API changes — Caleb Tennis <caleb@...> 2006/01/28

>

PATCH: append option to sysread

From: Yohanes Santoso <ysantoso-rubycore@...>
Date: 2006-01-30 22:42:51 UTC
List: ruby-core #7249

Hi,

It is hard to do efficient incremental IO, a method where you just
keep on buffering until you have a complete message in the buffer, in
ruby because you end up creating many new objects.

Here is a patch to IO#sysread that allows it to append to the given
string instead of replacing it (1.8.x behaviour).

Attached also the test script, test_io.rb, that simulates a
pathological case of incremental IO.

Basically the test would loop until it reads 10MB of data, on each
loop it tries to detect for an end of message marker (which of course
does not exist since this is a pathological case).

Three methods are tested:
1. array method
each return from io#sysread is appended into the array and then the
array is joined and scanned for EOM.

2. concat method
append the return value of io#sysread into a string and scan the
string for EOM.

3. fastio method
io#sysread appends read data into the given string and then scan the
string for EOM.


~/tmp/ruby-1.8.4 $ ./ruby test_io.rb 
Reference content initialized. Size: 10485760 octets
                        user     system      total        real
array 10M           1.550000   1.020000   2.570000 (  2.784659)
concat 10M          1.500000   1.080000   2.580000 (  2.755901)
fastio 10M          0.510000   0.040000   0.550000 (  0.663247)


YS.

Attachments (2)

test_io.rb (2.71 KB, text/x-ruby)
$: << './.ext/i686-linux'
$: << './lib'
require 'socket'
require 'benchmark'

READ_SIZE = 10*1024*1024

def match_for_non_existent(str)
  str =~ /search_for_non_existent_end_of_message_marker/
end  


$reference_content = nil
def reference_content
  if not $reference_content
    ary = []
    read_length = 0
    TCPSocket.open('localhost', 'chargen') {|sock| 
      while read_length < READ_SIZE
        buff = sock.sysread(READ_SIZE-read_length) 
        ary << buff
        read_length += buff.length
      end
    }
    $reference_content = ary.join
    write_to_file("++reference", $reference_content)
    puts "Reference content initialized. Size: #{$reference_content.size} octets"
  end

  $reference_content
end

def assert_integrity(str)
  raise "Integrity compromised" if str != reference_content
end

def write_to_file(fname, str)
  File.open(fname, "w") {|f|
    f.write(str)
  }
end

def simulate(bm, test_name)
  TCPSocket.open('localhost', 'chargen') {|sock|
    str = nil
    GC.start
    sleep(5) # let memory settle down first
    bm.report("#{test_name} #{READ_SIZE/1024/1024}M") {
      str = yield(sock)
    }
    write_to_file("++#{test_name}", str)
    assert_integrity(str)
  }
end


def simulate_incremental_io
  Benchmark.bm("                  ".length) {|bm|
    total_iter = {}
    
    
    ##############################
    simulate(bm, 'array') {|sock|
      ary = []
      str = ary.join
      iter = 0
      while str.length < READ_SIZE
        ary << sock.sysread(READ_SIZE-str.length) 
        str = ary.join
        match_for_non_existent(str)
        iter += 1
      end
      total_iter[:array] = iter
      str
    }
    
    

    ##############################
    simulate(bm, 'concat') {|sock|
      str = ""
      iter = 0
      while str.length < READ_SIZE
        str += sock.sysread(READ_SIZE-str.length)
        match_for_non_existent(str)
        iter +=1 
      end
      total_iter[:concat] = iter
      str
    }
    

    ##############################
    simulate(bm, 'fastio') {|sock|
      str = ""
      iter = 0
      while str.length < READ_SIZE
        sock.sysread(READ_SIZE-str.length, str, true)
        match_for_non_existent(str)
        iter += 1
      end
      total_iter[:fastio] = iter
      str
    }

    ##############################
    p total_iter
  }
end

reference_content
simulate_incremental_io




~/tmp/ruby-1.8.4 $ ./ruby test_io.rb 
Reference content initialized. Size: 10485760 octets
                        user     system      total        real
array 10M           1.550000   1.020000   2.570000 (  2.784659)
concat 10M          1.500000   1.080000   2.580000 (  2.755901)
fastio 10M          0.510000   0.040000   0.550000 (  0.663247)
{:concat=>204, :array=>200, :fastio=>167}
fastio.patch (2.76 KB, text/x-patch)
--- ruby-1.8.4.orig/io.c	2005-12-19 12:11:20.000000000 -0500
+++ ruby-1.8.4/io.c	2006-01-30 16:35:49.000000000 -0500
@@ -2362,38 +2362,53 @@
 
 /*
  *  call-seq:
- *     ios.sysread(integer )    => string
+ *     ios.sysread(integer, string=nil, is_append=nil )    => string
  *  
  *  Reads <i>integer</i> bytes from <em>ios</em> using a low-level
- *  read and returns them as a string. Do not mix with other methods
- *  that read from <em>ios</em> or you may get unpredictable results.
- *  Raises <code>SystemCallError</code> on error and
- *  <code>EOFError</code> at end of file.
+ *  read and returns them as a string. If <i>string</i> is provided,
+ *  its content will be replaced or appended (if <i>append</i> is
+ *  <code>true</code>). Do not mix with other methods that read from
+ *  <em>ios</em> or you may get unpredictable results.  Raises
+ *  <code>SystemCallError</code> on error and <code>EOFError</code> at
+ *  end of file.
  *     
  *     f = File.new("testfile")
  *     f.sysread(16)   #=> "This is line one"
  */
 
+
 static VALUE
 rb_io_sysread(argc, argv, io)
     int argc;
     VALUE *argv;
     VALUE io;
 {
-    VALUE len, str;
+    VALUE len, str, op_append;
     OpenFile *fptr;
-    long n, ilen;
+    long n, ilen, initial_len;
+    char *write_pos;
 
-    rb_scan_args(argc, argv, "11", &len, &str);
+    rb_scan_args(argc, argv, "12", &len, &str, &op_append);
     ilen = NUM2LONG(len);
 
     if (NIL_P(str)) {
 	str = rb_str_new(0, ilen);
-    }
-    else {
+	initial_len = 0;
+	write_pos = RSTRING(str)->ptr;
+    } else {
 	StringValue(str);
 	rb_str_modify(str);
-	rb_str_resize(str, ilen);
+	initial_len = RSTRING(str)->len;
+	if ((NIL_P(op_append)) || 
+	    (op_append == Qfalse)) {
+	  /* overwrites existing content */
+	  rb_str_resize(str, ilen);
+	  write_pos = RSTRING(str)->ptr;
+	} else {
+	  /* appends */
+	  rb_str_resize(str, RSTRING(str)->len + ilen);
+	  write_pos = RSTRING(str)->ptr + initial_len;
+	}
     }
     if (ilen == 0) return str;
 
@@ -2408,23 +2423,23 @@
     n = fileno(fptr->f);
     rb_thread_wait_fd(fileno(fptr->f));
     rb_io_check_closed(fptr);
-    if (RSTRING(str)->len != ilen) {
+    if (RSTRING(str)->len != initial_len + ilen) {
 	rb_raise(rb_eRuntimeError, "buffer string modified");
     }
     TRAP_BEG;
-    n = read(fileno(fptr->f), RSTRING(str)->ptr, ilen);
+    n = read(fileno(fptr->f), write_pos, ilen);
     TRAP_END;
-
+    
     rb_str_unlocktmp(str);
     if (n == -1) {
 	rb_sys_fail(fptr->path);
     }
-    rb_str_resize(str, n);
+    rb_str_resize(str, initial_len + n);
     if (n == 0 && ilen > 0) {
 	rb_eof_error();
     }
-    RSTRING(str)->len = n;
-    RSTRING(str)->ptr[n] = '\0';
+    RSTRING(str)->len = initial_len + n;
+    RSTRING(str)->ptr[initial_len+n] = '\0';
     OBJ_TAINT(str);
 
     return str;

In This Thread

Prev Next