Re: class of $1, $2 in 1.8.0
From:
matz@... (Yukihiro Matsumoto)
Date:
2003-02-17 15:55:37 UTC
List:
ruby-core #870
Hi,
In message "class of $1, $2 in 1.8.0"
on 03/02/07, dblack@candle.superlink.net <dblack@candle.superlink.net> writes:
|Just wondering about this:
|
|Fri Dec 20 00:16:06 2002 Nobuyoshi Nakada <nobu.nokada@softhome.net>
|
| * re.c (rb_reg_match_pre, rb_reg_match_post, match_to_a,
| match_select): return instances of same class as the original
| string. [ruby-dev:19119]
|
|which applies to the $1, $2... sub-matches. It caused scanf to blow
|up, and while I can fix it with a bunch of String.new() calls, I'm
|still left wondering what was wrong with having $1, $2... just be
|String objects.
First of all, I examined scanf-1.0.0, and had a conclusion that the
design was plain wrong in making subclass. Attached was my
modification.
Besides that, class of subrange (both arrays and strings) are
interesting issue, and I continue working on it.
matz.
----
# scanf for Ruby, v.1.0.0
#
# August 21, 2002
#
# A product of the Austin Ruby Codefest (Austin, Texas, August 2002)
#
# Copyright (c) 2002, David Alan Black
# Released under the same licensing terms as Ruby itself.
#
# See README for information on usage and details of license.
#
#
module Scanf
class FormatSpecifier
attr_reader :re_string
private
def skip; /^%\*/.match(@spec); end
def to_i(s); s.to_i if s &&! skip; end
def to_f(s); s.to_f if s &&! skip; end
def to_h(s); s.hex if s &&! skip; end
def to_o(s); s.oct if s &&! skip; end
def to_n(s); Integer(s) if s &&! skip; end
def keep(s); s unless skip; end
def nil_proc(s); nil; end
def count_spec_space?
/%\*?\d*c/.match(@spec)
end
def count_input_space?
/(?:\A|\S)%\*?\d*c|\[/.match(@spec)
end
public
def initialize(spec)
@spec = spec
@spec.sub!(/\A\s+/, '') unless count_spec_space?
h = '[A-Fa-f0-9]'
@re_string, @handler =
case @spec
# %[[:...:]]
when /%\*?(\[\[:[a-z]+:\]\])/
["(#{$1}+)", :keep]
# %5[[:...:]]
when /%\*?(\d+)(\[\[:[a-z]+:\]\])/
["(#{$2}{1,#{$1}})", :keep]
# %[...]
when /%\*?(\[[^\]]*\])/
["(#{$1}+)", :keep]
# %5[...]
when /%\*?(\d+)(\[[^\]]*\])/
["(#{$2}{1,#{$1}})", :keep]
# %i
when /%\*?i/
["([-+]?(?:(?:0[0-7]+)|(?:0[Xx]#{h}+)|(?:[1-9]\\d+)))", :to_n]
# %5i
when /%\*?(\d+)i/
n = $1.to_i
s = "("
if n > 1 then s += "[1-9]\\d{1,#{n-1}}|" end
if n > 1 then s += "0[0-7]{1,#{n-1}}|" end
if n > 2 then s += "[-+]0[0-7]{1,#{n-2}}|" end
if n > 2 then s += "[-+][1-9]\\d{1,#{n-2}}|" end
if n > 2 then s += "0[Xx]#{h}{1,#{n-2}}|" end
if n > 3 then s += "[-+]0[Xx]#{h}{1,#{n-3}}|" end
s += "\\d"
s += ")"
[s, :to_n]
# %d, %u
when /%\*?[du]/
['([-+]?\d+)', :to_i]
# %5d, %5u
when /%\*?(\d+)[du]/
n = $1.to_i
s = "("
if n > 1 then s += "[-+]\\d{1,#{n-1}}|" end
s += "\\d{1,#{$1}})"
[s, :to_i]
# %x
when /%\*?[Xx]/
["([-+]?(?:0[Xx])?#{h}+)", :to_h]
# %5x
when /%\*?(\d+)[Xx]/
n = $1.to_i
s = "("
if n > 3 then s += "[-+]0[Xx]#{h}{1,#{n-3}}|" end
if n > 2 then s += "0[Xx]#{h}{1,#{n-2}}|" end
if n > 1 then s += "[-+]#{h}{1,#{n-1}}|" end
s += "#{h}{1,#{n}}"
s += ")"
[s, :to_h]
# %o
when /%\*?o/
['([-+]?[0-7]+)', :to_o]
# %5o
when /%\*?(\d+)o/
["([-+][0-7]{1,#{$1.to_i-1}}|[0-7]{1,#{$1}})", :to_o]
# %f
when /%\*?f/
['([-+]?((\d+(?>(?=[^\d.]|$)))|(\d*(\.(\d*([eE][-+]?\d+)?)))))', :to_f]
# %5f
when /%\*?(\d+)f/
["(\\S{1,#{$1}})", :to_f]
# %5s
when /%\*?(\d+)s/
["(\\S{1,#{$1}})", :keep]
# %s
when /%\*?s/
['(\S+)', :keep]
# %c
when /\s%\*?c/
["\\s*(.)", :keep]
# %c
when /%\*?c/
["(.)", :keep]
# %5c (whitespace issues are handled by the count_*_space? methods)
when /%\*?(\d+)c/
["(.{1,#{$1}})", :keep]
# %%
when /%%/
['(\s*%)', :nil_proc]
# literal characters
else
["(#{Regexp.escape(@spec)})", :nil_proc]
end
@re_string = '\A' + @re_string
end
def cook(str)
send(@handler, str)
end
def to_re
Regexp.new(@re_string,Regexp::MULTILINE)
end
def match(str)
s = str.dup
s.sub!(/^\s+/,'') unless count_input_space?
to_re.match(s)
end
def letter
/%\*?\d*([a-z])/.match(@spec).to_a[1]
end
def width
/%\*?(\d+)/.to_a[1]
end
end
class FormatString
attr_reader :pos, :last, :space
def done?; @done; end
def failed?; @failed; end
def fatal?; @fatal; end
SPECIFIERS = 'diuXxofeEgsc'
REGEX = /
# possible space, followed by...
(?:\s*
# percent sign, followed by...
%
# another percent sign, or...
(?:%|
# optional assignment suppression flag
\*?
# optional maximum field width
\d*
# named character class, ...
(?:\[\[:\w+:\]\]|
# traditional character class, or...
\[[^\]]*\]|
# specifier letter.
[#{SPECIFIERS}])))|
# or miscellaneous characters
[^%\s]+/ix
def initialize(str)
unless /\S/.match(str)
@specs = []
else
@space = true if /\s\z/.match(str)
@specs = str.scan(REGEX).map {|spec| FormatSpecifier.new(spec)}
end
end
private
def fatal_error(spec, str)
/\S/.match(str)
end
public
def shift
@specs.shift
end
def match(str)
acc = []
@pos = 0
@specs.each_with_index do |spec,i|
@last = i
@fatal = false
m = spec.match(str)
@failed = !m
if @failed
@fatal = fatal_error(spec,str)
break
end
acc << spec.cook(m[1])
s = str.size
str = m.post_match
if @space && i == @specs.size - 1
str = str.sub(/(\s*)\z/,'')
@pos += $1.size if $1
end
@pos += s - str.size
end
@done = !@failed
acc
end
end
end
class IO
def scanf(fstr)
start = pos
fs = Scanf::FormatString.new(fstr)
buffer = ""
matched = 0
m = []
loop do
break if eof
buffer << gets
m.concat fs.match(buffer).compact
s = buffer.size
buffer = buffer[fs.pos..-1] || ""
matched += s - buffer.size if buffer
break if fs.done? || fs.fatal?
fs.last.times do fs.shift end
end
seek(matched + start, IO::SEEK_SET) rescue Errno::ESPIPE
m
end
end
class String
def scanf(fs)
Scanf::FormatString.new(fs).match(self).compact
end
end
module Kernel
def scanf(fs)
STDIN.scanf(fs)
end
end