brians | 343bc11 | 2013-02-10 01:53:46 +0000 | [diff] [blame] | 1 | class BufferedReader |
| 2 | def initialize(file) |
| 3 | @file = file |
| 4 | @line = 1 |
| 5 | @chars = [] |
| 6 | @col_nums = [] |
| 7 | @col = 0 |
| 8 | end |
| 9 | def filename |
| 10 | return File.basename(@file.path) |
| 11 | end |
| 12 | def pos() |
| 13 | "#{filename()}:#{lineno()},#{@col}" |
| 14 | end |
| 15 | def clear_comment() |
| 16 | @file.gets |
| 17 | @line += 1 |
| 18 | end |
| 19 | def lineno() |
| 20 | return @line |
| 21 | return @file.lineno + 1 |
| 22 | end |
| 23 | def pop_char() |
| 24 | val = @chars.pop() || @file.read(1) |
| 25 | @col_nums[@line] = @col += 1 |
| 26 | if(val == "\n") |
| 27 | @line += 1 |
| 28 | @col = 0 |
| 29 | end |
| 30 | |
| 31 | return val |
| 32 | end |
| 33 | def unpop_char(char) |
| 34 | if(char == "\n") |
| 35 | @line -= 1 |
| 36 | @col = @col_nums[@line] |
| 37 | end |
| 38 | @col -= 1 |
| 39 | @chars.push(char) |
| 40 | end |
| 41 | end |
| 42 | class Tokenizer |
| 43 | TOKEN_TYPES = {"{" => :tOpenB,"}"=> :tCloseB,";" => :tSemi,"," => :tComma, |
| 44 | "(" => :tOpenParan,")" => :tCloseParan,"=" => :tAssign,"." => :tDot, |
| 45 | "<<"=> :tLShift,"*" => :tMult,"+" => :tAdd,"[" => :tOpenB, |
| 46 | "]" => :tCloseB} |
| 47 | Humanize = TOKEN_TYPES.invert |
| 48 | class Token |
| 49 | attr_accessor :type,:data,:pos |
| 50 | def to_s |
| 51 | if(@type == :tString) |
| 52 | val = @data.inspect.to_s |
| 53 | elsif(@type == :tWord) |
| 54 | val = @data.to_s |
| 55 | else |
| 56 | val = @data.to_s |
| 57 | end |
| 58 | return "#{val.ljust(50)}:#{@type}" |
| 59 | end |
| 60 | def humanize() |
| 61 | if(@type == :tString) |
| 62 | return "#{@data.inspect.to_s} string" |
| 63 | elsif(@type == :tWord) |
| 64 | return "#{@data.inspect} identifier" |
| 65 | end |
| 66 | return Humanize[@type].inspect |
| 67 | end |
| 68 | def inspect() |
| 69 | data = "" |
| 70 | data = " #{@data.inspect}" if(@data) |
| 71 | "<Token :#{@type}#{data} at #{@pos}>" |
| 72 | end |
| 73 | def ==(other) |
| 74 | if(other.class == Symbol) |
| 75 | return @type == other |
| 76 | elsif(other.class == self.class) |
| 77 | return @type == other.type && @data == other.data |
| 78 | else |
| 79 | return nil |
| 80 | end |
| 81 | end |
| 82 | end |
| 83 | def initialize(file) |
| 84 | file = File.open(file,"r") if(file.class == String) |
| 85 | @read = BufferedReader.new(file) |
| 86 | end |
| 87 | def qError(error) |
| 88 | syntax_error(error) |
| 89 | end |
| 90 | def syntax_error(msg) |
| 91 | err = QSyntaxError.new(msg) |
| 92 | err.qstacktrace << "#{@read.lineno} of #{@read.filename}" |
| 93 | raise err |
| 94 | end |
| 95 | def peak_token() |
| 96 | @peak_token = next_token() |
| 97 | end |
| 98 | def peak() |
| 99 | peak_token() |
| 100 | end |
| 101 | def next() |
| 102 | next_token() |
| 103 | end |
| 104 | def pos() |
| 105 | @read.pos |
| 106 | end |
| 107 | def tokenize(string_token) |
| 108 | token = Token.new() |
| 109 | token.type = TOKEN_TYPES[string_token] |
| 110 | return token |
| 111 | end |
| 112 | def next_token() |
| 113 | if(token = @peak_token) |
| 114 | @peak_token = nil |
| 115 | return token |
| 116 | end |
| 117 | token = next_token_cache() |
| 118 | pos = self.pos() |
| 119 | token.pos = pos |
| 120 | return token |
| 121 | end |
| 122 | def next_token_cache() |
| 123 | token = Token.new() |
| 124 | token.data = "" |
| 125 | while (char = @read.pop_char()) |
| 126 | #puts "#{char.inspect}:#{token.inspect}" |
| 127 | if(char == "/") |
| 128 | if(@read.pop_char == "/") |
| 129 | @read.clear_comment() |
| 130 | else |
| 131 | syntax_error("unexpected #{char.inspect}") |
| 132 | end |
| 133 | elsif(char == "#") |
| 134 | @read.clear_comment() |
| 135 | elsif(char =~ /[\s\r\n]/) |
| 136 | if(token.type) |
| 137 | return token |
| 138 | end |
| 139 | elsif(char =~ /\"/) |
| 140 | token.type = :tString |
| 141 | token.data = "" |
| 142 | while((char = @read.pop_char) != "\"") |
| 143 | token.data += char |
| 144 | end |
| 145 | return token |
| 146 | elsif(char =~ /[1-9]/) |
| 147 | token.type = :tNumber |
| 148 | token.data = char.to_i |
| 149 | while(char != ".") |
| 150 | char = @read.pop_char |
| 151 | if(char =~ /[0-9]/) |
| 152 | token.data = char.to_i + token.data * 10 |
| 153 | elsif(char == ".") |
| 154 | else |
| 155 | @read.unpop_char(char) |
| 156 | return token |
| 157 | end |
| 158 | end |
| 159 | second_char = 0 |
| 160 | man = 0 |
| 161 | while(true) |
| 162 | char = @read.pop_char |
| 163 | if(char =~ /[0-9]/) |
| 164 | second_char = char.to_i + second_char * 10 |
| 165 | man = man * 10 |
| 166 | else |
| 167 | @read.unpop_char(char) |
| 168 | token.data += second_char / man.to_f |
| 169 | return token |
| 170 | end |
| 171 | end |
| 172 | elsif(char == ":") |
| 173 | if(@read.pop_char == "=") |
| 174 | return tokenize(":=") |
| 175 | end |
| 176 | syntax_error("unexpected \":\"") |
| 177 | elsif(char =~ /[;\{\}=()\",\*\+\.\[\]]/) |
| 178 | return(tokenize(char)) |
| 179 | elsif(char =~ /[a-zA-Z_]/) |
| 180 | token.type = :tWord |
| 181 | token.data = char |
| 182 | while(true) |
| 183 | char = @read.pop_char() |
| 184 | if(char && char =~ /\w/) |
| 185 | token.data += char |
| 186 | else |
| 187 | @read.unpop_char(char) |
| 188 | return token |
| 189 | end |
| 190 | end |
| 191 | elsif(char == "<") |
| 192 | if((char = @read.pop_char()) == "<") |
| 193 | return tokenize("<<") |
| 194 | else |
| 195 | @read.unpop_char(char) |
| 196 | return tokenize("<") |
| 197 | end |
| 198 | else |
| 199 | syntax_error("unexpected #{char.inspect}") |
| 200 | end |
| 201 | end |
| 202 | token.type = :tEnd |
| 203 | return token |
| 204 | end |
| 205 | def expect(type,&blk) |
| 206 | token = self.next |
| 207 | if(token != type) |
| 208 | syntax_error(blk.call(token)) if(blk) |
| 209 | syntax_error("unexpected: #{token.type}") |
| 210 | end |
| 211 | return token |
| 212 | end |
| 213 | end |