ソースを参照

Implement #NI6

Alois Mahdal 3 日 前
コミット
1901c187c6
共有3 個のファイルを変更した309 個の追加5 個の削除を含む
  1. 5
    1
      app/runnable/runnable.go
  2. 200
    4
      app/tokenize/tokenize.go
  3. 104
    0
      app/tokenize/tokenize_test.go

+ 5
- 1
app/runnable/runnable.go ファイルの表示

@@ -60,7 +60,11 @@ func (e ParseError) Error() string {
60 60
 }
61 61
 
62 62
 func Parse(ctx *core.Context, command_line string) (Runnable, error) {
63
-	tokens := tokenize.Tokenize(command_line)
63
+	tokens, err := tokenize.Tokenize(command_line)
64
+	if err != nil {
65
+		return nil, err
66
+	}
67
+	//fmt.Printf("Parse():tokens=%#v\n", tokens)
64 68
 	if len(tokens) == 0 { // ie. empty or whitespace-only input
65 69
 		return nil, ParseError{
66 70
 			code: ParseErrorCodeNothing,

+ 200
- 4
app/tokenize/tokenize.go ファイルの表示

@@ -1,8 +1,204 @@
1 1
 package tokenize
2 2
 
3
-import "strings"
3
+import (
4
+	"fmt"
5
+	"strings"
6
+)
4 7
 
5
-func Tokenize(str string) []string {
6
-	tokens := strings.Fields(str)
7
-	return tokens
8
+type reader struct {
9
+	data string
10
+	pos  uint
11
+}
12
+
13
+func (self *reader) len() uint {
14
+	return uint(len(self.data))
15
+}
16
+
17
+func (self *reader) checkPos() {
18
+	if self.pos > uint(len(self.data)) {
19
+		panic(fmt.Sprintf("invalid cursor state: pos=%d but len(data)=%d", self.pos, len(self.data)))
20
+	}
21
+}
22
+
23
+func (self *reader) peekByte() (byte, bool) {
24
+	self.checkPos()
25
+	if self.done() {
26
+		return 0, false
27
+	}
28
+	return self.data[self.pos], true
29
+}
30
+
31
+func (self *reader) takeUntil(needle string) (string, bool) {
32
+	self.checkPos()
33
+	idx := strings.Index(self.data[self.pos:], needle)
34
+	if idx == -1 {
35
+		return "", false
36
+	}
37
+	end := self.pos + uint(idx)
38
+	out := self.data[self.pos:end]
39
+	self.pos = end
40
+	return out, true
41
+}
42
+
43
+func (self *reader) tossUntilNeitherOf(needles string) bool {
44
+	self.checkPos()
45
+	match := func(b byte, ns string) bool {
46
+		for i := range len(ns) {
47
+			if b == ns[i] {
48
+				return true
49
+			}
50
+		}
51
+		return false
52
+	}
53
+	tossed := uint(0)
54
+	for {
55
+		idx := self.pos + tossed
56
+		if idx == self.len() {
57
+			break
58
+		}
59
+		if match(self.data[idx], needles) {
60
+			tossed += 1
61
+			continue
62
+		}
63
+		break
64
+	}
65
+	self.pos += tossed
66
+	return tossed > 0
67
+}
68
+
69
+func (self *reader) tossChar() {
70
+	self.checkPos()
71
+	self.pos += 1
72
+}
73
+
74
+func (self *reader) done() bool {
75
+	self.checkPos()
76
+	return self.pos == self.len()
77
+}
78
+
79
+type builder struct {
80
+	tokens    []string
81
+	buf_set   bool
82
+	buf_chars string
83
+}
84
+
85
+func make_builder() builder {
86
+	return builder{tokens: make([]string, 0), buf_set: false, buf_chars: ""}
87
+}
88
+
89
+func (self *builder) bufAppend(chars string) {
90
+	if self.buf_set {
91
+		self.buf_chars += chars
92
+	} else {
93
+		self.buf_chars = chars
94
+		self.buf_set = true
95
+	}
96
+}
97
+
98
+func (self *builder) bufAppendChar(char byte) {
99
+	if !self.buf_set {
100
+		self.buf_chars = ""
101
+		self.buf_set = true
102
+	}
103
+	self.buf_chars += string(char)
104
+}
105
+
106
+func (self *builder) bufCommit() {
107
+	if !self.buf_set {
108
+		return
109
+	}
110
+	self.tokens = append(self.tokens, self.buf_chars)
111
+	self.buf_chars = ""
112
+	self.buf_set = false
113
+}
114
+
115
+type tokenizeResult struct {
116
+	tokens  []string
117
+	code    tokenizeResultCode
118
+	err_loc uint
119
+}
120
+
121
+type tokenizeResultCode uint8
122
+
123
+func (self tokenizeResultCode) String() string {
124
+	switch self {
125
+	case tokenizeResultCodeOk:
126
+		return "tokenizeResultCodeOk"
127
+	case tokenizeResultCodeMissingEndSingleQuote:
128
+		return "tokenizeResultCodeMissingEndSingleQuote"
129
+	default:
130
+		return fmt.Sprintf("unknown!tokenizeResultCode(%d)", self)
131
+	}
132
+}
133
+
134
+const (
135
+	tokenizeResultCodeOk tokenizeResultCode = iota
136
+	tokenizeResultCodeMissingEndSingleQuote
137
+)
138
+
139
+func tokenize(str string) tokenizeResult {
140
+	rdr := reader{data: str}
141
+	b := make_builder()
142
+	for {
143
+		this_byte, ok := rdr.peekByte()
144
+		if !ok {
145
+			break
146
+		}
147
+		switch this_byte {
148
+		case ' ':
149
+			b.bufCommit()
150
+			rdr.tossUntilNeitherOf(" \t\n")
151
+		case '\t':
152
+			b.bufCommit()
153
+			rdr.tossUntilNeitherOf(" \t\n")
154
+		case '\n':
155
+			b.bufCommit()
156
+			rdr.tossUntilNeitherOf(" \t\n")
157
+		case '\'':
158
+			rdr.tossChar() // first `'`
159
+			new_chars, found := rdr.takeUntil("'")
160
+			if !found {
161
+				return tokenizeResult{code: tokenizeResultCodeMissingEndSingleQuote, err_loc: rdr.pos - 1}
162
+			}
163
+			b.bufAppend(new_chars)
164
+			rdr.tossChar() // the second `'`
165
+		default:
166
+			b.bufAppendChar(this_byte)
167
+			rdr.tossChar()
168
+		}
169
+	}
170
+	b.bufCommit()
171
+	return tokenizeResult{tokens: b.tokens}
172
+}
173
+
174
+func Tokenize(str string) ([]string, error) {
175
+	res := tokenize(str)
176
+	switch res.code {
177
+	case tokenizeResultCodeOk:
178
+		return res.tokens, nil
179
+	case tokenizeResultCodeMissingEndSingleQuote:
180
+		return nil, TokenizeError{code: TokenizeErrorCodeMissingEndSingleQuote, loc: res.err_loc}
181
+	default:
182
+		return nil, TokenizeError{code: TokenizeErrorCodeGeneral, loc: res.err_loc}
183
+	}
184
+}
185
+
186
+type TokenizeError struct {
187
+	code TokenizeErrorCode
188
+	loc  uint
189
+}
190
+type TokenizeErrorCode uint8
191
+
192
+const (
193
+	TokenizeErrorCodeGeneral TokenizeErrorCode = iota
194
+	TokenizeErrorCodeMissingEndSingleQuote
195
+)
196
+
197
+func (e TokenizeError) Error() string {
198
+	switch e.code {
199
+	case TokenizeErrorCodeMissingEndSingleQuote:
200
+		return fmt.Sprintf("unterminated single-quote: at %d", e.loc)
201
+	default:
202
+		return fmt.Sprintf("unknown TokenizeError code: .code=%d .loc=%d", e.code, e.loc)
203
+	}
8 204
 }

+ 104
- 0
app/tokenize/tokenize_test.go ファイルの表示

@@ -0,0 +1,104 @@
1
+package tokenize
2
+
3
+import "fmt"
4
+import "testing"
5
+
6
+func Test_tokenize(t *testing.T) {
7
+	var test_cases = []struct {
8
+		test_str    string
9
+		want_result tokenizeResult
10
+	}{
11
+
12
+		// emptiness
13
+		{"", tokenizeResult{tokens: []string{}}},
14
+
15
+		// unquoted whitespace
16
+		{" ", tokenizeResult{tokens: []string{}}},
17
+		{"\t", tokenizeResult{tokens: []string{}}},
18
+		{"\n", tokenizeResult{tokens: []string{}}},
19
+		{" \t\n", tokenizeResult{tokens: []string{}}},
20
+		{" \tfoo", tokenizeResult{tokens: []string{"foo"}}},
21
+		{"foo ", tokenizeResult{tokens: []string{"foo"}}},
22
+		{"foo bar", tokenizeResult{tokens: []string{"foo", "bar"}}},
23
+		{"foo\nbar", tokenizeResult{tokens: []string{"foo", "bar"}}},
24
+		{"foo\tbar", tokenizeResult{tokens: []string{"foo", "bar"}}},
25
+
26
+		// single quotes
27
+		{"'", tokenizeResult{tokens: []string{}, code: tokenizeResultCodeMissingEndSingleQuote}},
28
+		{"''", tokenizeResult{tokens: []string{""}}},
29
+		{"fo''o", tokenizeResult{tokens: []string{"foo"}}},
30
+		{"foo '' bar", tokenizeResult{tokens: []string{"foo", "", "bar"}}},
31
+		{"foo 'and' bar", tokenizeResult{tokens: []string{"foo", "and", "bar"}}},
32
+		{"foo '\\\t\n' bar", tokenizeResult{tokens: []string{"foo", "\\\t\n", "bar"}}},
33
+		{"foo ' space bar '", tokenizeResult{tokens: []string{"foo", " space bar "}}},
34
+		{"foo 'John \"Spaceman\" Doe'", tokenizeResult{tokens: []string{"foo", "John \"Spaceman\" Doe"}}},
35
+
36
+		//
37
+	}
38
+	for _, tc := range test_cases {
39
+		t.Run(fmt.Sprintf("%q", tc.test_str), func(t *testing.T) {
40
+			have_result := tokenize(tc.test_str)
41
+			if have_result.code != tc.want_result.code {
42
+				t.Errorf("unexpected result .code: got %s, want %s in %v", have_result.code, tc.want_result.code, have_result)
43
+				return
44
+			}
45
+			if have_result.err_loc != tc.want_result.err_loc {
46
+				t.Errorf("unexpected result .err_loc: got %d, want %d in %v", have_result.err_loc, tc.want_result.err_loc, have_result)
47
+				return
48
+			}
49
+			if len(have_result.tokens) != len(tc.want_result.tokens) {
50
+				t.Errorf("unexpected number of result .tokens: got %d, want %d in %v", len(have_result.tokens), len(tc.want_result.tokens), have_result)
51
+				return
52
+			}
53
+			for i := range have_result.tokens {
54
+				if have_result.tokens[i] == tc.want_result.tokens[i] {
55
+					continue
56
+				}
57
+				t.Errorf("unexpected token in result .tokens[%d]: got %q, want %q in %v", i, have_result.tokens[i], tc.want_result.tokens[i], have_result)
58
+				return
59
+			}
60
+		})
61
+	}
62
+}
63
+
64
+func Test_reader_tossUntilNeitherOf(t *testing.T) {
65
+	var test_cases = []struct {
66
+		test_data     string
67
+		test_needles  string
68
+		test_startpos uint
69
+		want_endpos   uint
70
+		want_ok       bool
71
+	}{
72
+		{"", "", 0, 0, false},
73
+		{"", "x", 0, 0, false},
74
+		{"", "xy", 0, 0, false},
75
+		{"x", "", 0, 0, false},
76
+		{"x", "x", 0, 1, true},
77
+		{"x", "xy", 0, 1, true},
78
+		{"x", "yx", 0, 1, true},
79
+		{"xa", "x", 0, 1, true},
80
+		{"xa", "xy", 0, 1, true},
81
+		{"xa", "yx", 0, 1, true},
82
+		{"xya", "x", 0, 1, true},
83
+		{"xya", "xy", 0, 2, true},
84
+		{"xya", "yx", 0, 2, true},
85
+		{"xxya", "x", 1, 2, true},
86
+		{"xxya", "xy", 1, 3, true},
87
+		{"xxya", "yx", 1, 3, true},
88
+	}
89
+	for _, tc := range test_cases {
90
+		t.Run(fmt.Sprintf("%q[%d:]-%q", tc.test_data, tc.test_startpos, tc.test_needles), func(t *testing.T) {
91
+			test_reader := reader{data: tc.test_data}
92
+			test_reader.pos = tc.test_startpos
93
+			have_ok := test_reader.tossUntilNeitherOf(tc.test_needles)
94
+			if test_reader.pos != tc.want_endpos {
95
+				t.Errorf("unexpected position after toss: got %d, want %d", test_reader.pos, tc.want_endpos)
96
+				return
97
+			}
98
+			if have_ok != tc.want_ok {
99
+				t.Errorf("unexpected ok: got %v, want %v", have_ok, tc.want_ok)
100
+				return
101
+			}
102
+		})
103
+	}
104
+}