tokenize_test.go 7.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. package tokenize
  2. import "fmt"
  3. import "testing"
  4. func Test_tokenize(t *testing.T) {
  5. var test_cases = []struct {
  6. test_str string
  7. want_result tokenizeResult
  8. }{
  9. // emptiness
  10. {"", tokenizeResult{tokens: []string{}}},
  11. // unquoted whitespace
  12. {" ", tokenizeResult{tokens: []string{}}},
  13. {"\t", tokenizeResult{tokens: []string{}}},
  14. {"\n", tokenizeResult{tokens: []string{}}},
  15. {" \t\n", tokenizeResult{tokens: []string{}}},
  16. {" \tfoo", tokenizeResult{tokens: []string{"foo"}}},
  17. {"foo ", tokenizeResult{tokens: []string{"foo"}}},
  18. {"foo bar", tokenizeResult{tokens: []string{"foo", "bar"}}},
  19. {"foo\nbar", tokenizeResult{tokens: []string{"foo", "bar"}}},
  20. {"foo\tbar", tokenizeResult{tokens: []string{"foo", "bar"}}},
  21. // single quotes
  22. {"'", tokenizeResult{tokens: []string{}, code: tokenizeResultCodeMissingEndSingleQuote}},
  23. {"''", tokenizeResult{tokens: []string{""}}},
  24. {"fo''o", tokenizeResult{tokens: []string{"foo"}}},
  25. {"foo '' bar", tokenizeResult{tokens: []string{"foo", "", "bar"}}},
  26. {"foo 'and' bar", tokenizeResult{tokens: []string{"foo", "and", "bar"}}},
  27. {"foo '\\\t\n' bar", tokenizeResult{tokens: []string{"foo", "\\\t\n", "bar"}}},
  28. {"foo ' space bar '", tokenizeResult{tokens: []string{"foo", " space bar "}}},
  29. {"foo 'John \"Spaceman\" Doe'", tokenizeResult{tokens: []string{"foo", "John \"Spaceman\" Doe"}}},
  30. // double quotes
  31. {"\"", tokenizeResult{tokens: []string{}, code: tokenizeResultCodeMissingEndDoubleQuote}},
  32. {"\"\"", tokenizeResult{tokens: []string{""}}},
  33. {"fo\"\"o", tokenizeResult{tokens: []string{"foo"}}},
  34. {"foo \"\" bar", tokenizeResult{tokens: []string{"foo", "", "bar"}}},
  35. {"foo \"and\" bar", tokenizeResult{tokens: []string{"foo", "and", "bar"}}},
  36. {"foo \"\\\\\t\n\" bar", tokenizeResult{tokens: []string{"foo", "\\\t\n", "bar"}}},
  37. {"foo \" space bar \"", tokenizeResult{tokens: []string{"foo", " space bar "}}},
  38. {"foo \"Joe's lunch\"", tokenizeResult{tokens: []string{"foo", "Joe's lunch"}}},
  39. // lone backslash
  40. {"\\", tokenizeResult{tokens: []string{}, code: tokenizeResultCodeMissingEscapedCharacter}},
  41. {"\\\\", tokenizeResult{tokens: []string{"\\"}}},
  42. {"\\a", tokenizeResult{tokens: []string{"a"}}},
  43. {"\\?", tokenizeResult{tokens: []string{"?"}}},
  44. {"\\$", tokenizeResult{tokens: []string{"$"}}},
  45. {"\\ ", tokenizeResult{tokens: []string{" "}}},
  46. {"\\\t", tokenizeResult{tokens: []string{"\t"}}},
  47. {"\\\n", tokenizeResult{tokens: []string{"\n"}}},
  48. {"fo\\o", tokenizeResult{tokens: []string{"foo"}}},
  49. {"fo\\\\o", tokenizeResult{tokens: []string{"fo\\o"}}},
  50. {"foo \\ bar", tokenizeResult{tokens: []string{"foo", " bar"}}},
  51. {"foo \\\\ bar", tokenizeResult{tokens: []string{"foo", "\\", "bar"}}},
  52. {"foo \\'bar\\' baz", tokenizeResult{tokens: []string{"foo", "'bar'", "baz"}}},
  53. // backslash within double quotes
  54. {"\"\\", tokenizeResult{tokens: []string{}, code: tokenizeResultCodeMissingEscapedCharacter}},
  55. {"\"\\\"", tokenizeResult{tokens: []string{}, code: tokenizeResultCodeMissingEndDoubleQuote, err_loc: 2}},
  56. {"\" \\\" \"", tokenizeResult{tokens: []string{" \" "}}},
  57. {"\" \\\\ \"", tokenizeResult{tokens: []string{" \\ "}}},
  58. {"\" \\x \"", tokenizeResult{tokens: []string{" \\x "}}},
  59. {"fo\"o\\\\b\"ar", tokenizeResult{tokens: []string{"foo\\bar"}}},
  60. {"fo\"o \\\\ \"bar\" b\"az", tokenizeResult{tokens: []string{"foo \\ bar baz"}}},
  61. // CC cases
  62. {"cat \"/tmp/fox/'f 2'\" \"/tmp/fox/'f \\73'\" \"/tmp/fox/'f \\21\\'\"",
  63. tokenizeResult{tokens: []string{"cat", "/tmp/fox/'f 2'", "/tmp/fox/'f \\73'", "/tmp/fox/'f \\21\\'"}},
  64. },
  65. //
  66. }
  67. for _, tc := range test_cases {
  68. t.Run(fmt.Sprintf("%q", tc.test_str), func(t *testing.T) {
  69. have_result := tokenize(tc.test_str)
  70. if have_result.code != tc.want_result.code {
  71. t.Errorf("unexpected result .code: got %s, want %s in %v", have_result.code, tc.want_result.code, have_result)
  72. return
  73. }
  74. if have_result.err_loc != tc.want_result.err_loc {
  75. t.Errorf("unexpected result .err_loc: got %d, want %d in %v", have_result.err_loc, tc.want_result.err_loc, have_result)
  76. return
  77. }
  78. if len(have_result.tokens) != len(tc.want_result.tokens) {
  79. t.Errorf("unexpected number of result .tokens: got %d, want %d in %v", len(have_result.tokens), len(tc.want_result.tokens), have_result)
  80. return
  81. }
  82. for i := range have_result.tokens {
  83. if have_result.tokens[i] == tc.want_result.tokens[i] {
  84. continue
  85. }
  86. t.Errorf("unexpected token in result .tokens[%d]: got %q, want %q in %v", i, have_result.tokens[i], tc.want_result.tokens[i], have_result)
  87. t.Errorf(" .. test_str: ⟅%s⟆", tc.test_str)
  88. t.Errorf(" .. got: ⟅%s⟆", have_result.tokens[i])
  89. t.Errorf(" .. want: ⟅%s⟆", tc.want_result.tokens[i])
  90. return
  91. }
  92. })
  93. }
  94. }
  95. func Test_reader_tossUntilNeitherOf(t *testing.T) {
  96. var test_cases = []struct {
  97. test_startpos uint
  98. test_data string
  99. test_needles string
  100. want_endpos uint
  101. want_ok bool
  102. }{
  103. {0, "", "", 0, false},
  104. {0, "", "x", 0, false},
  105. {0, "", "xy", 0, false},
  106. {0, "x", "", 0, false},
  107. {0, "x", "x", 1, true},
  108. {0, "x", "xy", 1, true},
  109. {0, "x", "yx", 1, true},
  110. {0, "xa", "x", 1, true},
  111. {0, "xa", "xy", 1, true},
  112. {0, "xa", "yx", 1, true},
  113. {0, "xya", "x", 1, true},
  114. {0, "xya", "xy", 2, true},
  115. {0, "xya", "yx", 2, true},
  116. {1, "xxya", "x", 2, true},
  117. {1, "xxya", "xy", 3, true},
  118. {1, "xxya", "yx", 3, true},
  119. }
  120. for _, tc := range test_cases {
  121. t.Run(fmt.Sprintf("%q[%d:]-%q", tc.test_data, tc.test_startpos, tc.test_needles), func(t *testing.T) {
  122. test_reader := reader{data: tc.test_data}
  123. test_reader.pos = tc.test_startpos
  124. have_ok := test_reader.tossUntilNeitherOf(tc.test_needles)
  125. if test_reader.pos != tc.want_endpos {
  126. t.Errorf("unexpected position after toss: got %d, want %d", test_reader.pos, tc.want_endpos)
  127. return
  128. }
  129. if have_ok != tc.want_ok {
  130. t.Errorf("unexpected ok: got %v, want %v", have_ok, tc.want_ok)
  131. return
  132. }
  133. })
  134. }
  135. }
  136. func Test_reader_takeUntilAnyOf(t *testing.T) {
  137. var test_cases = []struct {
  138. test_startpos uint
  139. test_data string
  140. test_needles string
  141. want_endpos uint
  142. want_ok bool
  143. }{
  144. {0, "", "", 0, false},
  145. {0, "", "x", 0, false},
  146. {0, "", "xy", 0, false},
  147. {0, "x", "", 1, true},
  148. {0, "x", "x", 0, false},
  149. {0, "x", "xy", 0, false},
  150. {0, "x", "yx", 0, false},
  151. {0, "xa", "x", 0, false},
  152. {0, "xa", "xy", 0, false},
  153. {0, "xa", "yx", 0, false},
  154. {0, "xya", "x", 0, false},
  155. {0, "xya", "xy", 0, false},
  156. {0, "xya", "yx", 0, false},
  157. {0, "ax", "x", 1, true},
  158. {0, "ax", "xy", 1, true},
  159. {0, "ax", "yx", 1, true},
  160. {0, "axy", "x", 1, true},
  161. {0, "axy", "xy", 1, true},
  162. {0, "axy", "yx", 1, true},
  163. {0, "abxa", "x", 2, true},
  164. {0, "aboa", "x", 4, true},
  165. }
  166. for _, tc := range test_cases {
  167. t.Run(fmt.Sprintf("%q[%d:]-%q", tc.test_data, tc.test_startpos, tc.test_needles), func(t *testing.T) {
  168. test_reader := reader{data: tc.test_data}
  169. test_reader.pos = tc.test_startpos
  170. _, have_ok := test_reader.takeUntilAnyOf(tc.test_needles)
  171. if test_reader.pos != tc.want_endpos {
  172. t.Errorf("unexpected position after take: got %d, want %d", test_reader.pos, tc.want_endpos)
  173. return
  174. }
  175. if have_ok != tc.want_ok {
  176. t.Errorf("unexpected ok: got %v, want %v", have_ok, tc.want_ok)
  177. return
  178. }
  179. })
  180. }
  181. }