scan returns the primary tokens for the given JSON buffer in sequence. The responsibility of this pass is to just mark the slices of the buffer as being of various types. It is lax in how it interprets the multi-byte token types keyword, string and number, preferring to capture erroneous extra byte
(buf []byte, start pos)
| 42 | // extra bytes that we presume the user intended to be part of the token |
| 43 | // so that we can generate more helpful diagnostics in the parser. |
| 44 | func scan(buf []byte, start pos) []token { |
| 45 | var tokens []token |
| 46 | p := start |
| 47 | for { |
| 48 | if len(buf) == 0 { |
| 49 | tokens = append(tokens, token{ |
| 50 | Type: tokenEOF, |
| 51 | Bytes: nil, |
| 52 | Range: posRange(p, p), |
| 53 | }) |
| 54 | return tokens |
| 55 | } |
| 56 | |
| 57 | buf, p = skipWhitespace(buf, p) |
| 58 | |
| 59 | if len(buf) == 0 { |
| 60 | tokens = append(tokens, token{ |
| 61 | Type: tokenEOF, |
| 62 | Bytes: nil, |
| 63 | Range: posRange(p, p), |
| 64 | }) |
| 65 | return tokens |
| 66 | } |
| 67 | |
| 68 | start = p |
| 69 | |
| 70 | first := buf[0] |
| 71 | switch { |
| 72 | case first == '{' || first == '}' || first == '[' || first == ']' || first == ',' || first == ':' || first == '=': |
| 73 | p.Pos.Column++ |
| 74 | p.Pos.Byte++ |
| 75 | tokens = append(tokens, token{ |
| 76 | Type: tokenType(first), |
| 77 | Bytes: buf[0:1], |
| 78 | Range: posRange(start, p), |
| 79 | }) |
| 80 | buf = buf[1:] |
| 81 | case first == '"': |
| 82 | var tokBuf []byte |
| 83 | tokBuf, buf, p = scanString(buf, p) |
| 84 | tokens = append(tokens, token{ |
| 85 | Type: tokenString, |
| 86 | Bytes: tokBuf, |
| 87 | Range: posRange(start, p), |
| 88 | }) |
| 89 | case byteCanStartNumber(first): |
| 90 | var tokBuf []byte |
| 91 | tokBuf, buf, p = scanNumber(buf, p) |
| 92 | tokens = append(tokens, token{ |
| 93 | Type: tokenNumber, |
| 94 | Bytes: tokBuf, |
| 95 | Range: posRange(start, p), |
| 96 | }) |
| 97 | case byteCanStartKeyword(first): |
| 98 | var tokBuf []byte |
| 99 | tokBuf, buf, p = scanKeyword(buf, p) |
| 100 | tokens = append(tokens, token{ |
| 101 | Type: tokenKeyword, |