360 lines
6.7 KiB
Go
360 lines
6.7 KiB
Go
package httphead
|
|
|
|
import (
|
|
"bytes"
|
|
)
|
|
|
|
// ItemType encodes type of the lexing token.
|
|
type ItemType int
|
|
|
|
const (
|
|
// ItemUndef reports that token is undefined.
|
|
ItemUndef ItemType = iota
|
|
// ItemToken reports that token is RFC2616 token.
|
|
ItemToken
|
|
// ItemSeparator reports that token is RFC2616 separator.
|
|
ItemSeparator
|
|
// ItemString reports that token is RFC2616 quouted string.
|
|
ItemString
|
|
// ItemComment reports that token is RFC2616 comment.
|
|
ItemComment
|
|
// ItemOctet reports that token is octet slice.
|
|
ItemOctet
|
|
)
|
|
|
|
// Scanner represents header tokens scanner.
|
|
// See https://tools.ietf.org/html/rfc2616#section-2
|
|
type Scanner struct {
|
|
data []byte
|
|
pos int
|
|
|
|
itemType ItemType
|
|
itemBytes []byte
|
|
|
|
err bool
|
|
}
|
|
|
|
// NewScanner creates new RFC2616 data scanner.
|
|
func NewScanner(data []byte) *Scanner {
|
|
return &Scanner{data: data}
|
|
}
|
|
|
|
// Next scans for next token. It returns true on successful scanning, and false
|
|
// on error or EOF.
|
|
func (l *Scanner) Next() bool {
|
|
c, ok := l.nextChar()
|
|
if !ok {
|
|
return false
|
|
}
|
|
switch c {
|
|
case '"': // quoted-string;
|
|
return l.fetchQuotedString()
|
|
|
|
case '(': // comment;
|
|
return l.fetchComment()
|
|
|
|
case '\\', ')': // unexpected chars;
|
|
l.err = true
|
|
return false
|
|
|
|
default:
|
|
return l.fetchToken()
|
|
}
|
|
}
|
|
|
|
// FetchUntil fetches ItemOctet from current scanner position to first
|
|
// occurence of the c or to the end of the underlying data.
|
|
func (l *Scanner) FetchUntil(c byte) bool {
|
|
l.resetItem()
|
|
if l.pos == len(l.data) {
|
|
return false
|
|
}
|
|
return l.fetchOctet(c)
|
|
}
|
|
|
|
// Peek reads byte at current position without advancing it. On end of data it
|
|
// returns 0.
|
|
func (l *Scanner) Peek() byte {
|
|
if l.pos == len(l.data) {
|
|
return 0
|
|
}
|
|
return l.data[l.pos]
|
|
}
|
|
|
|
// Peek2 reads two first bytes at current position without advancing it.
|
|
// If there not enough data it returs 0.
|
|
func (l *Scanner) Peek2() (a, b byte) {
|
|
if l.pos == len(l.data) {
|
|
return 0, 0
|
|
}
|
|
if l.pos+1 == len(l.data) {
|
|
return l.data[l.pos], 0
|
|
}
|
|
return l.data[l.pos], l.data[l.pos+1]
|
|
}
|
|
|
|
// Buffered reporst how many bytes there are left to scan.
|
|
func (l *Scanner) Buffered() int {
|
|
return len(l.data) - l.pos
|
|
}
|
|
|
|
// Advance moves current position index at n bytes. It returns true on
|
|
// successful move.
|
|
func (l *Scanner) Advance(n int) bool {
|
|
l.pos += n
|
|
if l.pos > len(l.data) {
|
|
l.pos = len(l.data)
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
// Skip skips all bytes until first occurence of c.
|
|
func (l *Scanner) Skip(c byte) {
|
|
if l.err {
|
|
return
|
|
}
|
|
// Reset scanner state.
|
|
l.resetItem()
|
|
|
|
if i := bytes.IndexByte(l.data[l.pos:], c); i == -1 {
|
|
// Reached the end of data.
|
|
l.pos = len(l.data)
|
|
} else {
|
|
l.pos += i + 1
|
|
}
|
|
}
|
|
|
|
// SkipEscaped skips all bytes until first occurence of non-escaped c.
|
|
func (l *Scanner) SkipEscaped(c byte) {
|
|
if l.err {
|
|
return
|
|
}
|
|
// Reset scanner state.
|
|
l.resetItem()
|
|
|
|
if i := ScanUntil(l.data[l.pos:], c); i == -1 {
|
|
// Reached the end of data.
|
|
l.pos = len(l.data)
|
|
} else {
|
|
l.pos += i + 1
|
|
}
|
|
}
|
|
|
|
// Type reports current token type.
|
|
func (l *Scanner) Type() ItemType {
|
|
return l.itemType
|
|
}
|
|
|
|
// Bytes returns current token bytes.
|
|
func (l *Scanner) Bytes() []byte {
|
|
return l.itemBytes
|
|
}
|
|
|
|
func (l *Scanner) nextChar() (byte, bool) {
|
|
// Reset scanner state.
|
|
l.resetItem()
|
|
|
|
if l.err {
|
|
return 0, false
|
|
}
|
|
l.pos += SkipSpace(l.data[l.pos:])
|
|
if l.pos == len(l.data) {
|
|
return 0, false
|
|
}
|
|
return l.data[l.pos], true
|
|
}
|
|
|
|
func (l *Scanner) resetItem() {
|
|
l.itemType = ItemUndef
|
|
l.itemBytes = nil
|
|
}
|
|
|
|
func (l *Scanner) fetchOctet(c byte) bool {
|
|
i := l.pos
|
|
if j := bytes.IndexByte(l.data[l.pos:], c); j == -1 {
|
|
// Reached the end of data.
|
|
l.pos = len(l.data)
|
|
} else {
|
|
l.pos += j
|
|
}
|
|
|
|
l.itemType = ItemOctet
|
|
l.itemBytes = l.data[i:l.pos]
|
|
|
|
return true
|
|
}
|
|
|
|
func (l *Scanner) fetchToken() bool {
|
|
n, t := ScanToken(l.data[l.pos:])
|
|
if n == -1 {
|
|
l.err = true
|
|
return false
|
|
}
|
|
|
|
l.itemType = t
|
|
l.itemBytes = l.data[l.pos : l.pos+n]
|
|
l.pos += n
|
|
|
|
return true
|
|
}
|
|
|
|
func (l *Scanner) fetchQuotedString() (ok bool) {
|
|
l.pos++
|
|
|
|
n := ScanUntil(l.data[l.pos:], '"')
|
|
if n == -1 {
|
|
l.err = true
|
|
return false
|
|
}
|
|
|
|
l.itemType = ItemString
|
|
l.itemBytes = RemoveByte(l.data[l.pos:l.pos+n], '\\')
|
|
l.pos += n + 1
|
|
|
|
return true
|
|
}
|
|
|
|
func (l *Scanner) fetchComment() (ok bool) {
|
|
l.pos++
|
|
|
|
n := ScanPairGreedy(l.data[l.pos:], '(', ')')
|
|
if n == -1 {
|
|
l.err = true
|
|
return false
|
|
}
|
|
|
|
l.itemType = ItemComment
|
|
l.itemBytes = RemoveByte(l.data[l.pos:l.pos+n], '\\')
|
|
l.pos += n + 1
|
|
|
|
return true
|
|
}
|
|
|
|
// ScanUntil scans for first non-escaped character c in given data.
|
|
// It returns index of matched c and -1 if c is not found.
|
|
func ScanUntil(data []byte, c byte) (n int) {
|
|
for {
|
|
i := bytes.IndexByte(data[n:], c)
|
|
if i == -1 {
|
|
return -1
|
|
}
|
|
n += i
|
|
if n == 0 || data[n-1] != '\\' {
|
|
break
|
|
}
|
|
n++
|
|
}
|
|
return
|
|
}
|
|
|
|
// ScanPairGreedy scans for complete pair of opening and closing chars in greedy manner.
|
|
// Note that first opening byte must not be present in data.
|
|
func ScanPairGreedy(data []byte, open, close byte) (n int) {
|
|
var m int
|
|
opened := 1
|
|
for {
|
|
i := bytes.IndexByte(data[n:], close)
|
|
if i == -1 {
|
|
return -1
|
|
}
|
|
n += i
|
|
// If found index is not escaped then it is the end.
|
|
if n == 0 || data[n-1] != '\\' {
|
|
opened--
|
|
}
|
|
|
|
for m < i {
|
|
j := bytes.IndexByte(data[m:i], open)
|
|
if j == -1 {
|
|
break
|
|
}
|
|
m += j + 1
|
|
opened++
|
|
}
|
|
|
|
if opened == 0 {
|
|
break
|
|
}
|
|
|
|
n++
|
|
m = n
|
|
}
|
|
return
|
|
}
|
|
|
|
// RemoveByte returns data without c. If c is not present in data it returns
|
|
// the same slice. If not, it copies data without c.
|
|
func RemoveByte(data []byte, c byte) []byte {
|
|
j := bytes.IndexByte(data, c)
|
|
if j == -1 {
|
|
return data
|
|
}
|
|
|
|
n := len(data) - 1
|
|
|
|
// If character is present, than allocate slice with n-1 capacity. That is,
|
|
// resulting bytes could be at most n-1 length.
|
|
result := make([]byte, n)
|
|
k := copy(result, data[:j])
|
|
|
|
for i := j + 1; i < n; {
|
|
j = bytes.IndexByte(data[i:], c)
|
|
if j != -1 {
|
|
k += copy(result[k:], data[i:i+j])
|
|
i = i + j + 1
|
|
} else {
|
|
k += copy(result[k:], data[i:])
|
|
break
|
|
}
|
|
}
|
|
|
|
return result[:k]
|
|
}
|
|
|
|
// SkipSpace skips spaces and lws-sequences from p.
|
|
// It returns number ob bytes skipped.
|
|
func SkipSpace(p []byte) (n int) {
|
|
for len(p) > 0 {
|
|
switch {
|
|
case len(p) >= 3 &&
|
|
p[0] == '\r' &&
|
|
p[1] == '\n' &&
|
|
OctetTypes[p[2]].IsSpace():
|
|
p = p[3:]
|
|
n += 3
|
|
case OctetTypes[p[0]].IsSpace():
|
|
p = p[1:]
|
|
n++
|
|
default:
|
|
return
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// ScanToken scan for next token in p. It returns length of the token and its
|
|
// type. It do not trim p.
|
|
func ScanToken(p []byte) (n int, t ItemType) {
|
|
if len(p) == 0 {
|
|
return 0, ItemUndef
|
|
}
|
|
|
|
c := p[0]
|
|
switch {
|
|
case OctetTypes[c].IsSeparator():
|
|
return 1, ItemSeparator
|
|
|
|
case OctetTypes[c].IsToken():
|
|
for n = 1; n < len(p); n++ {
|
|
c := p[n]
|
|
if !OctetTypes[c].IsToken() {
|
|
break
|
|
}
|
|
}
|
|
return n, ItemToken
|
|
|
|
default:
|
|
return -1, ItemUndef
|
|
}
|
|
}
|