// Copyright 2012 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Adapted for mmark, by Miek Gieben, 2015. package mmark import ( "bufio" "bytes" "errors" "io/ioutil" "regexp" "strconv" "unicode/utf8" ) // SourceCodeTypes are the different languages that are supported as // a type attribute in sourcecode, see Section 2.48.4 of XML2RFC v3 (-21). var SourceCodeTypes = map[string]bool{ "abnf": true, "asn.1": true, "bash": true, "c++": true, "c": true, "cbor": true, "dtd": true, "java": true, "javascript": true, "json": true, "mib": true, "perl": true, "pseudocode": true, "python": true, "rnc": true, "xml": true, "go": true, } // parseAddress parses a code address directive and returns the bytes. func parseAddress(addr []byte, file []byte) []byte { bytes.TrimSpace(addr) textBytes, err := ioutil.ReadFile(string(file)) if err != nil { printf(nil, "failed: `%s': %s", string(file), err) return nil } lo, hi, err := addrToByteRange(string(addr), 0, textBytes) if err != nil { printf(nil, "code include address: %s", err.Error()) return textBytes } // Acme pattern matches can stop mid-line, // so run to end of line in both directions if not at line start/end. for lo > 0 && textBytes[lo-1] != '\n' { lo-- } if hi > 0 { for hi < len(textBytes) && textBytes[hi-1] != '\n' { hi++ } } lines := codeLines(textBytes, lo, hi) return lines } // codeLines takes a source file and returns the lines that // span the byte range specified by start and end. // It discards lines that end in "OMIT" and in "OMIT -->" func codeLines(src []byte, start, end int) (lines []byte) { startLine := 1 for i, b := range src { if i == start { break } if b == '\n' { startLine++ } } s := bufio.NewScanner(bytes.NewReader(src[start:end])) for n := startLine; s.Scan(); n++ { l := s.Bytes() if bytes.HasSuffix(l, []byte("OMIT")) { continue } if bytes.HasSuffix(l, []byte("OMIT -->")) { continue } lines = append(lines, l...) lines = append(lines, '\n') } // TODO(miek): trim leading and trailing blanklines return } // This file is stolen from go/src/cmd/godoc/codewalk.go. // It's an evaluator for the file address syntax implemented by acme and sam, // but using Go-native regular expressions. // To keep things reasonably close, this version uses (?m:re) for all user-provided // regular expressions. That is the only change to the code from codewalk.go. // See http://plan9.bell-labs.com/sys/doc/sam/sam.html Table II // for details on the syntax. // addrToByte evaluates the given address starting at offset start in data. // It returns the lo and hi byte offset of the matched region within data. func addrToByteRange(addr string, start int, data []byte) (lo, hi int, err error) { if addr == "" { lo, hi = start, len(data) return } var ( dir byte prevc byte charOffset bool ) lo = start hi = start for addr != "" && err == nil { c := addr[0] switch c { default: err = errors.New("invalid address syntax near " + string(c)) case ',': if len(addr) == 1 { hi = len(data) } else { _, hi, err = addrToByteRange(addr[1:], hi, data) } return case '+', '-': if prevc == '+' || prevc == '-' { lo, hi, err = addrNumber(data, lo, hi, prevc, 1, charOffset) } dir = c case '$': lo = len(data) hi = len(data) if len(addr) > 1 { dir = '+' } case '#': charOffset = true case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': var i int for i = 1; i < len(addr); i++ { if addr[i] < '0' || addr[i] > '9' { break } } var n int n, err = strconv.Atoi(addr[0:i]) if err != nil { break } lo, hi, err = addrNumber(data, lo, hi, dir, n, charOffset) dir = 0 charOffset = false prevc = c addr = addr[i:] continue case '/': var i, j int Regexp: for i = 1; i < len(addr); i++ { switch addr[i] { case '\\': i++ case '/': j = i + 1 break Regexp } } if j == 0 { j = i } pattern := addr[1:i] lo, hi, err = addrRegexp(data, lo, hi, dir, pattern) prevc = c addr = addr[j:] continue } prevc = c addr = addr[1:] } if err == nil && dir != 0 { lo, hi, err = addrNumber(data, lo, hi, dir, 1, charOffset) } if err != nil { return 0, 0, err } return lo, hi, nil } // addrNumber applies the given dir, n, and charOffset to the address lo, hi. // dir is '+' or '-', n is the count, and charOffset is true if the syntax // used was #n. Applying +n (or +#n) means to advance n lines // (or characters) after hi. Applying -n (or -#n) means to back up n lines // (or characters) before lo. // The return value is the new lo, hi. func addrNumber(data []byte, lo, hi int, dir byte, n int, charOffset bool) (int, int, error) { switch dir { case 0: lo = 0 hi = 0 fallthrough case '+': if charOffset { pos := hi for ; n > 0 && pos < len(data); n-- { _, size := utf8.DecodeRune(data[pos:]) pos += size } if n == 0 { return pos, pos, nil } break } // find next beginning of line if hi > 0 { for hi < len(data) && data[hi-1] != '\n' { hi++ } } lo = hi if n == 0 { return lo, hi, nil } for ; hi < len(data); hi++ { if data[hi] != '\n' { continue } switch n--; n { case 1: lo = hi + 1 case 0: return lo, hi + 1, nil } } case '-': if charOffset { // Scan backward for bytes that are not UTF-8 continuation bytes. pos := lo for ; pos > 0 && n > 0; pos-- { if data[pos]&0xc0 != 0x80 { n-- } } if n == 0 { return pos, pos, nil } break } // find earlier beginning of line for lo > 0 && data[lo-1] != '\n' { lo-- } hi = lo if n == 0 { return lo, hi, nil } for ; lo >= 0; lo-- { if lo > 0 && data[lo-1] != '\n' { continue } switch n--; n { case 1: hi = lo case 0: return lo, hi, nil } } } return 0, 0, errors.New("address out of range") } // addrRegexp searches for pattern in the given direction starting at lo, hi. // The direction dir is '+' (search forward from hi) or '-' (search backward from lo). // Backward searches are unimplemented. func addrRegexp(data []byte, lo, hi int, dir byte, pattern string) (int, int, error) { // We want ^ and $ to work as in sam/acme, so use ?m. re, err := regexp.Compile("(?m:" + pattern + ")") if err != nil { return 0, 0, err } if dir == '-' { // Could implement reverse search using binary search // through file, but that seems like overkill. return 0, 0, errors.New("reverse search not implemented") } m := re.FindIndex(data[hi:]) if len(m) > 0 { m[0] += hi m[1] += hi } else if hi > 0 { // No match. Wrap to beginning of data. m = re.FindIndex(data) } if len(m) == 0 { return 0, 0, errors.New("no match for " + pattern) } return m[0], m[1], nil }