108 lines
3.3 KiB
Go
108 lines
3.3 KiB
Go
package transform
|
||
|
||
import (
|
||
"regexp"
|
||
"strings"
|
||
"unicode"
|
||
"unicode/utf8"
|
||
|
||
"github.com/jdkato/prose/internal/util"
|
||
)
|
||
|
||
// An IgnoreFunc is a TitleConverter callback that decides whether or not the
|
||
// the string word should be capitalized. firstOrLast indicates whether or not
|
||
// word is the first or last word in the given string.
|
||
type IgnoreFunc func(word string, firstOrLast bool) bool
|
||
|
||
// A TitleConverter converts a string to title case according to its style.
|
||
type TitleConverter struct {
|
||
ignore IgnoreFunc
|
||
}
|
||
|
||
var (
|
||
// APStyle states to:
|
||
// 1. Capitalize the principal words, including prepositions and
|
||
// conjunctions of four or more letters.
|
||
// 2. Capitalize an article – the, a, an – or words of fewer than four
|
||
// letters if it is the first or last word in a title.
|
||
APStyle IgnoreFunc = optionsAP
|
||
|
||
// ChicagoStyle states to lowercase articles (a, an, the), coordinating
|
||
// conjunctions (and, but, or, for, nor), and prepositions, regardless of
|
||
// length, unless they are the first or last word of the title.
|
||
ChicagoStyle IgnoreFunc = optionsChicago
|
||
)
|
||
|
||
// NewTitleConverter returns a new TitleConverter set to enforce the specified
|
||
// style.
|
||
func NewTitleConverter(style IgnoreFunc) *TitleConverter {
|
||
return &TitleConverter{ignore: style}
|
||
}
|
||
|
||
// Title returns a copy of the string s in title case format.
|
||
func (tc *TitleConverter) Title(s string) string {
|
||
idx, pos := 0, 0
|
||
t := sanitizer.Replace(s)
|
||
end := len(t)
|
||
return splitRE.ReplaceAllStringFunc(s, func(m string) string {
|
||
sm := strings.ToLower(m)
|
||
pos = strings.Index(t[idx:], m) + idx
|
||
prev := charAt(t, pos-1)
|
||
ext := utf8.RuneCountInString(m)
|
||
idx = pos + ext
|
||
if tc.ignore(sm, pos == 0 || idx == end) &&
|
||
(prev == ' ' || prev == '-' || prev == '/') &&
|
||
charAt(t, pos-2) != ':' && charAt(t, pos-2) != '-' &&
|
||
(charAt(t, pos+ext) != '-' || charAt(t, pos-1) == '-') {
|
||
return sm
|
||
}
|
||
return toTitle(m, prev)
|
||
})
|
||
}
|
||
|
||
func optionsAP(word string, bounding bool) bool {
|
||
return !bounding && util.StringInSlice(word, smallWords)
|
||
}
|
||
|
||
func optionsChicago(word string, bounding bool) bool {
|
||
return !bounding && (util.StringInSlice(word, smallWords) || util.StringInSlice(word, prepositions))
|
||
}
|
||
|
||
var smallWords = []string{
|
||
"a", "an", "and", "as", "at", "but", "by", "en", "for", "if", "in", "nor",
|
||
"of", "on", "or", "per", "the", "to", "vs", "vs.", "via", "v", "v."}
|
||
|
||
var prepositions = []string{
|
||
"with", "from", "into", "during", "including", "until", "against", "among",
|
||
"throughout", "despite", "towards", "upon", "concerning", "about", "over",
|
||
"through", "before", "between", "after", "since", "without", "under",
|
||
"within", "along", "following", "across", "beyond", "around", "down",
|
||
"near", "above"}
|
||
|
||
var splitRE = regexp.MustCompile(`[\p{N}\p{L}]+[^\s-/]*`)
|
||
|
||
// sanitizer replaces a set of Unicode characters with ASCII equivalents.
|
||
var sanitizer = strings.NewReplacer(
|
||
"\u201c", `"`,
|
||
"\u201d", `"`,
|
||
"\u2018", "'",
|
||
"\u2019", "'",
|
||
"\u2013", "-",
|
||
"\u2014", "-",
|
||
"\u2026", "...")
|
||
|
||
// charAt returns the ith character of s, if it exists. Otherwise, it returns
|
||
// the first character.
|
||
func charAt(s string, i int) byte {
|
||
if i >= 0 && i < len(s) {
|
||
return s[i]
|
||
}
|
||
return s[0]
|
||
}
|
||
|
||
// toTitle returns a copy of the string m with its first Unicode letter mapped
|
||
// to its title case.
|
||
func toTitle(m string, prev byte) string {
|
||
r, size := utf8.DecodeRuneInString(m)
|
||
return string(unicode.ToTitle(r)) + m[size:]
|
||
}
|