SandpointsGitHook/vendor/github.com/jdkato/prose/transform/title.go

108 lines
3.3 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package transform
import (
"regexp"
"strings"
"unicode"
"unicode/utf8"
"github.com/jdkato/prose/internal/util"
)
// An IgnoreFunc is a TitleConverter callback that decides whether or not the
// the string word should be capitalized. firstOrLast indicates whether or not
// word is the first or last word in the given string.
type IgnoreFunc func(word string, firstOrLast bool) bool
// A TitleConverter converts a string to title case according to its style.
type TitleConverter struct {
ignore IgnoreFunc
}
var (
// APStyle states to:
// 1. Capitalize the principal words, including prepositions and
// conjunctions of four or more letters.
// 2. Capitalize an article the, a, an or words of fewer than four
// letters if it is the first or last word in a title.
APStyle IgnoreFunc = optionsAP
// ChicagoStyle states to lowercase articles (a, an, the), coordinating
// conjunctions (and, but, or, for, nor), and prepositions, regardless of
// length, unless they are the first or last word of the title.
ChicagoStyle IgnoreFunc = optionsChicago
)
// NewTitleConverter returns a new TitleConverter set to enforce the specified
// style.
func NewTitleConverter(style IgnoreFunc) *TitleConverter {
return &TitleConverter{ignore: style}
}
// Title returns a copy of the string s in title case format.
func (tc *TitleConverter) Title(s string) string {
idx, pos := 0, 0
t := sanitizer.Replace(s)
end := len(t)
return splitRE.ReplaceAllStringFunc(s, func(m string) string {
sm := strings.ToLower(m)
pos = strings.Index(t[idx:], m) + idx
prev := charAt(t, pos-1)
ext := utf8.RuneCountInString(m)
idx = pos + ext
if tc.ignore(sm, pos == 0 || idx == end) &&
(prev == ' ' || prev == '-' || prev == '/') &&
charAt(t, pos-2) != ':' && charAt(t, pos-2) != '-' &&
(charAt(t, pos+ext) != '-' || charAt(t, pos-1) == '-') {
return sm
}
return toTitle(m, prev)
})
}
func optionsAP(word string, bounding bool) bool {
return !bounding && util.StringInSlice(word, smallWords)
}
func optionsChicago(word string, bounding bool) bool {
return !bounding && (util.StringInSlice(word, smallWords) || util.StringInSlice(word, prepositions))
}
var smallWords = []string{
"a", "an", "and", "as", "at", "but", "by", "en", "for", "if", "in", "nor",
"of", "on", "or", "per", "the", "to", "vs", "vs.", "via", "v", "v."}
var prepositions = []string{
"with", "from", "into", "during", "including", "until", "against", "among",
"throughout", "despite", "towards", "upon", "concerning", "about", "over",
"through", "before", "between", "after", "since", "without", "under",
"within", "along", "following", "across", "beyond", "around", "down",
"near", "above"}
var splitRE = regexp.MustCompile(`[\p{N}\p{L}]+[^\s-/]*`)
// sanitizer replaces a set of Unicode characters with ASCII equivalents.
var sanitizer = strings.NewReplacer(
"\u201c", `"`,
"\u201d", `"`,
"\u2018", "'",
"\u2019", "'",
"\u2013", "-",
"\u2014", "-",
"\u2026", "...")
// charAt returns the ith character of s, if it exists. Otherwise, it returns
// the first character.
func charAt(s string, i int) byte {
if i >= 0 && i < len(s) {
return s[i]
}
return s[0]
}
// toTitle returns a copy of the string m with its first Unicode letter mapped
// to its title case.
func toTitle(m string, prev byte) string {
r, size := utf8.DecodeRuneInString(m)
return string(unicode.ToTitle(r)) + m[size:]
}