SandpointsGitHook/vendor/github.com/jdkato/prose/transform/title.go

package transform

import (
	"regexp"
	"strings"
	"unicode"
	"unicode/utf8"

	"github.com/jdkato/prose/internal/util"
)

// An IgnoreFunc is a TitleConverter callback that decides whether or not the
// the string word should be capitalized. firstOrLast indicates whether or not
// word is the first or last word in the given string.
type IgnoreFunc func(word string, firstOrLast bool) bool

// A TitleConverter converts a string to title case according to its style.
type TitleConverter struct {
	ignore IgnoreFunc
}

var (
	// APStyle states to:
	// 1. Capitalize the principal words, including prepositions and
	//    conjunctions of four or more letters.
	// 2. Capitalize an article – the, a, an – or words of fewer than four
	//    letters if it is the first or last word in a title.
	APStyle IgnoreFunc = optionsAP

	// ChicagoStyle states to lowercase articles (a, an, the), coordinating
	// conjunctions (and, but, or, for, nor), and prepositions, regardless of
	// length, unless they are the first or last word of the title.
	ChicagoStyle IgnoreFunc = optionsChicago
)

// NewTitleConverter returns a new TitleConverter set to enforce the specified
// style.
func NewTitleConverter(style IgnoreFunc) *TitleConverter {
	return &TitleConverter{ignore: style}
}

// Title returns a copy of the string s in title case format.
func (tc *TitleConverter) Title(s string) string {
	idx, pos := 0, 0
	t := sanitizer.Replace(s)
	end := len(t)
	return splitRE.ReplaceAllStringFunc(s, func(m string) string {
		sm := strings.ToLower(m)
		pos = strings.Index(t[idx:], m) + idx
		prev := charAt(t, pos-1)
		ext := utf8.RuneCountInString(m)
		idx = pos + ext
		if tc.ignore(sm, pos == 0 || idx == end) &&
			(prev == ' ' || prev == '-' || prev == '/') &&
			charAt(t, pos-2) != ':' && charAt(t, pos-2) != '-' &&
			(charAt(t, pos+ext) != '-' || charAt(t, pos-1) == '-') {
			return sm
		}
		return toTitle(m, prev)
	})
}

func optionsAP(word string, bounding bool) bool {
	return !bounding && util.StringInSlice(word, smallWords)
}

func optionsChicago(word string, bounding bool) bool {
	return !bounding && (util.StringInSlice(word, smallWords) || util.StringInSlice(word, prepositions))
}

var smallWords = []string{
	"a", "an", "and", "as", "at", "but", "by", "en", "for", "if", "in", "nor",
	"of", "on", "or", "per", "the", "to", "vs", "vs.", "via", "v", "v."}

var prepositions = []string{
	"with", "from", "into", "during", "including", "until", "against", "among",
	"throughout", "despite", "towards", "upon", "concerning", "about", "over",
	"through", "before", "between", "after", "since", "without", "under",
	"within", "along", "following", "across", "beyond", "around", "down",
	"near", "above"}

var splitRE = regexp.MustCompile(`[\p{N}\p{L}]+[^\s-/]*`)

// sanitizer replaces a set of Unicode characters with ASCII equivalents.
var sanitizer = strings.NewReplacer(
	"\u201c", `"`,
	"\u201d", `"`,
	"\u2018", "'",
	"\u2019", "'",
	"\u2013", "-",
	"\u2014", "-",
	"\u2026", "...")

// charAt returns the ith character of s, if it exists. Otherwise, it returns
// the first character.
func charAt(s string, i int) byte {
	if i >= 0 && i < len(s) {
		return s[i]
	}
	return s[0]
}

// toTitle returns a copy of the string m with its first Unicode letter mapped
// to its title case.
func toTitle(m string, prev byte) string {
	r, size := utf8.DecodeRuneInString(m)
	return string(unicode.ToTitle(r)) + m[size:]
}