icumsg

package module
v0.3.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 17, 2026 License: MIT Imports: 9 Imported by: 1

README

GoDoc GoReportCard Coverage Status

icumsg

This Go module provides an efficient ICU Message Format tokenizer.

https://go.dev/play/p/y7OA1YK2Wn4

package main

import (
	"fmt"
	"os"

	"github.com/romshark/icumsg"
	"golang.org/x/text/language"
)

func main() {
	msg := `Hello {arg} ({rank, ordinal})!`

	var tokenizer icumsg.Tokenizer
	tokens, err := tokenizer.Tokenize(language.English, nil, msg)
	if err != nil {
		fmt.Printf("ERR: at index %d: %v\n", tokenizer.Pos(), err)
		os.Exit(1)
	}

	fmt.Printf("token (%d):\n", len(tokens))
	for i, token := range tokens {
		fmt.Printf(" %d (%s): %q\n", i,
			token.Type.String(), token.String(msg, tokens))
	}

	// output:
	// token (8):
	//  0 (literal): "Hello "
	//  1 (simple argument): "{arg}"
	//  2 (argument name): "arg"
	//  3 (literal): " ("
	//  4 (simple argument): "{rank, ordinal}"
	//  5 (argument name): "rank"
	//  6 (argument type ordinal): "ordinal"
	//  7 (literal): ")!"
}

Error handling

https://go.dev/play/p/NI6gXkcJJcH

package main

import (
	"fmt"

	"github.com/romshark/icumsg"
	"golang.org/x/text/language"
)

func main() {
	// The English language only supports the 'one' and 'other' CLDR plural rules.
	msg := `{numMsgs,plural, one{# message} other{# messages} few{this is wrong}}`

	var tokenizer icumsg.Tokenizer
	_, err := tokenizer.Tokenize(language.English, nil, msg)
	if err != nil {
		fmt.Printf("Error at index %d: %v\n", tokenizer.Pos(), err)
	}

	// output:
	// Error at index 50: plural rule unsupported for locale
}

Semantic Analysis

ICU messages can be syntactically valid yet incomplete when missing select, plural or selectordinal options required by the locale as well as semantically invalid when featuring unsupported select options. icumsg.Analyze allows you to inspect a message in detail and discover semantic issues.

https://go.dev/play/p/U9t0a0XH9U_h

package main

import (
	"fmt"

	"github.com/romshark/icumsg"
	"golang.org/x/text/language"
)

var optionsForVarGender = func(argName string) (
	options []string,
	policyPresence icumsg.OptionsPresencePolicy,
	policyUnknown icumsg.OptionUnknownPolicy,
) {
	if argName == "varGender" {
		// Apply these policies and options only for argument "varGender"
		policyPresence = icumsg.OptionsPresencePolicyRequired
		policyUnknown = icumsg.OptionUnknownPolicyReject
		// Option "other" doesn't need to be included because it's always required.
		return []string{"male", "female"}, policyPresence, policyUnknown
	}
	return nil, 0, 0
}

func main() {
	locale := language.English

	// varGender lists unsupported option "unknown"
	msg := `This message is valid but has incomplete plural and unknown select options:
	missing one: {varNum, plural,
		other{
			missing male: {varGender, select,
				unknown{
					varNum[other],varGender[unknown]
				}
				female{
					varNum[other],varGender[female]
				}
				other{
					varNum[other],varGender[other]
				}
			}
		}
	}
	complete: {varNum, plural,
		one{-}
		other{-}
	}`

	var tokenizer icumsg.Tokenizer
	tokens, err := tokenizer.Tokenize(locale, nil, msg)
	if err != nil {
		fmt.Printf("ERR: at index %d: %v\n", tokenizer.Pos(), err)
		os.Exit(1)
	}

	// Option "other" doesn't need to be included because it's always required.
	optionsForVarGender := []string{"male", "female"}

	var incomplete, rejected []string
	totalChoices, err := icumsg.Analyze(locale, msg, tokens,
		func(argName string) (
			options []string,
			policyPresence icumsg.OptionsPresencePolicy,
			policyUnknown icumsg.OptionUnknownPolicy,
		) {
			if argName == "varGender" {
				// Apply these policies and options only for argument "varGender"
				policyPresence = icumsg.OptionsPresencePolicyRequired
				policyUnknown = icumsg.OptionUnknownPolicyReject
				return optionsForVarGender, policyPresence, policyUnknown
			}
			return nil, 0, 0
		}, func(index int) error {
			// This is called when an incomplete choice is encountered.
			tArg, tName := tokens[index], tokens[index+1]
			incomplete = append(incomplete,
				tArg.Type.String()+": "+tName.String(msg, tokens))
			return nil
		}, func(indexArgument, indexOption int) error {
			// This is called when a rejected option is encountered.
			tArg, tName := tokens[indexArgument+1], tokens[indexOption+1]
			rejected = append(rejected, fmt.Sprintf("%q: option %q",
				tArg.String(msg, tokens), tName.String(msg, tokens)))
			return nil
		})
	if err != nil {
		panic(err)
	}

	fmt.Printf("totalChoices: %d\n", totalChoices)
	fmt.Printf("incomplete (%d):\n", len(incomplete))
	for _, s := range incomplete {
		fmt.Printf(" %s\n", s)
	}
	fmt.Printf("rejected (%d):\n", len(rejected))
	for _, s := range rejected {
		fmt.Printf(" %s\n", s)
	}

	{
		total := float64(totalChoices)
		incomplete := float64(len(incomplete))
		complete := total - incomplete
		percent := complete / total
		fmt.Printf("completeness: %.2f%%\n", percent*100)
	}

	// output:
	// totalChoices: 3
	// incomplete (2):
	//  select argument: varGender
	//  plural argument: varNum
	// rejected (1):
	//  "varGender": option "unknown"
	// completeness: 33.33%
}

Documentation

Overview

Package icumsg provides an ICU Message Format (See https://unicode-org.github.io/icu/userguide/format_parse/messages/)

Index

Examples

Constants

View Source
const (
	// OptionsPresencePolicyOptional does not require all select options to be present
	// for the ICU message to be considered complete.
	OptionsPresencePolicyOptional = iota

	// OptionsPresencePolicyRequired requires all select options to be present
	// for the ICU message to be considered complete.
	OptionsPresencePolicyRequired
)

Variables

View Source
var (
	ErrUnclosedQuote         = errors.New("unclosed quote")
	ErrUnexpectedToken       = errors.New("unexpected token")
	ErrUnexpectedEOF         = errors.New("unexpected EOF")
	ErrExpectedComma         = errors.New("expected comma")
	ErrExpectedColon         = errors.New("expected colon")
	ErrExpectBracketOpen     = errors.New("expect opening bracket")
	ErrExpectBracketClose    = errors.New("expect closing bracket")
	ErrMissingOptionOther    = errors.New("missing the mandatory 'other' option")
	ErrEmptyOption           = errors.New("empty option")
	ErrDuplicateOption       = errors.New("duplicate option")
	ErrInvalidOffset         = errors.New("invalid offset")
	ErrUnsupportedPluralRule = errors.New("plural rule unsupported for locale")
)
View Source
var ErrInvalidOption = errors.New("invalid plural option")

Functions

func Analyze added in v0.3.0

func Analyze(
	locale language.Tag,
	src string,
	buffer []Token,
	selectOptions func(argName string) (
		[]string, OptionsPresencePolicy, OptionUnknownPolicy,
	),
	onIncomplete func(index int) error,
	onRejected func(indexArgument, indexOption int) error,
) (totalChoices int, err error)

Analyze returns the total number of choices in src. onIncomplete is invoked when an incomplete, select, plural or selectordinal is encountered. onRejected is invoked when an unknown select option was encountered. selectOptions is invoked when a select is encountered and if it returns a slice then those will be the expected options the presence of which will define whether the select is complete (depending on the policies returned). selectOptions is not invoked for plural and selectordinal, instead locale is used to determine what options are required. If onIncomplete or onRejected returns an error it's returned immediately.

Example
package main

import (
	"fmt"
	"os"

	"github.com/romshark/icumsg"
	"golang.org/x/text/language"
)

func main() {
	locale := language.English

	// varGender lists unsupported option "unknown"
	msg := `This message is valid but has incomplete plural and unknown select options:
	missing one: {varNum, plural,
		other{
			missing male: {varGender, select,
				unknown{
					varNum[other],varGender[unknown]
				}
				female{
					varNum[other],varGender[female]
				}
				other{
					varNum[other],varGender[other]
				}
			}
		}
	}
	complete: {varNum, plural,
		one{-}
		other{-}
	}`

	var tokenizer icumsg.Tokenizer
	tokens, err := tokenizer.Tokenize(locale, nil, msg)
	if err != nil {
		fmt.Printf("ERR: at index %d: %v\n", tokenizer.Pos(), err)
		os.Exit(1)
	}

	// Option "other" doesn't need to be included because it's always required.
	optionsForVarGender := []string{"male", "female"}

	var incomplete, rejected []string
	totalChoices, err := icumsg.Analyze(locale, msg, tokens,
		func(argName string) (
			options []string,
			policyPresence icumsg.OptionsPresencePolicy,
			policyUnknown icumsg.OptionUnknownPolicy,
		) {
			if argName == "varGender" {
				// Apply these policies and options only for argument "varGender"
				policyPresence = icumsg.OptionsPresencePolicyRequired
				policyUnknown = icumsg.OptionUnknownPolicyReject
				return optionsForVarGender, policyPresence, policyUnknown
			}
			return nil, 0, 0
		}, func(index int) error {
			// This is called when an incomplete choice is encountered.
			tArg, tName := tokens[index], tokens[index+1]
			incomplete = append(incomplete,
				tArg.Type.String()+": "+tName.String(msg, tokens))
			return nil
		}, func(indexArgument, indexOption int) error {
			// This is called when a rejected option is encountered.
			tArg, tName := tokens[indexArgument+1], tokens[indexOption+1]
			rejected = append(rejected, fmt.Sprintf("%q: option %q",
				tArg.String(msg, tokens), tName.String(msg, tokens)))
			return nil
		})
	if err != nil {
		panic(err)
	}

	fmt.Printf("totalChoices: %d\n", totalChoices)
	fmt.Printf("incomplete (%d):\n", len(incomplete))
	for _, s := range incomplete {
		fmt.Printf(" %s\n", s)
	}
	fmt.Printf("rejected (%d):\n", len(rejected))
	for _, s := range rejected {
		fmt.Printf(" %s\n", s)
	}

	{
		total := float64(totalChoices)
		incomplete := float64(len(incomplete))
		complete := total - incomplete
		percent := complete / total
		fmt.Printf("completeness: %.2f%%\n", percent*100)
	}

}
Output:

totalChoices: 3
incomplete (2):
 select argument: varGender
 plural argument: varNum
rejected (1):
 "varGender": option "unknown"
completeness: 33.33%

func Errors added in v0.3.0

func Errors(
	locale language.Tag, raw string, tokens []Token, selectOptions SelectOptions,
) iter.Seq[error]

Errors returns an iterator over all semantic (non-syntax) errors for the given ICU message.

Example
package main

import (
	"fmt"
	"os"

	"github.com/romshark/icumsg"
	"golang.org/x/text/language"
)

func main() {
	// varGender lists unsupported option "unknown"
	locale := language.Ukrainian
	msg := `This message is valid but has incomplete plural and unknown select options:
	missing one: {varNum, plural,
		other{
			missing male: {varGender, select,
				unknown{
					varNum[other],varGender[unknown]
				}
				female{
					varNum[other],varGender[female]
				}
				other{
					varNum[other],varGender[other]
				}
			}
		}
	}`

	var tokenizer icumsg.Tokenizer
	tokens, err := tokenizer.Tokenize(locale, nil, msg)
	if err != nil {
		fmt.Printf("ERR: at index %d: %v\n", tokenizer.Pos(), err)
		os.Exit(1)
	}

	// Option "other" doesn't need to be included because it's always required.
	optionsForVarGender := []string{"male", "female"}

	errSeq := icumsg.Errors(locale, msg, tokens,
		func(argName string) (
			options []string,
			policyPresence icumsg.OptionsPresencePolicy,
			policyUnknown icumsg.OptionUnknownPolicy,
		) {
			if argName == "varGender" {
				// Apply these policies and options only for argument "varGender"
				policyPresence = icumsg.OptionsPresencePolicyRequired
				policyUnknown = icumsg.OptionUnknownPolicyReject
				return optionsForVarGender, policyPresence, policyUnknown
			}
			return nil, 0, 0
		})

	for err := range errSeq {
		switch e := err.(type) {
		case icumsg.ErrorSelectMissingOption:
			varName := tokens[e.TokenIndex+1].String(msg, tokens)
			fmt.Printf("ERR at %s: %v\n", varName, err.Error())
		case icumsg.ErrorPluralMissingOption:
			varName := tokens[e.TokenIndex+1].String(msg, tokens)
			fmt.Printf("ERR at %s: %v\n", varName, err.Error())
		case icumsg.ErrorSelectInvalidOption:
			varName := tokens[e.TokenIndexArgument+1].String(msg, tokens)
			optName := tokens[e.TokenIndexOption+1].String(msg, tokens)
			fmt.Printf("ERR at %s (option %q): %v\n", varName, optName, err.Error())
		}
	}

}
Output:

ERR at varGender (option "unknown"): invalid select option
ERR at varGender: missing select options [male]
ERR at varNum: missing cardinal plural options [one,few,many]

func Options

func Options(buffer []Token, tokenIndex int) iter.Seq[int]

Options returns an iterator iterating over all options of a select, plural or selectordinal token at buffer[tokenIndex]. The iterator provides the indexes of option tokens. Returns a no-op iterator if buffer[tokenIndex] is neither of:

  • TokenTypeSelect
  • TokenTypePlural
  • TokenTypeSelectOrdinal

Types

type ErrorPluralMissingOption added in v0.3.0

type ErrorPluralMissingOption struct {
	Need, Has  cldr.PluralRules
	TokenIndex int

	// When Ordinal == false the plural options are cardinal.
	Ordinal bool
}

func (ErrorPluralMissingOption) Error added in v0.3.0

func (e ErrorPluralMissingOption) Error() string

func (ErrorPluralMissingOption) MissingOptions added in v0.3.0

func (e ErrorPluralMissingOption) MissingOptions() iter.Seq[string]

type ErrorSelectInvalidOption added in v0.3.0

type ErrorSelectInvalidOption struct {
	TokenIndexArgument int
	TokenIndexOption   int
}

func (ErrorSelectInvalidOption) Error added in v0.3.0

func (e ErrorSelectInvalidOption) Error() string

type ErrorSelectMissingOption added in v0.3.0

type ErrorSelectMissingOption struct {
	Need, Has  []string
	TokenIndex int
}

func (ErrorSelectMissingOption) Error added in v0.3.0

func (e ErrorSelectMissingOption) Error() string

func (ErrorSelectMissingOption) MissingOptions added in v0.3.0

func (e ErrorSelectMissingOption) MissingOptions() iter.Seq[string]

type OptionUnknownPolicy

type OptionUnknownPolicy int8

OptionUnknownPolicy defines treatment of unknown select options.

const (
	// OptionUnknownPolicyIgnore ignores unknown select options.
	OptionUnknownPolicyIgnore OptionUnknownPolicy = iota
	// OptionUnknownPolicyReject rejects unknown select options.
	OptionUnknownPolicyReject
)

type OptionsPresencePolicy

type OptionsPresencePolicy int8

OptionsPresencePolicy defines treatment of known options.

type SelectOptions added in v0.3.0

type SelectOptions func(argName string) (
	[]string, OptionsPresencePolicy, OptionUnknownPolicy,
)

type Token

type Token struct {
	// IndexStart and IndexEnd have different meaning depending on Type.
	// See the token type groups.
	IndexStart, IndexEnd int
	Type                 TokenType
}

func (Token) String

func (t Token) String(s string, buffer []Token) string

String returns a slice of the input string token t represents.

type TokenType

type TokenType uint8
const (

	// Literal. IndexStart and IndexEnd are byte offsets in the input string.
	TokenTypeLiteral      TokenType // Any literal
	TokenTypeSimpleArg              // { arg }
	TokenTypePluralOffset           // offset:1
	TokenTypeArgName                // The name of any argument

	// The following token types always follow TokenTypeArgName.
	TokenTypeArgTypeNumber   // "You have {count, number} new messages."
	TokenTypeArgTypeDate     // "Your appointment is on {appointmentDate, date}."
	TokenTypeArgTypeTime     // "The train departs at {departureTime, time}."
	TokenTypeArgTypeSpellout // "You have {count, spellout} new notifications."
	TokenTypeArgTypeOrdinal  // "You came in {place, ordinal} place!"
	TokenTypeArgTypeDuration // "Estimated time: {seconds, duration}."

	// The following token types always follow any argument type.
	TokenTypeArgStyleShort
	TokenTypeArgStyleMedium
	TokenTypeArgStyleLong
	TokenTypeArgStyleFull
	TokenTypeArgStyleInteger
	TokenTypeArgStyleCurrency
	TokenTypeArgStylePercent
	TokenTypeArgStyleCustom
	TokenTypeArgStyleSkeleton

	// TokenTypeOptionName is the select option. Always follows TokenTypeOption.
	TokenTypeOptionName

	// Complex. IndexEnd is an index of the token buffer.
	TokenTypePlural        // {arg, plural, ...}
	TokenTypeSelect        // {arg, select, ...}
	TokenTypeSelectOrdinal // {arg, selectordinal, ...}
	TokenTypeOption        // The { ... } that follows an option name.
	TokenTypeOptionZero    // zero { ... }
	TokenTypeOptionOne     // one { ... }
	TokenTypeOptionTwo     // two { ... }
	TokenTypeOptionFew     // few { ... }
	TokenTypeOptionMany    // many { ... }
	TokenTypeOptionOther   // other { ... }
	TokenTypeOptionNumber  // =2 { ... }

	// Terminator. IndexStart is an index of the token buffer.
	TokenTypeOptionTerm     // } Terminator of an option
	TokenTypeComplexArgTerm // } Terminator of a complex argument
)

func (TokenType) String

func (t TokenType) String() string

type Tokenizer

type Tokenizer struct {
	// contains filtered or unexported fields
}
Example
package main

import (
	"fmt"
	"os"

	"github.com/romshark/icumsg"
	"golang.org/x/text/language"
)

func main() {
	msg := `Hello {arg} ({rank, ordinal})!`

	var tokenizer icumsg.Tokenizer
	tokens, err := tokenizer.Tokenize(language.English, nil, msg)
	if err != nil {
		fmt.Printf("ERR: at index %d: %v\n", tokenizer.Pos(), err)
		os.Exit(1)
	}

	fmt.Printf("token (%d):\n", len(tokens))
	for i, token := range tokens {
		fmt.Printf(" %d (%s): %q\n", i,
			token.Type.String(), token.String(msg, tokens))
	}

}
Output:

token (8):
 0 (literal): "Hello "
 1 (simple argument): "{arg}"
 2 (argument name): "arg"
 3 (literal): " ("
 4 (simple argument): "{rank, ordinal}"
 5 (argument name): "rank"
 6 (argument type ordinal): "ordinal"
 7 (literal): ")!"
Example (Error)
package main

import (
	"fmt"

	"github.com/romshark/icumsg"
	"golang.org/x/text/language"
)

func main() {
	msg := `{numMsgs,plural, one{# message} other{# messages} few{this is wrong}}`

	var tokenizer icumsg.Tokenizer
	_, err := tokenizer.Tokenize(language.English, nil, msg)
	if err != nil {
		fmt.Printf("Error at index %d: %v\n", tokenizer.Pos(), err)
	}

}
Output:

Error at index 50: plural rule unsupported for locale

func (*Tokenizer) Pos

func (t *Tokenizer) Pos() int

Pos returns the last position (byte offset in the input string) the tokenizer was at.

func (*Tokenizer) Tokenize

func (t *Tokenizer) Tokenize(
	locale language.Tag, buffer []Token, s string,
) ([]Token, error)

Tokenize resets the tokenizer and appends any tokens encountered to buffer.

Directories

Path Synopsis
internal
cmd/gencldr command

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL