dictionary

package
v0.0.0-...-f57c564 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 1, 2019 License: Apache-2.0 Imports: 20 Imported by: 0

Documentation

Index

Constants

View Source
const (
	DEFAULT      uint32 = 1       // The fall back category
	SPACE        uint32 = 1 << 1  // WhiteSpaces
	KANJI        uint32 = 1 << 2  // CJKV ideographic characters
	SYMBOL       uint32 = 1 << 3  // Symbols
	NUMERIC      uint32 = 1 << 4  // Numerical characters
	ALPHA        uint32 = 1 << 5  // Latin alphabets
	HIRAGANA     uint32 = 1 << 6  // Hiragana characters
	KATAKANA     uint32 = 1 << 7  // Katakana characters
	KANJINUMERIC uint32 = 1 << 8  // Knaji numeric characters
	GREEK        uint32 = 1 << 9  // Greek alphabets
	CYRILLIC     uint32 = 1 << 10 // Cyrillic alphabets
	USER1        uint32 = 1 << 11 // User defined category
	USER2        uint32 = 1 << 12 // User defined category
	USER3        uint32 = 1 << 13 // User defined category
	USER4        uint32 = 1 << 14 // User defined category
	NOOOVBOW     uint32 = 1 << 15 // Characters that cannot be the beginning of word
)

Categories of characters

View Source
const (
	StringUtf8MaxLength  = 32767
	StringUtf16MaxLength = 32767
	ArrayMaxLength       = 127
	NumberOfColumns      = 18
	BufferSize           = 1024 * 1024
)
View Source
const (
	DescriptionSize   = 256
	HeaderStorageSize = 8 + 8 + DescriptionSize
)
View Source
const (
	SystemDictVersion = 0x7366d3f18bd111e7
	UserDictVersion   = 0xa50f31188bd211e7
	UserDictVersion2  = 0x9fdeb5a90168d868
)
View Source
const (
	InhibitedConnection = math.MaxInt16
)
View Source
const (
	LexiconSetMaxDictionaries = 16
)

Variables

View Source
var (
	BosParameter = []int16{0, 0, 0}
	EosParameter = []int16{0, 0, 0}
)

Functions

func GetCategoryType

func GetCategoryType(s string) (uint32, error)

func IsUserDictionary

func IsUserDictionary(version uint64) bool

func PrintDictionary

func PrintDictionary(filename string, utf16string bool, systemDict *BinaryDictionary, output io.Writer) error

func PrintHeader

func PrintHeader(dictfile string, output io.Writer) error

Types

type BinaryDictionary

type BinaryDictionary struct {
	Header  *DictionaryHeader
	Grammar *Grammar
	Lexicon *DoubleArrayLexicon
	// contains filtered or unexported fields
}

func NewBinaryDictionary

func NewBinaryDictionary(filename string, utf16string bool) (*BinaryDictionary, error)

func ReadSystemDictionary

func ReadSystemDictionary(filename string, utf16string bool) (*BinaryDictionary, error)

func ReadUserDictionary

func ReadUserDictionary(filename string, utf16string bool) (*BinaryDictionary, error)

func (*BinaryDictionary) Close

func (bd *BinaryDictionary) Close() error

type CalculateCostFunc

type CalculateCostFunc func(text string) (int16, error)

type CharacterCategory

type CharacterCategory struct {
	// contains filtered or unexported fields
}

func NewCharacterCategory

func NewCharacterCategory() *CharacterCategory

func (*CharacterCategory) GetCategoryTypes

func (cc *CharacterCategory) GetCategoryTypes(codePoint rune) uint32

func (*CharacterCategory) ReadCharacterDefinition

func (cc *CharacterCategory) ReadCharacterDefinition(charDefReader io.Reader) error

type DictionaryBuilder

type DictionaryBuilder struct {
	// contains filtered or unexported fields
}

func NewDictionaryBuilder

func NewDictionaryBuilder(position int64, systemLexicon *DoubleArrayLexicon, utf16string bool) *DictionaryBuilder

func (*DictionaryBuilder) BuildLexicon

func (dicbuilder *DictionaryBuilder) BuildLexicon(store PosIdStore, input io.Reader) error

func (*DictionaryBuilder) EntrySize

func (dicbuilder *DictionaryBuilder) EntrySize() int

func (*DictionaryBuilder) WriteGrammar

func (dicbuilder *DictionaryBuilder) WriteGrammar(postable *PosTable, input io.Reader, writer io.Writer) error

func (*DictionaryBuilder) WriteGrammarUser

func (dicbuilder *DictionaryBuilder) WriteGrammarUser(postable *PosTable, writer io.Writer) error

func (*DictionaryBuilder) WriteLexicon

func (dicbuilder *DictionaryBuilder) WriteLexicon(writer io.WriteSeeker, store PosIdStore) error

type DictionaryHeader

type DictionaryHeader struct {
	Version     uint64
	CreateTime  int64
	Description string
}

func NewDictionaryHeader

func NewDictionaryHeader(version uint64, createTime int64, description string) *DictionaryHeader

func ParseDictionaryHeader

func ParseDictionaryHeader(input []byte, offset int) *DictionaryHeader

func (*DictionaryHeader) ToBytes

func (dh *DictionaryHeader) ToBytes() ([]byte, error)

type DoubleArrayLexicon

type DoubleArrayLexicon struct {
	// contains filtered or unexported fields
}

func NewDoubleArrayLexicon

func NewDoubleArrayLexicon(bytebuffer []byte, offset int, utf16string bool) *DoubleArrayLexicon

func (*DoubleArrayLexicon) CalculateCost

func (lexicon *DoubleArrayLexicon) CalculateCost(cf CalculateCostFunc) error

func (*DoubleArrayLexicon) GetCost

func (lexicon *DoubleArrayLexicon) GetCost(wordId int32) int16

func (*DoubleArrayLexicon) GetDictionaryId

func (lexicon *DoubleArrayLexicon) GetDictionaryId(wordId int32) int

func (*DoubleArrayLexicon) GetLeftId

func (lexicon *DoubleArrayLexicon) GetLeftId(wordId int32) int16

func (*DoubleArrayLexicon) GetRightId

func (lexicon *DoubleArrayLexicon) GetRightId(wordId int32) int16

func (*DoubleArrayLexicon) GetWordId

func (lexicon *DoubleArrayLexicon) GetWordId(headword string, posId int16, readingForm string) int32

func (*DoubleArrayLexicon) GetWordInfo

func (lexicon *DoubleArrayLexicon) GetWordInfo(wordId int32) *WordInfo

func (*DoubleArrayLexicon) Lookup

func (lexicon *DoubleArrayLexicon) Lookup(text []byte, offset int) *DoubleArrayLexiconIterator

func (*DoubleArrayLexicon) Size

func (lexicon *DoubleArrayLexicon) Size() int32

func (*DoubleArrayLexicon) WriteTrieTo

func (lexicon *DoubleArrayLexicon) WriteTrieTo(writer io.Writer) (int, error)

func (*DoubleArrayLexicon) WriteWordIdTableTo

func (lexicon *DoubleArrayLexicon) WriteWordIdTableTo(writer io.Writer) (int, error)

func (*DoubleArrayLexicon) WriteWordInfos

func (lexicon *DoubleArrayLexicon) WriteWordInfos(writer io.Writer, offset int64, offsetlen int64, utf16string bool) (int, *bytes.Buffer, error)

func (*DoubleArrayLexicon) WriteWordParamsTo

func (lexicon *DoubleArrayLexicon) WriteWordParamsTo(writer io.Writer) (int, error)

type DoubleArrayLexiconIterator

type DoubleArrayLexiconIterator struct {
	// contains filtered or unexported fields
}

func (*DoubleArrayLexiconIterator) Err

func (*DoubleArrayLexiconIterator) Get

func (it *DoubleArrayLexiconIterator) Get() (int32, int)

func (*DoubleArrayLexiconIterator) Next

func (it *DoubleArrayLexiconIterator) Next() bool

type Grammar

type Grammar struct {
	CharCategory *CharacterCategory
	StorageSize  int
	// contains filtered or unexported fields
}

func NewGrammar

func NewGrammar(bytebuffer []byte, offset int, utf16string bool) *Grammar

func (*Grammar) AddPosList

func (g *Grammar) AddPosList(fromg *Grammar)

func (*Grammar) GetConnectCost

func (g *Grammar) GetConnectCost(leftId int16, rightId int16) int16

func (*Grammar) GetPartOfSpeechId

func (g *Grammar) GetPartOfSpeechId(pos []string) int16

func (*Grammar) GetPartOfSpeechSize

func (g *Grammar) GetPartOfSpeechSize() int

func (*Grammar) GetPartOfSpeechString

func (g *Grammar) GetPartOfSpeechString(posId int16) []string

func (*Grammar) GetPosId

func (g *Grammar) GetPosId(posstrings ...string) int16

func (*Grammar) SetConnectCost

func (g *Grammar) SetConnectCost(leftId int16, rightId int16, cost int16)

func (*Grammar) WriteConnMatrixTo

func (g *Grammar) WriteConnMatrixTo(writer io.Writer) (int, error)

func (*Grammar) WritePOSTableTo

func (g *Grammar) WritePOSTableTo(buffer *bytes.Buffer, utf16string bool) error

type LexiconSet

type LexiconSet struct {
	// contains filtered or unexported fields
}

func NewLexiconSet

func NewLexiconSet(systemLexicon *DoubleArrayLexicon) *LexiconSet

func (*LexiconSet) Add

func (s *LexiconSet) Add(lexicon *DoubleArrayLexicon, posOffset int32)

func (*LexiconSet) GetCost

func (s *LexiconSet) GetCost(wordId int32) int16

func (*LexiconSet) GetDictionaryId

func (s *LexiconSet) GetDictionaryId(wordId int32) int

func (*LexiconSet) GetLeftId

func (s *LexiconSet) GetLeftId(wordId int32) int16

func (*LexiconSet) GetRightId

func (s *LexiconSet) GetRightId(wordId int32) int16

func (*LexiconSet) GetWordId

func (s *LexiconSet) GetWordId(headword string, posId int16, readingForm string) int32

func (*LexiconSet) GetWordInfo

func (s *LexiconSet) GetWordInfo(wordId int32) *WordInfo

func (*LexiconSet) IsFull

func (s *LexiconSet) IsFull() bool

func (*LexiconSet) Lookup

func (s *LexiconSet) Lookup(text []byte, offset int) *LexiconSetIterator

func (*LexiconSet) Size

func (s *LexiconSet) Size() int32

type LexiconSetIterator

type LexiconSetIterator struct {
	// contains filtered or unexported fields
}

func (*LexiconSetIterator) Err

func (it *LexiconSetIterator) Err() error

func (*LexiconSetIterator) Get

func (it *LexiconSetIterator) Get() (int32, int)

func (*LexiconSetIterator) Next

func (it *LexiconSetIterator) Next() bool

type PosIdStore

type PosIdStore interface {
	GetPosId(posstrings ...string) int16
	GetPartOfSpeechSize() int
}

type PosTable

type PosTable struct {
	// contains filtered or unexported fields
}

func NewPosTable

func NewPosTable() *PosTable

func (*PosTable) GetPartOfSpeechSize

func (pt *PosTable) GetPartOfSpeechSize() int

func (*PosTable) GetPosId

func (pt *PosTable) GetPosId(posstrings ...string) int16

type PosTableUser

type PosTableUser struct {
	PosTable
	// contains filtered or unexported fields
}

func NewPosTableUser

func NewPosTableUser(base PosIdStore) *PosTableUser

func (*PosTableUser) GetPosId

func (pt *PosTableUser) GetPosId(posstrings ...string) int16

type WordInfo

type WordInfo struct {
	Surface              string
	HeadwordLength       int16
	PosId                int16
	NormalizedForm       string
	DictionaryFormWordId int32
	DictionaryForm       string
	ReadingForm          string
	AUnitSplit           []int32
	BUnitSplit           []int32
	WordStructure        []int32
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL