Documentation
¶
Index ¶
- Constants
- Variables
- func GetCategoryType(s string) (uint32, error)
- func IsUserDictionary(version uint64) bool
- func PrintDictionary(filename string, utf16string bool, systemDict *BinaryDictionary, ...) error
- func PrintHeader(dictfile string, output io.Writer) error
- type BinaryDictionary
- type CalculateCostFunc
- type CharacterCategory
- type DictionaryBuilder
- func (dicbuilder *DictionaryBuilder) BuildLexicon(store PosIdStore, input io.Reader) error
- func (dicbuilder *DictionaryBuilder) EntrySize() int
- func (dicbuilder *DictionaryBuilder) WriteGrammar(postable *PosTable, input io.Reader, writer io.Writer) error
- func (dicbuilder *DictionaryBuilder) WriteGrammarUser(postable *PosTable, writer io.Writer) error
- func (dicbuilder *DictionaryBuilder) WriteLexicon(writer io.WriteSeeker, store PosIdStore) error
- type DictionaryHeader
- type DoubleArrayLexicon
- func (lexicon *DoubleArrayLexicon) CalculateCost(cf CalculateCostFunc) error
- func (lexicon *DoubleArrayLexicon) GetCost(wordId int32) int16
- func (lexicon *DoubleArrayLexicon) GetDictionaryId(wordId int32) int
- func (lexicon *DoubleArrayLexicon) GetLeftId(wordId int32) int16
- func (lexicon *DoubleArrayLexicon) GetRightId(wordId int32) int16
- func (lexicon *DoubleArrayLexicon) GetWordId(headword string, posId int16, readingForm string) int32
- func (lexicon *DoubleArrayLexicon) GetWordInfo(wordId int32) *WordInfo
- func (lexicon *DoubleArrayLexicon) Lookup(text []byte, offset int) *DoubleArrayLexiconIterator
- func (lexicon *DoubleArrayLexicon) Size() int32
- func (lexicon *DoubleArrayLexicon) WriteTrieTo(writer io.Writer) (int, error)
- func (lexicon *DoubleArrayLexicon) WriteWordIdTableTo(writer io.Writer) (int, error)
- func (lexicon *DoubleArrayLexicon) WriteWordInfos(writer io.Writer, offset int64, offsetlen int64, utf16string bool) (int, *bytes.Buffer, error)
- func (lexicon *DoubleArrayLexicon) WriteWordParamsTo(writer io.Writer) (int, error)
- type DoubleArrayLexiconIterator
- type Grammar
- func (g *Grammar) AddPosList(fromg *Grammar)
- func (g *Grammar) GetConnectCost(leftId int16, rightId int16) int16
- func (g *Grammar) GetPartOfSpeechId(pos []string) int16
- func (g *Grammar) GetPartOfSpeechSize() int
- func (g *Grammar) GetPartOfSpeechString(posId int16) []string
- func (g *Grammar) GetPosId(posstrings ...string) int16
- func (g *Grammar) SetConnectCost(leftId int16, rightId int16, cost int16)
- func (g *Grammar) WriteConnMatrixTo(writer io.Writer) (int, error)
- func (g *Grammar) WritePOSTableTo(buffer *bytes.Buffer, utf16string bool) error
- type LexiconSet
- func (s *LexiconSet) Add(lexicon *DoubleArrayLexicon, posOffset int32)
- func (s *LexiconSet) GetCost(wordId int32) int16
- func (s *LexiconSet) GetDictionaryId(wordId int32) int
- func (s *LexiconSet) GetLeftId(wordId int32) int16
- func (s *LexiconSet) GetRightId(wordId int32) int16
- func (s *LexiconSet) GetWordId(headword string, posId int16, readingForm string) int32
- func (s *LexiconSet) GetWordInfo(wordId int32) *WordInfo
- func (s *LexiconSet) IsFull() bool
- func (s *LexiconSet) Lookup(text []byte, offset int) *LexiconSetIterator
- func (s *LexiconSet) Size() int32
- type LexiconSetIterator
- type PosIdStore
- type PosTable
- type PosTableUser
- type WordInfo
Constants ¶
View Source
const ( DEFAULT uint32 = 1 // The fall back category SPACE uint32 = 1 << 1 // WhiteSpaces KANJI uint32 = 1 << 2 // CJKV ideographic characters SYMBOL uint32 = 1 << 3 // Symbols NUMERIC uint32 = 1 << 4 // Numerical characters ALPHA uint32 = 1 << 5 // Latin alphabets HIRAGANA uint32 = 1 << 6 // Hiragana characters KATAKANA uint32 = 1 << 7 // Katakana characters KANJINUMERIC uint32 = 1 << 8 // Knaji numeric characters GREEK uint32 = 1 << 9 // Greek alphabets CYRILLIC uint32 = 1 << 10 // Cyrillic alphabets USER1 uint32 = 1 << 11 // User defined category USER2 uint32 = 1 << 12 // User defined category USER3 uint32 = 1 << 13 // User defined category USER4 uint32 = 1 << 14 // User defined category NOOOVBOW uint32 = 1 << 15 // Characters that cannot be the beginning of word )
Categories of characters
View Source
const ( StringUtf8MaxLength = 32767 StringUtf16MaxLength = 32767 ArrayMaxLength = 127 NumberOfColumns = 18 BufferSize = 1024 * 1024 )
View Source
const ( DescriptionSize = 256 HeaderStorageSize = 8 + 8 + DescriptionSize )
View Source
const ( SystemDictVersion = 0x7366d3f18bd111e7 UserDictVersion = 0xa50f31188bd211e7 UserDictVersion2 = 0x9fdeb5a90168d868 )
View Source
const (
InhibitedConnection = math.MaxInt16
)
View Source
const (
LexiconSetMaxDictionaries = 16
)
Variables ¶
View Source
var ( BosParameter = []int16{0, 0, 0} EosParameter = []int16{0, 0, 0} )
Functions ¶
func GetCategoryType ¶
func IsUserDictionary ¶
func PrintDictionary ¶
Types ¶
type BinaryDictionary ¶
type BinaryDictionary struct {
Header *DictionaryHeader
Grammar *Grammar
Lexicon *DoubleArrayLexicon
// contains filtered or unexported fields
}
func NewBinaryDictionary ¶
func NewBinaryDictionary(filename string, utf16string bool) (*BinaryDictionary, error)
func ReadSystemDictionary ¶
func ReadSystemDictionary(filename string, utf16string bool) (*BinaryDictionary, error)
func ReadUserDictionary ¶
func ReadUserDictionary(filename string, utf16string bool) (*BinaryDictionary, error)
func (*BinaryDictionary) Close ¶
func (bd *BinaryDictionary) Close() error
type CalculateCostFunc ¶
type CharacterCategory ¶
type CharacterCategory struct {
// contains filtered or unexported fields
}
func NewCharacterCategory ¶
func NewCharacterCategory() *CharacterCategory
func (*CharacterCategory) GetCategoryTypes ¶
func (cc *CharacterCategory) GetCategoryTypes(codePoint rune) uint32
func (*CharacterCategory) ReadCharacterDefinition ¶
func (cc *CharacterCategory) ReadCharacterDefinition(charDefReader io.Reader) error
type DictionaryBuilder ¶
type DictionaryBuilder struct {
// contains filtered or unexported fields
}
func NewDictionaryBuilder ¶
func NewDictionaryBuilder(position int64, systemLexicon *DoubleArrayLexicon, utf16string bool) *DictionaryBuilder
func (*DictionaryBuilder) BuildLexicon ¶
func (dicbuilder *DictionaryBuilder) BuildLexicon(store PosIdStore, input io.Reader) error
func (*DictionaryBuilder) EntrySize ¶
func (dicbuilder *DictionaryBuilder) EntrySize() int
func (*DictionaryBuilder) WriteGrammar ¶
func (*DictionaryBuilder) WriteGrammarUser ¶
func (dicbuilder *DictionaryBuilder) WriteGrammarUser(postable *PosTable, writer io.Writer) error
func (*DictionaryBuilder) WriteLexicon ¶
func (dicbuilder *DictionaryBuilder) WriteLexicon(writer io.WriteSeeker, store PosIdStore) error
type DictionaryHeader ¶
func NewDictionaryHeader ¶
func NewDictionaryHeader(version uint64, createTime int64, description string) *DictionaryHeader
func ParseDictionaryHeader ¶
func ParseDictionaryHeader(input []byte, offset int) *DictionaryHeader
func (*DictionaryHeader) ToBytes ¶
func (dh *DictionaryHeader) ToBytes() ([]byte, error)
type DoubleArrayLexicon ¶
type DoubleArrayLexicon struct {
// contains filtered or unexported fields
}
func NewDoubleArrayLexicon ¶
func NewDoubleArrayLexicon(bytebuffer []byte, offset int, utf16string bool) *DoubleArrayLexicon
func (*DoubleArrayLexicon) CalculateCost ¶
func (lexicon *DoubleArrayLexicon) CalculateCost(cf CalculateCostFunc) error
func (*DoubleArrayLexicon) GetCost ¶
func (lexicon *DoubleArrayLexicon) GetCost(wordId int32) int16
func (*DoubleArrayLexicon) GetDictionaryId ¶
func (lexicon *DoubleArrayLexicon) GetDictionaryId(wordId int32) int
func (*DoubleArrayLexicon) GetLeftId ¶
func (lexicon *DoubleArrayLexicon) GetLeftId(wordId int32) int16
func (*DoubleArrayLexicon) GetRightId ¶
func (lexicon *DoubleArrayLexicon) GetRightId(wordId int32) int16
func (*DoubleArrayLexicon) GetWordId ¶
func (lexicon *DoubleArrayLexicon) GetWordId(headword string, posId int16, readingForm string) int32
func (*DoubleArrayLexicon) GetWordInfo ¶
func (lexicon *DoubleArrayLexicon) GetWordInfo(wordId int32) *WordInfo
func (*DoubleArrayLexicon) Lookup ¶
func (lexicon *DoubleArrayLexicon) Lookup(text []byte, offset int) *DoubleArrayLexiconIterator
func (*DoubleArrayLexicon) Size ¶
func (lexicon *DoubleArrayLexicon) Size() int32
func (*DoubleArrayLexicon) WriteTrieTo ¶
func (lexicon *DoubleArrayLexicon) WriteTrieTo(writer io.Writer) (int, error)
func (*DoubleArrayLexicon) WriteWordIdTableTo ¶
func (lexicon *DoubleArrayLexicon) WriteWordIdTableTo(writer io.Writer) (int, error)
func (*DoubleArrayLexicon) WriteWordInfos ¶
func (*DoubleArrayLexicon) WriteWordParamsTo ¶
func (lexicon *DoubleArrayLexicon) WriteWordParamsTo(writer io.Writer) (int, error)
type DoubleArrayLexiconIterator ¶
type DoubleArrayLexiconIterator struct {
// contains filtered or unexported fields
}
func (*DoubleArrayLexiconIterator) Err ¶
func (it *DoubleArrayLexiconIterator) Err() error
func (*DoubleArrayLexiconIterator) Get ¶
func (it *DoubleArrayLexiconIterator) Get() (int32, int)
func (*DoubleArrayLexiconIterator) Next ¶
func (it *DoubleArrayLexiconIterator) Next() bool
type Grammar ¶
type Grammar struct {
CharCategory *CharacterCategory
StorageSize int
// contains filtered or unexported fields
}
func (*Grammar) AddPosList ¶
func (*Grammar) GetConnectCost ¶
func (*Grammar) GetPartOfSpeechId ¶
func (*Grammar) GetPartOfSpeechSize ¶
func (*Grammar) GetPartOfSpeechString ¶
func (*Grammar) SetConnectCost ¶
func (*Grammar) WriteConnMatrixTo ¶
type LexiconSet ¶
type LexiconSet struct {
// contains filtered or unexported fields
}
func NewLexiconSet ¶
func NewLexiconSet(systemLexicon *DoubleArrayLexicon) *LexiconSet
func (*LexiconSet) Add ¶
func (s *LexiconSet) Add(lexicon *DoubleArrayLexicon, posOffset int32)
func (*LexiconSet) GetCost ¶
func (s *LexiconSet) GetCost(wordId int32) int16
func (*LexiconSet) GetDictionaryId ¶
func (s *LexiconSet) GetDictionaryId(wordId int32) int
func (*LexiconSet) GetLeftId ¶
func (s *LexiconSet) GetLeftId(wordId int32) int16
func (*LexiconSet) GetRightId ¶
func (s *LexiconSet) GetRightId(wordId int32) int16
func (*LexiconSet) GetWordId ¶
func (s *LexiconSet) GetWordId(headword string, posId int16, readingForm string) int32
func (*LexiconSet) GetWordInfo ¶
func (s *LexiconSet) GetWordInfo(wordId int32) *WordInfo
func (*LexiconSet) IsFull ¶
func (s *LexiconSet) IsFull() bool
func (*LexiconSet) Lookup ¶
func (s *LexiconSet) Lookup(text []byte, offset int) *LexiconSetIterator
func (*LexiconSet) Size ¶
func (s *LexiconSet) Size() int32
type LexiconSetIterator ¶
type LexiconSetIterator struct {
// contains filtered or unexported fields
}
func (*LexiconSetIterator) Err ¶
func (it *LexiconSetIterator) Err() error
func (*LexiconSetIterator) Get ¶
func (it *LexiconSetIterator) Get() (int32, int)
func (*LexiconSetIterator) Next ¶
func (it *LexiconSetIterator) Next() bool
type PosIdStore ¶
type PosTable ¶
type PosTable struct {
// contains filtered or unexported fields
}
func NewPosTable ¶
func NewPosTable() *PosTable
func (*PosTable) GetPartOfSpeechSize ¶
type PosTableUser ¶
type PosTableUser struct {
PosTable
// contains filtered or unexported fields
}
func NewPosTableUser ¶
func NewPosTableUser(base PosIdStore) *PosTableUser
func (*PosTableUser) GetPosId ¶
func (pt *PosTableUser) GetPosId(posstrings ...string) int16
Click to show internal directories.
Click to hide internal directories.