eval

package
v0.4.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 16, 2026 License: Apache-2.0 Imports: 22 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var ErrNoSuchModel = errors.New("no such model")
View Source
var ObligatoryExamples = []QueryStatsRecord{
	{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[]"},
	{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[word=\".*\"]"},
	{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[word=\".+\"]"},
	{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[lemma=\".*\"]"},
	{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[lemma=\".+\"]"},
	{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[lc=\".*\"]"},
	{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[lc=\".+\"]"},
	{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[tag=\"N.*\"]"},
	{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[tag=\"N.+\"]"},
	{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[pos=\"N\"]"},
}

Functions

This section is empty.

Types

type LearningDataStats

type LearningDataStats struct {
	NumProcessed       int     `msgpack:"numProcessed"`
	NumFailed          int     `msgpack:"numFailed"`
	DeduplicationRatio float64 `msgpack:"deduplicationRatio"`
}

func (LearningDataStats) AsComment

func (stats LearningDataStats) AsComment() string

type MLModel

type MLModel interface {

	// Train trains the model based on input data. In case the model
	// supports only inference (e.g. our XGBoost), this should just prepare
	// data to a format required by actual program performing the learning.
	Train(ctx context.Context, data []feats.QueryEvaluation, slowQueriesTime float64, comment string) error

	Predict(feats.QueryEvaluation) predict.Prediction
	SetClassThreshold(v float64)
	GetClassThreshold() float64
	GetSlowQueriesThresholdTime() float64
	SaveToFile(string) error
	GetInfo() string

	// IsInferenceOnly specifies whether the model also supports
	IsInferenceOnly() bool

	// CreateModelFileName should generate proper model filename based
	// on the feature (i.e. input) file name. This should keep data and
	// model names organized and easy to search through.
	CreateModelFileName(featFile string) string
}

MLModel is a generalization of a Machine Learning model used to extract knowledge about CQL queries.

func GetMLModel added in v0.2.1

func GetMLModel(modelType, modelPath string) (MLModel, error)

type PrecAndRecall

type PrecAndRecall struct {
	Precision float64
	Recall    float64
	FBeta     float64
}

func (PrecAndRecall) CSV

func (pr PrecAndRecall) CSV(x float64) string

type Predictor

type Predictor struct {
	Evaluations []feats.QueryEvaluation

	LearningDataStats LearningDataStats
	// contains filtered or unexported fields
}

func NewPredictor

func NewPredictor(
	mlModel MLModel,
	conf *cnf.Conf,
) *Predictor

func (*Predictor) BalanceSample

func (model *Predictor) BalanceSample() []feats.QueryEvaluation

func (*Predictor) CreateAndTestModel

func (model *Predictor) CreateAndTestModel(
	ctx context.Context,
	testData []feats.QueryEvaluation,
	featsFile string,
	reporter *Reporter,
) error

CreateAndTestModel trains a ML model and saves it to a file specified by the `outputPath`. It also takes a python script

func (*Predictor) Deduplicate

func (model *Predictor) Deduplicate()

func (*Predictor) FindAndSetDataMidpoint

func (model *Predictor) FindAndSetDataMidpoint()

func (*Predictor) PrecisionAndRecall

func (model *Predictor) PrecisionAndRecall(misclassQueries misclassifiedQueryReporter) PrecAndRecall

func (*Predictor) ProcessEntry

func (model *Predictor) ProcessEntry(entry QueryStatsRecord) error

func (*Predictor) SetStats

func (model *Predictor) SetStats(numProcessed, numFailed int)

type QueryStatsRecord

type QueryStatsRecord struct {
	Corpus        string  `json:"corpus"`
	CorpusSize    int64   `json:"corpusSize"`
	SubcorpusSize int64   `json:"subcorpusSize"`
	TimeProc      float64 `json:"timeProc"`
	Query         string  `json:"query"`

	// IsSynthetic specifies whether the record comes from
	// production KonText stats log or if it is generated
	// using a benchmarking module (= MQuery).
	IsSynthetic bool `json:"isSynthetic,omitempty"`

	FlaggedAsSlow bool `json:"flaggedAsSlow,omitempty"`
}

func (QueryStatsRecord) GetCQL

func (rec QueryStatsRecord) GetCQL() string

func (QueryStatsRecord) UniqKey

func (rec QueryStatsRecord) UniqKey() string

type Reporter

type Reporter struct {
	RFAccuracyScript string

	MisclassQueriesOutPath string
	// contains filtered or unexported fields
}

func (*Reporter) AddMisclassifiedQuery

func (reporter *Reporter) AddMisclassifiedQuery(q feats.QueryEvaluation, mlOut, threshold, slowProcTime float64)

func (*Reporter) PlotRFAccuracy

func (reporter *Reporter) PlotRFAccuracy(data, chartLabel, modelPath string) error

PlotModelAccuracy creates a chart from CSV data using a Python plotting script. The output file name is derived from the provided modelPath

func (*Reporter) SaveMisclassifiedQueries

func (reporter *Reporter) SaveMisclassifiedQueries() error

func (*Reporter) ShowMisclassifiedQueries

func (reporter *Reporter) ShowMisclassifiedQueries()

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL