eval

package

v0.4.0 Latest Latest Go to latest Published: Jan 16, 2026 License: Apache-2.0 Imports: 22 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/czcorpus/cqlizer

Links

Open Source Insights

Documentation ¶

Index ¶

Variables
type LearningDataStats
- func (stats LearningDataStats) AsComment() string
type MLModel
- func GetMLModel(modelType, modelPath string) (MLModel, error)
type PrecAndRecall
- func (pr PrecAndRecall) CSV(x float64) string
type Predictor
- func NewPredictor(mlModel MLModel, conf *cnf.Conf) *Predictor
type QueryStatsRecord
- func (rec QueryStatsRecord) GetCQL() string
- func (rec QueryStatsRecord) UniqKey() string
type Reporter

Constants ¶

This section is empty.

Variables ¶

View Source

var ErrNoSuchModel = errors.New("no such model")

View Source

var ObligatoryExamples = []QueryStatsRecord{
	{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[]"},
	{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[word=\".*\"]"},
	{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[word=\".+\"]"},
	{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[lemma=\".*\"]"},
	{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[lemma=\".+\"]"},
	{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[lc=\".*\"]"},
	{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[lc=\".+\"]"},
	{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[tag=\"N.*\"]"},
	{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[tag=\"N.+\"]"},
	{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[pos=\"N\"]"},
}

Functions ¶

This section is empty.

Types ¶

type LearningDataStats ¶

type LearningDataStats struct {
	NumProcessed       int     `msgpack:"numProcessed"`
	NumFailed          int     `msgpack:"numFailed"`
	DeduplicationRatio float64 `msgpack:"deduplicationRatio"`
}

func (LearningDataStats) AsComment ¶

func (stats LearningDataStats) AsComment() string

type MLModel ¶

type MLModel interface {

	// Train trains the model based on input data. In case the model
	// supports only inference (e.g. our XGBoost), this should just prepare
	// data to a format required by actual program performing the learning.
	Train(ctx context.Context, data []feats.QueryEvaluation, slowQueriesTime float64, comment string) error

	Predict(feats.QueryEvaluation) predict.Prediction
	SetClassThreshold(v float64)
	GetClassThreshold() float64
	GetSlowQueriesThresholdTime() float64
	SaveToFile(string) error
	GetInfo() string

	// IsInferenceOnly specifies whether the model also supports
	IsInferenceOnly() bool

	// CreateModelFileName should generate proper model filename based
	// on the feature (i.e. input) file name. This should keep data and
	// model names organized and easy to search through.
	CreateModelFileName(featFile string) string
}

MLModel is a generalization of a Machine Learning model used to extract knowledge about CQL queries.

func GetMLModel ¶ added in v0.2.1

func GetMLModel(modelType, modelPath string) (MLModel, error)

type PrecAndRecall ¶

type PrecAndRecall struct {
	Precision float64
	Recall    float64
	FBeta     float64
}

func (PrecAndRecall) CSV ¶

func (pr PrecAndRecall) CSV(x float64) string

type Predictor ¶

type Predictor struct {
	Evaluations []feats.QueryEvaluation

	LearningDataStats LearningDataStats
	// contains filtered or unexported fields
}

func NewPredictor ¶

func NewPredictor(
	mlModel MLModel,
	conf *cnf.Conf,
) *Predictor

func (*Predictor) BalanceSample ¶

func (model *Predictor) BalanceSample() []feats.QueryEvaluation

func (*Predictor) CreateAndTestModel ¶

func (model *Predictor) CreateAndTestModel(
	ctx context.Context,
	testData []feats.QueryEvaluation,
	featsFile string,
	reporter *Reporter,
) error

CreateAndTestModel trains a ML model and saves it to a file specified by the `outputPath`. It also takes a python script

func (*Predictor) Deduplicate ¶

func (model *Predictor) Deduplicate()

func (*Predictor) FindAndSetDataMidpoint ¶

func (model *Predictor) FindAndSetDataMidpoint()

func (*Predictor) PrecisionAndRecall ¶

func (model *Predictor) PrecisionAndRecall(misclassQueries misclassifiedQueryReporter) PrecAndRecall

func (*Predictor) ProcessEntry ¶

func (model *Predictor) ProcessEntry(entry QueryStatsRecord) error

func (*Predictor) SetStats ¶

func (model *Predictor) SetStats(numProcessed, numFailed int)

type QueryStatsRecord ¶

type QueryStatsRecord struct {
	Corpus        string  `json:"corpus"`
	CorpusSize    int64   `json:"corpusSize"`
	SubcorpusSize int64   `json:"subcorpusSize"`
	TimeProc      float64 `json:"timeProc"`
	Query         string  `json:"query"`

	// IsSynthetic specifies whether the record comes from
	// production KonText stats log or if it is generated
	// using a benchmarking module (= MQuery).
	IsSynthetic bool `json:"isSynthetic,omitempty"`

	FlaggedAsSlow bool `json:"flaggedAsSlow,omitempty"`
}

func (QueryStatsRecord) GetCQL ¶

func (rec QueryStatsRecord) GetCQL() string

func (QueryStatsRecord) UniqKey ¶

func (rec QueryStatsRecord) UniqKey() string

type Reporter ¶

type Reporter struct {
	RFAccuracyScript string

	MisclassQueriesOutPath string
	// contains filtered or unexported fields
}

func (*Reporter) AddMisclassifiedQuery ¶

func (reporter *Reporter) AddMisclassifiedQuery(q feats.QueryEvaluation, mlOut, threshold, slowProcTime float64)

func (*Reporter) PlotRFAccuracy ¶

func (reporter *Reporter) PlotRFAccuracy(data, chartLabel, modelPath string) error

PlotModelAccuracy creates a chart from CSV data using a Python plotting script. The output file name is derived from the provided modelPath

func (*Reporter) SaveMisclassifiedQueries ¶

func (reporter *Reporter) SaveMisclassifiedQueries() error

func (*Reporter) ShowMisclassifiedQueries ¶

func (reporter *Reporter) ShowMisclassifiedQueries()

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
feats
modutils
nn
predict
rf
xg
ym
zero

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL