Documentation
¶
Index ¶
- Variables
- type LearningDataStats
- type MLModel
- type PrecAndRecall
- type Predictor
- func (model *Predictor) BalanceSample() []feats.QueryEvaluation
- func (model *Predictor) CreateAndTestModel(ctx context.Context, testData []feats.QueryEvaluation, featsFile string, ...) error
- func (model *Predictor) Deduplicate()
- func (model *Predictor) FindAndSetDataMidpoint()
- func (model *Predictor) PrecisionAndRecall(misclassQueries misclassifiedQueryReporter) PrecAndRecall
- func (model *Predictor) ProcessEntry(entry QueryStatsRecord) error
- func (model *Predictor) SetStats(numProcessed, numFailed int)
- type QueryStatsRecord
- type Reporter
- func (reporter *Reporter) AddMisclassifiedQuery(q feats.QueryEvaluation, mlOut, threshold, slowProcTime float64)
- func (reporter *Reporter) PlotRFAccuracy(data, chartLabel, modelPath string) error
- func (reporter *Reporter) SaveMisclassifiedQueries() error
- func (reporter *Reporter) ShowMisclassifiedQueries()
Constants ¶
This section is empty.
Variables ¶
View Source
var ErrNoSuchModel = errors.New("no such model")
View Source
var ObligatoryExamples = []QueryStatsRecord{
{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[]"},
{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[word=\".*\"]"},
{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[word=\".+\"]"},
{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[lemma=\".*\"]"},
{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[lemma=\".+\"]"},
{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[lc=\".*\"]"},
{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[lc=\".+\"]"},
{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[tag=\"N.*\"]"},
{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[tag=\"N.+\"]"},
{Corpus: "syn_v13", CorpusSize: 6400899055, TimeProc: 500, Query: "aword,[pos=\"N\"]"},
}
Functions ¶
This section is empty.
Types ¶
type LearningDataStats ¶
type LearningDataStats struct {
NumProcessed int `msgpack:"numProcessed"`
NumFailed int `msgpack:"numFailed"`
DeduplicationRatio float64 `msgpack:"deduplicationRatio"`
}
func (LearningDataStats) AsComment ¶
func (stats LearningDataStats) AsComment() string
type MLModel ¶
type MLModel interface {
// Train trains the model based on input data. In case the model
// supports only inference (e.g. our XGBoost), this should just prepare
// data to a format required by actual program performing the learning.
Train(ctx context.Context, data []feats.QueryEvaluation, slowQueriesTime float64, comment string) error
Predict(feats.QueryEvaluation) predict.Prediction
SetClassThreshold(v float64)
GetClassThreshold() float64
GetSlowQueriesThresholdTime() float64
SaveToFile(string) error
GetInfo() string
// IsInferenceOnly specifies whether the model also supports
IsInferenceOnly() bool
// CreateModelFileName should generate proper model filename based
// on the feature (i.e. input) file name. This should keep data and
// model names organized and easy to search through.
CreateModelFileName(featFile string) string
}
MLModel is a generalization of a Machine Learning model used to extract knowledge about CQL queries.
func GetMLModel ¶ added in v0.2.1
type PrecAndRecall ¶
func (PrecAndRecall) CSV ¶
func (pr PrecAndRecall) CSV(x float64) string
type Predictor ¶
type Predictor struct {
Evaluations []feats.QueryEvaluation
LearningDataStats LearningDataStats
// contains filtered or unexported fields
}
func (*Predictor) BalanceSample ¶
func (model *Predictor) BalanceSample() []feats.QueryEvaluation
func (*Predictor) CreateAndTestModel ¶
func (model *Predictor) CreateAndTestModel( ctx context.Context, testData []feats.QueryEvaluation, featsFile string, reporter *Reporter, ) error
CreateAndTestModel trains a ML model and saves it to a file specified by the `outputPath`. It also takes a python script
func (*Predictor) Deduplicate ¶
func (model *Predictor) Deduplicate()
func (*Predictor) FindAndSetDataMidpoint ¶
func (model *Predictor) FindAndSetDataMidpoint()
func (*Predictor) PrecisionAndRecall ¶
func (model *Predictor) PrecisionAndRecall(misclassQueries misclassifiedQueryReporter) PrecAndRecall
func (*Predictor) ProcessEntry ¶
func (model *Predictor) ProcessEntry(entry QueryStatsRecord) error
type QueryStatsRecord ¶
type QueryStatsRecord struct {
Corpus string `json:"corpus"`
CorpusSize int64 `json:"corpusSize"`
SubcorpusSize int64 `json:"subcorpusSize"`
TimeProc float64 `json:"timeProc"`
Query string `json:"query"`
// IsSynthetic specifies whether the record comes from
// production KonText stats log or if it is generated
// using a benchmarking module (= MQuery).
IsSynthetic bool `json:"isSynthetic,omitempty"`
FlaggedAsSlow bool `json:"flaggedAsSlow,omitempty"`
}
func (QueryStatsRecord) GetCQL ¶
func (rec QueryStatsRecord) GetCQL() string
func (QueryStatsRecord) UniqKey ¶
func (rec QueryStatsRecord) UniqKey() string
type Reporter ¶
type Reporter struct {
RFAccuracyScript string
MisclassQueriesOutPath string
// contains filtered or unexported fields
}
func (*Reporter) AddMisclassifiedQuery ¶
func (reporter *Reporter) AddMisclassifiedQuery(q feats.QueryEvaluation, mlOut, threshold, slowProcTime float64)
func (*Reporter) PlotRFAccuracy ¶
PlotModelAccuracy creates a chart from CSV data using a Python plotting script. The output file name is derived from the provided modelPath
func (*Reporter) SaveMisclassifiedQueries ¶
func (*Reporter) ShowMisclassifiedQueries ¶
func (reporter *Reporter) ShowMisclassifiedQueries()
Click to show internal directories.
Click to hide internal directories.