// Copyright 2023 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT package meilisearch import ( "context" "errors" "fmt" "strconv" "strings" indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_meilisearch "code.gitea.io/gitea/modules/indexer/internal/meilisearch" "code.gitea.io/gitea/modules/indexer/issues/internal" "github.com/meilisearch/meilisearch-go" ) const ( issueIndexerLatestVersion = 3 // TODO: make this configurable if necessary maxTotalHits = 10000 ) // ErrMalformedResponse is never expected as we initialize the indexer ourself and so define the types. var ErrMalformedResponse = errors.New("meilisearch returned unexpected malformed content") var _ internal.Indexer = &Indexer{} // Indexer implements Indexer interface type Indexer struct { inner *inner_meilisearch.Indexer indexer_internal.Indexer // do not composite inner_meilisearch.Indexer directly to avoid exposing too much } // NewIndexer creates a new meilisearch indexer func NewIndexer(url, apiKey, indexerName string) *Indexer { settings := &meilisearch.Settings{ // The default ranking rules of meilisearch are: ["words", "typo", "proximity", "attribute", "sort", "exactness"] // So even if we specify the sort order, it could not be respected because the priority of "sort" is so low. // So we need to specify the ranking rules to make sure the sort order is respected. // See https://www.meilisearch.com/docs/learn/core_concepts/relevancy RankingRules: []string{"sort", // make sure "sort" has the highest priority "words", "typo", "proximity", "attribute", "exactness"}, SearchableAttributes: []string{ "title", "content", "comments", }, DisplayedAttributes: []string{ "id", "title", "content", "comments", }, FilterableAttributes: []string{ "repo_id", "is_public", "is_pull", "is_closed", "label_ids", "no_label", "milestone_id", "project_id", "project_board_id", "poster_id", "assignee_id", "mention_ids", "reviewed_ids", "review_requested_ids", "subscriber_ids", "updated_unix", }, SortableAttributes: []string{ "updated_unix", "created_unix", "deadline_unix", "comment_count", "id", }, Pagination: &meilisearch.Pagination{ MaxTotalHits: maxTotalHits, }, } inner := inner_meilisearch.NewIndexer(url, apiKey, indexerName, issueIndexerLatestVersion, settings) indexer := &Indexer{ inner: inner, Indexer: inner, } return indexer } // Index will save the index data func (b *Indexer) Index(_ context.Context, issues ...*internal.IndexerData) error { if len(issues) == 0 { return nil } for _, issue := range issues { _, err := b.inner.Client.Index(b.inner.VersionedIndexName()).AddDocuments(issue) if err != nil { return err } } // TODO: bulk send index data return nil } // Delete deletes indexes by ids func (b *Indexer) Delete(_ context.Context, ids ...int64) error { if len(ids) == 0 { return nil } for _, id := range ids { _, err := b.inner.Client.Index(b.inner.VersionedIndexName()).DeleteDocument(strconv.FormatInt(id, 10)) if err != nil { return err } } // TODO: bulk send deletes return nil } // Search searches for issues by given conditions. // Returns the matching issue IDs func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) { query := inner_meilisearch.FilterAnd{} if len(options.RepoIDs) > 0 { q := &inner_meilisearch.FilterOr{} q.Or(inner_meilisearch.NewFilterIn("repo_id", options.RepoIDs...)) if options.AllPublic { q.Or(inner_meilisearch.NewFilterEq("is_public", true)) } query.And(q) } if options.IsPull.Has() { query.And(inner_meilisearch.NewFilterEq("is_pull", options.IsPull.Value())) } if options.IsClosed.Has() { query.And(inner_meilisearch.NewFilterEq("is_closed", options.IsClosed.Value())) } if options.NoLabelOnly { query.And(inner_meilisearch.NewFilterEq("no_label", true)) } else { if len(options.IncludedLabelIDs) > 0 { q := &inner_meilisearch.FilterAnd{} for _, labelID := range options.IncludedLabelIDs { q.And(inner_meilisearch.NewFilterEq("label_ids", labelID)) } query.And(q) } else if len(options.IncludedAnyLabelIDs) > 0 { query.And(inner_meilisearch.NewFilterIn("label_ids", options.IncludedAnyLabelIDs...)) } if len(options.ExcludedLabelIDs) > 0 { q := &inner_meilisearch.FilterAnd{} for _, labelID := range options.ExcludedLabelIDs { q.And(inner_meilisearch.NewFilterNot(inner_meilisearch.NewFilterEq("label_ids", labelID))) } query.And(q) } } if len(options.MilestoneIDs) > 0 { query.And(inner_meilisearch.NewFilterIn("milestone_id", options.MilestoneIDs...)) } if options.ProjectID.Has() { query.And(inner_meilisearch.NewFilterEq("project_id", options.ProjectID.Value())) } if options.ProjectColumnID.Has() { query.And(inner_meilisearch.NewFilterEq("project_board_id", options.ProjectColumnID.Value())) } if options.PosterID.Has() { query.And(inner_meilisearch.NewFilterEq("poster_id", options.PosterID.Value())) } if options.AssigneeID.Has() { query.And(inner_meilisearch.NewFilterEq("assignee_id", options.AssigneeID.Value())) } if options.MentionID.Has() { query.And(inner_meilisearch.NewFilterEq("mention_ids", options.MentionID.Value())) } if options.ReviewedID.Has() { query.And(inner_meilisearch.NewFilterEq("reviewed_ids", options.ReviewedID.Value())) } if options.ReviewRequestedID.Has() { query.And(inner_meilisearch.NewFilterEq("review_requested_ids", options.ReviewRequestedID.Value())) } if options.SubscriberID.Has() { query.And(inner_meilisearch.NewFilterEq("subscriber_ids", options.SubscriberID.Value())) } if options.UpdatedAfterUnix.Has() { query.And(inner_meilisearch.NewFilterGte("updated_unix", options.UpdatedAfterUnix.Value())) } if options.UpdatedBeforeUnix.Has() { query.And(inner_meilisearch.NewFilterLte("updated_unix", options.UpdatedBeforeUnix.Value())) } var sortBy []string switch options.SortBy { // sort by relevancy (no explicit sorting) case internal.SortByScore: fallthrough case "": sortBy = []string{} default: sortBy = []string{ parseSortBy(options.SortBy), "id:desc", } } skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxTotalHits) counting := limit == 0 if counting { // If set limit to 0, it will be 20 by default, and -1 is not allowed. // See https://www.meilisearch.com/docs/reference/api/search#limit // So set limit to 1 to make the cost as low as possible, then clear the result before returning. limit = 1 } keyword := options.Keyword if !options.IsFuzzyKeyword { // to make it non fuzzy ("typo tolerance" in meilisearch terms), we have to quote the keyword(s) // https://www.meilisearch.com/docs/reference/api/search#phrase-search keyword = doubleQuoteKeyword(keyword) } searchRes, err := b.inner.Client.Index(b.inner.VersionedIndexName()).Search(keyword, &meilisearch.SearchRequest{ Filter: query.Statement(), Limit: int64(limit), Offset: int64(skip), Sort: sortBy, MatchingStrategy: meilisearch.All, }) if err != nil { return nil, err } if counting { searchRes.Hits = nil } hits, err := convertHits(searchRes) if err != nil { return nil, err } return &internal.SearchResult{ Total: searchRes.EstimatedTotalHits, Hits: hits, }, nil } func parseSortBy(sortBy internal.SortBy) string { field := strings.TrimPrefix(string(sortBy), "-") if strings.HasPrefix(string(sortBy), "-") { return field + ":desc" } return field + ":asc" } func doubleQuoteKeyword(k string) string { kp := strings.Split(k, " ") parts := 0 for i := range kp { part := strings.Trim(kp[i], "\"") if part != "" { kp[parts] = fmt.Sprintf(`"%s"`, part) parts++ } } return strings.Join(kp[:parts], " ") } func convertHits(searchRes *meilisearch.SearchResponse) ([]internal.Match, error) { hits := make([]internal.Match, 0, len(searchRes.Hits)) for _, hit := range searchRes.Hits { hit, ok := hit.(map[string]any) if !ok { return nil, ErrMalformedResponse } issueID, ok := hit["id"].(float64) if !ok { return nil, ErrMalformedResponse } hits = append(hits, internal.Match{ ID: int64(issueID), }) } return hits, nil }