From eeb490c7ab11cb6895ddf6e3069a2397c81296c9 Mon Sep 17 00:00:00 2001 From: silverwind Date: Sat, 30 Jul 2022 18:37:02 +0200 Subject: [PATCH] Rework raw file http header logic (#20484) (#20542) - Always respect the user's configured mime type map - Allow more types like image/pdf/video/audio to serve with correct content-type - Shorten cache duration of raw files to 5 minutes, matching GitHub - Don't set `content-disposition: attachment`, let the browser decide whether it wants to download or display a file directly - Implement rfc5987 for filenames, remove previous hack. Confirmed it working in Safari. - Make PDF attachment work in Safari by removing `sandbox` attribute. This change will make a lot more file types open directly in browser now. Logic should generally be more readable than before with less `if` nesting and such. Replaces: https://github.com/go-gitea/gitea/pull/20460 Replaces: https://github.com/go-gitea/gitea/pull/20455 Fixes: https://github.com/go-gitea/gitea/issues/20404 --- modules/typesniffer/typesniffer.go | 10 ++++ routers/common/repo.go | 96 ++++++++++++++++++------------ 2 files changed, 67 insertions(+), 39 deletions(-) diff --git a/modules/typesniffer/typesniffer.go b/modules/typesniffer/typesniffer.go index b6a6646d50..e50928e8c2 100644 --- a/modules/typesniffer/typesniffer.go +++ b/modules/typesniffer/typesniffer.go @@ -70,6 +70,16 @@ func (ct SniffedType) IsRepresentableAsText() bool { return ct.IsText() || ct.IsSvgImage() } +// IsBrowsableType returns whether a non-text type can be displayed in a browser +func (ct SniffedType) IsBrowsableBinaryType() bool { + return ct.IsImage() || ct.IsSvgImage() || ct.IsPDF() || ct.IsVideo() || ct.IsAudio() +} + +// GetMimeType returns the mime type +func (ct SniffedType) GetMimeType() string { + return strings.SplitN(ct.contentType, ";", 2)[0] +} + // DetectContentType extends http.DetectContentType with more content types. Defaults to text/unknown if input is empty. func DetectContentType(data []byte) SniffedType { if len(data) == 0 { diff --git a/routers/common/repo.go b/routers/common/repo.go index b3cd749115..a9e80fad48 100644 --- a/routers/common/repo.go +++ b/routers/common/repo.go @@ -7,12 +7,13 @@ package common import ( "fmt" "io" + "net/url" "path" "path/filepath" "strings" "time" - "code.gitea.io/gitea/modules/charset" + charsetModule "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/httpcache" @@ -42,7 +43,7 @@ func ServeBlob(ctx *context.Context, blob *git.Blob, lastModified time.Time) err } // ServeData download file from io.Reader -func ServeData(ctx *context.Context, name string, size int64, reader io.Reader) error { +func ServeData(ctx *context.Context, filePath string, size int64, reader io.Reader) error { buf := make([]byte, 1024) n, err := util.ReadAtMost(reader, buf) if err != nil { @@ -52,56 +53,73 @@ func ServeData(ctx *context.Context, name string, size int64, reader io.Reader) buf = buf[:n] } - ctx.Resp.Header().Set("Cache-Control", "public,max-age=86400") + httpcache.AddCacheControlToHeader(ctx.Resp.Header(), 5*time.Minute) if size >= 0 { ctx.Resp.Header().Set("Content-Length", fmt.Sprintf("%d", size)) } else { - log.Error("ServeData called to serve data: %s with size < 0: %d", name, size) + log.Error("ServeData called to serve data: %s with size < 0: %d", filePath, size) } - name = path.Base(name) - // Google Chrome dislike commas in filenames, so let's change it to a space - name = strings.ReplaceAll(name, ",", " ") + fileName := path.Base(filePath) + sniffedType := typesniffer.DetectContentType(buf) + isPlain := sniffedType.IsText() || ctx.FormBool("render") + mimeType := "" + charset := "" - st := typesniffer.DetectContentType(buf) - - mappedMimeType := "" if setting.MimeTypeMap.Enabled { - fileExtension := strings.ToLower(filepath.Ext(name)) - mappedMimeType = setting.MimeTypeMap.Map[fileExtension] + fileExtension := strings.ToLower(filepath.Ext(fileName)) + mimeType = setting.MimeTypeMap.Map[fileExtension] } - if st.IsText() || ctx.FormBool("render") { - cs, err := charset.DetectEncoding(buf) - if err != nil { - log.Error("Detect raw file %s charset failed: %v, using by default utf-8", name, err) - cs = "utf-8" - } - if mappedMimeType == "" { - mappedMimeType = "text/plain" - } - ctx.Resp.Header().Set("Content-Type", mappedMimeType+"; charset="+strings.ToLower(cs)) - } else { - ctx.Resp.Header().Set("Access-Control-Expose-Headers", "Content-Disposition") - if mappedMimeType != "" { - ctx.Resp.Header().Set("Content-Type", mappedMimeType) - } - if (st.IsImage() || st.IsPDF()) && (setting.UI.SVG.Enabled || !st.IsSvgImage()) { - ctx.Resp.Header().Set("Content-Disposition", fmt.Sprintf(`inline; filename="%s"`, name)) - if st.IsSvgImage() || st.IsPDF() { - ctx.Resp.Header().Set("Content-Security-Policy", "default-src 'none'; style-src 'unsafe-inline'; sandbox") - ctx.Resp.Header().Set("X-Content-Type-Options", "nosniff") - if st.IsSvgImage() { - ctx.Resp.Header().Set("Content-Type", typesniffer.SvgMimeType) - } else { - ctx.Resp.Header().Set("Content-Type", typesniffer.ApplicationOctetStream) - } - } + + if mimeType == "" { + if sniffedType.IsBrowsableBinaryType() { + mimeType = sniffedType.GetMimeType() + } else if isPlain { + mimeType = "text/plain" } else { - ctx.Resp.Header().Set("Content-Disposition", fmt.Sprintf(`attachment; filename="%s"`, name)) + mimeType = typesniffer.ApplicationOctetStream } } + if isPlain { + charset, err = charsetModule.DetectEncoding(buf) + if err != nil { + log.Error("Detect raw file %s charset failed: %v, using by default utf-8", filePath, err) + charset = "utf-8" + } + } + + if charset != "" { + ctx.Resp.Header().Set("Content-Type", mimeType+"; charset="+strings.ToLower(charset)) + } else { + ctx.Resp.Header().Set("Content-Type", mimeType) + } + ctx.Resp.Header().Set("X-Content-Type-Options", "nosniff") + + isSVG := sniffedType.IsSvgImage() + + // serve types that can present a security risk with CSP + if isSVG { + ctx.Resp.Header().Set("Content-Security-Policy", "default-src 'none'; style-src 'unsafe-inline'; sandbox") + } else if sniffedType.IsPDF() { + // no sandbox attribute for pdf as it breaks rendering in at least safari. this + // should generally be safe as scripts inside PDF can not escape the PDF document + // see https://bugs.chromium.org/p/chromium/issues/detail?id=413851 for more discussion + ctx.Resp.Header().Set("Content-Security-Policy", "default-src 'none'; style-src 'unsafe-inline'") + } + + disposition := "inline" + if isSVG && !setting.UI.SVG.Enabled { + disposition = "attachment" + } + + // encode filename per https://datatracker.ietf.org/doc/html/rfc5987 + encodedFileName := `filename*=UTF-8''` + url.PathEscape(fileName) + + ctx.Resp.Header().Set("Content-Disposition", disposition+"; "+encodedFileName) + ctx.Resp.Header().Set("Access-Control-Expose-Headers", "Content-Disposition") + _, err = ctx.Resp.Write(buf) if err != nil { return err