From fa32faf1a4cd93e5d08468135014667090946b56 Mon Sep 17 00:00:00 2001 From: tso Date: Wed, 9 Dec 2015 09:53:05 -0500 Subject: [PATCH] Add a Language Statistics Bar to repo/view With the usage of a port of github's linguist functionality to Go, which I have made as a separate library and is hosted here: https://github.com/generaltso/linguist And a quick design I made, I have hacked a language statistics bar into gogs I wasn't sure where to put everything so it's sitting directly on the view router and the CSS is inlined into a new template file Based on the structure of this project I would fully expect this feature to belong in its own sub-package Also, even though determining language stats on-the-fly is pretty fast, caching the results in the database for large codebases would probably be a much better strategy, especially if the top language were to be displayed in the "Explore" view like GitHub has I also had difficulty trying to figure out how to do: if len(something) == 1 ? '' : 's' with go templates for plurals (1 Commit vs 2 Commits), and I kinda gave up there... --- routers/repo/view.go | 173 ++++++++++++++++++++++++ templates/repo/home.tmpl | 3 + templates/repo/language_statistics.tmpl | 168 +++++++++++++++++++++++ 3 files changed, 344 insertions(+) create mode 100644 templates/repo/language_statistics.tmpl diff --git a/routers/repo/view.go b/routers/repo/view.go index 361d77a85..1ddfcb359 100644 --- a/routers/repo/view.go +++ b/routers/repo/view.go @@ -6,11 +6,15 @@ package repo import ( "bytes" + "fmt" "io/ioutil" "path" "path/filepath" + "sort" + "strconv" "strings" + "github.com/Unknwon/com" "github.com/Unknwon/paginater" "github.com/gogits/gogs/models" @@ -19,6 +23,8 @@ import ( "github.com/gogits/gogs/modules/log" "github.com/gogits/gogs/modules/middleware" "github.com/gogits/gogs/modules/template" + + "github.com/generaltso/linguist" ) const ( @@ -218,6 +224,14 @@ func Home(ctx *middleware.Context) { } ctx.Data["LastCommit"] = lastCommit ctx.Data["LastCommitUser"] = models.ValidateCommitWithEmail(lastCommit) + + branchId, err := ctx.Repo.GitRepo.GetCommitIdOfBranch(branchName) + if err != nil || branchId != lastCommit.ID.String() { + branchId = lastCommit.ID.String() + } + Langs := getLanguageStats(ctx, branchId) + ctx.Data["LanguageStats"] = Langs + } ctx.Data["Username"] = userName @@ -295,3 +309,162 @@ func Forks(ctx *middleware.Context) { ctx.HTML(200, FORKS) } + +func getLanguageStats(ctx *middleware.Context, branchId string) interface{} { + + all_files := linguistlstree(ctx, branchId) + languages := map[string]float64{} + + var total_size float64 + for _, f := range all_files { + languages[f.Language] += f.Size + total_size += f.Size + } + + percent := []float64{} + results := map[float64]string{} + + for lang, size := range languages { + p := size / total_size * 100.0 + percent = append(percent, p) + results[p] = lang + } + + sort.Sort(sort.Reverse(sort.Float64Slice(percent))) + + ret := []*LanguageStat{} + for i, p := range percent { + // limit result set + if i > 10 { + break + } + lang := results[p] + color := linguist.GetColor(lang) + if color == "" { + color = "#ccc" //grey + } + ret = append(ret, &LanguageStat{Name: lang, + Percent: fmt.Sprintf("%.2f%%", p), + Color: color}) + } + return ret +} + +type LanguageStat struct { + Name string + Percent string + Color string +} + +// see below +type file struct { + Name string + Size float64 + Language string +} + +// just some utilities... +func gitcmd(ctx *middleware.Context, args ...string) string { + stdout, _, err := com.ExecCmdDir(ctx.Repo.GitRepo.Path, "git", args...) + tsoErr(ctx, err) + return stdout +} +func gitcmdbytes(ctx *middleware.Context, args ...string) []byte { + stdout, _, err := com.ExecCmdDirBytes(ctx.Repo.GitRepo.Path, "git", args...) + tsoErr(ctx, err) + return stdout +} +func tsoErr(ctx *middleware.Context, err error) { + if err != nil { + ctx.Handle(500, "*blames tso*", err) + } +} + +// returns every file in a tree +// additionally detecting programming language +func linguistlstree(ctx *middleware.Context, treeish string) (files []*file) { + files = []*file{} + lstext := gitcmd(ctx, "ls-tree", treeish) + for _, ln := range strings.Split(lstext, "\n") { + fields := strings.Split(ln, " ") + if len(fields) != 3 { + continue + } + //fmode := fields[0] + ftype := fields[1] + fields = strings.Split(fields[2], "\t") + if len(fields) != 2 { + continue + } + fhash := fields[0] + fname := fields[1] + + switch ftype { + // broken, don't know why + // case "tree": + // subdir := linguistlstree(ctx, fhash) + // files = append(files, subdir...) + case "blob": + // if it's vendored, don't even look at it + // (vendored means files like README.md, .gitignore, etc...) + if linguist.IsVendored(fname) { + continue + } + + ssize := gitcmd(ctx, "cat-file", "-s", fhash) + fsize, err := strconv.ParseFloat(strings.TrimSpace(ssize), 64) + tsoErr(ctx, err) + + // if it's an empty file don't even waste time + if fsize == 0 { + continue + } + + f := &file{} + f.Name = fname + f.Size = fsize + + // + // language detection + // + + // by file extension + by_ext := linguist.DetectFromFilename(fname) + if by_ext != "" { + f.Language = by_ext + files = append(files, f) + continue + } + // by mimetype + // if we can't guess type by extension, then before jumping into + // lexing and parsing things like image files or cat videos + // ...or other binary formats which will give erroneous results... + // ...or other binary formats which will give erroneous results... + // with the linguist.DetectFromContents method, I posit looking + // at mimetype with linguist.DetectMimeFromFilename + // + // ...however, this is not what github does at all, instead ignoring + // binary files altogether. However, there is no law that states + // git must be used for code only. + by_mime, shouldIgnore, _ := linguist.DetectMimeFromFilename(fname) + if by_mime != "" && shouldIgnore { + f.Language = by_mime + files = append(files, f) + continue + } + + // by contents + // see also: github.com/github/linguist + // see also: github.com/generaltso/linguist + contents := gitcmdbytes(ctx, "cat-file", "blob", fhash) + by_contents := linguist.DetectFromContents(contents) + if by_contents != "" { + f.Language = by_contents + } else { + f.Language = "(undetermined)" + } + files = append(files, f) + } + } + return files +} diff --git a/templates/repo/home.tmpl b/templates/repo/home.tmpl index 22a2f839a..3a93a674c 100644 --- a/templates/repo/home.tmpl +++ b/templates/repo/home.tmpl @@ -6,6 +6,9 @@ {{if .Repository.DescriptionHtml}}{{.Repository.DescriptionHtml}}{{else}}{{.i18n.Tr "repo.no_desc"}}{{end}} {{.Repository.Website}}

+ {{if .LanguageStats}} + {{template "repo/language_statistics" .}} + {{end}}