From fa32faf1a4cd93e5d08468135014667090946b56 Mon Sep 17 00:00:00 2001
From: tso
Date: Wed, 9 Dec 2015 09:53:05 -0500
Subject: [PATCH] Add a Language Statistics Bar to repo/view
With the usage of a port of github's linguist functionality to Go,
which I have made as a separate library and is hosted here:
https://github.com/generaltso/linguist
And a quick design I made, I have hacked a language statistics bar
into gogs
I wasn't sure where to put everything so it's sitting directly
on the view router and the CSS is inlined into a new template file
Based on the structure of this project I would fully expect this
feature to belong in its own sub-package
Also, even though determining language stats on-the-fly is pretty
fast, caching the results in the database for large codebases
would probably be a much better strategy, especially if the top
language were to be displayed in the "Explore" view like GitHub has
I also had difficulty trying to figure out how to do:
if len(something) == 1 ? '' : 's'
with go templates for plurals (1 Commit vs 2 Commits), and I kinda
gave up there...
---
routers/repo/view.go | 173 ++++++++++++++++++++++++
templates/repo/home.tmpl | 3 +
templates/repo/language_statistics.tmpl | 168 +++++++++++++++++++++++
3 files changed, 344 insertions(+)
create mode 100644 templates/repo/language_statistics.tmpl
diff --git a/routers/repo/view.go b/routers/repo/view.go
index 361d77a85..1ddfcb359 100644
--- a/routers/repo/view.go
+++ b/routers/repo/view.go
@@ -6,11 +6,15 @@ package repo
import (
"bytes"
+ "fmt"
"io/ioutil"
"path"
"path/filepath"
+ "sort"
+ "strconv"
"strings"
+ "github.com/Unknwon/com"
"github.com/Unknwon/paginater"
"github.com/gogits/gogs/models"
@@ -19,6 +23,8 @@ import (
"github.com/gogits/gogs/modules/log"
"github.com/gogits/gogs/modules/middleware"
"github.com/gogits/gogs/modules/template"
+
+ "github.com/generaltso/linguist"
)
const (
@@ -218,6 +224,14 @@ func Home(ctx *middleware.Context) {
}
ctx.Data["LastCommit"] = lastCommit
ctx.Data["LastCommitUser"] = models.ValidateCommitWithEmail(lastCommit)
+
+ branchId, err := ctx.Repo.GitRepo.GetCommitIdOfBranch(branchName)
+ if err != nil || branchId != lastCommit.ID.String() {
+ branchId = lastCommit.ID.String()
+ }
+ Langs := getLanguageStats(ctx, branchId)
+ ctx.Data["LanguageStats"] = Langs
+
}
ctx.Data["Username"] = userName
@@ -295,3 +309,162 @@ func Forks(ctx *middleware.Context) {
ctx.HTML(200, FORKS)
}
+
+func getLanguageStats(ctx *middleware.Context, branchId string) interface{} {
+
+ all_files := linguistlstree(ctx, branchId)
+ languages := map[string]float64{}
+
+ var total_size float64
+ for _, f := range all_files {
+ languages[f.Language] += f.Size
+ total_size += f.Size
+ }
+
+ percent := []float64{}
+ results := map[float64]string{}
+
+ for lang, size := range languages {
+ p := size / total_size * 100.0
+ percent = append(percent, p)
+ results[p] = lang
+ }
+
+ sort.Sort(sort.Reverse(sort.Float64Slice(percent)))
+
+ ret := []*LanguageStat{}
+ for i, p := range percent {
+ // limit result set
+ if i > 10 {
+ break
+ }
+ lang := results[p]
+ color := linguist.GetColor(lang)
+ if color == "" {
+ color = "#ccc" //grey
+ }
+ ret = append(ret, &LanguageStat{Name: lang,
+ Percent: fmt.Sprintf("%.2f%%", p),
+ Color: color})
+ }
+ return ret
+}
+
+type LanguageStat struct {
+ Name string
+ Percent string
+ Color string
+}
+
+// see below
+type file struct {
+ Name string
+ Size float64
+ Language string
+}
+
+// just some utilities...
+func gitcmd(ctx *middleware.Context, args ...string) string {
+ stdout, _, err := com.ExecCmdDir(ctx.Repo.GitRepo.Path, "git", args...)
+ tsoErr(ctx, err)
+ return stdout
+}
+func gitcmdbytes(ctx *middleware.Context, args ...string) []byte {
+ stdout, _, err := com.ExecCmdDirBytes(ctx.Repo.GitRepo.Path, "git", args...)
+ tsoErr(ctx, err)
+ return stdout
+}
+func tsoErr(ctx *middleware.Context, err error) {
+ if err != nil {
+ ctx.Handle(500, "*blames tso*", err)
+ }
+}
+
+// returns every file in a tree
+// additionally detecting programming language
+func linguistlstree(ctx *middleware.Context, treeish string) (files []*file) {
+ files = []*file{}
+ lstext := gitcmd(ctx, "ls-tree", treeish)
+ for _, ln := range strings.Split(lstext, "\n") {
+ fields := strings.Split(ln, " ")
+ if len(fields) != 3 {
+ continue
+ }
+ //fmode := fields[0]
+ ftype := fields[1]
+ fields = strings.Split(fields[2], "\t")
+ if len(fields) != 2 {
+ continue
+ }
+ fhash := fields[0]
+ fname := fields[1]
+
+ switch ftype {
+ // broken, don't know why
+ // case "tree":
+ // subdir := linguistlstree(ctx, fhash)
+ // files = append(files, subdir...)
+ case "blob":
+ // if it's vendored, don't even look at it
+ // (vendored means files like README.md, .gitignore, etc...)
+ if linguist.IsVendored(fname) {
+ continue
+ }
+
+ ssize := gitcmd(ctx, "cat-file", "-s", fhash)
+ fsize, err := strconv.ParseFloat(strings.TrimSpace(ssize), 64)
+ tsoErr(ctx, err)
+
+ // if it's an empty file don't even waste time
+ if fsize == 0 {
+ continue
+ }
+
+ f := &file{}
+ f.Name = fname
+ f.Size = fsize
+
+ //
+ // language detection
+ //
+
+ // by file extension
+ by_ext := linguist.DetectFromFilename(fname)
+ if by_ext != "" {
+ f.Language = by_ext
+ files = append(files, f)
+ continue
+ }
+ // by mimetype
+ // if we can't guess type by extension, then before jumping into
+ // lexing and parsing things like image files or cat videos
+ // ...or other binary formats which will give erroneous results...
+ // ...or other binary formats which will give erroneous results...
+ // with the linguist.DetectFromContents method, I posit looking
+ // at mimetype with linguist.DetectMimeFromFilename
+ //
+ // ...however, this is not what github does at all, instead ignoring
+ // binary files altogether. However, there is no law that states
+ // git must be used for code only.
+ by_mime, shouldIgnore, _ := linguist.DetectMimeFromFilename(fname)
+ if by_mime != "" && shouldIgnore {
+ f.Language = by_mime
+ files = append(files, f)
+ continue
+ }
+
+ // by contents
+ // see also: github.com/github/linguist
+ // see also: github.com/generaltso/linguist
+ contents := gitcmdbytes(ctx, "cat-file", "blob", fhash)
+ by_contents := linguist.DetectFromContents(contents)
+ if by_contents != "" {
+ f.Language = by_contents
+ } else {
+ f.Language = "(undetermined)"
+ }
+ files = append(files, f)
+ }
+ }
+ return files
+}
diff --git a/templates/repo/home.tmpl b/templates/repo/home.tmpl
index 22a2f839a..3a93a674c 100644
--- a/templates/repo/home.tmpl
+++ b/templates/repo/home.tmpl
@@ -6,6 +6,9 @@
{{if .Repository.DescriptionHtml}}{{.Repository.DescriptionHtml}}{{else}}{{.i18n.Tr "repo.no_desc"}}{{end}}
{{.Repository.Website}}
+ {{if .LanguageStats}}
+ {{template "repo/language_statistics" .}}
+ {{end}}