aboutsummaryrefslogtreecommitdiffstats
path: root/update-license.go
blob: ab76e0f3129a9aff03c9c447060ecd37a1e7d010 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
// +build none

/*
This command generates GPL license headers on top of all source files.
You can run it once per month, before cutting a release or just
whenever you feel like it.

    go run update-licenses.go

The copyright in each file is assigned to any authors for which git
can find commits in the file's history. It will try to follow renames
throughout history. The author names are mapped and deduplicated using
the .mailmap file. You can use .mailmap to set the canonical name and
address for each author. See git-shortlog(1) for an explanation
of the .mailmap format.

Please review the resulting diff to check whether the correct
copyright assignments are performed.
*/
package main

import (
    "bufio"
    "bytes"
    "fmt"
    "io/ioutil"
    "os"
    "os/exec"
    "path"
    "regexp"
    "runtime"
    "sort"
    "strings"
    "sync"
    "text/template"
)

var (
    // only files with these extensions will be considered
    extensions = []string{".go", ".js", ".qml"}

    // paths with any of these prefixes will be skipped
    skipPrefixes = []string{"tests/files/", "cmd/mist/assets/ext/", "cmd/mist/assets/muted/"}

    // paths with this prefix are licensed as GPL. all other files are LGPL.
    gplPrefixes = []string{"cmd/"}

    // this regexp must match the entire license comment at the
    // beginning of each file.
    licenseCommentRE = regexp.MustCompile(`(?s)^/\*\s*(Copyright|This file is part of) .*?\*/\n*`)

    // this line is used when git doesn't find any authors for a file
    defaultCopyright = "Copyright (C) 2014 Jeffrey Wilcke <jeffrey@ethereum.org>"
)

// this template generates the license comment.
// its input is an info structure.
var licenseT = template.Must(template.New("").Parse(`/*
    {{.Copyrights}}

    This file is part of go-ethereum

    go-ethereum is free software: you can redistribute it and/or modify
    it under the terms of the GNU {{.License}} as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    go-ethereum is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU {{.License}} for more details.

    You should have received a copy of the GNU {{.License}}
    along with go-ethereum.  If not, see <http://www.gnu.org/licenses/>.
*/

`))

type info struct {
    file    string
    mode    os.FileMode
    authors map[string][]string // map keys are authors, values are years
    gpl     bool
}

func (i info) Copyrights() string {
    var lines []string
    for name, years := range i.authors {
        lines = append(lines, "Copyright (C) "+strings.Join(years, ", ")+" "+name)
    }
    if len(lines) == 0 {
        lines = []string{defaultCopyright}
    }
    sort.Strings(lines)
    return strings.Join(lines, "\n\t")
}

func (i info) License() string {
    if i.gpl {
        return "General Public License"
    } else {
        return "Lesser General Public License"
    }
}

func (i info) ShortLicense() string {
    if i.gpl {
        return "GPL"
    } else {
        return "LGPL"
    }
}

func (i *info) addAuthorYear(name, year string) {
    for _, y := range i.authors[name] {
        if y == year {
            return
        }
    }
    i.authors[name] = append(i.authors[name], year)
    sort.Strings(i.authors[name])
}

func main() {
    files := make(chan string)
    infos := make(chan *info)
    wg := new(sync.WaitGroup)

    go getFiles(files)
    for i := runtime.NumCPU(); i >= 0; i-- {
        // getting file info is slow and needs to be parallel
        wg.Add(1)
        go getInfo(files, infos, wg)
    }
    go func() { wg.Wait(); close(infos) }()
    writeLicenses(infos)
}

func getFiles(out chan<- string) {
    cmd := exec.Command("git", "ls-tree", "-r", "--name-only", "HEAD")
    err := doLines(cmd, func(line string) {
        for _, p := range skipPrefixes {
            if strings.HasPrefix(line, p) {
                return
            }
        }
        ext := path.Ext(line)
        for _, wantExt := range extensions {
            if ext == wantExt {
                goto send
            }
        }
        return

    send:
        out <- line
    })
    if err != nil {
        fmt.Println("error getting files:", err)
    }
    close(out)
}

func getInfo(files <-chan string, out chan<- *info, wg *sync.WaitGroup) {
    for file := range files {
        stat, err := os.Lstat(file)
        if err != nil {
            fmt.Printf("ERROR %s: %v\n", file, err)
            continue
        }
        if !stat.Mode().IsRegular() {
            continue
        }
        info, err := fileInfo(file)
        if err != nil {
            fmt.Printf("ERROR %s: %v\n", file, err)
            continue
        }
        info.mode = stat.Mode()
        out <- info
    }
    wg.Done()
}

func fileInfo(file string) (*info, error) {
    info := &info{file: file, authors: make(map[string][]string)}
    for _, p := range gplPrefixes {
        if strings.HasPrefix(file, p) {
            info.gpl = true
            break
        }
    }
    cmd := exec.Command("git", "log", "--follow", "--find-copies", "--pretty=format:%aI | %aN <%aE>", "--", file)
    err := doLines(cmd, func(line string) {
        sep := strings.IndexByte(line, '|')
        year, name := line[:4], line[sep+2:]
        info.addAuthorYear(name, year)
    })
    return info, err
}

func writeLicenses(infos <-chan *info) error {
    buf := new(bytes.Buffer)
    for info := range infos {
        content, err := ioutil.ReadFile(info.file)
        if err != nil {
            fmt.Printf("ERROR: couldn't read %s: %v\n", info.file, err)
        }

        // construct new file content
        buf.Reset()
        licenseT.Execute(buf, info)
        if m := licenseCommentRE.FindIndex(content); m != nil && m[0] == 0 {
            buf.Write(content[m[1]:])
        } else {
            buf.Write(content)
        }

        if !bytes.Equal(content, buf.Bytes()) {
            fmt.Println("writing", info.ShortLicense(), info.file)
            if err := ioutil.WriteFile(info.file, buf.Bytes(), info.mode); err != nil {
                return err
            }
        }
    }
    return nil
}

func doLines(cmd *exec.Cmd, f func(string)) error {
    stdout, err := cmd.StdoutPipe()
    if err != nil {
        return err
    }
    if err := cmd.Start(); err != nil {
        return err
    }
    s := bufio.NewScanner(stdout)
    for s.Scan() {
        f(s.Text())
    }
    if s.Err() != nil {
        return s.Err()
    }
    if err := cmd.Wait(); err != nil {
        return fmt.Errorf("%v (for %s)", err, strings.Join(cmd.Args, " "))
    }
    return nil
}