Rewrite reference processing code in preparation for opening/closing from comment references (#8261)

* Add a markdown stripper for mentions and xrefs

* Improve comments

* Small code simplification

* Move reference code to modules/references

* Fix typo

* Make MarkdownStripper return [][]byte

* Implement preliminary keywords parsing

* Add FIXME comment

* Fix comment

* make fmt

* Fix permissions check

* Fix text assumptions

* Fix imports

* Fix lint, fmt

* Fix unused import

* Add missing export comment

* Bypass revive on implemented interface

* Move mdstripper into its own package

* Support alphanumeric patterns

* Refactor FindAllMentions

* Move mentions test to references

* Parse mentions from reference package

* Refactor code to implement renderizable references

* Fix typo

* Move patterns and tests to the references package

* Fix nil reference

* Preliminary rendering attempt of closing keywords

* Normalize names, comments, general tidy-up

* Add CSS style for action keywords

* Fix permission for admin and owner

* Fix golangci-lint

* Fix golangci-lint
This commit is contained in:
guillep2k 2019-10-13 19:29:10 -03:00 committed by zeripath
parent 6e3f51098b
commit 15809d81f7
16 changed files with 1123 additions and 438 deletions

View file

@ -15,6 +15,7 @@ import (
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/references"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
@ -36,17 +37,6 @@ var (
// While fast, this is also incorrect and lead to false positives.
// TODO: fix invalid linking issue
// mentionPattern matches all mentions in the form of "@user"
mentionPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@[0-9a-zA-Z-_\.]+)(?:\s|$|\)|\])`)
// issueNumericPattern matches string that references to a numeric issue, e.g. #1287
issueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(#[0-9]+)(?:\s|$|\)|\]|:|\.(\s|$))`)
// issueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234
issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([A-Z]{1,10}-[1-9][0-9]*)(?:\s|$|\)|\]|:|\.(\s|$))`)
// crossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository
// e.g. gogits/gogs#12345
crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+#[0-9]+)(?:\s|$|\)|\]|\.(\s|$))`)
// sha1CurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae
// Although SHA1 hashes are 40 chars long, the regex matches the hash from 7 to 40 chars in length
// so that abbreviated hash links can be used as well. This matches git and github useability.
@ -70,6 +60,9 @@ var (
linkRegex, _ = xurls.StrictMatchingScheme("https?://")
)
// CSS class for action keywords (e.g. "closes: #1")
const keywordClass = "issue-keyword"
// regexp for full links to issues/pulls
var issueFullPattern *regexp.Regexp
@ -99,17 +92,6 @@ func getIssueFullPattern() *regexp.Regexp {
return issueFullPattern
}
// FindAllMentions matches mention patterns in given content
// and returns a list of found user names without @ prefix.
func FindAllMentions(content string) []string {
mentions := mentionPattern.FindAllStringSubmatch(content, -1)
ret := make([]string, len(mentions))
for i, val := range mentions {
ret[i] = val[1][1:]
}
return ret
}
// IsSameDomain checks if given url string has the same hostname as current Gitea instance
func IsSameDomain(s string) bool {
if strings.HasPrefix(s, "/") {
@ -142,7 +124,6 @@ var defaultProcessors = []processor{
linkProcessor,
mentionProcessor,
issueIndexPatternProcessor,
crossReferenceIssueIndexPatternProcessor,
sha1CurrentPatternProcessor,
emailAddressProcessor,
}
@ -183,7 +164,6 @@ var commitMessageProcessors = []processor{
linkProcessor,
mentionProcessor,
issueIndexPatternProcessor,
crossReferenceIssueIndexPatternProcessor,
sha1CurrentPatternProcessor,
emailAddressProcessor,
}
@ -217,7 +197,6 @@ var commitMessageSubjectProcessors = []processor{
linkProcessor,
mentionProcessor,
issueIndexPatternProcessor,
crossReferenceIssueIndexPatternProcessor,
sha1CurrentPatternProcessor,
}
@ -330,6 +309,24 @@ func (ctx *postProcessCtx) textNode(node *html.Node) {
}
}
// createKeyword() renders a highlighted version of an action keyword
func createKeyword(content string) *html.Node {
span := &html.Node{
Type: html.ElementNode,
Data: atom.Span.String(),
Attr: []html.Attribute{},
}
span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: keywordClass})
text := &html.Node{
Type: html.TextNode,
Data: content,
}
span.AppendChild(text)
return span
}
func createLink(href, content, class string) *html.Node {
a := &html.Node{
Type: html.ElementNode,
@ -377,10 +374,16 @@ func createCodeLink(href, content, class string) *html.Node {
return a
}
// replaceContent takes a text node, and in its content it replaces a section of
// it with the specified newNode. An example to visualize how this can work can
// be found here: https://play.golang.org/p/5zP8NnHZ03s
// replaceContent takes text node, and in its content it replaces a section of
// it with the specified newNode.
func replaceContent(node *html.Node, i, j int, newNode *html.Node) {
replaceContentList(node, i, j, []*html.Node{newNode})
}
// replaceContentList takes text node, and in its content it replaces a section of
// it with the specified newNodes. An example to visualize how this can work can
// be found here: https://play.golang.org/p/5zP8NnHZ03s
func replaceContentList(node *html.Node, i, j int, newNodes []*html.Node) {
// get the data before and after the match
before := node.Data[:i]
after := node.Data[j:]
@ -392,7 +395,9 @@ func replaceContent(node *html.Node, i, j int, newNode *html.Node) {
// Get the current next sibling, before which we place the replaced data,
// and after that we place the new text node.
nextSibling := node.NextSibling
node.Parent.InsertBefore(newNode, nextSibling)
for _, n := range newNodes {
node.Parent.InsertBefore(n, nextSibling)
}
if after != "" {
node.Parent.InsertBefore(&html.Node{
Type: html.TextNode,
@ -402,13 +407,13 @@ func replaceContent(node *html.Node, i, j int, newNode *html.Node) {
}
func mentionProcessor(_ *postProcessCtx, node *html.Node) {
m := mentionPattern.FindStringSubmatchIndex(node.Data)
if m == nil {
// We replace only the first mention; other mentions will be addressed later
found, loc := references.FindFirstMentionBytes([]byte(node.Data))
if !found {
return
}
// Replace the mention with a link to the specified user.
mention := node.Data[m[2]:m[3]]
replaceContent(node, m[2], m[3], createLink(util.URLJoin(setting.AppURL, mention[1:]), mention, "mention"))
mention := node.Data[loc.Start:loc.End]
replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, mention[1:]), mention, "mention"))
}
func shortLinkProcessor(ctx *postProcessCtx, node *html.Node) {
@ -597,45 +602,44 @@ func issueIndexPatternProcessor(ctx *postProcessCtx, node *html.Node) {
if ctx.metas == nil {
return
}
// default to numeric pattern, unless alphanumeric is requested.
pattern := issueNumericPattern
var (
found bool
ref *references.RenderizableReference
)
if ctx.metas["style"] == IssueNameStyleAlphanumeric {
pattern = issueAlphanumericPattern
}
match := pattern.FindStringSubmatchIndex(node.Data)
if match == nil {
return
}
id := node.Data[match[2]:match[3]]
var link *html.Node
if _, ok := ctx.metas["format"]; ok {
// Support for external issue tracker
if ctx.metas["style"] == IssueNameStyleAlphanumeric {
ctx.metas["index"] = id
} else {
ctx.metas["index"] = id[1:]
}
link = createLink(com.Expand(ctx.metas["format"], ctx.metas), id, "issue")
found, ref = references.FindRenderizableReferenceAlphanumeric(node.Data)
} else {
link = createLink(util.URLJoin(setting.AppURL, ctx.metas["user"], ctx.metas["repo"], "issues", id[1:]), id, "issue")
found, ref = references.FindRenderizableReferenceNumeric(node.Data)
}
replaceContent(node, match[2], match[3], link)
}
func crossReferenceIssueIndexPatternProcessor(ctx *postProcessCtx, node *html.Node) {
m := crossReferenceIssueNumericPattern.FindStringSubmatchIndex(node.Data)
if m == nil {
if !found {
return
}
ref := node.Data[m[2]:m[3]]
parts := strings.SplitN(ref, "#", 2)
repo, issue := parts[0], parts[1]
var link *html.Node
reftext := node.Data[ref.RefLocation.Start:ref.RefLocation.End]
if _, ok := ctx.metas["format"]; ok {
ctx.metas["index"] = ref.Issue
link = createLink(com.Expand(ctx.metas["format"], ctx.metas), reftext, "issue")
} else if ref.Owner == "" {
link = createLink(util.URLJoin(setting.AppURL, ctx.metas["user"], ctx.metas["repo"], "issues", ref.Issue), reftext, "issue")
} else {
link = createLink(util.URLJoin(setting.AppURL, ref.Owner, ref.Name, "issues", ref.Issue), reftext, "issue")
}
replaceContent(node, m[2], m[3],
createLink(util.URLJoin(setting.AppURL, repo, "issues", issue), ref, issue))
if ref.Action == references.XRefActionNone {
replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link)
return
}
// Decorate action keywords
keyword := createKeyword(node.Data[ref.ActionLocation.Start:ref.ActionLocation.End])
spaces := &html.Node{
Type: html.TextNode,
Data: node.Data[ref.ActionLocation.End:ref.RefLocation.Start],
}
replaceContentList(node, ref.ActionLocation.Start, ref.RefLocation.End, []*html.Node{keyword, spaces, link})
}
// fullSha1PatternProcessor renders SHA containing URLs

View file

@ -239,34 +239,6 @@ func TestRender_FullIssueURLs(t *testing.T) {
`<a href="http://localhost:3000/gogits/gogs/issues/4" class="issue">#4</a>`)
}
func TestRegExp_issueNumericPattern(t *testing.T) {
trueTestCases := []string{
"#1234",
"#0",
"#1234567890987654321",
" #12",
"#12:",
"ref: #12: msg",
}
falseTestCases := []string{
"# 1234",
"# 0",
"# ",
"#",
"#ABC",
"#1A2B",
"",
"ABC",
}
for _, testCase := range trueTestCases {
assert.True(t, issueNumericPattern.MatchString(testCase))
}
for _, testCase := range falseTestCases {
assert.False(t, issueNumericPattern.MatchString(testCase))
}
}
func TestRegExp_sha1CurrentPattern(t *testing.T) {
trueTestCases := []string{
"d8a994ef243349f321568f9e36d5c3f444b99cae",
@ -325,70 +297,6 @@ func TestRegExp_anySHA1Pattern(t *testing.T) {
}
}
func TestRegExp_mentionPattern(t *testing.T) {
trueTestCases := []string{
"@Unknwon",
"@ANT_123",
"@xxx-DiN0-z-A..uru..s-xxx",
" @lol ",
" @Te-st",
"(@gitea)",
"[@gitea]",
}
falseTestCases := []string{
"@ 0",
"@ ",
"@",
"",
"ABC",
"/home/gitea/@gitea",
"\"@gitea\"",
}
for _, testCase := range trueTestCases {
res := mentionPattern.MatchString(testCase)
assert.True(t, res)
}
for _, testCase := range falseTestCases {
res := mentionPattern.MatchString(testCase)
assert.False(t, res)
}
}
func TestRegExp_issueAlphanumericPattern(t *testing.T) {
trueTestCases := []string{
"ABC-1234",
"A-1",
"RC-80",
"ABCDEFGHIJ-1234567890987654321234567890",
"ABC-123.",
"(ABC-123)",
"[ABC-123]",
"ABC-123:",
}
falseTestCases := []string{
"RC-08",
"PR-0",
"ABCDEFGHIJK-1",
"PR_1",
"",
"#ABC",
"",
"ABC",
"GG-",
"rm-1",
"/home/gitea/ABC-1234",
"MY-STRING-ABC-123",
}
for _, testCase := range trueTestCases {
assert.True(t, issueAlphanumericPattern.MatchString(testCase))
}
for _, testCase := range falseTestCases {
assert.False(t, issueAlphanumericPattern.MatchString(testCase))
}
}
func TestRegExp_shortLinkPattern(t *testing.T) {
trueTestCases := []string{
"[[stuff]]",

View file

@ -0,0 +1,260 @@
// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package mdstripper
import (
"bytes"
"github.com/russross/blackfriday"
)
// MarkdownStripper extends blackfriday.Renderer
type MarkdownStripper struct {
blackfriday.Renderer
links []string
coallesce bool
}
const (
blackfridayExtensions = 0 |
blackfriday.EXTENSION_NO_INTRA_EMPHASIS |
blackfriday.EXTENSION_TABLES |
blackfriday.EXTENSION_FENCED_CODE |
blackfriday.EXTENSION_STRIKETHROUGH |
blackfriday.EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK |
blackfriday.EXTENSION_DEFINITION_LISTS |
blackfriday.EXTENSION_FOOTNOTES |
blackfriday.EXTENSION_HEADER_IDS |
blackfriday.EXTENSION_AUTO_HEADER_IDS |
// Not included in modules/markup/markdown/markdown.go;
// required here to process inline links
blackfriday.EXTENSION_AUTOLINK
)
//revive:disable:var-naming Implementing the Rendering interface requires breaking some linting rules
// StripMarkdown parses markdown content by removing all markup and code blocks
// in order to extract links and other references
func StripMarkdown(rawBytes []byte) (string, []string) {
stripper := &MarkdownStripper{
links: make([]string, 0, 10),
}
body := blackfriday.Markdown(rawBytes, stripper, blackfridayExtensions)
return string(body), stripper.GetLinks()
}
// StripMarkdownBytes parses markdown content by removing all markup and code blocks
// in order to extract links and other references
func StripMarkdownBytes(rawBytes []byte) ([]byte, []string) {
stripper := &MarkdownStripper{
links: make([]string, 0, 10),
}
body := blackfriday.Markdown(rawBytes, stripper, blackfridayExtensions)
return body, stripper.GetLinks()
}
// block-level callbacks
// BlockCode dummy function to proceed with rendering
func (r *MarkdownStripper) BlockCode(out *bytes.Buffer, text []byte, infoString string) {
// Not rendered
r.coallesce = false
}
// BlockQuote dummy function to proceed with rendering
func (r *MarkdownStripper) BlockQuote(out *bytes.Buffer, text []byte) {
// FIXME: perhaps it's better to leave out block quote for this?
r.processString(out, text, false)
}
// BlockHtml dummy function to proceed with rendering
func (r *MarkdownStripper) BlockHtml(out *bytes.Buffer, text []byte) { //nolint
// Not rendered
r.coallesce = false
}
// Header dummy function to proceed with rendering
func (r *MarkdownStripper) Header(out *bytes.Buffer, text func() bool, level int, id string) {
text()
r.coallesce = false
}
// HRule dummy function to proceed with rendering
func (r *MarkdownStripper) HRule(out *bytes.Buffer) {
// Not rendered
r.coallesce = false
}
// List dummy function to proceed with rendering
func (r *MarkdownStripper) List(out *bytes.Buffer, text func() bool, flags int) {
text()
r.coallesce = false
}
// ListItem dummy function to proceed with rendering
func (r *MarkdownStripper) ListItem(out *bytes.Buffer, text []byte, flags int) {
r.processString(out, text, false)
}
// Paragraph dummy function to proceed with rendering
func (r *MarkdownStripper) Paragraph(out *bytes.Buffer, text func() bool) {
text()
r.coallesce = false
}
// Table dummy function to proceed with rendering
func (r *MarkdownStripper) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
r.processString(out, header, false)
r.processString(out, body, false)
}
// TableRow dummy function to proceed with rendering
func (r *MarkdownStripper) TableRow(out *bytes.Buffer, text []byte) {
r.processString(out, text, false)
}
// TableHeaderCell dummy function to proceed with rendering
func (r *MarkdownStripper) TableHeaderCell(out *bytes.Buffer, text []byte, flags int) {
r.processString(out, text, false)
}
// TableCell dummy function to proceed with rendering
func (r *MarkdownStripper) TableCell(out *bytes.Buffer, text []byte, flags int) {
r.processString(out, text, false)
}
// Footnotes dummy function to proceed with rendering
func (r *MarkdownStripper) Footnotes(out *bytes.Buffer, text func() bool) {
text()
}
// FootnoteItem dummy function to proceed with rendering
func (r *MarkdownStripper) FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) {
r.processString(out, text, false)
}
// TitleBlock dummy function to proceed with rendering
func (r *MarkdownStripper) TitleBlock(out *bytes.Buffer, text []byte) {
r.processString(out, text, false)
}
// Span-level callbacks
// AutoLink dummy function to proceed with rendering
func (r *MarkdownStripper) AutoLink(out *bytes.Buffer, link []byte, kind int) {
r.processLink(out, link, []byte{})
}
// CodeSpan dummy function to proceed with rendering
func (r *MarkdownStripper) CodeSpan(out *bytes.Buffer, text []byte) {
// Not rendered
r.coallesce = false
}
// DoubleEmphasis dummy function to proceed with rendering
func (r *MarkdownStripper) DoubleEmphasis(out *bytes.Buffer, text []byte) {
r.processString(out, text, false)
}
// Emphasis dummy function to proceed with rendering
func (r *MarkdownStripper) Emphasis(out *bytes.Buffer, text []byte) {
r.processString(out, text, false)
}
// Image dummy function to proceed with rendering
func (r *MarkdownStripper) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
// Not rendered
r.coallesce = false
}
// LineBreak dummy function to proceed with rendering
func (r *MarkdownStripper) LineBreak(out *bytes.Buffer) {
// Not rendered
r.coallesce = false
}
// Link dummy function to proceed with rendering
func (r *MarkdownStripper) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
r.processLink(out, link, content)
}
// RawHtmlTag dummy function to proceed with rendering
func (r *MarkdownStripper) RawHtmlTag(out *bytes.Buffer, tag []byte) { //nolint
// Not rendered
r.coallesce = false
}
// TripleEmphasis dummy function to proceed with rendering
func (r *MarkdownStripper) TripleEmphasis(out *bytes.Buffer, text []byte) {
r.processString(out, text, false)
}
// StrikeThrough dummy function to proceed with rendering
func (r *MarkdownStripper) StrikeThrough(out *bytes.Buffer, text []byte) {
r.processString(out, text, false)
}
// FootnoteRef dummy function to proceed with rendering
func (r *MarkdownStripper) FootnoteRef(out *bytes.Buffer, ref []byte, id int) {
// Not rendered
r.coallesce = false
}
// Low-level callbacks
// Entity dummy function to proceed with rendering
func (r *MarkdownStripper) Entity(out *bytes.Buffer, entity []byte) {
// FIXME: literal entities are not parsed; perhaps they should
r.coallesce = false
}
// NormalText dummy function to proceed with rendering
func (r *MarkdownStripper) NormalText(out *bytes.Buffer, text []byte) {
r.processString(out, text, true)
}
// Header and footer
// DocumentHeader dummy function to proceed with rendering
func (r *MarkdownStripper) DocumentHeader(out *bytes.Buffer) {
r.coallesce = false
}
// DocumentFooter dummy function to proceed with rendering
func (r *MarkdownStripper) DocumentFooter(out *bytes.Buffer) {
r.coallesce = false
}
// GetFlags returns rendering flags
func (r *MarkdownStripper) GetFlags() int {
return 0
}
//revive:enable:var-naming
func doubleSpace(out *bytes.Buffer) {
if out.Len() > 0 {
out.WriteByte('\n')
}
}
func (r *MarkdownStripper) processString(out *bytes.Buffer, text []byte, coallesce bool) {
// Always break-up words
if !coallesce || !r.coallesce {
doubleSpace(out)
}
out.Write(text)
r.coallesce = coallesce
}
func (r *MarkdownStripper) processLink(out *bytes.Buffer, link []byte, content []byte) {
// Links are processed out of band
r.links = append(r.links, string(link))
r.coallesce = false
}
// GetLinks returns the list of link data collected while parsing
func (r *MarkdownStripper) GetLinks() []string {
return r.links
}

View file

@ -0,0 +1,71 @@
// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package mdstripper
import (
"strings"
"testing"
"github.com/stretchr/testify/assert"
)
func TestMarkdownStripper(t *testing.T) {
type testItem struct {
markdown string
expectedText []string
expectedLinks []string
}
list := []testItem{
{
`
## This is a title
This is [one](link) to paradise.
This **is emphasized**.
This: should coallesce.
` + "```" + `
This is a code block.
This should not appear in the output at all.
` + "```" + `
* Bullet 1
* Bullet 2
A HIDDEN ` + "`" + `GHOST` + "`" + ` IN THIS LINE.
`,
[]string{
"This is a title",
"This is",
"to paradise.",
"This",
"is emphasized",
".",
"This: should coallesce.",
"Bullet 1",
"Bullet 2",
"A HIDDEN",
"IN THIS LINE.",
},
[]string{
"link",
}},
}
for _, test := range list {
text, links := StripMarkdown([]byte(test.markdown))
rawlines := strings.Split(text, "\n")
lines := make([]string, 0, len(rawlines))
for _, line := range rawlines {
line := strings.TrimSpace(line)
if line != "" {
lines = append(lines, line)
}
}
assert.EqualValues(t, test.expectedText, lines)
assert.EqualValues(t, test.expectedLinks, links)
}
}

View file

@ -38,6 +38,9 @@ func NewSanitizer() {
// Custom URL-Schemes
sanitizer.policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...)
// Allow keyword markup
sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^` + keywordClass + `$`)).OnElements("span")
})
}