Search bar for issues/pulls (#530)

2025-07-08 19:00:02 +02:00 · 2017-01-24 21:43:02 -05:00 · 2017-01-24 21:43:02 -05:00 · 833f8b94c2
commit 833f8b94c2
parent 8bc431952f
195 changed files with 221830 additions and 60 deletions
--- a/vendor/github.com/blevesearch/bleve/analysis/tokenizer/character/character.go
+++ b/vendor/github.com/blevesearch/bleve/analysis/tokenizer/character/character.go
@ -0,0 +1,76 @@
+//  Copyright (c) 2016 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package character
+
+import (
+	"unicode/utf8"
+
+	"github.com/blevesearch/bleve/analysis"
+)
+
+type IsTokenRune func(r rune) bool
+
+type CharacterTokenizer struct {
+	isTokenRun IsTokenRune
+}
+
+func NewCharacterTokenizer(f IsTokenRune) *CharacterTokenizer {
+	return &CharacterTokenizer{
+		isTokenRun: f,
+	}
+}
+
+func (c *CharacterTokenizer) Tokenize(input []byte) analysis.TokenStream {
+
+	rv := make(analysis.TokenStream, 0, 1024)
+
+	offset := 0
+	start := 0
+	end := 0
+	count := 0
+	for currRune, size := utf8.DecodeRune(input[offset:]); currRune != utf8.RuneError; currRune, size = utf8.DecodeRune(input[offset:]) {
+		isToken := c.isTokenRun(currRune)
+		if isToken {
+			end = offset + size
+		} else {
+			if end-start > 0 {
+				// build token
+				rv = append(rv, &analysis.Token{
+					Term:     input[start:end],
+					Start:    start,
+					End:      end,
+					Position: count + 1,
+					Type:     analysis.AlphaNumeric,
+				})
+				count++
+			}
+			start = offset + size
+			end = start
+		}
+		offset += size
+	}
+	// if we ended in the middle of a token, finish it
+	if end-start > 0 {
+		// build token
+		rv = append(rv, &analysis.Token{
+			Term:     input[start:end],
+			Start:    start,
+			End:      end,
+			Position: count + 1,
+			Type:     analysis.AlphaNumeric,
+		})
+	}
+	return rv
+}
--- a/vendor/github.com/blevesearch/bleve/analysis/tokenizer/letter/letter.go
+++ b/vendor/github.com/blevesearch/bleve/analysis/tokenizer/letter/letter.go
@ -0,0 +1,33 @@
+//  Copyright (c) 2016 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package letter
+
+import (
+	"unicode"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/analysis/tokenizer/character"
+	"github.com/blevesearch/bleve/registry"
+)
+
+const Name = "letter"
+
+func TokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
+	return character.NewCharacterTokenizer(unicode.IsLetter), nil
+}
+
+func init() {
+	registry.RegisterTokenizer(Name, TokenizerConstructor)
+}
--- a/vendor/github.com/blevesearch/bleve/analysis/tokenizer/unicode/unicode.go
+++ b/vendor/github.com/blevesearch/bleve/analysis/tokenizer/unicode/unicode.go
@ -0,0 +1,131 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package unicode
+
+import (
+	"github.com/blevesearch/segment"
+
+	"github.com/blevesearch/bleve/analysis"
+	"github.com/blevesearch/bleve/registry"
+)
+
+const Name = "unicode"
+
+type UnicodeTokenizer struct {
+}
+
+func NewUnicodeTokenizer() *UnicodeTokenizer {
+	return &UnicodeTokenizer{}
+}
+
+func (rt *UnicodeTokenizer) Tokenize(input []byte) analysis.TokenStream {
+	rvx := make([]analysis.TokenStream, 0, 10) // When rv gets full, append to rvx.
+	rv := make(analysis.TokenStream, 0, 1)
+
+	ta := []analysis.Token(nil)
+	taNext := 0
+
+	segmenter := segment.NewWordSegmenterDirect(input)
+	start := 0
+	pos := 1
+
+	guessRemaining := func(end int) int {
+		avgSegmentLen := end / (len(rv) + 1)
+		if avgSegmentLen < 1 {
+			avgSegmentLen = 1
+		}
+
+		remainingLen := len(input) - end
+
+		return remainingLen / avgSegmentLen
+	}
+
+	for segmenter.Segment() {
+		segmentBytes := segmenter.Bytes()
+		end := start + len(segmentBytes)
+		if segmenter.Type() != segment.None {
+			if taNext >= len(ta) {
+				remainingSegments := guessRemaining(end)
+				if remainingSegments > 1000 {
+					remainingSegments = 1000
+				}
+				if remainingSegments < 1 {
+					remainingSegments = 1
+				}
+
+				ta = make([]analysis.Token, remainingSegments)
+				taNext = 0
+			}
+
+			token := &ta[taNext]
+			taNext++
+
+			token.Term = segmentBytes
+			token.Start = start
+			token.End = end
+			token.Position = pos
+			token.Type = convertType(segmenter.Type())
+
+			if len(rv) >= cap(rv) { // When rv is full, save it into rvx.
+				rvx = append(rvx, rv)
+
+				rvCap := cap(rv) * 2
+				if rvCap > 256 {
+					rvCap = 256
+				}
+
+				rv = make(analysis.TokenStream, 0, rvCap) // Next rv cap is bigger.
+			}
+
+			rv = append(rv, token)
+			pos++
+		}
+		start = end
+	}
+
+	if len(rvx) > 0 {
+		n := len(rv)
+		for _, r := range rvx {
+			n += len(r)
+		}
+		rall := make(analysis.TokenStream, 0, n)
+		for _, r := range rvx {
+			rall = append(rall, r...)
+		}
+		return append(rall, rv...)
+	}
+
+	return rv
+}
+
+func UnicodeTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
+	return NewUnicodeTokenizer(), nil
+}
+
+func init() {
+	registry.RegisterTokenizer(Name, UnicodeTokenizerConstructor)
+}
+
+func convertType(segmentWordType int) analysis.TokenType {
+	switch segmentWordType {
+	case segment.Ideo:
+		return analysis.Ideographic
+	case segment.Kana:
+		return analysis.Ideographic
+	case segment.Number:
+		return analysis.Numeric
+	}
+	return analysis.AlphaNumeric
+}