diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..e0780e4 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,6 @@ +dist: xenial +language: go +go: + - "1.12" +script: + - "make" \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index ae67a14..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "go.formatTool": "goimports" -} \ No newline at end of file diff --git a/README.md b/README.md index ae68932..7e1e641 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ This bot will show latest quotes from bash.im. Also it can (but not yet) work in - [x] Ability to fetch latest quotes from bash.im - [x] Ability to send quote to dialog via inline mode - [x] Integrated search and autocomplete for inline mode -- [ ] Use [goquery](https://github.com/PuerkitoBio/goquery) instead of regular expressions +- [x] Use [goquery](https://github.com/PuerkitoBio/goquery) instead of regular expressions +- [x] Setup CI/CD - [ ] Automated version increment -- [ ] Setup CI/CD \ No newline at end of file +- [ ] Test cases \ No newline at end of file diff --git a/go.mod b/go.mod index 0526a16..480288d 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,18 @@ module github.com/Neur0toxine/bash.im-telegram-bot go 1.12 require ( + github.com/PuerkitoBio/goquery v1.5.0 // indirect + github.com/antchfx/htmlquery v1.0.0 // indirect + github.com/antchfx/xmlquery v1.0.0 // indirect + github.com/antchfx/xpath v1.0.0 // indirect github.com/go-telegram-bot-api/telegram-bot-api v4.6.4+incompatible + github.com/gobwas/glob v0.2.3 // indirect + github.com/gocolly/colly v1.2.0 github.com/joho/godotenv v1.3.0 + github.com/kennygrant/sanitize v1.2.4 // indirect + github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect github.com/technoweenie/multipartstreamer v1.0.1 // indirect + github.com/temoto/robotstxt v0.0.0-20180810133444-97ee4a9ee6ea // indirect + golang.org/x/net v0.0.0-20190607181551-461777fb6f67 // indirect + google.golang.org/appengine v1.6.1 // indirect ) diff --git a/go.sum b/go.sum index 25c0b99..b30bd87 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,48 @@ +github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk= +github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg= +github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o= +github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= +github.com/antchfx/htmlquery v1.0.0 h1:O5IXz8fZF3B3MW+B33MZWbTHBlYmcfw0BAxgErHuaMA= +github.com/antchfx/htmlquery v1.0.0/go.mod h1:MS9yksVSQXls00iXkiMqXr0J+umL/AmxXKuP28SUJM8= +github.com/antchfx/xmlquery v1.0.0 h1:YuEPqexGG2opZKNc9JU3Zw6zFXwC47wNcy6/F8oKsrM= +github.com/antchfx/xmlquery v1.0.0/go.mod h1:/+CnyD/DzHRnv2eRxrVbieRU/FIF6N0C+7oTtyUtCKk= +github.com/antchfx/xpath v1.0.0 h1:Q5gFgh2O40VTSwMOVbFE7nFNRBu3tS21Tn0KAWeEjtk= +github.com/antchfx/xpath v1.0.0/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= github.com/go-telegram-bot-api/telegram-bot-api v4.6.4+incompatible h1:2cauKuaELYAEARXRkq2LrJ0yDDv1rW7+wrTEdVL3uaU= github.com/go-telegram-bot-api/telegram-bot-api v4.6.4+incompatible/go.mod h1:qf9acutJ8cwBUhm1bqgz6Bei9/C/c93FPDljKWwsOgM= +github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= +github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= +github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI= +github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA= +github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/joho/godotenv v1.3.0 h1:Zjp+RcGpHhGlrMbJzXTrZZPrWj+1vfm90La1wgB6Bhc= github.com/joho/godotenv v1.3.0/go.mod h1:7hK45KPybAkOC6peb+G5yklZfMxEjkZhHbwpqxOKXbg= +github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o= +github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak= +github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI= +github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= github.com/technoweenie/multipartstreamer v1.0.1 h1:XRztA5MXiR1TIRHxH2uNxXxaIkKQDeX7m2XsSOlQEnM= github.com/technoweenie/multipartstreamer v1.0.1/go.mod h1:jNVxdtShOxzAsukZwTSw6MDx5eUJoiEBsSvzDU9uzog= +github.com/temoto/robotstxt v0.0.0-20180810133444-97ee4a9ee6ea h1:hH8P1IiDpzRU6ZDbDh/RDnVuezi2oOXJpApa06M0zyI= +github.com/temoto/robotstxt v0.0.0-20180810133444-97ee4a9ee6ea/go.mod h1:aOux3gHPCftJ3KHq6Pz/AlDjYJ7Y+yKfm1gU/3B0u04= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/net v0.0.0-20190607181551-461777fb6f67 h1:rJJxsykSlULwd2P2+pg/rtnwN2FrWp4IuCxOSyS0V00= +golang.org/x/net v0.0.0-20190607181551-461777fb6f67/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +google.golang.org/appengine v1.6.1 h1:QzqyMA1tlu6CgqCDUtU9V+ZKhLFT2dkJuANu5QaxI3I= +google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= diff --git a/src/bashim.go b/src/bashim.go index 12f6fb3..58cfd3a 100644 --- a/src/bashim.go +++ b/src/bashim.go @@ -3,9 +3,8 @@ package main import ( "errors" "fmt" + "github.com/gocolly/colly" "html" - "io/ioutil" - "net/http" "net/url" "regexp" "strconv" @@ -22,71 +21,16 @@ type BashQuote struct { Text string } -var ( - replaceBrRe = regexp.MustCompile(`(?im)\<[\s+]?br[\s+\/]{0,2}?\>`) - getQuotesListRe = regexp.MustCompile(`(?im)[.\s\w\W]+?\`) - getQuoteDataRe = regexp.MustCompile(`(?im)data\-quote\=\"(?P\d+)\"[.\s\w\W]+?quote__header_permalink.+href\=\"(?P\/.+\d)\"[.\s\w\W]+?quote__header_date\"\>[.\s\w\W]+?(?P.+)[.\s\w\W]+?quote__body\"\>\s+?(?P.+)[.\s\w\W]+?quote__total.+\>(?P\d+)`) -) - -func getQuotesList(response string, maxItems int) []BashQuote { - var items []BashQuote - matches := getQuotesListRe.FindAllString(response, -1) - - if maxItems != 0 && len(matches) > maxItems { - matches = matches[:maxItems] - } - - for _, match := range matches { - id, created, rating, permalink, text, err := getQuoteData(match) - - if err != nil { - continue - } - - if id == 0 { - continue - } - - items = append(items, BashQuote{ - ID: id, - Created: created, - Rating: rating, - Permalink: permalink, - Text: text, - }) - } - - return items -} - -func getQuoteData(response string) (id int, created string, rating string, permalink string, text string, err error) { - matches := getQuoteDataRe.FindStringSubmatch(response) - - if len(matches) == 0 { - return 0, "", "", "", "", errors.New("No data found") - } else { - matches = matches[1:] - } - - id, err = strconv.Atoi(matches[0]) - - if err != nil { - return 0, "", "", "", "", err - } - - created = strings.ReplaceAll(strings.TrimSpace(matches[2]), " ", " ") - rating = strings.TrimSpace(matches[4]) - permalink = BASH_URL + matches[1] - text = html.UnescapeString(replaceBrRe.ReplaceAllString(strings.TrimSpace(matches[3]), "\n")) - err = nil - - return -} +var replaceBrRe = regexp.MustCompile(`(?im)\<[\s+]?br[\s+\/]{0,2}?\>`) func GetLatestQuotes() ([]BashQuote, error) { return extractQuotes("/", 25) } +func GetLatestAbyssQuotes() ([]BashQuote, error) { + return extractQuotes("/abyss/", 25) +} + func GetQuote(id int) (BashQuote, error) { quotes, err := extractQuotes(fmt.Sprintf("/quote/%d", id), 1) @@ -104,27 +48,53 @@ func SearchQuotes(search string, maxResults int) ([]BashQuote, error) { } func extractQuotes(url string, maxItems int) ([]BashQuote, error) { - var ( - quotes []BashQuote - link = fmt.Sprintf("%s%s", BASH_URL, url) - ) + var quotes []BashQuote - if resp, err := http.Get(link); err == nil { - defer resp.Body.Close() + c := colly.NewCollector() - if resp.StatusCode == 200 { - if bodyData, err := ioutil.ReadAll(resp.Body); err == nil { - body := string(bodyData) - items := getQuotesList(body, maxItems) + c.OnHTML("article.quote", func(e *colly.HTMLElement) { + if len(quotes) == maxItems { + return + } - return items, nil - } else { - return quotes, err + id, err := strconv.Atoi(strings.TrimSpace(e.Attr("data-quote"))) + + if err == nil { + created, err := e.DOM.Find(".quote__header_date").Html() + rating, err := e.DOM.Find(".quote__total").Html() + permalink, err := e.DOM.Find(".quote__header_permalink").Html() + text, err := e.DOM.Find(".quote__body").Html() + + if err == nil { + created = strings.TrimSpace(strings.ReplaceAll(created, " ", " ")) + rating = strings.TrimSpace(rating) + text = replaceBrRe.ReplaceAllString(html.UnescapeString(strings.TrimSpace(text)), "\n") + text = strings.ReplaceAll(text, "`", "\\`") + text = strings.ReplaceAll(text, "*", "\\*") + text = strings.ReplaceAll(text, "_", "\\_") + + if len(permalink) > 0 && permalink[0] != '#' { + permalink = fmt.Sprintf("%s%s", BASH_URL, strings.TrimSpace(permalink)) + } else { + permalink = "" + } + + quotes = append(quotes, BashQuote{ + ID: id, + Created: created, + Rating: rating, + Permalink: permalink, + Text: text, + }) } } else { - return quotes, errors.New("Incorrect status code: " + strconv.Itoa(resp.StatusCode)) + return } - } else { + }) + + if err := c.Visit(fmt.Sprintf("%s%s", BASH_URL, url)); err != nil { return quotes, err + } else { + return quotes, nil } } diff --git a/src/bot.go b/src/bot.go index d098f29..cb997c6 100644 --- a/src/bot.go +++ b/src/bot.go @@ -1,12 +1,12 @@ package main import ( - "strings" "fmt" "log" "math/rand" "net/http" "strconv" + "strings" "unicode/utf16" tgbotapi "github.com/go-telegram-bot-api/telegram-bot-api" @@ -32,13 +32,18 @@ func processUpdate(update tgbotapi.Update, bot *tgbotapi.BotAPI) { bashQuotes = append(bashQuotes, quote) } else { log.Print(errConv) - bashQuotes, err = SearchQuotes(update.InlineQuery.Query, 3) + bashQuotes, err = SearchQuotes(update.InlineQuery.Query, maxSearchResults) } if err == nil { for _, quote := range bashQuotes { + cutSize := 50 utfEncodedString := utf16.Encode([]rune(quote.Text)) - runeString := utf16.Decode(utfEncodedString[:50]) + runeString := utf16.Decode(utfEncodedString) + + if len(utfEncodedString) > cutSize { + runeString = utf16.Decode(utfEncodedString[:cutSize]) + } title := fmt.Sprintf( "[#%d]: %s\n", @@ -107,34 +112,9 @@ func processUpdate(update tgbotapi.Update, bot *tgbotapi.BotAPI) { if update.Message.IsCommand() { switch update.Message.Command() { case "latest": - SendMessages(bot, []tgbotapi.MessageConfig{ - NewMessage(update.Message.Chat.ID, update.Message.MessageID, "_Получаю свежие цитаты..._", "markdown"), - }) - - items, err := GetLatestQuotes() - - if err != nil { - msgs[len(msgs)-1].Text = "Не удалось получить последние цитаты :(" - } else { - for _, item := range items { - text := fmt.Sprintf( - "*Цитата:* [#%d](%s), %s \n"+ - "*Рейтинг:* %s \n"+ - "%s \n\n", - item.ID, - item.Permalink, - item.Created, - item.Rating, - item.Text, - ) - - if len(msgs[len(msgs)-1].Text+text) > 4096 { - msgs = append(msgs, NewMessage(update.Message.Chat.ID, 0, text, "markdown")) - } else { - msgs[len(msgs)-1].Text += text - } - } - } + NewMessageWithQuotes(bot, update, &msgs, "Получаю свежие цитаты", update.Message.Command()) + case "abyss": + NewMessageWithQuotes(bot, update, &msgs, "Получаю цитаты из Бездны", update.Message.Command()) default: msgs[len(msgs)-1].Text = "Как насчёт последних цитат? Используйте /latest" } @@ -142,6 +122,7 @@ func processUpdate(update tgbotapi.Update, bot *tgbotapi.BotAPI) { text := fmt.Sprintf("Зайдите в любой чат, вызовите бота вот так:\n `@%s `, где ID - "+ "это идентификатор цитаты на bash.im. И бот перешлёт её!\n"+ "Ещё вместо идентификатора можно указать текст, по которому бот попытается найти цитаты.", bot.Self.UserName) + msgs[len(msgs)-1] = NewMessage(update.Message.Chat.ID, update.Message.MessageID, text, "markdown") } @@ -149,6 +130,57 @@ func processUpdate(update tgbotapi.Update, bot *tgbotapi.BotAPI) { } } +func NewMessageWithQuotes( + bot *tgbotapi.BotAPI, + update tgbotapi.Update, + msgs *[]tgbotapi.MessageConfig, + waitMsg string, + command string, +) { + var ( + items []BashQuote + err error + ) + + SendMessages(bot, []tgbotapi.MessageConfig{ + NewMessage(update.Message.Chat.ID, update.Message.MessageID, fmt.Sprintf("_%s..._", waitMsg), "markdown"), + }) + + switch command { + case "latest": + items, err = GetLatestQuotes() + case "abyss": + items, err = GetLatestAbyssQuotes() + default: + items, err = GetLatestQuotes() + } + + messages := *msgs + + if err != nil { + messages[len(messages)-1].Text = "Не удалось получить цитаты :(" + } else { + for _, item := range items { + text := fmt.Sprintf( + "*Цитата:* [#%d](%s), %s \n"+ + "*Рейтинг:* %s \n"+ + "%s \n\n", + item.ID, + item.Permalink, + item.Created, + item.Rating, + item.Text, + ) + + if len(messages[len(messages)-1].Text+text) > 4096 { + messages = append(messages, NewMessage(update.Message.Chat.ID, 0, text, "markdown")) + } else { + messages[len(messages)-1].Text += text + } + } + } +} + func NewMessage(chatID int64, replyTo int, text string, parse string) tgbotapi.MessageConfig { msg := tgbotapi.NewMessage(chatID, text) msg.ParseMode = parse diff --git a/src/config.go b/src/config.go index a11a50c..f5e2a6b 100644 --- a/src/config.go +++ b/src/config.go @@ -1,9 +1,9 @@ package main import ( + "fmt" "log" "os" - "fmt" "path/filepath" "strconv" "strings" @@ -22,6 +22,7 @@ const envDebug = "DEBUG" const defaultPollTimeout = 30 const defaultWebhookPort = 8000 +const maxSearchResults = 25 const ModePolling = "polling" const ModeWebhook = "webhook"