CI/CD, goquery

This commit is contained in:
Pavel 2019-06-09 20:16:14 +03:00
parent 0c9a399451
commit f85a5ddd3a
8 changed files with 174 additions and 114 deletions

6
.travis.yml Normal file
View File

@ -0,0 +1,6 @@
dist: xenial
language: go
go:
- "1.12"
script:
- "make"

View File

@ -1,3 +0,0 @@
{
"go.formatTool": "goimports"
}

View File

@ -2,6 +2,7 @@ This bot will show latest quotes from bash.im. Also it can (but not yet) work in
- [x] Ability to fetch latest quotes from bash.im
- [x] Ability to send quote to dialog via inline mode
- [x] Integrated search and autocomplete for inline mode
- [ ] Use [goquery](https://github.com/PuerkitoBio/goquery) instead of regular expressions
- [x] Use [goquery](https://github.com/PuerkitoBio/goquery) instead of regular expressions
- [x] Setup CI/CD
- [ ] Automated version increment
- [ ] Setup CI/CD
- [ ] Test cases

11
go.mod
View File

@ -3,7 +3,18 @@ module github.com/Neur0toxine/bash.im-telegram-bot
go 1.12
require (
github.com/PuerkitoBio/goquery v1.5.0 // indirect
github.com/antchfx/htmlquery v1.0.0 // indirect
github.com/antchfx/xmlquery v1.0.0 // indirect
github.com/antchfx/xpath v1.0.0 // indirect
github.com/go-telegram-bot-api/telegram-bot-api v4.6.4+incompatible
github.com/gobwas/glob v0.2.3 // indirect
github.com/gocolly/colly v1.2.0
github.com/joho/godotenv v1.3.0
github.com/kennygrant/sanitize v1.2.4 // indirect
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect
github.com/technoweenie/multipartstreamer v1.0.1 // indirect
github.com/temoto/robotstxt v0.0.0-20180810133444-97ee4a9ee6ea // indirect
golang.org/x/net v0.0.0-20190607181551-461777fb6f67 // indirect
google.golang.org/appengine v1.6.1 // indirect
)

42
go.sum
View File

@ -1,6 +1,48 @@
github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk=
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/antchfx/htmlquery v1.0.0 h1:O5IXz8fZF3B3MW+B33MZWbTHBlYmcfw0BAxgErHuaMA=
github.com/antchfx/htmlquery v1.0.0/go.mod h1:MS9yksVSQXls00iXkiMqXr0J+umL/AmxXKuP28SUJM8=
github.com/antchfx/xmlquery v1.0.0 h1:YuEPqexGG2opZKNc9JU3Zw6zFXwC47wNcy6/F8oKsrM=
github.com/antchfx/xmlquery v1.0.0/go.mod h1:/+CnyD/DzHRnv2eRxrVbieRU/FIF6N0C+7oTtyUtCKk=
github.com/antchfx/xpath v1.0.0 h1:Q5gFgh2O40VTSwMOVbFE7nFNRBu3tS21Tn0KAWeEjtk=
github.com/antchfx/xpath v1.0.0/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
github.com/go-telegram-bot-api/telegram-bot-api v4.6.4+incompatible h1:2cauKuaELYAEARXRkq2LrJ0yDDv1rW7+wrTEdVL3uaU=
github.com/go-telegram-bot-api/telegram-bot-api v4.6.4+incompatible/go.mod h1:qf9acutJ8cwBUhm1bqgz6Bei9/C/c93FPDljKWwsOgM=
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI=
github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA=
github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/joho/godotenv v1.3.0 h1:Zjp+RcGpHhGlrMbJzXTrZZPrWj+1vfm90La1wgB6Bhc=
github.com/joho/godotenv v1.3.0/go.mod h1:7hK45KPybAkOC6peb+G5yklZfMxEjkZhHbwpqxOKXbg=
github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI=
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
github.com/technoweenie/multipartstreamer v1.0.1 h1:XRztA5MXiR1TIRHxH2uNxXxaIkKQDeX7m2XsSOlQEnM=
github.com/technoweenie/multipartstreamer v1.0.1/go.mod h1:jNVxdtShOxzAsukZwTSw6MDx5eUJoiEBsSvzDU9uzog=
github.com/temoto/robotstxt v0.0.0-20180810133444-97ee4a9ee6ea h1:hH8P1IiDpzRU6ZDbDh/RDnVuezi2oOXJpApa06M0zyI=
github.com/temoto/robotstxt v0.0.0-20180810133444-97ee4a9ee6ea/go.mod h1:aOux3gHPCftJ3KHq6Pz/AlDjYJ7Y+yKfm1gU/3B0u04=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.0.0-20190607181551-461777fb6f67 h1:rJJxsykSlULwd2P2+pg/rtnwN2FrWp4IuCxOSyS0V00=
golang.org/x/net v0.0.0-20190607181551-461777fb6f67/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
google.golang.org/appengine v1.6.1 h1:QzqyMA1tlu6CgqCDUtU9V+ZKhLFT2dkJuANu5QaxI3I=
google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=

View File

@ -3,9 +3,8 @@ package main
import (
"errors"
"fmt"
"github.com/gocolly/colly"
"html"
"io/ioutil"
"net/http"
"net/url"
"regexp"
"strconv"
@ -22,71 +21,16 @@ type BashQuote struct {
Text string
}
var (
replaceBrRe = regexp.MustCompile(`(?im)\<[\s+]?br[\s+\/]{0,2}?\>`)
getQuotesListRe = regexp.MustCompile(`(?im)[.\s\w\W]+?\<article\sclass\="quote\"[.\s\w\W]+?<\/article\>`)
getQuoteDataRe = regexp.MustCompile(`(?im)data\-quote\=\"(?P<id>\d+)\"[.\s\w\W]+?quote__header_permalink.+href\=\"(?P<permalink>\/.+\d)\"[.\s\w\W]+?quote__header_date\"\>[.\s\w\W]+?(?P<date>.+)[.\s\w\W]+?quote__body\"\>\s+?(?P<text>.+)[.\s\w\W]+?quote__total.+\>(?P<rating>\d+)`)
)
func getQuotesList(response string, maxItems int) []BashQuote {
var items []BashQuote
matches := getQuotesListRe.FindAllString(response, -1)
if maxItems != 0 && len(matches) > maxItems {
matches = matches[:maxItems]
}
for _, match := range matches {
id, created, rating, permalink, text, err := getQuoteData(match)
if err != nil {
continue
}
if id == 0 {
continue
}
items = append(items, BashQuote{
ID: id,
Created: created,
Rating: rating,
Permalink: permalink,
Text: text,
})
}
return items
}
func getQuoteData(response string) (id int, created string, rating string, permalink string, text string, err error) {
matches := getQuoteDataRe.FindStringSubmatch(response)
if len(matches) == 0 {
return 0, "", "", "", "", errors.New("No data found")
} else {
matches = matches[1:]
}
id, err = strconv.Atoi(matches[0])
if err != nil {
return 0, "", "", "", "", err
}
created = strings.ReplaceAll(strings.TrimSpace(matches[2]), " ", " ")
rating = strings.TrimSpace(matches[4])
permalink = BASH_URL + matches[1]
text = html.UnescapeString(replaceBrRe.ReplaceAllString(strings.TrimSpace(matches[3]), "\n"))
err = nil
return
}
var replaceBrRe = regexp.MustCompile(`(?im)\<[\s+]?br[\s+\/]{0,2}?\>`)
func GetLatestQuotes() ([]BashQuote, error) {
return extractQuotes("/", 25)
}
func GetLatestAbyssQuotes() ([]BashQuote, error) {
return extractQuotes("/abyss/", 25)
}
func GetQuote(id int) (BashQuote, error) {
quotes, err := extractQuotes(fmt.Sprintf("/quote/%d", id), 1)
@ -104,27 +48,53 @@ func SearchQuotes(search string, maxResults int) ([]BashQuote, error) {
}
func extractQuotes(url string, maxItems int) ([]BashQuote, error) {
var (
quotes []BashQuote
link = fmt.Sprintf("%s%s", BASH_URL, url)
)
var quotes []BashQuote
if resp, err := http.Get(link); err == nil {
defer resp.Body.Close()
c := colly.NewCollector()
if resp.StatusCode == 200 {
if bodyData, err := ioutil.ReadAll(resp.Body); err == nil {
body := string(bodyData)
items := getQuotesList(body, maxItems)
c.OnHTML("article.quote", func(e *colly.HTMLElement) {
if len(quotes) == maxItems {
return
}
return items, nil
id, err := strconv.Atoi(strings.TrimSpace(e.Attr("data-quote")))
if err == nil {
created, err := e.DOM.Find(".quote__header_date").Html()
rating, err := e.DOM.Find(".quote__total").Html()
permalink, err := e.DOM.Find(".quote__header_permalink").Html()
text, err := e.DOM.Find(".quote__body").Html()
if err == nil {
created = strings.TrimSpace(strings.ReplaceAll(created, " ", " "))
rating = strings.TrimSpace(rating)
text = replaceBrRe.ReplaceAllString(html.UnescapeString(strings.TrimSpace(text)), "\n")
text = strings.ReplaceAll(text, "`", "\\`")
text = strings.ReplaceAll(text, "*", "\\*")
text = strings.ReplaceAll(text, "_", "\\_")
if len(permalink) > 0 && permalink[0] != '#' {
permalink = fmt.Sprintf("%s%s", BASH_URL, strings.TrimSpace(permalink))
} else {
permalink = ""
}
quotes = append(quotes, BashQuote{
ID: id,
Created: created,
Rating: rating,
Permalink: permalink,
Text: text,
})
}
} else {
return
}
})
if err := c.Visit(fmt.Sprintf("%s%s", BASH_URL, url)); err != nil {
return quotes, err
}
} else {
return quotes, errors.New("Incorrect status code: " + strconv.Itoa(resp.StatusCode))
}
} else {
return quotes, err
return quotes, nil
}
}

View File

@ -1,12 +1,12 @@
package main
import (
"strings"
"fmt"
"log"
"math/rand"
"net/http"
"strconv"
"strings"
"unicode/utf16"
tgbotapi "github.com/go-telegram-bot-api/telegram-bot-api"
@ -32,13 +32,18 @@ func processUpdate(update tgbotapi.Update, bot *tgbotapi.BotAPI) {
bashQuotes = append(bashQuotes, quote)
} else {
log.Print(errConv)
bashQuotes, err = SearchQuotes(update.InlineQuery.Query, 3)
bashQuotes, err = SearchQuotes(update.InlineQuery.Query, maxSearchResults)
}
if err == nil {
for _, quote := range bashQuotes {
cutSize := 50
utfEncodedString := utf16.Encode([]rune(quote.Text))
runeString := utf16.Decode(utfEncodedString[:50])
runeString := utf16.Decode(utfEncodedString)
if len(utfEncodedString) > cutSize {
runeString = utf16.Decode(utfEncodedString[:cutSize])
}
title := fmt.Sprintf(
"[#%d]: %s\n",
@ -107,14 +112,53 @@ func processUpdate(update tgbotapi.Update, bot *tgbotapi.BotAPI) {
if update.Message.IsCommand() {
switch update.Message.Command() {
case "latest":
NewMessageWithQuotes(bot, update, &msgs, "Получаю свежие цитаты", update.Message.Command())
case "abyss":
NewMessageWithQuotes(bot, update, &msgs, "Получаю цитаты из Бездны", update.Message.Command())
default:
msgs[len(msgs)-1].Text = "Как насчёт последних цитат? Используйте /latest"
}
} else {
text := fmt.Sprintf("Зайдите в любой чат, вызовите бота вот так:\n `@%s <id>`, где ID - "+
"это идентификатор цитаты на bash.im. И бот перешлёт её!\n"+
"Ещё вместо идентификатора можно указать текст, по которому бот попытается найти цитаты.", bot.Self.UserName)
msgs[len(msgs)-1] = NewMessage(update.Message.Chat.ID, update.Message.MessageID, text, "markdown")
}
SendMessages(bot, msgs)
}
}
func NewMessageWithQuotes(
bot *tgbotapi.BotAPI,
update tgbotapi.Update,
msgs *[]tgbotapi.MessageConfig,
waitMsg string,
command string,
) {
var (
items []BashQuote
err error
)
SendMessages(bot, []tgbotapi.MessageConfig{
NewMessage(update.Message.Chat.ID, update.Message.MessageID, "_Получаю свежие цитаты..._", "markdown"),
NewMessage(update.Message.Chat.ID, update.Message.MessageID, fmt.Sprintf("_%s..._", waitMsg), "markdown"),
})
items, err := GetLatestQuotes()
switch command {
case "latest":
items, err = GetLatestQuotes()
case "abyss":
items, err = GetLatestAbyssQuotes()
default:
items, err = GetLatestQuotes()
}
messages := *msgs
if err != nil {
msgs[len(msgs)-1].Text = "Не удалось получить последние цитаты :("
messages[len(messages)-1].Text = "Не удалось получить цитаты :("
} else {
for _, item := range items {
text := fmt.Sprintf(
@ -128,25 +172,13 @@ func processUpdate(update tgbotapi.Update, bot *tgbotapi.BotAPI) {
item.Text,
)
if len(msgs[len(msgs)-1].Text+text) > 4096 {
msgs = append(msgs, NewMessage(update.Message.Chat.ID, 0, text, "markdown"))
if len(messages[len(messages)-1].Text+text) > 4096 {
messages = append(messages, NewMessage(update.Message.Chat.ID, 0, text, "markdown"))
} else {
msgs[len(msgs)-1].Text += text
messages[len(messages)-1].Text += text
}
}
}
default:
msgs[len(msgs)-1].Text = "Как насчёт последних цитат? Используйте /latest"
}
} else {
text := fmt.Sprintf("Зайдите в любой чат, вызовите бота вот так:\n `@%s <id>`, где ID - "+
"это идентификатор цитаты на bash.im. И бот перешлёт её!\n"+
"Ещё вместо идентификатора можно указать текст, по которому бот попытается найти цитаты.", bot.Self.UserName)
msgs[len(msgs)-1] = NewMessage(update.Message.Chat.ID, update.Message.MessageID, text, "markdown")
}
SendMessages(bot, msgs)
}
}
func NewMessage(chatID int64, replyTo int, text string, parse string) tgbotapi.MessageConfig {

View File

@ -1,9 +1,9 @@
package main
import (
"fmt"
"log"
"os"
"fmt"
"path/filepath"
"strconv"
"strings"
@ -22,6 +22,7 @@ const envDebug = "DEBUG"
const defaultPollTimeout = 30
const defaultWebhookPort = 8000
const maxSearchResults = 25
const ModePolling = "polling"
const ModeWebhook = "webhook"