From 46d173d06c283d50d22211cabd62d0cbe42a68b3 Mon Sep 17 00:00:00 2001 From: balki <3070606-balki@users.noreply.gitlab.com> Date: Tue, 3 May 2022 17:14:37 -0400 Subject: [PATCH] implement atom --- app/app.go | 7 +++-- app/config.go | 18 ++++++----- app/db.go | 9 +++--- app/parser.go | 28 ------------------ exp/xml/xml.go | 15 ++++++++-- parser/parser.go | 77 ++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 109 insertions(+), 45 deletions(-) delete mode 100644 app/parser.go create mode 100644 parser/parser.go diff --git a/app/app.go b/app/app.go index 21c10bf..79db2db 100644 --- a/app/app.go +++ b/app/app.go @@ -6,6 +6,7 @@ import ( "time" "go.balki.me/tss/log" + "go.balki.me/tss/parser" "go.balki.me/tss/proxy" "go.balki.me/tss/telegram" ) @@ -65,7 +66,7 @@ func ProcessFeed(feed FeedCfg, scheduler Scheduler, dbDir string, tgram telegram return } - entries, err := ParseFeed(data) + entries, err := parser.ParseFeed(feed.Type, data) if err != nil { log.Error("feed parsing failed", "feed", feed.Name, "data", data, "error", err) return @@ -74,13 +75,13 @@ func ProcessFeed(feed FeedCfg, scheduler Scheduler, dbDir string, tgram telegram scheduler.Good(feed.Name) var records []Record - var newEntries []FeedEntry + var newEntries []parser.FeedEntry if db.IsNewFeed() { ftl := int(feed.FirstTimeLimit) if feed.FirstTimeLimit == NoLimit || len(entries) <= ftl { newEntries = entries } else { - var filteredEntries []FeedEntry + var filteredEntries []parser.FeedEntry newEntries, filteredEntries = entries[:ftl], entries[ftl:] for _, entry := range filteredEntries { records = append(records, Record{ diff --git a/app/config.go b/app/config.go index 579bb68..92843ef 100644 --- a/app/config.go +++ b/app/config.go @@ -4,6 +4,7 @@ import ( "os" "path" + "go.balki.me/tss/parser" "gopkg.in/yaml.v3" ) @@ -12,14 +13,15 @@ type FeedLimit int const NoLimit FeedLimit = -1 type FeedCfg struct { - Name string `yaml:"name"` - Channel string `yaml:"channel"` - Rhash string `yaml:"rhash"` - Url string `yaml:"url"` - Cron string `yaml:"cron"` - Proxy string `yaml:"proxy"` - FTL *int `yaml:"first_time_limit"` - FirstTimeLimit FeedLimit `yaml:"-"` + Name string `yaml:"name"` + Channel string `yaml:"channel"` + Rhash string `yaml:"rhash"` + Url string `yaml:"url"` + Cron string `yaml:"cron"` + Proxy string `yaml:"proxy"` + Type parser.FeedType `yaml:"type"` + FTL *int `yaml:"first_time_limit"` + FirstTimeLimit FeedLimit `yaml:"-"` } type Config struct { diff --git a/app/db.go b/app/db.go index 57e1a3a..e4b37c1 100644 --- a/app/db.go +++ b/app/db.go @@ -10,6 +10,7 @@ import ( "time" "go.balki.me/tss/log" + "go.balki.me/tss/parser" ) type Status string @@ -27,12 +28,12 @@ type Record struct { Time time.Time Status Status Filter string - FeedEntry FeedEntry + FeedEntry parser.FeedEntry } type DB interface { IsNewFeed() bool - Filter(entries []FeedEntry) ([]FeedEntry, error) + Filter(entries []parser.FeedEntry) ([]parser.FeedEntry, error) Save([]Record) error } @@ -72,8 +73,8 @@ func NewDB(storageDir, feedName string) (DB, error) { return &db, nil } -func (d *db) Filter(entries []FeedEntry) ([]FeedEntry, error) { - var filteredEntries []FeedEntry +func (d *db) Filter(entries []parser.FeedEntry) ([]parser.FeedEntry, error) { + var filteredEntries []parser.FeedEntry for _, entry := range entries { if _, ok := d.seenLinks[entry.Link]; !ok { filteredEntries = append(filteredEntries, entry) diff --git a/app/parser.go b/app/parser.go deleted file mode 100644 index 42fab7f..0000000 --- a/app/parser.go +++ /dev/null @@ -1,28 +0,0 @@ -package app - -import ( - "encoding/xml" -) - -type FeedEntry struct { - Title string `xml:"title"` - Link string `xml:"link"` - Author string `xml:"author"` - Guid string `xml:"guid"` - Description string `xml:"description"` - Content string `xml:",innerxml"` -} - -func ParseFeed(data []byte) ([]FeedEntry, error) { - - v := struct { - Items []FeedEntry `xml:"channel>item"` - }{} - - err := xml.Unmarshal(data, &v) - if err != nil { - return nil, err - } - - return v.Items, nil -} diff --git a/exp/xml/xml.go b/exp/xml/xml.go index b114150..a1875ac 100644 --- a/exp/xml/xml.go +++ b/exp/xml/xml.go @@ -29,6 +29,10 @@ type Item struct { func main() { fmt.Println("hello go") + foo() + if 1 == 1 { + return + } data, err := os.ReadFile("/home/balki/projects/tss/w.tmp/ounapuu.xml") if err != nil { log.Fatal(err) @@ -93,11 +97,18 @@ func main() { func foo() { x := ` - slkdfjdslk + + Hello World + slkdfjdslk + ` s := struct { Bar struct { + Link struct { + Href string `xml:"href,attr"` + } `xml:"link"` + Attr string `xml:"src,attr"` Content string `xml:",innerxml"` } `xml:"bar"` }{} @@ -106,5 +117,5 @@ func foo() { if err != nil { log.Fatal(err) } - fmt.Println(s.Bar.Content) + fmt.Println(s.Bar.Link.Href) } diff --git a/parser/parser.go b/parser/parser.go new file mode 100644 index 0000000..ba22b4f --- /dev/null +++ b/parser/parser.go @@ -0,0 +1,77 @@ +package parser + +import ( + "encoding/xml" + "fmt" +) + +type FeedType string + +const ( + Rss FeedType = "rss" + Atom FeedType = "atom" +) + +type FeedEntry struct { + Link string + Content string +} + +func ParseFeed(feedType FeedType, data []byte) ([]FeedEntry, error) { + switch feedType { + case Rss: + return parseRss(data) + case Atom: + return parseAtom(data) + } + return nil, fmt.Errorf("Unknown feed type: %s", feedType) +} + +func parseAtom(data []byte) ([]FeedEntry, error) { + v := struct { + Items []struct { + Link struct { + Href string `xml:"href,attr"` + } `xml:"link"` + Content string `xml:",innerxml"` + } `xml:"entry"` + }{} + + err := xml.Unmarshal(data, &v) + if err != nil { + return nil, err + } + + var feedEntries []FeedEntry + for _, atomEntry := range v.Items { + feedEntries = append(feedEntries, FeedEntry{ + Link: atomEntry.Link.Href, + Content: atomEntry.Content, + }) + } + return feedEntries, nil +} +func parseRss(data []byte) ([]FeedEntry, error) { + + v := struct { + Items []struct { + Link string `xml:"link"` + Content string `xml:",innerxml"` + } `xml:"channel>item"` + }{} + + err := xml.Unmarshal(data, &v) + if err != nil { + return nil, err + } + + var feedEntries []FeedEntry + for _, rssEntry := range v.Items { + feedEntries = append(feedEntries, FeedEntry{ + Link: rssEntry.Link, + Content: rssEntry.Content, + }) + } + + return feedEntries, nil +}