implement atom

This commit is contained in:
Balakrishnan Balasubramanian 2022-05-03 17:14:37 -04:00
parent ccbf558fa1
commit 46d173d06c
6 changed files with 109 additions and 45 deletions

View File

@ -6,6 +6,7 @@ import (
"time" "time"
"go.balki.me/tss/log" "go.balki.me/tss/log"
"go.balki.me/tss/parser"
"go.balki.me/tss/proxy" "go.balki.me/tss/proxy"
"go.balki.me/tss/telegram" "go.balki.me/tss/telegram"
) )
@ -65,7 +66,7 @@ func ProcessFeed(feed FeedCfg, scheduler Scheduler, dbDir string, tgram telegram
return return
} }
entries, err := ParseFeed(data) entries, err := parser.ParseFeed(feed.Type, data)
if err != nil { if err != nil {
log.Error("feed parsing failed", "feed", feed.Name, "data", data, "error", err) log.Error("feed parsing failed", "feed", feed.Name, "data", data, "error", err)
return return
@ -74,13 +75,13 @@ func ProcessFeed(feed FeedCfg, scheduler Scheduler, dbDir string, tgram telegram
scheduler.Good(feed.Name) scheduler.Good(feed.Name)
var records []Record var records []Record
var newEntries []FeedEntry var newEntries []parser.FeedEntry
if db.IsNewFeed() { if db.IsNewFeed() {
ftl := int(feed.FirstTimeLimit) ftl := int(feed.FirstTimeLimit)
if feed.FirstTimeLimit == NoLimit || len(entries) <= ftl { if feed.FirstTimeLimit == NoLimit || len(entries) <= ftl {
newEntries = entries newEntries = entries
} else { } else {
var filteredEntries []FeedEntry var filteredEntries []parser.FeedEntry
newEntries, filteredEntries = entries[:ftl], entries[ftl:] newEntries, filteredEntries = entries[:ftl], entries[ftl:]
for _, entry := range filteredEntries { for _, entry := range filteredEntries {
records = append(records, Record{ records = append(records, Record{

View File

@ -4,6 +4,7 @@ import (
"os" "os"
"path" "path"
"go.balki.me/tss/parser"
"gopkg.in/yaml.v3" "gopkg.in/yaml.v3"
) )
@ -18,6 +19,7 @@ type FeedCfg struct {
Url string `yaml:"url"` Url string `yaml:"url"`
Cron string `yaml:"cron"` Cron string `yaml:"cron"`
Proxy string `yaml:"proxy"` Proxy string `yaml:"proxy"`
Type parser.FeedType `yaml:"type"`
FTL *int `yaml:"first_time_limit"` FTL *int `yaml:"first_time_limit"`
FirstTimeLimit FeedLimit `yaml:"-"` FirstTimeLimit FeedLimit `yaml:"-"`
} }

View File

@ -10,6 +10,7 @@ import (
"time" "time"
"go.balki.me/tss/log" "go.balki.me/tss/log"
"go.balki.me/tss/parser"
) )
type Status string type Status string
@ -27,12 +28,12 @@ type Record struct {
Time time.Time Time time.Time
Status Status Status Status
Filter string Filter string
FeedEntry FeedEntry FeedEntry parser.FeedEntry
} }
type DB interface { type DB interface {
IsNewFeed() bool IsNewFeed() bool
Filter(entries []FeedEntry) ([]FeedEntry, error) Filter(entries []parser.FeedEntry) ([]parser.FeedEntry, error)
Save([]Record) error Save([]Record) error
} }
@ -72,8 +73,8 @@ func NewDB(storageDir, feedName string) (DB, error) {
return &db, nil return &db, nil
} }
func (d *db) Filter(entries []FeedEntry) ([]FeedEntry, error) { func (d *db) Filter(entries []parser.FeedEntry) ([]parser.FeedEntry, error) {
var filteredEntries []FeedEntry var filteredEntries []parser.FeedEntry
for _, entry := range entries { for _, entry := range entries {
if _, ok := d.seenLinks[entry.Link]; !ok { if _, ok := d.seenLinks[entry.Link]; !ok {
filteredEntries = append(filteredEntries, entry) filteredEntries = append(filteredEntries, entry)

View File

@ -1,28 +0,0 @@
package app
import (
"encoding/xml"
)
type FeedEntry struct {
Title string `xml:"title"`
Link string `xml:"link"`
Author string `xml:"author"`
Guid string `xml:"guid"`
Description string `xml:"description"`
Content string `xml:",innerxml"`
}
func ParseFeed(data []byte) ([]FeedEntry, error) {
v := struct {
Items []FeedEntry `xml:"channel>item"`
}{}
err := xml.Unmarshal(data, &v)
if err != nil {
return nil, err
}
return v.Items, nil
}

View File

@ -29,6 +29,10 @@ type Item struct {
func main() { func main() {
fmt.Println("hello go") fmt.Println("hello go")
foo()
if 1 == 1 {
return
}
data, err := os.ReadFile("/home/balki/projects/tss/w.tmp/ounapuu.xml") data, err := os.ReadFile("/home/balki/projects/tss/w.tmp/ounapuu.xml")
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
@ -93,11 +97,18 @@ func main() {
func foo() { func foo() {
x := ` x := `
<foo> <foo>
<bar><blue>slkdfjdslk</blue></bar> <bar src="hello">
<link href="http://foobar.com">Hello World</link>
<blue>slkdfjdslk</blue>
</bar>
</foo> </foo>
` `
s := struct { s := struct {
Bar struct { Bar struct {
Link struct {
Href string `xml:"href,attr"`
} `xml:"link"`
Attr string `xml:"src,attr"`
Content string `xml:",innerxml"` Content string `xml:",innerxml"`
} `xml:"bar"` } `xml:"bar"`
}{} }{}
@ -106,5 +117,5 @@ func foo() {
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
fmt.Println(s.Bar.Content) fmt.Println(s.Bar.Link.Href)
} }

77
parser/parser.go Normal file
View File

@ -0,0 +1,77 @@
package parser
import (
"encoding/xml"
"fmt"
)
type FeedType string
const (
Rss FeedType = "rss"
Atom FeedType = "atom"
)
type FeedEntry struct {
Link string
Content string
}
func ParseFeed(feedType FeedType, data []byte) ([]FeedEntry, error) {
switch feedType {
case Rss:
return parseRss(data)
case Atom:
return parseAtom(data)
}
return nil, fmt.Errorf("Unknown feed type: %s", feedType)
}
func parseAtom(data []byte) ([]FeedEntry, error) {
v := struct {
Items []struct {
Link struct {
Href string `xml:"href,attr"`
} `xml:"link"`
Content string `xml:",innerxml"`
} `xml:"entry"`
}{}
err := xml.Unmarshal(data, &v)
if err != nil {
return nil, err
}
var feedEntries []FeedEntry
for _, atomEntry := range v.Items {
feedEntries = append(feedEntries, FeedEntry{
Link: atomEntry.Link.Href,
Content: atomEntry.Content,
})
}
return feedEntries, nil
}
func parseRss(data []byte) ([]FeedEntry, error) {
v := struct {
Items []struct {
Link string `xml:"link"`
Content string `xml:",innerxml"`
} `xml:"channel>item"`
}{}
err := xml.Unmarshal(data, &v)
if err != nil {
return nil, err
}
var feedEntries []FeedEntry
for _, rssEntry := range v.Items {
feedEntries = append(feedEntries, FeedEntry{
Link: rssEntry.Link,
Content: rssEntry.Content,
})
}
return feedEntries, nil
}