implement atom

This commit is contained in:
Balakrishnan Balasubramanian 2022-05-03 17:14:37 -04:00
parent ccbf558fa1
commit 46d173d06c
6 changed files with 109 additions and 45 deletions

View File

@ -6,6 +6,7 @@ import (
"time"
"go.balki.me/tss/log"
"go.balki.me/tss/parser"
"go.balki.me/tss/proxy"
"go.balki.me/tss/telegram"
)
@ -65,7 +66,7 @@ func ProcessFeed(feed FeedCfg, scheduler Scheduler, dbDir string, tgram telegram
return
}
entries, err := ParseFeed(data)
entries, err := parser.ParseFeed(feed.Type, data)
if err != nil {
log.Error("feed parsing failed", "feed", feed.Name, "data", data, "error", err)
return
@ -74,13 +75,13 @@ func ProcessFeed(feed FeedCfg, scheduler Scheduler, dbDir string, tgram telegram
scheduler.Good(feed.Name)
var records []Record
var newEntries []FeedEntry
var newEntries []parser.FeedEntry
if db.IsNewFeed() {
ftl := int(feed.FirstTimeLimit)
if feed.FirstTimeLimit == NoLimit || len(entries) <= ftl {
newEntries = entries
} else {
var filteredEntries []FeedEntry
var filteredEntries []parser.FeedEntry
newEntries, filteredEntries = entries[:ftl], entries[ftl:]
for _, entry := range filteredEntries {
records = append(records, Record{

View File

@ -4,6 +4,7 @@ import (
"os"
"path"
"go.balki.me/tss/parser"
"gopkg.in/yaml.v3"
)
@ -12,14 +13,15 @@ type FeedLimit int
const NoLimit FeedLimit = -1
type FeedCfg struct {
Name string `yaml:"name"`
Channel string `yaml:"channel"`
Rhash string `yaml:"rhash"`
Url string `yaml:"url"`
Cron string `yaml:"cron"`
Proxy string `yaml:"proxy"`
FTL *int `yaml:"first_time_limit"`
FirstTimeLimit FeedLimit `yaml:"-"`
Name string `yaml:"name"`
Channel string `yaml:"channel"`
Rhash string `yaml:"rhash"`
Url string `yaml:"url"`
Cron string `yaml:"cron"`
Proxy string `yaml:"proxy"`
Type parser.FeedType `yaml:"type"`
FTL *int `yaml:"first_time_limit"`
FirstTimeLimit FeedLimit `yaml:"-"`
}
type Config struct {

View File

@ -10,6 +10,7 @@ import (
"time"
"go.balki.me/tss/log"
"go.balki.me/tss/parser"
)
type Status string
@ -27,12 +28,12 @@ type Record struct {
Time time.Time
Status Status
Filter string
FeedEntry FeedEntry
FeedEntry parser.FeedEntry
}
type DB interface {
IsNewFeed() bool
Filter(entries []FeedEntry) ([]FeedEntry, error)
Filter(entries []parser.FeedEntry) ([]parser.FeedEntry, error)
Save([]Record) error
}
@ -72,8 +73,8 @@ func NewDB(storageDir, feedName string) (DB, error) {
return &db, nil
}
func (d *db) Filter(entries []FeedEntry) ([]FeedEntry, error) {
var filteredEntries []FeedEntry
func (d *db) Filter(entries []parser.FeedEntry) ([]parser.FeedEntry, error) {
var filteredEntries []parser.FeedEntry
for _, entry := range entries {
if _, ok := d.seenLinks[entry.Link]; !ok {
filteredEntries = append(filteredEntries, entry)

View File

@ -1,28 +0,0 @@
package app
import (
"encoding/xml"
)
type FeedEntry struct {
Title string `xml:"title"`
Link string `xml:"link"`
Author string `xml:"author"`
Guid string `xml:"guid"`
Description string `xml:"description"`
Content string `xml:",innerxml"`
}
func ParseFeed(data []byte) ([]FeedEntry, error) {
v := struct {
Items []FeedEntry `xml:"channel>item"`
}{}
err := xml.Unmarshal(data, &v)
if err != nil {
return nil, err
}
return v.Items, nil
}

View File

@ -29,6 +29,10 @@ type Item struct {
func main() {
fmt.Println("hello go")
foo()
if 1 == 1 {
return
}
data, err := os.ReadFile("/home/balki/projects/tss/w.tmp/ounapuu.xml")
if err != nil {
log.Fatal(err)
@ -93,11 +97,18 @@ func main() {
func foo() {
x := `
<foo>
<bar><blue>slkdfjdslk</blue></bar>
<bar src="hello">
<link href="http://foobar.com">Hello World</link>
<blue>slkdfjdslk</blue>
</bar>
</foo>
`
s := struct {
Bar struct {
Link struct {
Href string `xml:"href,attr"`
} `xml:"link"`
Attr string `xml:"src,attr"`
Content string `xml:",innerxml"`
} `xml:"bar"`
}{}
@ -106,5 +117,5 @@ func foo() {
if err != nil {
log.Fatal(err)
}
fmt.Println(s.Bar.Content)
fmt.Println(s.Bar.Link.Href)
}

77
parser/parser.go Normal file
View File

@ -0,0 +1,77 @@
package parser
import (
"encoding/xml"
"fmt"
)
type FeedType string
const (
Rss FeedType = "rss"
Atom FeedType = "atom"
)
type FeedEntry struct {
Link string
Content string
}
func ParseFeed(feedType FeedType, data []byte) ([]FeedEntry, error) {
switch feedType {
case Rss:
return parseRss(data)
case Atom:
return parseAtom(data)
}
return nil, fmt.Errorf("Unknown feed type: %s", feedType)
}
func parseAtom(data []byte) ([]FeedEntry, error) {
v := struct {
Items []struct {
Link struct {
Href string `xml:"href,attr"`
} `xml:"link"`
Content string `xml:",innerxml"`
} `xml:"entry"`
}{}
err := xml.Unmarshal(data, &v)
if err != nil {
return nil, err
}
var feedEntries []FeedEntry
for _, atomEntry := range v.Items {
feedEntries = append(feedEntries, FeedEntry{
Link: atomEntry.Link.Href,
Content: atomEntry.Content,
})
}
return feedEntries, nil
}
func parseRss(data []byte) ([]FeedEntry, error) {
v := struct {
Items []struct {
Link string `xml:"link"`
Content string `xml:",innerxml"`
} `xml:"channel>item"`
}{}
err := xml.Unmarshal(data, &v)
if err != nil {
return nil, err
}
var feedEntries []FeedEntry
for _, rssEntry := range v.Items {
feedEntries = append(feedEntries, FeedEntry{
Link: rssEntry.Link,
Content: rssEntry.Content,
})
}
return feedEntries, nil
}