2022-05-01 15:32:41 -04:00
|
|
|
package app
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/csv"
|
2022-05-02 00:36:27 -04:00
|
|
|
"errors"
|
2022-05-01 15:32:41 -04:00
|
|
|
"fmt"
|
|
|
|
"os"
|
|
|
|
"path"
|
2022-05-02 00:36:27 -04:00
|
|
|
"strings"
|
2022-05-01 15:32:41 -04:00
|
|
|
"time"
|
2022-05-02 00:36:27 -04:00
|
|
|
|
|
|
|
"go.balki.me/tss/log"
|
2022-05-03 17:14:37 -04:00
|
|
|
"go.balki.me/tss/parser"
|
2022-05-01 15:32:41 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
type Status string
|
|
|
|
|
|
|
|
const (
|
|
|
|
Sent Status = "SENT"
|
|
|
|
Filtered = "FILTERED"
|
|
|
|
Error = "ERROR"
|
|
|
|
)
|
|
|
|
|
|
|
|
//default format used by yaml.Marshal
|
|
|
|
const TimeFormat string = "2006-01-02T15:04:05.999999999-07:00"
|
|
|
|
|
|
|
|
type Record struct {
|
|
|
|
Time time.Time
|
|
|
|
Status Status
|
2022-05-02 23:23:56 -04:00
|
|
|
Filter string
|
2022-05-03 17:14:37 -04:00
|
|
|
FeedEntry parser.FeedEntry
|
2022-05-01 15:32:41 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
type DB interface {
|
2022-05-02 23:23:56 -04:00
|
|
|
IsNewFeed() bool
|
2022-05-03 17:14:37 -04:00
|
|
|
Filter(entries []parser.FeedEntry) ([]parser.FeedEntry, error)
|
2022-05-01 15:32:41 -04:00
|
|
|
Save([]Record) error
|
|
|
|
}
|
|
|
|
|
|
|
|
type db struct {
|
|
|
|
dbPath string
|
|
|
|
seenLinks map[string]struct{}
|
|
|
|
}
|
|
|
|
|
2022-05-02 23:23:56 -04:00
|
|
|
func (d *db) IsNewFeed() bool {
|
|
|
|
return len(d.seenLinks) == 0
|
|
|
|
}
|
|
|
|
|
2022-05-01 15:32:41 -04:00
|
|
|
func NewDB(storageDir, feedName string) (DB, error) {
|
|
|
|
dbPath := path.Join(storageDir, fmt.Sprintf("%s.csv", feedName))
|
2022-05-02 00:36:27 -04:00
|
|
|
db := db{dbPath: dbPath}
|
|
|
|
db.seenLinks = map[string]struct{}{}
|
2022-05-01 15:32:41 -04:00
|
|
|
f, err := os.Open(dbPath)
|
|
|
|
if err != nil {
|
2022-05-02 00:36:27 -04:00
|
|
|
if errors.Is(err, os.ErrNotExist) {
|
|
|
|
log.Info("db file does not exist, will be created", "feed", feedName, "path", dbPath)
|
|
|
|
return &db, nil
|
|
|
|
}
|
2022-05-01 15:32:41 -04:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
defer f.Close()
|
|
|
|
reader := csv.NewReader(f)
|
|
|
|
records, err := reader.ReadAll()
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to parse csv, path:%v, error:%w", dbPath, err)
|
|
|
|
}
|
|
|
|
for _, rec := range records {
|
|
|
|
var recStatus Status = Status(rec[2])
|
|
|
|
if recStatus == Sent || recStatus == Filtered {
|
|
|
|
db.seenLinks[rec[1]] = struct{}{}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return &db, nil
|
|
|
|
}
|
|
|
|
|
2022-05-03 17:14:37 -04:00
|
|
|
func (d *db) Filter(entries []parser.FeedEntry) ([]parser.FeedEntry, error) {
|
|
|
|
var filteredEntries []parser.FeedEntry
|
2022-05-01 15:32:41 -04:00
|
|
|
for _, entry := range entries {
|
|
|
|
if _, ok := d.seenLinks[entry.Link]; !ok {
|
|
|
|
filteredEntries = append(filteredEntries, entry)
|
2022-05-27 23:05:59 -04:00
|
|
|
d.seenLinks[entry.Link] = struct{}{}
|
2022-05-01 15:32:41 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return filteredEntries, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (d *db) Save(records []Record) error {
|
2022-05-25 16:02:04 -04:00
|
|
|
if len(records) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-05-01 15:32:41 -04:00
|
|
|
f, err := os.OpenFile(d.dbPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2022-05-25 16:02:04 -04:00
|
|
|
defer f.Close()
|
|
|
|
|
2022-05-01 15:32:41 -04:00
|
|
|
csvw := csv.NewWriter(f)
|
2022-05-25 16:02:04 -04:00
|
|
|
defer csvw.Flush()
|
|
|
|
|
|
|
|
if d.IsNewFeed() {
|
2022-05-01 15:32:41 -04:00
|
|
|
csvw.Write([]string{
|
2022-05-25 16:02:04 -04:00
|
|
|
/* 1 */ "Date",
|
|
|
|
/* 2 */ "Link",
|
|
|
|
/* 3 */ "Status",
|
|
|
|
/* 4 */ "Filter",
|
|
|
|
/* 5 */ "Content",
|
2022-05-01 15:32:41 -04:00
|
|
|
})
|
|
|
|
}
|
|
|
|
for _, r := range records {
|
|
|
|
csvw.Write([]string{
|
2022-05-25 16:02:04 -04:00
|
|
|
/* 1 */ r.Time.Format(TimeFormat),
|
|
|
|
/* 2 */ r.FeedEntry.Link,
|
|
|
|
/* 3 */ string(r.Status),
|
|
|
|
/* 4 */ r.Filter,
|
|
|
|
/* 5 */ fmt.Sprintf("<item>%s</item>", strings.ReplaceAll(r.FeedEntry.Content, "\n", " ")),
|
2022-05-01 15:32:41 -04:00
|
|
|
})
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|