tss/db/db.go

148 lines
2.8 KiB
Go
Raw Normal View History

2022-06-17 17:51:16 -04:00
package db
2022-05-01 15:32:41 -04:00
import (
"encoding/csv"
"errors"
2022-05-01 15:32:41 -04:00
"fmt"
"os"
"path"
"strings"
2022-05-01 15:32:41 -04:00
"time"
2022-06-17 18:23:06 -04:00
"github.com/go-logr/logr"
2022-05-03 17:14:37 -04:00
"go.balki.me/tss/parser"
2022-05-01 15:32:41 -04:00
)
2022-06-17 18:23:06 -04:00
var log = logr.Discard()
2022-06-20 15:49:55 -04:00
func SetLogger(l logr.Logger) {
log = l
2022-06-17 18:23:06 -04:00
}
2022-05-01 15:32:41 -04:00
type Status string
const (
Sent Status = "SENT"
2022-06-21 21:32:16 -04:00
Filtered Status = "FILTERED"
Error Status = "ERROR"
2022-05-01 15:32:41 -04:00
)
2022-06-21 21:32:16 -04:00
// TimeFormat used by yaml.Marshal
2022-05-01 15:32:41 -04:00
const TimeFormat string = "2006-01-02T15:04:05.999999999-07:00"
type Record struct {
Time time.Time
Status Status
2022-05-02 23:23:56 -04:00
Filter string
2022-05-03 17:14:37 -04:00
FeedEntry parser.FeedEntry
2022-05-01 15:32:41 -04:00
}
type DB interface {
2022-05-02 23:23:56 -04:00
IsNewFeed() bool
2022-05-03 17:14:37 -04:00
Filter(entries []parser.FeedEntry) ([]parser.FeedEntry, error)
2022-05-01 15:32:41 -04:00
Save([]Record) error
}
type db struct {
dbPath string
seenLinks map[string]struct{}
}
2022-05-02 23:23:56 -04:00
func (d *db) IsNewFeed() bool {
return len(d.seenLinks) == 0
}
2022-05-01 15:32:41 -04:00
func NewDB(storageDir, feedName string) (DB, error) {
dbPath := path.Join(storageDir, fmt.Sprintf("%s.csv", feedName))
2022-06-02 02:09:59 -04:00
db := db{
dbPath: dbPath,
seenLinks: map[string]struct{}{},
}
2022-05-01 15:32:41 -04:00
f, err := os.Open(dbPath)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
log.Info("db file does not exist, will be created", "feed", feedName, "path", dbPath)
return &db, nil
}
2022-05-01 15:32:41 -04:00
return nil, err
}
2022-06-21 21:32:16 -04:00
defer func() {
err := f.Close()
if err != nil {
log.Error(err, "f.Close() failed")
return
}
}()
2022-05-01 15:32:41 -04:00
reader := csv.NewReader(f)
records, err := reader.ReadAll()
if err != nil {
2022-06-17 18:23:06 -04:00
log.Error(err, "failed to parse csv", "path", dbPath)
return nil, err
2022-05-01 15:32:41 -04:00
}
for _, rec := range records {
2022-06-21 21:32:16 -04:00
recStatus := Status(rec[2])
2022-05-01 15:32:41 -04:00
if recStatus == Sent || recStatus == Filtered {
db.seenLinks[rec[1]] = struct{}{}
}
}
return &db, nil
}
2022-05-03 17:14:37 -04:00
func (d *db) Filter(entries []parser.FeedEntry) ([]parser.FeedEntry, error) {
var filteredEntries []parser.FeedEntry
2022-05-01 15:32:41 -04:00
for _, entry := range entries {
if _, ok := d.seenLinks[entry.Link]; !ok {
filteredEntries = append(filteredEntries, entry)
2022-05-27 23:05:59 -04:00
d.seenLinks[entry.Link] = struct{}{}
2022-05-01 15:32:41 -04:00
}
}
return filteredEntries, nil
}
func (d *db) Save(records []Record) error {
2022-05-25 16:02:04 -04:00
if len(records) == 0 {
return nil
}
2022-05-01 15:32:41 -04:00
f, err := os.OpenFile(d.dbPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return err
}
2022-06-21 21:32:16 -04:00
defer func() {
err := f.Close()
if err != nil {
log.Error(err, "f.Close() failed")
return
}
}()
2022-05-25 16:02:04 -04:00
2022-05-01 15:32:41 -04:00
csvw := csv.NewWriter(f)
2022-05-25 16:02:04 -04:00
defer csvw.Flush()
if d.IsNewFeed() {
2022-06-21 21:32:16 -04:00
err := csvw.Write([]string{
2022-05-25 16:02:04 -04:00
/* 1 */ "Date",
/* 2 */ "Link",
/* 3 */ "Status",
/* 4 */ "Filter",
/* 5 */ "Content",
2022-05-01 15:32:41 -04:00
})
2022-06-21 21:32:16 -04:00
if err != nil {
return err
}
2022-05-01 15:32:41 -04:00
}
for _, r := range records {
2022-06-21 21:32:16 -04:00
err := csvw.Write([]string{
2022-05-25 16:02:04 -04:00
/* 1 */ r.Time.Format(TimeFormat),
/* 2 */ r.FeedEntry.Link,
/* 3 */ string(r.Status),
/* 4 */ r.Filter,
/* 5 */ fmt.Sprintf("<item>%s</item>", strings.ReplaceAll(r.FeedEntry.Content, "\n", " ")),
2022-05-01 15:32:41 -04:00
})
2022-06-21 21:32:16 -04:00
if err != nil {
return err
}
2022-05-01 15:32:41 -04:00
}
return nil
}