package db import ( "encoding/csv" "errors" "fmt" "os" "path" "strings" "time" "github.com/go-logr/logr" "go.balki.me/tss/parser" ) var log = logr.Discard() func SetLogger(l logr.Logger) { log = l } type Status string const ( Sent Status = "SENT" Filtered = "FILTERED" Error = "ERROR" ) //default format used by yaml.Marshal const TimeFormat string = "2006-01-02T15:04:05.999999999-07:00" type Record struct { Time time.Time Status Status Filter string FeedEntry parser.FeedEntry } type DB interface { IsNewFeed() bool Filter(entries []parser.FeedEntry) ([]parser.FeedEntry, error) Save([]Record) error } type db struct { dbPath string seenLinks map[string]struct{} } func (d *db) IsNewFeed() bool { return len(d.seenLinks) == 0 } func NewDB(storageDir, feedName string) (DB, error) { dbPath := path.Join(storageDir, fmt.Sprintf("%s.csv", feedName)) db := db{ dbPath: dbPath, seenLinks: map[string]struct{}{}, } f, err := os.Open(dbPath) if err != nil { if errors.Is(err, os.ErrNotExist) { log.Info("db file does not exist, will be created", "feed", feedName, "path", dbPath) return &db, nil } return nil, err } defer f.Close() reader := csv.NewReader(f) records, err := reader.ReadAll() if err != nil { log.Error(err, "failed to parse csv", "path", dbPath) return nil, err } for _, rec := range records { var recStatus Status = Status(rec[2]) if recStatus == Sent || recStatus == Filtered { db.seenLinks[rec[1]] = struct{}{} } } return &db, nil } func (d *db) Filter(entries []parser.FeedEntry) ([]parser.FeedEntry, error) { var filteredEntries []parser.FeedEntry for _, entry := range entries { if _, ok := d.seenLinks[entry.Link]; !ok { filteredEntries = append(filteredEntries, entry) d.seenLinks[entry.Link] = struct{}{} } } return filteredEntries, nil } func (d *db) Save(records []Record) error { if len(records) == 0 { return nil } f, err := os.OpenFile(d.dbPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) if err != nil { return err } defer f.Close() csvw := csv.NewWriter(f) defer csvw.Flush() if d.IsNewFeed() { csvw.Write([]string{ /* 1 */ "Date", /* 2 */ "Link", /* 3 */ "Status", /* 4 */ "Filter", /* 5 */ "Content", }) } for _, r := range records { csvw.Write([]string{ /* 1 */ r.Time.Format(TimeFormat), /* 2 */ r.FeedEntry.Link, /* 3 */ string(r.Status), /* 4 */ r.Filter, /* 5 */ fmt.Sprintf("%s", strings.ReplaceAll(r.FeedEntry.Content, "\n", " ")), }) } return nil }