package app import ( "encoding/csv" "errors" "fmt" "os" "path" "strings" "time" "go.balki.me/tss/log" ) type Status string const ( Sent Status = "SENT" Filtered = "FILTERED" Error = "ERROR" ) //default format used by yaml.Marshal const TimeFormat string = "2006-01-02T15:04:05.999999999-07:00" type Record struct { Time time.Time Status Status FeedEntry FeedEntry } type DB interface { Filter(entries []FeedEntry) ([]FeedEntry, error) Save([]Record) error } type db struct { dbPath string seenLinks map[string]struct{} } func NewDB(storageDir, feedName string) (DB, error) { dbPath := path.Join(storageDir, fmt.Sprintf("%s.csv", feedName)) db := db{dbPath: dbPath} db.seenLinks = map[string]struct{}{} f, err := os.Open(dbPath) if err != nil { if errors.Is(err, os.ErrNotExist) { log.Info("db file does not exist, will be created", "feed", feedName, "path", dbPath) return &db, nil } return nil, err } defer f.Close() reader := csv.NewReader(f) records, err := reader.ReadAll() if err != nil { return nil, fmt.Errorf("failed to parse csv, path:%v, error:%w", dbPath, err) } for _, rec := range records { var recStatus Status = Status(rec[2]) if recStatus == Sent || recStatus == Filtered { db.seenLinks[rec[1]] = struct{}{} } } return &db, nil } func (d *db) Filter(entries []FeedEntry) ([]FeedEntry, error) { var filteredEntries []FeedEntry for _, entry := range entries { if _, ok := d.seenLinks[entry.Link]; !ok { filteredEntries = append(filteredEntries, entry) } } return filteredEntries, nil } func (d *db) Save(records []Record) error { f, err := os.OpenFile(d.dbPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) if err != nil { return err } defer func() { f.Sync() f.Close() }() csvw := csv.NewWriter(f) if len(d.seenLinks) == 0 { //New file, write header csvw.Write([]string{ "Date", "Link", "Status", "FilteredBy", "Content", }) } for _, r := range records { csvw.Write([]string{ r.Time.Format(TimeFormat), r.FeedEntry.Link, string(r.Status), "-", strings.ReplaceAll(r.FeedEntry.Content, "\n", " "), }) } csvw.Flush() return nil }