diff --git a/TODO.md b/TODO.md index 2b9349b..2afc75f 100644 --- a/TODO.md +++ b/TODO.md @@ -9,12 +9,12 @@ * ✓ Posting to telegram * ✓ Telegram rate limit: 20 msgs/min https://core.telegram.org/bots/faq#my-bot-is-hitting-limits-how-do-i-avoid-this https://gist.github.com/zdebra/10f0e284c4672e99f0cb767298f20c11 -* First Run skip all but first n -* Folder creation +* ✓ First Run skip all but first n +* ✓ Folder creation +* ✓ Wrap content inside item * Change flags to os.Args[1] for config path * Cleanup * Make Rhash optional -* Wrap content inside item ## Issues * ✓ Last record is not written fully to csv: *Fixed*. Had to Flush writer diff --git a/app/app.go b/app/app.go index c29cd95..21c10bf 100644 --- a/app/app.go +++ b/app/app.go @@ -73,16 +73,34 @@ func ProcessFeed(feed FeedCfg, scheduler Scheduler, dbDir string, tgram telegram scheduler.Good(feed.Name) - filteredEntries, err := db.Filter(entries) - if err != nil { - log.Error("failed to filter entries", "feed", feed.Name, "error", err) + var records []Record + var newEntries []FeedEntry + if db.IsNewFeed() { + ftl := int(feed.FirstTimeLimit) + if feed.FirstTimeLimit == NoLimit || len(entries) <= ftl { + newEntries = entries + } else { + var filteredEntries []FeedEntry + newEntries, filteredEntries = entries[:ftl], entries[ftl:] + for _, entry := range filteredEntries { + records = append(records, Record{ + Time: time.Now(), + Status: Filtered, + Filter: "FirstTime", + FeedEntry: entry, + }) + } + } + } else { + newEntries, err = db.Filter(entries) + if err != nil { + log.Error("failed to filter entries", "feed", feed.Name, "error", err) + } } - var records []Record - for _, entry := range filteredEntries { - now := time.Now() + for _, entry := range newEntries { r := Record{ - Time: now, + Time: time.Now(), FeedEntry: entry, } err := tgram.SendLink(entry.Link, feed.Channel, feed.Rhash) diff --git a/app/config.go b/app/config.go index 34f10dd..579bb68 100644 --- a/app/config.go +++ b/app/config.go @@ -7,23 +7,29 @@ import ( "gopkg.in/yaml.v3" ) +type FeedLimit int + +const NoLimit FeedLimit = -1 + type FeedCfg struct { - Name string `yaml:"name"` - Channel string `yaml:"channel"` - Rhash string `yaml:"rhash"` - Url string `yaml:"url"` - Cron string `yaml:"cron"` - Proxy string `yaml:"proxy"` + Name string `yaml:"name"` + Channel string `yaml:"channel"` + Rhash string `yaml:"rhash"` + Url string `yaml:"url"` + Cron string `yaml:"cron"` + Proxy string `yaml:"proxy"` + FTL *int `yaml:"first_time_limit"` + FirstTimeLimit FeedLimit `yaml:"-"` } type Config struct { - Proxy string `yaml:"proxy"` - TelegramProxy string `yaml:"telegram_proxy"` - //TODO: read from credential file - TelegramAuthToken string `yaml:"telegram_auth_token"` + Proxy string `yaml:"proxy"` + TelegramProxy string `yaml:"telegram_proxy"` + TelegramAuthToken string `yaml:"telegram_auth_token"` //TODO: read from credential file DataDir string `yaml:"data_dir"` LastSuccessPath string `yaml:"last_loaded_path"` DbDir string `yaml:"db_dir"` + FTL *int `yaml:"first_time_limit"` Feeds []FeedCfg `yaml:"feeds"` } @@ -53,6 +59,17 @@ func ParseConfig(configPath string) (*Config, error) { c.DbDir = path.Join(c.DataDir, "feed_data") } + err = os.MkdirAll(c.DbDir, 0755) + + if err != nil { + return nil, err + } + + err = os.MkdirAll(c.DataDir, 0755) + if err != nil { + return nil, err + } + if c.Proxy != "" { if c.TelegramProxy == "" { c.TelegramProxy = c.Proxy @@ -74,6 +91,13 @@ func ParseConfig(configPath string) (*Config, error) { if feedCfg.Proxy == "NONE" { feedCfg.Proxy = "" } + if feedCfg.FTL != nil { + feedCfg.FirstTimeLimit = FeedLimit(*feedCfg.FTL) + } else if c.FTL != nil { + feedCfg.FirstTimeLimit = FeedLimit(*c.FTL) + } else { + feedCfg.FirstTimeLimit = NoLimit + } } return &c, nil diff --git a/app/db.go b/app/db.go index 8afe5d7..57e1a3a 100644 --- a/app/db.go +++ b/app/db.go @@ -26,10 +26,12 @@ const TimeFormat string = "2006-01-02T15:04:05.999999999-07:00" type Record struct { Time time.Time Status Status + Filter string FeedEntry FeedEntry } type DB interface { + IsNewFeed() bool Filter(entries []FeedEntry) ([]FeedEntry, error) Save([]Record) error } @@ -39,6 +41,10 @@ type db struct { seenLinks map[string]struct{} } +func (d *db) IsNewFeed() bool { + return len(d.seenLinks) == 0 +} + func NewDB(storageDir, feedName string) (DB, error) { dbPath := path.Join(storageDir, fmt.Sprintf("%s.csv", feedName)) db := db{dbPath: dbPath} @@ -91,7 +97,7 @@ func (d *db) Save(records []Record) error { "Date", "Link", "Status", - "FilteredBy", + "Filter", "Content", }) } @@ -100,8 +106,8 @@ func (d *db) Save(records []Record) error { r.Time.Format(TimeFormat), r.FeedEntry.Link, string(r.Status), - "-", - strings.ReplaceAll(r.FeedEntry.Content, "\n", " "), + r.Filter, + fmt.Sprintf("%s", strings.ReplaceAll(r.FeedEntry.Content, "\n", " ")), }) } csvw.Flush() diff --git a/app/schedule.go b/app/schedule.go index 74a6530..0f9be77 100644 --- a/app/schedule.go +++ b/app/schedule.go @@ -23,7 +23,7 @@ type scheduler struct { } func NewScheduler(filePath string) (Scheduler, error) { - s := scheduler{filePath: filePath} + s := scheduler{filePath: filePath, lastSuccessTime: map[string]time.Time{}} data, err := os.ReadFile(filePath) if err != nil { if !errors.Is(err, os.ErrNotExist) { diff --git a/exp/slice/main.go b/exp/slice/main.go new file mode 100644 index 0000000..1ad910b --- /dev/null +++ b/exp/slice/main.go @@ -0,0 +1,14 @@ +package main + +import "fmt" + +func main() { + var arr []int + for i := 0; i < 10; i++ { + arr = append(arr, i) + } + fmt.Println(arr) + fmt.Println(arr[:0], arr[0:]) + fmt.Println(arr[:4], arr[4:]) + fmt.Println(arr[:15], arr[15:]) +} diff --git a/telegram/telegram.go b/telegram/telegram.go index 7d3f4aa..f6bee1e 100644 --- a/telegram/telegram.go +++ b/telegram/telegram.go @@ -15,10 +15,6 @@ import ( "golang.org/x/time/rate" ) -func main() { - fmt.Println("vim-go") -} - type TelegramSender interface { SendLink(link, channel, rhash string) error }