almost done
This commit is contained in:
		
							
								
								
									
										47
									
								
								app/app.go
									
									
									
									
									
								
							
							
						
						
									
										47
									
								
								app/app.go
									
									
									
									
									
								
							@@ -1,6 +1,11 @@
 | 
			
		||||
package app
 | 
			
		||||
 | 
			
		||||
import "go.balki.me/tss/log"
 | 
			
		||||
import (
 | 
			
		||||
	"io"
 | 
			
		||||
 | 
			
		||||
	"go.balki.me/tss/log"
 | 
			
		||||
	"go.balki.me/tss/proxy"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func Run(configPath string) {
 | 
			
		||||
	cfg, err := ParseConfig(configPath)
 | 
			
		||||
@@ -19,7 +24,7 @@ func Run(configPath string) {
 | 
			
		||||
 | 
			
		||||
	for _, feed := range cfg.Feeds {
 | 
			
		||||
		log.Info("processing feed", "feed", feed.Name)
 | 
			
		||||
		ProcessFeed(feed, scheduler)
 | 
			
		||||
		ProcessFeed(feed, scheduler, cfg.DbDir)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
@@ -53,19 +58,51 @@ func Run(configPath string) {
 | 
			
		||||
	*/
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func ProcessFeed(feed FeedCfg, scheduler *Scheduler) {
 | 
			
		||||
func ProcessFeed(feed FeedCfg, scheduler *Scheduler, dbDir string) {
 | 
			
		||||
	sd, err := scheduler.ShouldDownload(feed.Name, feed.Cron)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		log.Error("shouldDownload failed", "feed", feed.Name, "err", err)
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if !sd {
 | 
			
		||||
		log.Info("skipping feed due to schedule", "feed", feed.Name)
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
	_, err = Download(feed.Url, feed.Proxy)
 | 
			
		||||
 | 
			
		||||
	db, err := NewDB(dbDir, feed.Name)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		log.Error("download failed", "feed", feed.Name, "url", feed.Url, "proxy", feed.Proxy)
 | 
			
		||||
		log.Error("failed to get db", "feed", feed.Name, "db_dir", dbDir, "error", err)
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	data, err := Download(feed.Url, feed.Proxy)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		log.Error("download failed", "feed", feed.Name, "url", feed.Url, "proxy", feed.Proxy, "error", err)
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	entries, err := ParseFeed(data)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		log.Error("feed parsing failed", "feed", feed.Name, "data", data, "error", err)
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	_, err = db.Filter(entries)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		log.Error("failed to filter entries", "feed", feed.Name, "error", err)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func Download(url string, proxyUrl string) ([]byte, error) {
 | 
			
		||||
	client, err := proxy.GetClient(proxyUrl)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
	res, err := client.Get(url)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
	defer res.Body.Close()
 | 
			
		||||
	return io.ReadAll(res.Body)
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -19,6 +19,7 @@ type FeedCfg struct {
 | 
			
		||||
type Config struct {
 | 
			
		||||
	DataDir         string    `yaml:"data_dir"`
 | 
			
		||||
	LastSuccessPath string    `yaml:"last_loaded_path"`
 | 
			
		||||
	DbDir           string    `yaml:"db_dir"`
 | 
			
		||||
	Feeds           []FeedCfg `yaml:"feeds"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -44,5 +45,9 @@ func ParseConfig(configPath string) (*Config, error) {
 | 
			
		||||
		c.LastSuccessPath = path.Join(c.DataDir, "last_success.yml")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if c.DbDir == "" {
 | 
			
		||||
		c.DbDir = path.Join(c.DataDir, "feed_data")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return &c, nil
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										97
									
								
								app/db.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										97
									
								
								app/db.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,97 @@
 | 
			
		||||
package app
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"encoding/csv"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"os"
 | 
			
		||||
	"path"
 | 
			
		||||
	"time"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type Status string
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	Sent     Status = "SENT"
 | 
			
		||||
	Filtered        = "FILTERED"
 | 
			
		||||
	Error           = "ERROR"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
//default format used by yaml.Marshal
 | 
			
		||||
const TimeFormat string = "2006-01-02T15:04:05.999999999-07:00"
 | 
			
		||||
 | 
			
		||||
type Record struct {
 | 
			
		||||
	Time      time.Time
 | 
			
		||||
	Status    Status
 | 
			
		||||
	FeedEntry FeedEntry
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type DB interface {
 | 
			
		||||
	Filter(entries []FeedEntry) ([]FeedEntry, error)
 | 
			
		||||
	Save([]Record) error
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type db struct {
 | 
			
		||||
	dbPath    string
 | 
			
		||||
	seenLinks map[string]struct{}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func NewDB(storageDir, feedName string) (DB, error) {
 | 
			
		||||
	dbPath := path.Join(storageDir, fmt.Sprintf("%s.csv", feedName))
 | 
			
		||||
	f, err := os.Open(dbPath)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
	defer f.Close()
 | 
			
		||||
	reader := csv.NewReader(f)
 | 
			
		||||
	records, err := reader.ReadAll()
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, fmt.Errorf("failed to parse csv, path:%v, error:%w", dbPath, err)
 | 
			
		||||
	}
 | 
			
		||||
	db := db{dbPath: dbPath}
 | 
			
		||||
	db.seenLinks = map[string]struct{}{}
 | 
			
		||||
	for _, rec := range records {
 | 
			
		||||
		var recStatus Status = Status(rec[2])
 | 
			
		||||
		if recStatus == Sent || recStatus == Filtered {
 | 
			
		||||
			db.seenLinks[rec[1]] = struct{}{}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return &db, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (d *db) Filter(entries []FeedEntry) ([]FeedEntry, error) {
 | 
			
		||||
	var filteredEntries []FeedEntry
 | 
			
		||||
	for _, entry := range entries {
 | 
			
		||||
		if _, ok := d.seenLinks[entry.Link]; !ok {
 | 
			
		||||
			filteredEntries = append(filteredEntries, entry)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return filteredEntries, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (d *db) Save(records []Record) error {
 | 
			
		||||
	f, err := os.OpenFile(d.dbPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	defer f.Close()
 | 
			
		||||
	csvw := csv.NewWriter(f)
 | 
			
		||||
	if len(d.seenLinks) == 0 { //New file, write header
 | 
			
		||||
		csvw.Write([]string{
 | 
			
		||||
			"Date",
 | 
			
		||||
			"Link",
 | 
			
		||||
			"Status",
 | 
			
		||||
			"FilteredBy",
 | 
			
		||||
			"Content",
 | 
			
		||||
		})
 | 
			
		||||
	}
 | 
			
		||||
	for _, r := range records {
 | 
			
		||||
		csvw.Write([]string{
 | 
			
		||||
			r.Time.Format(TimeFormat),
 | 
			
		||||
			r.FeedEntry.Link,
 | 
			
		||||
			string(r.Status),
 | 
			
		||||
			"-",
 | 
			
		||||
			r.FeedEntry.Content,
 | 
			
		||||
		})
 | 
			
		||||
	}
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
@@ -1,5 +1 @@
 | 
			
		||||
package app
 | 
			
		||||
 | 
			
		||||
func Download(url string, proxy string) ([]byte, error) {
 | 
			
		||||
	return nil, nil
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										28
									
								
								app/parser.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								app/parser.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,28 @@
 | 
			
		||||
package app
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"encoding/xml"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type FeedEntry struct {
 | 
			
		||||
	Title       string `xml:"title"`
 | 
			
		||||
	Link        string `xml:"link"`
 | 
			
		||||
	Author      string `xml:"author"`
 | 
			
		||||
	Guid        string `xml:"guid"`
 | 
			
		||||
	Description string `xml:"description"`
 | 
			
		||||
	Content     string `xml:",innerxml"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func ParseFeed(data []byte) ([]FeedEntry, error) {
 | 
			
		||||
 | 
			
		||||
	v := struct {
 | 
			
		||||
		Items []FeedEntry `xml:"channel>item"`
 | 
			
		||||
	}{}
 | 
			
		||||
 | 
			
		||||
	err := xml.Unmarshal(data, &v)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return v.Items, nil
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										0
									
								
								exp/csv/csv.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								exp/csv/csv.go
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										41
									
								
								exp/csv/main.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								exp/csv/main.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,41 @@
 | 
			
		||||
package main
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"bytes"
 | 
			
		||||
	"encoding/csv"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	"gopkg.in/yaml.v3"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func main() {
 | 
			
		||||
	fmt.Println("vim-go")
 | 
			
		||||
	fb := bytes.NewReader(nil)
 | 
			
		||||
	cr := csv.NewReader(fb)
 | 
			
		||||
	records, err := cr.ReadAll()
 | 
			
		||||
	fmt.Println(records, err)
 | 
			
		||||
	fmt.Println(time.Now().String())
 | 
			
		||||
	yesterday := time.Now().Add(-24 * time.Hour)
 | 
			
		||||
	m := map[string]time.Time{
 | 
			
		||||
		"Bala":  time.Now(),
 | 
			
		||||
		"Linus": yesterday,
 | 
			
		||||
	}
 | 
			
		||||
	data, _ := yaml.Marshal(&m)
 | 
			
		||||
	fmt.Printf("%s\n", data)
 | 
			
		||||
	//format := "2022-05-01T15:08:20.593630746-04:00"
 | 
			
		||||
	format := "2006-01-02T15:04:05.999999999-07:00"
 | 
			
		||||
	fmt.Println("============")
 | 
			
		||||
	fmt.Println(yesterday.Format(format))
 | 
			
		||||
	fmt.Println("============")
 | 
			
		||||
	fmt.Println(yesterday.GoString())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 | 
			
		||||
Linus:
 | 
			
		||||
 | 
			
		||||
============
 | 
			
		||||
2022-04-30T15:14:40.302916106-04:00
 | 
			
		||||
2022-04-30T15:14:40.302916106-04:00
 | 
			
		||||
*/
 | 
			
		||||
		Reference in New Issue
	
	Block a user