This commit is contained in:
Urle Sistiana
2022-12-15 11:19:07 +08:00
commit fc8c213056
16 changed files with 1915 additions and 0 deletions

20
cmd/unpack/cmd.go Normal file
View File

@@ -0,0 +1,20 @@
package unpack
import (
"github.com/spf13/cobra"
"github.com/urlesistiana/v2dat/cmd"
)
var unpack = &cobra.Command{
Use: "unpack",
Short: "unpack geosite and geoip to text files.",
}
func init() {
unpack.AddCommand(newGeoSiteCmd(), newGeoIPCmd())
cmd.RootCmd.AddCommand(unpack)
}
func AddCommand(cmds ...*cobra.Command) {
unpack.AddCommand(cmds...)
}

116
cmd/unpack/geoip.go Normal file
View File

@@ -0,0 +1,116 @@
package unpack
import (
"bufio"
"fmt"
"github.com/spf13/cobra"
"github.com/urlesistiana/v2dat/v2data"
"go.uber.org/zap"
"io"
"net/netip"
"os"
"path/filepath"
"strings"
)
func newGeoIPCmd() *cobra.Command {
args := new(unpackArgs)
c := &cobra.Command{
Use: "geoip [-o output_dir] [-f tag]... geoip.dat",
Args: cobra.ExactArgs(1),
Short: "Unpack geoip file to text files.",
Run: func(cmd *cobra.Command, a []string) {
args.file = a[0]
if err := unpackGeoIP(args); err != nil {
logger.Fatal("failed to unpack geoip", zap.Error(err))
}
},
DisableFlagsInUseLine: true,
}
c.Flags().StringVarP(&args.outDir, "out", "o", "", "output dir")
c.Flags().StringArrayVarP(&args.filters, "filter", "f", nil, "unpack given tag")
return c
}
func unpackGeoIP(args *unpackArgs) error {
filePath, wantTags, ourDir := args.file, args.filters, args.outDir
b, err := os.ReadFile(filePath)
if err != nil {
return err
}
geoIPList, err := v2data.LoadGeoIPListFromDAT(b)
if err != nil {
return err
}
entries := make(map[string]*v2data.GeoIP)
var wantEntries map[string]*v2data.GeoIP
for _, geoSite := range geoIPList.GetEntry() {
tag := strings.ToLower(geoSite.GetCountryCode())
entries[tag] = geoSite
}
if len(wantTags) > 0 {
wantEntries = make(map[string]*v2data.GeoIP)
for _, tag := range wantTags {
entry, ok := entries[tag]
if !ok {
return fmt.Errorf("cannot find entry %s", tag)
}
wantEntries[tag] = entry
}
} else {
wantEntries = entries
}
for tag, ipList := range wantEntries {
file := fmt.Sprintf("%s_%s.txt", fileName(filePath), tag)
if len(ourDir) > 0 {
file = filepath.Join(ourDir, file)
}
logger.Info(
"unpacking entry",
zap.String("tag", tag),
zap.Int("length", len(ipList.GetCidr())),
zap.String("file", file),
)
err := convertV2CidrToTextFile(ipList.GetCidr(), file)
if err != nil {
return err
}
}
return nil
}
func convertV2CidrToTextFile(cidr []*v2data.CIDR, file string) error {
f, err := os.Create(file)
if err != nil {
return err
}
defer f.Close()
return convertV2CidrToText(cidr, f)
}
func convertV2CidrToText(cidr []*v2data.CIDR, w io.Writer) error {
bw := bufio.NewWriter(w)
for i, record := range cidr {
ip, ok := netip.AddrFromSlice(record.Ip)
if !ok {
return fmt.Errorf("invalid ip at index #%d, %s", i, record.Ip)
}
prefix, err := ip.Prefix(int(record.Prefix))
if !ok {
return fmt.Errorf("invalid prefix at index #%d, %w", i, err)
}
if _, err := bw.WriteString(prefix.String()); err != nil {
return err
}
if _, err := bw.WriteRune('\n'); err != nil {
return err
}
}
return bw.Flush()
}

130
cmd/unpack/geosite.go Normal file
View File

@@ -0,0 +1,130 @@
package unpack
import (
"bufio"
"fmt"
"github.com/spf13/cobra"
"github.com/urlesistiana/v2dat/v2data"
"go.uber.org/zap"
"io"
"os"
"path/filepath"
"strings"
)
type unpackArgs struct {
outDir string
file string
filters []string
}
func newGeoSiteCmd() *cobra.Command {
args := new(unpackArgs)
c := &cobra.Command{
Use: "geosite [-o output_dir] [-f tag[@attr]...]... geosite.dat",
Args: cobra.ExactArgs(1),
Short: "Unpack geosite file to text files.",
Run: func(cmd *cobra.Command, a []string) {
args.file = a[0]
if err := unpackGeoSite(args); err != nil {
logger.Fatal("failed to unpack geosite", zap.Error(err))
}
},
DisableFlagsInUseLine: true,
}
c.Flags().StringVarP(&args.outDir, "out", "o", "", "output dir")
c.Flags().StringArrayVarP(&args.filters, "filter", "f", nil, "unpack given tag and attrs")
return c
}
func unpackGeoSite(args *unpackArgs) error {
filePath, suffixes, outDir := args.file, args.filters, args.outDir
b, err := os.ReadFile(filePath)
if err != nil {
return err
}
geoSiteList, err := v2data.LoadGeoSiteList(b)
if err != nil {
return err
}
entries := make(map[string][]*v2data.Domain)
for _, geoSite := range geoSiteList.GetEntry() {
tag := strings.ToLower(geoSite.GetCountryCode())
entries[tag] = geoSite.GetDomain()
}
save := func(suffix string, data []*v2data.Domain) error {
file := fmt.Sprintf("%s_%s.txt", fileName(filePath), suffix)
if len(outDir) > 0 {
file = filepath.Join(outDir, file)
}
logger.Info(
"unpacking entry",
zap.String("tag", suffix),
zap.Int("length", len(data)),
zap.String("file", file),
)
return convertV2DomainToTextFile(data, file)
}
if len(suffixes) > 0 {
for _, suffix := range suffixes {
tag, attrs := splitAttrs(suffix)
entry, ok := entries[tag]
if !ok {
return fmt.Errorf("cannot find entry %s", tag)
}
entry = filterAttrs(entry, attrs)
if err := save(suffix, entry); err != nil {
return fmt.Errorf("failed to save %s, %w", suffix, err)
}
}
} else { // If tag is omitted, unpack all tags.
for tag, domains := range entries {
if err := save(tag, domains); err != nil {
return fmt.Errorf("failed to save %s, %w", tag, err)
}
}
}
return nil
}
func convertV2DomainToTextFile(domain []*v2data.Domain, file string) error {
f, err := os.Create(file)
if err != nil {
return err
}
defer f.Close()
return convertV2DomainToText(domain, f)
}
func convertV2DomainToText(domain []*v2data.Domain, w io.Writer) error {
bw := bufio.NewWriter(w)
for _, r := range domain {
var prefix string
switch r.Type {
case v2data.Domain_Plain:
prefix = "keyword:"
case v2data.Domain_Regex:
prefix = "regexp:"
case v2data.Domain_Domain:
prefix = ""
case v2data.Domain_Full:
prefix = "full:"
default:
return fmt.Errorf("invalid domain type %d", r.Type)
}
if _, err := bw.WriteString(prefix); err != nil {
return err
}
if _, err := bw.WriteString(r.Value); err != nil {
return err
}
if _, err := bw.WriteRune('\n'); err != nil {
return err
}
}
return bw.Flush()
}

54
cmd/unpack/utils.go Normal file
View File

@@ -0,0 +1,54 @@
package unpack
import (
"github.com/urlesistiana/v2dat/mlog"
"github.com/urlesistiana/v2dat/v2data"
"path/filepath"
"strings"
)
var logger = mlog.L()
func splitAttrs(s string) (string, map[string]struct{}) {
tag, attrs, ok := strings.Cut(s, "@")
if ok {
m := make(map[string]struct{})
for _, attr := range strings.Split(attrs, "@") {
m[attr] = struct{}{}
}
return tag, m
}
return s, nil
}
// filterAttrs filter entries that do not have any of given attrs.
// If no attr was given, filterAttrs returns in.
func filterAttrs(in []*v2data.Domain, attrs map[string]struct{}) []*v2data.Domain {
if len(attrs) == 0 {
return in
}
out := make([]*v2data.Domain, 0)
for _, d := range in {
hasAttr := false
for _, attr := range d.Attribute {
if _, ok := attrs[attr.Key]; ok {
hasAttr = true
break
}
}
if !hasAttr {
continue
}
out = append(out, d)
}
return out
}
func fileName(f string) string {
f = filepath.Base(f)
if i := strings.LastIndexByte(f, '.'); i == -1 {
return f
} else {
return f[:i]
}
}