From a053ec6134563fc2fff2b864612d50fca72e9cfc Mon Sep 17 00:00:00 2001 From: Brad Date: Tue, 16 Sep 2025 17:29:51 +1000 Subject: [PATCH] feat: Implement regexp template support --- goldens/by_regex.in | 9 +++ goldens/by_regex.out | 9 +++ keepsorted/options.go | 47 ++++++++++++--- keepsorted/options_parser.go | 96 +++++++++++++++++++++---------- keepsorted/options_parser_test.go | 23 +++++++- keepsorted/options_test.go | 20 ++++++- 6 files changed, 163 insertions(+), 41 deletions(-) diff --git a/goldens/by_regex.in b/goldens/by_regex.in index 6f1de44..914211d 100644 --- a/goldens/by_regex.in +++ b/goldens/by_regex.in @@ -116,3 +116,12 @@ Cannot combine with ignore_prefixes 1 3 keep-sorted-test end + +Template rewrites + keep-sorted-test start by_regex=['^(Jan|(Feb|Mar|(Apr|May|(Jun|(Jul|(Aug|Sep|(Oct|(Nov|(Dec))))))))) (?\d\d) (?\d\d:\d\d) (?\d\d\d\d) ': '${t_Y} ${9}A${8}A${7}A${6}A${5}A${4}A${3}A${2}A${1} ${t_d} ${t_R}'] + Jun 23 09:00 2025 | nobody + Aug 26 09:00 2024 | nobody + Sep 02 09:00 2024 | nobody + Apr 14 09:00 2025 | nobody + Jul 28 09:00 2025 | nobody + keep-sorted-test end diff --git a/goldens/by_regex.out b/goldens/by_regex.out index 2b78b2f..41cbdb1 100644 --- a/goldens/by_regex.out +++ b/goldens/by_regex.out @@ -122,3 +122,12 @@ Cannot combine with ignore_prefixes 2 3 keep-sorted-test end + +Template rewrites + keep-sorted-test start by_regex=['^(Jan|(Feb|Mar|(Apr|May|(Jun|(Jul|(Aug|Sep|(Oct|(Nov|(Dec))))))))) (?\d\d) (?\d\d:\d\d) (?\d\d\d\d) ': '${t_Y} ${9}A${8}A${7}A${6}A${5}A${4}A${3}A${2}A${1} ${t_d} ${t_R}'] + Aug 26 09:00 2024 | nobody + Sep 02 09:00 2024 | nobody + Apr 14 09:00 2025 | nobody + Jun 23 09:00 2025 | nobody + Jul 28 09:00 2025 | nobody + keep-sorted-test end diff --git a/keepsorted/options.go b/keepsorted/options.go index c4e02ca..ed10687 100644 --- a/keepsorted/options.go +++ b/keepsorted/options.go @@ -35,6 +35,11 @@ import ( // true is unmarshaled as 1, false as 0. type IntOrBool int +type ByRegexOption struct { + Pattern *regexp.Regexp + Template *string +} + type BlockOptions struct { opts blockOptions } @@ -62,7 +67,7 @@ func (opts BlockOptions) String() string { // - []string: key=a,b,c,d // - map[string]bool: key=a,b,c,d // - int: key=123 -// - []*regexp.Regexp: key=a,b,c,d +// - ByRegexOptions key=a,b,c,d, key=[yaml_list] type blockOptions struct { // AllowYAMLLists determines whether list.set valued options are allowed to be specified by YAML. AllowYAMLLists bool `key:"allow_yaml_lists"` @@ -97,7 +102,7 @@ type blockOptions struct { // IgnorePrefixes is a slice of prefixes that we do not consider when sorting lines. IgnorePrefixes []string `key:"ignore_prefixes"` // ByRegex is a slice of regexes that are used to extract the pieces of the line group that keep-sorted should sort by. - ByRegex []*regexp.Regexp `key:"by_regex"` + ByRegex []ByRegexOption `key:"by_regex"` //////////////////////////// // Post-sorting options // @@ -205,11 +210,21 @@ func formatValue(val reflect.Value) (string, error) { return strconv.Itoa(int(val.Int())), nil case reflect.TypeFor[int](): return strconv.Itoa(int(val.Int())), nil - case reflect.TypeFor[[]*regexp.Regexp](): - regexps := val.Interface().([]*regexp.Regexp) - vals := make([]string, len(regexps)) - for i, regex := range regexps { - vals[i] = regex.String() + case reflect.TypeFor[[]ByRegexOption](): + opts := val.Interface().([]ByRegexOption) + vals := make([]string, 0, len(opts)) + seenTemplate := false + for _, opt := range opts { + if opt.Template != nil { + seenTemplate = true + vals = append(vals, fmt.Sprintf(`%q: %q`, opt.Pattern.String(), *opt.Template)) + continue + } + vals = append(vals, opt.Pattern.String()) + } + if seenTemplate { + // always presented as a yaml sequence to preserve any `k:v` items + return fmt.Sprintf("[%s]", strings.Join(vals, ", ")), nil } return formatList(vals) } @@ -388,7 +403,23 @@ func (opts blockOptions) matchRegexes(s string) []regexMatch { } var ret []regexMatch - for _, regex := range opts.ByRegex { + for _, p := range opts.ByRegex { + regex := p.Pattern + + if p.Template != nil { + var result []byte + m := regex.FindAllStringSubmatchIndex(s, -1) + if m == nil { + ret = append(ret, regexDidNotMatch) + continue + } + for _, submatches := range m { + result = regex.ExpandString(result, *p.Template, s, submatches) + } + ret = append(ret, regexMatch{string(result)}) + continue + } + m := regex.FindStringSubmatch(s) if m == nil { ret = append(ret, regexDidNotMatch) diff --git a/keepsorted/options_parser.go b/keepsorted/options_parser.go index a858748..0d264a0 100644 --- a/keepsorted/options_parser.go +++ b/keepsorted/options_parser.go @@ -65,28 +65,12 @@ func (p *parser) popValue(typ reflect.Type) (reflect.Value, error) { case reflect.TypeFor[map[string]bool](): val, err := p.popSet() return reflect.ValueOf(val), err - case reflect.TypeFor[[]*regexp.Regexp](): - val, err := p.popList() + case reflect.TypeFor[[]ByRegexOption](): + val, err := p.popListRegexOption() if err != nil { return reflect.Zero(typ), err } - - ret := make([]*regexp.Regexp, len(val)) - var errs []error - for i, s := range val { - regex, err := regexp.Compile(s) - if err != nil { - errs = append(errs, err) - continue - } - ret[i] = regex - } - - if err := errors.Join(errs...); err != nil { - return reflect.Zero(typ), err - } - - return reflect.ValueOf(ret), nil + return reflect.ValueOf(val), nil } panic(fmt.Errorf("unhandled case in switch: %v", typ)) @@ -129,25 +113,78 @@ func (p *parser) popIntOrBool() (IntOrBool, error) { return IntOrBool(i), nil } -func (p *parser) popList() ([]string, error) { +func (ar *ByRegexOption) UnmarshalYAML(node *yaml.Node) error { + switch node.Tag { + case "!!str": + pat, err := regexp.Compile(node.Value) + if err != nil { + return err + } + ar.Pattern = pat + ar.Template = nil + return nil + case "!!map": + var m map[string]string + if err := node.Decode(&m); err != nil { + return err + } + if len(m) != 1 { + return fmt.Errorf("by_regex map item must have exactly one key-value pair, but got %d", len(m)) + } + for pattern, template := range m { + pat, err := regexp.Compile(pattern) + if err != nil { + return fmt.Errorf("invalid regex pattern %q: %w", pattern, err) + } + ar.Pattern = pat + ar.Template = &template + return nil + } + } + + return fmt.Errorf("unexpected data type at %v", node.Tag) +} + +func popListValue[T any](p *parser, parse func(string) (T, error)) ([]T, error) { if p.allowYAMLLists { val, rest, err := tryFindYAMLListAtStart(p.line) if err != nil && !errors.Is(err, errNotYAMLList) { return nil, err } if err == nil { - p.line = rest - return parseYAMLList(val) + p.line = strings.TrimSpace(rest) + return parseYAMLList[T](val) } - - // err is errNotYAMLList, parse it as a regular list. } + val, rest, _ := strings.Cut(p.line, " ") - p.line = rest + p.line = strings.TrimSpace(rest) if val == "" { - return []string{}, nil + return []T{}, nil } - return strings.Split(val, ","), nil + + var ret []T + var errs []error + for _, item := range strings.Split(val, ",") { + v, err := parse(item) + if err != nil { + errs = append(errs, err) + continue + } + ret = append(ret, v) + } + return ret, errors.Join(errs...) +} + +func (p *parser) popList() ([]string, error) { + return popListValue(p, func(s string) (string, error) { return s, nil }) +} + +func (p *parser) popListRegexOption() ([]ByRegexOption, error) { + return popListValue(p, func(s string) (ByRegexOption, error) { + pat, err := regexp.Compile(s) + return ByRegexOption{Pattern: pat}, err + }) } func tryFindYAMLListAtStart(s string) (list, rest string, err error) { @@ -210,11 +247,12 @@ loop: return s[:iter.idx], s[iter.idx:], nil } -func parseYAMLList(list string) ([]string, error) { - var val []string +func parseYAMLList[T any](list string) ([]T, error) { + var val []T if err := yaml.Unmarshal([]byte(list), &val); err != nil { return nil, err } + return val, nil } diff --git a/keepsorted/options_parser_test.go b/keepsorted/options_parser_test.go index d7e43bd..284cab5 100644 --- a/keepsorted/options_parser_test.go +++ b/keepsorted/options_parser_test.go @@ -9,6 +9,9 @@ import ( ) var cmpRegexp = cmp.Comparer(func(a, b *regexp.Regexp) bool { + if a == nil || b == nil { + return a == b + } return a.String() == b.String() }) @@ -215,14 +218,30 @@ func TestPopValue(t *testing.T) { name: "Regex", input: ".*", - want: []*regexp.Regexp{regexp.MustCompile(".*")}, + want: []ByRegexOption{{regexp.MustCompile(".*"), nil}}, }, { name: "MultipleRegex", input: `[.*, abcd, '(?:efgh)ijkl']`, allowYAMLList: true, - want: []*regexp.Regexp{regexp.MustCompile(".*"), regexp.MustCompile("abcd"), regexp.MustCompile("(?:efgh)ijkl")}, + want: []ByRegexOption{ + {regexp.MustCompile(".*"), nil}, + {regexp.MustCompile("abcd"), nil}, + {regexp.MustCompile("(?:efgh)ijkl"), nil}, + }, + }, + { + name: "RegexTemplates", + + input: `[.*, Mon: 0, '\b(\d{2})/(\d{2})/(\d{4})\b': '${3}-${1}-${2}', "0: 1": 2]`, + allowYAMLList: true, + want: []ByRegexOption{ + {regexp.MustCompile(".*"), nil}, + {regexp.MustCompile("Mon"), &([]string{"0"})[0]}, + {regexp.MustCompile(`\b(\d{2})/(\d{2})/(\d{4})\b`), &([]string{"${3}-${1}-${2}"})[0]}, + {regexp.MustCompile(`0: 1`), &([]string{"2"})[0]}, + }, }, { name: "IntOrBool_Int", diff --git a/keepsorted/options_test.go b/keepsorted/options_test.go index 562472d..13f4d1c 100644 --- a/keepsorted/options_test.go +++ b/keepsorted/options_test.go @@ -193,7 +193,23 @@ func TestBlockOptions(t *testing.T) { want: blockOptions{ AllowYAMLLists: true, - ByRegex: []*regexp.Regexp{regexp.MustCompile("(?:abcd)"), regexp.MustCompile("efg.*")}, + ByRegex: []ByRegexOption{ + {regexp.MustCompile("(?:abcd)"), nil}, {regexp.MustCompile("efg.*"), nil}, + }, + }, + }, + { + name: "RegexWithTemplate", + in: `by_regex=['.*', '\b(\d{2})/(\d{2})/(\d{4})\b': '${3}-${1}-${2}']`, + defaultOptions: blockOptions{AllowYAMLLists: true}, + + want: blockOptions{ + AllowYAMLLists: true, + ByRegex: []ByRegexOption{ + {Pattern: regexp.MustCompile(`.*`)}, + {Pattern: regexp.MustCompile(`\b(\d{2})/(\d{2})/(\d{4})\b`), + Template: &[]string{"${3}-${1}-${2}"}[0]}, + }, }, }, } { @@ -309,7 +325,7 @@ func TestBlockOptions_regexTransform(t *testing.T) { t.Run(tc.name, func(t *testing.T) { var opts blockOptions for _, regex := range tc.regexes { - opts.ByRegex = append(opts.ByRegex, regexp.MustCompile(regex)) + opts.ByRegex = append(opts.ByRegex, ByRegexOption{regexp.MustCompile(regex), nil}) } gotTokens := opts.matchRegexes(tc.in)