Skip to content

Commit 326281f

Browse files
committed
v0.2.5
1 parent 23b6974 commit 326281f

File tree

10 files changed

+369
-17
lines changed

10 files changed

+369
-17
lines changed

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,12 @@ You can also add the directory of the executable file to environment variable
2828

2929
or simply copy it to `/usr/local/bin`
3030

31-
## Subcommands (16 in total)
31+
## Subcommands (18 in total)
3232

3333
**Information**
3434

3535
- `stat` summary of CSV file
36+
- `stat2` summary of selected number fields
3637

3738
**Format convertion**
3839

@@ -92,6 +93,8 @@ to be continued...
9293
starts with `#`, please assign `-C` another rare symbol, e.g. `&`.
9394
4. By default, csvtk handles CSV files, use `-t` for tab-delimited files.
9495

96+
More [examples](http://shenwei356.github.io/csvtk/usage/) and [tutorial](http://shenwei356.github.io/csvtk/tutorial/)
97+
9598
Examples
9699

97100
1. Select fields/columns (`cut`)

csvtk/cmd/cut.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ var cutCmd = &cobra.Command{
4747

4848
printNames := getFlagBool(cmd, "colnames")
4949
if printNames && config.NoHeaderRow {
50-
checkError(fmt.Errorf("flag -n (--colnames) and -T (--no-header-row) should not given both"))
50+
checkError(fmt.Errorf("flag -n (--colnames) and -H (--no-header-row) should not given both"))
5151
}
5252

5353
fieldStr := getFlagString(cmd, "fields")

csvtk/cmd/helper.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ func NewCSVWriterChanByConfig(config Config) (chan []string, error) {
237237
}
238238

239239
var reFields = regexp.MustCompile(`([^,]+)(,[^,]+)*,?`)
240-
var reDigitals = regexp.MustCompile(`^[\-\d]+$`)
240+
var reDigitals = regexp.MustCompile(`^[\-\d\.e,E\+]+$`)
241241
var reDigitalRange = regexp.MustCompile(`^([\-\d]+?)\-([\-\d]+?)$`)
242242

243243
func getFlagFields(cmd *cobra.Command, flag string) string {
@@ -453,3 +453,15 @@ func parseCSVfile(cmd *cobra.Command, config Config, file string,
453453
}
454454
return HeaderRow, Data, fields
455455
}
456+
457+
func removeComma(s string) string {
458+
newSlice := []byte{}
459+
for i:=0; i<len(s); i++ {
460+
switch s[i] {
461+
case ',':
462+
default:
463+
newSlice = append(newSlice, s[i])
464+
}
465+
}
466+
return string(newSlice)
467+
}

csvtk/cmd/root.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ var RootCmd = &cobra.Command{
3434
Short: "Another cross-platform, efficient and practical CSV/TSV toolkit",
3535
Long: `Another cross-platform, efficient and practical CSV/TSV toolkit
3636
37-
Version: 0.2.4
37+
Version: 0.2.5
3838
3939
Author: Wei Shen <[email protected]>
4040

csvtk/cmd/stat.go

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,13 @@
2121
package cmd
2222

2323
import (
24-
"fmt"
2524
"runtime"
2625

2726
"github.com/brentp/xopen"
2827
"github.com/spf13/cobra"
28+
29+
"github.com/tatsushid/go-prettytable"
30+
"github.com/dustin/go-humanize"
2931
)
3032

3133
// statCmd represents the seq command
@@ -44,6 +46,14 @@ var statCmd = &cobra.Command{
4446
checkError(err)
4547
defer outfh.Close()
4648

49+
50+
tbl, err := prettytable.NewTable([]prettytable.Column{
51+
{Header: "file"},
52+
{Header: "num_cols", AlignRight: true},
53+
{Header: "num_rows", AlignRight: true}}...)
54+
checkError(err)
55+
tbl.Separator = " "
56+
4757
for _, file := range files {
4858
csvReader, err := newCSVReaderByConfig(config, file)
4959
checkError(err)
@@ -63,8 +73,16 @@ var statCmd = &cobra.Command{
6373
once = false
6474
}
6575
}
66-
outfh.WriteString(fmt.Sprintf("file: %s num_cols: %d num_rows: %d\n", file, numCols, numRows))
76+
if !config.NoHeaderRow {
77+
numRows--
78+
}
79+
tbl.AddRow(
80+
file,
81+
humanize.Comma(int64(numCols)),
82+
humanize.Comma(int64(numRows)))
83+
6784
}
85+
outfh.Write(tbl.Bytes())
6886
},
6987
}
7088

csvtk/cmd/stat2.go

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
// Copyright © 2016 Wei Shen <[email protected]>
2+
//
3+
// Permission is hereby granted, free of charge, to any person obtaining a copy
4+
// of this software and associated documentation files (the "Software"), to deal
5+
// in the Software without restriction, including without limitation the rights
6+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7+
// copies of the Software, and to permit persons to whom the Software is
8+
// furnished to do so, subject to the following conditions:
9+
//
10+
// The above copyright notice and this permission notice shall be included in
11+
// all copies or substantial portions of the Software.
12+
//
13+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19+
// THE SOFTWARE.
20+
21+
package cmd
22+
23+
import (
24+
"fmt"
25+
"regexp"
26+
"runtime"
27+
"strconv"
28+
"sort"
29+
30+
"github.com/brentp/xopen"
31+
"github.com/spf13/cobra"
32+
"github.com/gonum/floats"
33+
"github.com/tatsushid/go-prettytable"
34+
"github.com/dustin/go-humanize"
35+
"github.com/gonum/stat"
36+
"github.com/shenwei356/util/math"
37+
)
38+
39+
// stat2Cmd represents the seq command
40+
var stat2Cmd = &cobra.Command{
41+
Use: "stat2",
42+
Short: "summary of selected number fields",
43+
Long: `summary of selected number fields: num, sum, min, max, mean, stdev
44+
45+
`,
46+
Run: func(cmd *cobra.Command, args []string) {
47+
config := getConfigs(cmd)
48+
files := getFileList(args)
49+
if len(files) > 1 {
50+
checkError(fmt.Errorf("no more than one file should be given"))
51+
}
52+
runtime.GOMAXPROCS(config.NumCPUs)
53+
54+
fieldStr := getFlagString(cmd, "fields")
55+
if fieldStr == "" {
56+
checkError(fmt.Errorf("flag -f (--field) needed"))
57+
}
58+
59+
fuzzyFields := getFlagBool(cmd, "fuzzy-fields")
60+
fields, colnames, negativeFields, needParseHeaderRow := parseFields(cmd, fieldStr, config.NoHeaderRow)
61+
var fieldsMap map[int]struct{}
62+
if len(fields) > 0 {
63+
fields2 := make([]int, len(fields))
64+
fieldsMap = make(map[int]struct{}, len(fields))
65+
for i, f := range fields {
66+
if negativeFields {
67+
fieldsMap[f*-1] = struct{}{}
68+
fields2[i] = f * -1
69+
} else {
70+
fieldsMap[f] = struct{}{}
71+
fields2[i] = f
72+
}
73+
}
74+
fields = fields2
75+
}
76+
77+
outfh, err := xopen.Wopen(config.OutFile)
78+
checkError(err)
79+
defer outfh.Close()
80+
81+
82+
file := files[0]
83+
csvReader, err := newCSVReaderByConfig(config, file)
84+
checkError(err)
85+
csvReader.Run()
86+
87+
parseHeaderRow := needParseHeaderRow // parsing header row
88+
var colnames2fileds map[string]int // column name -> field
89+
var colnamesMap map[string]*regexp.Regexp
90+
var HeaderRow []string
91+
var isHeaderRow bool
92+
93+
checkFields := true
94+
95+
data := make(map[int][]float64)
96+
97+
for chunk := range csvReader.Ch {
98+
checkError(chunk.Err)
99+
100+
for _, record := range chunk.Data {
101+
if parseHeaderRow { // parsing header row
102+
colnames2fileds = make(map[string]int, len(record))
103+
for i, col := range record {
104+
colnames2fileds[col] = i + 1
105+
}
106+
colnamesMap = make(map[string]*regexp.Regexp, len(colnames))
107+
for _, col := range colnames {
108+
if negativeFields {
109+
colnamesMap[col[1:]] = fuzzyField2Regexp(col)
110+
} else {
111+
colnamesMap[col] = fuzzyField2Regexp(col)
112+
}
113+
}
114+
115+
if len(fields) == 0 { // user gives the colnames
116+
fields = []int{}
117+
for _, col := range record {
118+
var ok bool
119+
if fuzzyFields {
120+
for _, re := range colnamesMap {
121+
if re.MatchString(col) {
122+
ok = true
123+
break
124+
}
125+
}
126+
} else {
127+
_, ok = colnamesMap[col]
128+
}
129+
if (negativeFields && !ok) || (!negativeFields && ok) {
130+
fields = append(fields, colnames2fileds[col])
131+
}
132+
}
133+
}
134+
135+
fieldsMap = make(map[int]struct{}, len(fields))
136+
for _, f := range fields {
137+
fieldsMap[f] = struct{}{}
138+
}
139+
140+
HeaderRow = record
141+
parseHeaderRow = false
142+
isHeaderRow = true
143+
}
144+
if checkFields {
145+
fields2 := []int{}
146+
for f := range record {
147+
_, ok := fieldsMap[f+1]
148+
if negativeFields {
149+
if !ok {
150+
fields2 = append(fields2, f+1)
151+
}
152+
} else {
153+
if ok {
154+
fields2 = append(fields2, f+1)
155+
}
156+
}
157+
}
158+
fields = fields2
159+
if len(fields) == 0 {
160+
checkError(fmt.Errorf("no fields matched in file: %s", file))
161+
}
162+
163+
checkFields = false
164+
}
165+
166+
if isHeaderRow {
167+
isHeaderRow = false
168+
continue
169+
}
170+
for _, f := range fields {
171+
if !reDigitals.MatchString(record[f-1]) {
172+
checkError(fmt.Errorf("column %d has non-number data: %s", f, record[f-1]))
173+
}
174+
v, e := strconv.ParseFloat(removeComma(record[f-1]), 64)
175+
checkError(e)
176+
if _, ok := data[f]; !ok {
177+
data[f] = []float64{}
178+
}
179+
data[f] = append(data[f], v)
180+
}
181+
}
182+
}
183+
tbl, err := prettytable.NewTable([]prettytable.Column{
184+
{Header: "field"},
185+
{Header: "num", AlignRight: true},
186+
{Header: "sum", AlignRight: true},
187+
{Header: "min", AlignRight: true},
188+
{Header: "max", AlignRight: true},
189+
{Header: "mean", AlignRight: true},
190+
{Header: "stdev", AlignRight: true}}...)
191+
checkError(err)
192+
tbl.Separator = " "
193+
194+
fields = []int{}
195+
for f := range data {
196+
fields = append(fields, f)
197+
}
198+
sort.Ints(fields)
199+
200+
var fieldS string
201+
for _, f := range fields {
202+
if needParseHeaderRow {
203+
fieldS = HeaderRow[f-1]
204+
} else {
205+
fieldS = fmt.Sprintf("%d", f)
206+
}
207+
mean, stdev := stat.MeanStdDev(data[f], nil)
208+
tbl.AddRow(
209+
fieldS,
210+
humanize.Comma(int64(len(data[f]))),
211+
humanize.Commaf(math.Round(floats.Sum(data[f]),2)),
212+
humanize.Commaf(math.Round(floats.Min(data[f]),2)),
213+
humanize.Commaf(math.Round(floats.Max(data[f]),2)),
214+
humanize.Commaf(math.Round(mean,2)),
215+
humanize.Commaf(math.Round(stdev,2)))
216+
}
217+
outfh.Write(tbl.Bytes())
218+
},
219+
}
220+
221+
func init() {
222+
RootCmd.AddCommand(stat2Cmd)
223+
stat2Cmd.Flags().StringP("fields", "f", "", `select only these fields. e.g -f 1,2 or -f columnA,columnB`)
224+
stat2Cmd.Flags().BoolP("fuzzy-fields", "F", false, `using fuzzy fields, e.g. *name or id123*`)
225+
}

doc/docs/download.md

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66

77
## Current Version
88

9-
- [csvtk v0.2.4](https://github.com/shenwei356/csvtk/releases/tag/v0.2.4)
10-
- fix bug of handling comment lines
11-
- add some notes before using csvtk
9+
- [csvtk v0.2.5](https://github.com/shenwei356/csvtk/releases/tag/v0.2.5)
10+
- fix bug of `stat` that failed to considerate files with header row
11+
- add subcommand `stat2` - summary of selected number fields
12+
- make the output of `stat` prettier
1213

1314
## Installation
1415

@@ -32,6 +33,9 @@ You can also add the directory of the executable file to environment variable
3233

3334
## Previous Versions
3435

36+
- [csvtk v0.2.4](https://github.com/shenwei356/csvtk/releases/tag/v0.2.4)
37+
- fix bug of handling comment lines
38+
- add some notes before using csvtk
3539
- [csvtk v0.2.3](https://github.com/shenwei356/csvtk/releases/tag/v0.2.3)
3640
- add flag `--colnames` to `cut`
3741
- flag `-f` (`--fields`) of `join` supports single value now

0 commit comments

Comments
 (0)