diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..19c2888 --- /dev/null +++ b/.gitignore @@ -0,0 +1,27 @@ +# Compiled Object files, Static and Dynamic libs (Shared Objects) +*.o +*.a +*.so + +# Folders +_obj +_test + +# Architecture specific extensions/prefixes +*.[568vq] +[568vq].out + +*.cgo1.go +*.cgo2.c +_cgo_defun.c +_cgo_gotypes.go +_cgo_export.* + +_testmain.go + +*.exe +*.test +*.prof +*.directory +doc/site/* +*.Rhistory diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..04113b2 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright © 2016 Wei Shen + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..afb3d00 --- /dev/null +++ b/README.md @@ -0,0 +1,42 @@ +csvtk + +Another cross-platform, efficient and practical CSV/TSV tool kit. + +## commands + +**Information** + +- [x] stat + +**Format convertion** + +- [x] csv2tab +- [x] tab2csv +- space2tab +- [x] transpose + +**Set operations** + +- cut +- split +- grep + + +- join +- uniq +- inter + +**Ordering** + +- sort + +## Contact + +Email me for any problem when using `csvtk`. shenwei356(at)gmail.com + +[Create an issue](https://github.com/shenwei356/csvtk/issues) to report bugs, +propose new functions or ask for help. + +## License + +[MIT License](https://github.com/shenwei356/csvtk/blob/master/LICENSE) diff --git a/csvtk/cmd/csv.go b/csvtk/cmd/csv.go new file mode 100644 index 0000000..bbc5d78 --- /dev/null +++ b/csvtk/cmd/csv.go @@ -0,0 +1,112 @@ +// Copyright © 2016 Wei Shen +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package cmd + +import ( + "fmt" + "io" + + "github.com/brentp/xopen" + "github.com/mzimmerman/multicorecsv" +) + +// CSVRecordsChunk is chunk of CSV records +type CSVRecordsChunk struct { + ID uint64 + Data [][]string + Err error +} + +// CSVReader is +type CSVReader struct { + Reader *multicorecsv.Reader + + bufferSize int + chunkSize int + Ch chan CSVRecordsChunk + + fh *xopen.Reader +} + +// NewCSVReader is +func NewCSVReader(file string, bufferSize int, chunkSize int) (*CSVReader, error) { + if bufferSize < 1 { + return nil, fmt.Errorf("value of bufferSize should be greater than 0") + } + if chunkSize < 1 { + return nil, fmt.Errorf("value of chunkSize should be greater than 0") + } + + fh, err := xopen.Ropen(file) + if err != nil { + return nil, err + } + + reader := multicorecsv.NewReader(fh) + + ch := make(chan CSVRecordsChunk, bufferSize) + + csvReader := &CSVReader{ + Reader: reader, + bufferSize: bufferSize, + chunkSize: chunkSize, + Ch: ch, + fh: fh, + } + return csvReader, nil +} + +// Run begins to read +func (csvReader *CSVReader) Run() { + go func() { + defer func() { + csvReader.fh.Close() + csvReader.Reader.Close() + }() + + chunkData := make([][]string, csvReader.chunkSize) + var id uint64 + var i int + for { + record, err := csvReader.Reader.Read() + if err == io.EOF { + id++ + csvReader.Ch <- CSVRecordsChunk{id, chunkData[0:i], nil} + break + } + if err != nil { + csvReader.Ch <- CSVRecordsChunk{id, chunkData[0:i], err} + break + } + + chunkData[i] = record + i++ + if i == csvReader.chunkSize { + id++ + csvReader.Ch <- CSVRecordsChunk{id, chunkData, nil} + + chunkData = make([][]string, csvReader.chunkSize) + i = 0 + } + } + close(csvReader.Ch) + }() +} diff --git a/csvtk/cmd/csv2tab.go b/csvtk/cmd/csv2tab.go new file mode 100644 index 0000000..b84ffde --- /dev/null +++ b/csvtk/cmd/csv2tab.go @@ -0,0 +1,68 @@ +// Copyright © 2016 Wei Shen +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package cmd + +import ( + "encoding/csv" + + "github.com/brentp/xopen" + "github.com/spf13/cobra" +) + +// csv2tabCmd represents the seq command +var csv2tabCmd = &cobra.Command{ + Use: "csv2tab", + Short: "convert CSV to tabular format", + Long: `convert CSV to tabular format + +`, + Run: func(cmd *cobra.Command, args []string) { + config := getConfigs(cmd) + files := getFileList(args) + + outfh, err := xopen.Wopen(config.OutFile) + checkError(err) + defer outfh.Close() + + writer := csv.NewWriter(outfh) + writer.Comma = '\t' + + for _, file := range files { + csvReader, err := newCSVReaderByConfig(config, file) + checkError(err) + csvReader.Run() + + for chunk := range csvReader.Ch { + checkError(chunk.Err) + + for _, record := range chunk.Data { + checkError(writer.Write(record)) + } + } + } + writer.Flush() + checkError(writer.Error()) + }, +} + +func init() { + RootCmd.AddCommand(csv2tabCmd) +} diff --git a/csvtk/cmd/helper.go b/csvtk/cmd/helper.go new file mode 100644 index 0000000..034fa88 --- /dev/null +++ b/csvtk/cmd/helper.go @@ -0,0 +1,208 @@ +// Copyright © 2016 Wei Shen +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package cmd + +import ( + "encoding/csv" + "fmt" + "os" + + "github.com/brentp/xopen" + "github.com/spf13/cobra" +) + +func checkError(err error) { + if err != nil { + log.Error(err) + os.Exit(-1) + } +} + +func getFileList(args []string) []string { + files := []string{} + if len(args) == 0 { + files = append(files, "-") + } else { + for _, file := range files { + if file == "-" { + continue + } + if _, err := os.Stat(file); os.IsNotExist(err) { + checkError(err) + } + } + files = args + } + return files +} + +func getFlagInt(cmd *cobra.Command, flag string) int { + value, err := cmd.Flags().GetInt(flag) + checkError(err) + return value +} + +func getFlagPositiveInt(cmd *cobra.Command, flag string) int { + value, err := cmd.Flags().GetInt(flag) + checkError(err) + if value <= 0 { + checkError(fmt.Errorf("value of flag --%s should be greater than 0", flag)) + } + return value +} + +func getFlagNonNegativeInt(cmd *cobra.Command, flag string) int { + value, err := cmd.Flags().GetInt(flag) + checkError(err) + if value < 0 { + checkError(fmt.Errorf("value of flag --%s should be greater than 0", flag)) + } + return value +} + +func getFlagBool(cmd *cobra.Command, flag string) bool { + value, err := cmd.Flags().GetBool(flag) + checkError(err) + return value +} + +func getFlagString(cmd *cobra.Command, flag string) string { + value, err := cmd.Flags().GetString(flag) + checkError(err) + return value +} + +func getFlagRune(cmd *cobra.Command, flag string) rune { + value, err := cmd.Flags().GetString(flag) + checkError(err) + if len(value) > 1 { + checkError(fmt.Errorf("value of flag --%s should has length of 1", flag)) + } + var v rune + for _, r := range value { + v = r + break + } + return v +} + +func getFlagFloat64(cmd *cobra.Command, flag string) float64 { + value, err := cmd.Flags().GetFloat64(flag) + checkError(err) + return value +} + +func getFlagInt64(cmd *cobra.Command, flag string) int64 { + value, err := cmd.Flags().GetInt64(flag) + checkError(err) + return value +} + +func getFlagStringSlice(cmd *cobra.Command, flag string) []string { + value, err := cmd.Flags().GetStringSlice(flag) + checkError(err) + return value +} + +// Config is the struct containing all gloabl flags +type Config struct { + ChunkSize int + NumCPUs int + + Delimiter rune + OutDelimiter rune + // QuoteChar rune + CommentChar rune + LazyQuotes bool + + Tabs bool + OutTabs bool + WithHeadRow bool + + OutFile string +} + +func getConfigs(cmd *cobra.Command) Config { + return Config{ + ChunkSize: getFlagPositiveInt(cmd, "chunk-size"), + NumCPUs: getFlagPositiveInt(cmd, "num-cups"), + + Delimiter: getFlagRune(cmd, "delimiter"), + OutDelimiter: getFlagRune(cmd, "out-delimiter"), + // QuoteChar: getFlagRune(cmd, "quote-char"), + CommentChar: getFlagRune(cmd, "comment-char"), + LazyQuotes: getFlagBool(cmd, "lazy-quotes"), + + Tabs: getFlagBool(cmd, "tabs"), + OutTabs: getFlagBool(cmd, "out-tabs"), + WithHeadRow: getFlagBool(cmd, "with-header-row"), + + OutFile: getFlagString(cmd, "out-file"), + } +} + +func newCSVReaderByConfig(config Config, file string) (*CSVReader, error) { + reader, err := NewCSVReader(file, config.NumCPUs, config.ChunkSize) + if err != nil { + return nil, err + } + if config.Tabs { + reader.Reader.Comma = '\t' + } else { + reader.Reader.Comma = config.Delimiter + } + reader.Reader.Comment = config.CommentChar + reader.Reader.LazyQuotes = config.LazyQuotes + + return reader, nil +} + +// NewCSVWriterChanByConfig returns a chanel which you can send record to write +func NewCSVWriterChanByConfig(config Config) (chan []string, error) { + outfh, err := xopen.Wopen(config.OutFile) + if err != nil { + return nil, err + } + + ch := make(chan []string, config.NumCPUs) + + writer := csv.NewWriter(outfh) + if config.OutTabs { + writer.Comma = '\t' + } else { + writer.Comma = config.OutDelimiter + } + go func() { + defer outfh.Close() + for record := range ch { + if err := writer.Write(record); err != nil { + log.Fatal("error writing record to csv:", err) + } + } + writer.Flush() + if err := writer.Error(); err != nil { + log.Fatal(err) + } + + }() + + return ch, nil +} diff --git a/csvtk/cmd/logging.go b/csvtk/cmd/logging.go new file mode 100644 index 0000000..ab46cce --- /dev/null +++ b/csvtk/cmd/logging.go @@ -0,0 +1,25 @@ +// Copyright © 2016 Wei Shen +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package cmd + +import "github.com/op/go-logging" + +var log = logging.MustGetLogger("csvtk") diff --git a/csvtk/cmd/root.go b/csvtk/cmd/root.go new file mode 100644 index 0000000..a5089bc --- /dev/null +++ b/csvtk/cmd/root.go @@ -0,0 +1,70 @@ +// Copyright © 2016 Wei Shen +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package cmd + +import ( + "fmt" + "os" + + "github.com/spf13/cobra" +) + +// RootCmd represents the base command when called without any subcommands +var RootCmd = &cobra.Command{ + Use: "csvtk", + Short: "Another cross-platform, efficient and practical CSV/TSV tool kit", + Long: `Another cross-platform, efficient and practical CSV/TSV tool kit + +Version: 0.1 + +Author: Wei Shen + +Documents : http://shenwei356.github.io/csvtk +Source code: https://github.com/shenwei356/csvtk + +`, +} + +// Execute adds all child commands to the root command sets flags appropriately. +// This is called by main.main(). It only needs to happen once to the rootCmd. +func Execute() { + if err := RootCmd.Execute(); err != nil { + fmt.Println(err) + os.Exit(-1) + } +} + +func init() { + RootCmd.PersistentFlags().IntP("chunk-size", "c", 50, `Chunk size of CSV reader`) + RootCmd.PersistentFlags().IntP("num-cups", "j", 50, `Number of CPUs to use`) + + RootCmd.PersistentFlags().StringP("delimiter", "d", ",", `Delimiting character of the input CSV file`) + RootCmd.PersistentFlags().StringP("out-delimiter", "D", ",", `Delimiting character of the input CSV file`) + // RootCmd.PersistentFlags().StringP("quote-char", "q", `"`, `Character used to quote strings in the input CSV file`) + RootCmd.PersistentFlags().StringP("comment-char", "C", ``, `Commment character for start of line in the input CSV file`) + RootCmd.PersistentFlags().BoolP("lazy-quotes", "l", false, `If --lazy-quotes, a quote may appear in an unquoted field and a non-doubled quote may appear in a quoted field`) + + RootCmd.PersistentFlags().BoolP("tabs", "t", false, `Specifies that the input CSV file is delimited with tabs. Overrides "-d"`) + RootCmd.PersistentFlags().BoolP("out-tabs", "T", false, `Specifies that the output is delimited with tabs. Overrides "-D"`) + RootCmd.PersistentFlags().BoolP("with-header-row", "H", false, `Specifies that the input CSV file has header row`) + + RootCmd.PersistentFlags().StringP("out-file", "o", "-", `Out file ("-" for stdout, suffix .gz for gzipped out)`) +} diff --git a/csvtk/cmd/stat.go b/csvtk/cmd/stat.go new file mode 100644 index 0000000..f88a6eb --- /dev/null +++ b/csvtk/cmd/stat.go @@ -0,0 +1,71 @@ +// Copyright © 2016 Wei Shen +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package cmd + +import ( + "fmt" + + "github.com/brentp/xopen" + "github.com/spf13/cobra" +) + +// statCmd represents the seq command +var statCmd = &cobra.Command{ + Use: "stat", + Short: "summary of CSV file", + Long: `summary of CSV file + +`, + Run: func(cmd *cobra.Command, args []string) { + config := getConfigs(cmd) + files := getFileList(args) + + outfh, err := xopen.Wopen(config.OutFile) + checkError(err) + defer outfh.Close() + + for _, file := range files { + csvReader, err := newCSVReaderByConfig(config, file) + checkError(err) + csvReader.Run() + + var numCols, numRows uint64 + once := true + for chunk := range csvReader.Ch { + checkError(chunk.Err) + + numRows += uint64(len(chunk.Data)) + if once { + for _, record := range chunk.Data { + numCols = uint64(len(record)) + break + } + once = false + } + } + outfh.WriteString(fmt.Sprintf("file: %s num_cols: %d num_rows: %d\n", file, numCols, numRows)) + } + }, +} + +func init() { + RootCmd.AddCommand(statCmd) +} diff --git a/csvtk/cmd/tab2csv.go b/csvtk/cmd/tab2csv.go new file mode 100644 index 0000000..326f0af --- /dev/null +++ b/csvtk/cmd/tab2csv.go @@ -0,0 +1,69 @@ +// Copyright © 2016 Wei Shen +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package cmd + +import ( + "encoding/csv" + + "github.com/brentp/xopen" + "github.com/spf13/cobra" +) + +// tab2csvCmd represents the seq command +var tab2csvCmd = &cobra.Command{ + Use: "tab2csv", + Short: "convert tabular format to CSV", + Long: `convert tabular format to CSV + +`, + Run: func(cmd *cobra.Command, args []string) { + config := getConfigs(cmd) + files := getFileList(args) + + outfh, err := xopen.Wopen(config.OutFile) + checkError(err) + defer outfh.Close() + + writer := csv.NewWriter(outfh) + writer.Comma = ',' + + for _, file := range files { + csvReader, err := newCSVReaderByConfig(config, file) + checkError(err) + csvReader.Reader.Comma = '\t' + csvReader.Run() + + for chunk := range csvReader.Ch { + checkError(chunk.Err) + + for _, record := range chunk.Data { + checkError(writer.Write(record)) + } + } + } + writer.Flush() + checkError(writer.Error()) + }, +} + +func init() { + RootCmd.AddCommand(tab2csvCmd) +} diff --git a/csvtk/cmd/transpose.go b/csvtk/cmd/transpose.go new file mode 100644 index 0000000..9e16b59 --- /dev/null +++ b/csvtk/cmd/transpose.go @@ -0,0 +1,91 @@ +// Copyright © 2016 Wei Shen +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package cmd + +import ( + "encoding/csv" + "fmt" + + "github.com/brentp/xopen" + "github.com/spf13/cobra" +) + +// transposeCmd represents the seq command +var transposeCmd = &cobra.Command{ + Use: "transpose", + Short: "transpose CSV data", + Long: `transpose CSV data + +`, + Run: func(cmd *cobra.Command, args []string) { + config := getConfigs(cmd) + files := getFileList(args) + + outfh, err := xopen.Wopen(config.OutFile) + checkError(err) + defer outfh.Close() + + data := [][]string{} + + var numCols0, numCols, numRows uint64 + for _, file := range files { + csvReader, err := newCSVReaderByConfig(config, file) + checkError(err) + csvReader.Run() + + once := true + + for chunk := range csvReader.Ch { + checkError(chunk.Err) + + numRows += uint64(len(chunk.Data)) + for _, record := range chunk.Data { + data = append(data, record) + + if once { + numCols = uint64(len(record)) + if numCols0 == 0 { + numCols0 = numCols + } else if numCols0 != numCols { + checkError(fmt.Errorf("unmartched number of columns between files")) + } + once = false + } + } + } + } + + writer := csv.NewWriter(outfh) + for j := uint64(0); j < numCols0; j++ { + rowNew := make([]string, numRows) + for i, rowOld := range data { + rowNew[i] = rowOld[j] + } + checkError(writer.Write(rowNew)) + } + writer.Flush() + checkError(writer.Error()) + }, +} + +func init() { + RootCmd.AddCommand(transposeCmd) +} diff --git a/csvtk/main.go b/csvtk/main.go new file mode 100644 index 0000000..84d651e --- /dev/null +++ b/csvtk/main.go @@ -0,0 +1,46 @@ +// Copyright © 2016 Wei Shen +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package main + +import ( + "os" + + "github.com/op/go-logging" + "github.com/shenwei356/csvtk/csvtk/cmd" +) + +var logFormat = logging.MustStringFormatter( + // `%{color}%{time:15:04:05.000} %{shortfunc} ▶ %{level:.4s} %{id:03x}%{color:reset} %{message}`, + `%{color}[%{level:.4s}]%{color:reset} %{message}`, +) + +func init() { + backend := logging.NewLogBackend(os.Stderr, "", 0) + backendFormatter := logging.NewBackendFormatter(backend, logFormat) + logging.SetBackend(backendFormatter) +} + +func main() { + // f, _ := os.Create("pprof") + // pprof.StartCPUProfile(f) + // defer pprof.StopCPUProfile() + cmd.Execute() +} diff --git a/testdata/c.csv b/testdata/c.csv new file mode 100644 index 0000000..e8068be --- /dev/null +++ b/testdata/c.csv @@ -0,0 +1,4 @@ +first_name,last_name,username +"Rob","Pike",rob +Ken,Thompson,ken +"Robert","Griesemer","gri" diff --git a/testdata/d.tsv b/testdata/d.tsv new file mode 100644 index 0000000..b2a4dcf --- /dev/null +++ b/testdata/d.tsv @@ -0,0 +1,4 @@ +1 2 3 +4 5 6 +7 8 0 +8 1,000 4