forked from dongweiming/sed_and_awk
-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.html
executable file
·1521 lines (1487 loc) · 43.9 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>sed & awk</title>
<meta name="description" content="2012年工作总结">
<meta name="author" content="Dongweiming">
<link rel="stylesheet" href="css/reveal.min.css">
<link rel="stylesheet" href="css/theme/default.css" id="theme">
<link rel="stylesheet" href="lib/css/zenburn.css">
<!--[if lt IE 9]>
<script src="lib/js/html5shiv.js"></script>
<![endif]-->
</head>
<body>
<div class="reveal">
<div class="slides">
<section>
<p><span class="dongwmhead dongwmmenu">sed & awk</span></p>
</br>
</br>
<p class="dongwmright">姓名:<a href="#">董伟明</a></p>
<p class="dongwmright">日期:<a href="#"><font face="comic sans ms">2013-12-09</font></a></p>
<iframe src="http://ghbtns.com/github-btn.html?user=dongweiming&repo=sed_and_awk&type=fork"
allowtransparency="true" frameborder="0" scrolling="0" width="132" height="20"></iframe>
<iframe src="http://ghbtns.com/github-btn.html?user=dongweiming&repo=sed_and_awk&type=watch"
allowtransparency="true" frameborder="0" scrolling="0" width="165" height="20"></iframe>
</section>
<section>
<h2 class="dongwmtitle dongwmgreen dongwmcsm">(一)sed</h2>
</section>
<section>
<h3 class="dongwmtitle dongwmgreen">学完本章的后果</h2>
<ol>
<li class="fragment dongwmcsm">把123变成[123](不要sed "s/\([0-9]\{3\}\)/[\1]/")</li>
<li class="fragment dongwmcsm">这是什么意思: echo "1\n2\n3\n4"|sed -n "/2/, +2p"(gnu sed)</li>
<li class="fragment dongwmcsm">这是什么意思: echo "1\n2\n3"|sed '2 c 4' (gnu sed)</li>
<li class="fragment dongwmcsm">把This is UPPER变成IS,This,upper(gnu sed)</li>
<li class="fragment dongwmcsm">这是什么意思: sed 'H;x;s/^\(.*\)\n\(.*\)/\2\1/'</li>
<li class="fragment dongwmcsm">实现tac命令的功能</li>
</ol>
</section>
<section>
<section>
<h3 class="dongwmtitle dongwmgreen">语法1</h2>
<pre><code contenteditable>
sed [options] {sed-commands} {input-file}
</code></pre>
<h3 class="dongwmtitle dongwmgreen">例子</h2>
<pre><code contenteditable>
# -n表示取消默认输出,p表示打印行
$sed -n 'p' /etc/passwd
# 只打印第三行
$sed -n '3p' /etc/passwd
# 打印1,3行
$sed -n '1,3p' /etc/passwd
</code></pre>
</section>
<section>
<h3 class="dongwmtitle dongwmgreen">语法2</h2>
<pre><code contenteditable>
$sed [options] -f {sed-commands-in-a-file} {input-file}
</code></pre>
<h3 class="dongwmtitle dongwmgreen">例子</h2>
<pre><code contenteditable>
# 打印以root开头或者nobody开头的行
$cat sed_example_1.sed
/^root/ p
/^nobody/ p
$ sed -n -f sed_example_1.sed /etc/passwd
</code></pre>
</section>
<section>
<h3 class="dongwmtitle dongwmgreen">语法3</h2>
<pre><code contenteditable>
sed [options] -e {sed-command-1} -e {sed-command-2} {input-file}
</code></pre>
<h3 class="dongwmtitle dongwmgreen">例子</h2>
<pre><code contenteditable>
# 打印以root开头或者nobody开头的行
$sed -n -e '/^root/ p' -e '/^nobody/ p' /etc/passwd
#或者
$sed -n \
-e '/^root/ p' \
-e '/^nobody/ p' \
/etc/passwd
</code></pre>
</section>
<section>
<h3 class="dongwmtitle dongwmgreen">语法4</h2>
<pre><code contenteditable>
sed [options] '{
sed-command-1
sed-command-2
}' input-file
</code></pre>
<h3 class="dongwmtitle dongwmgreen">例子</h2>
<pre><code contenteditable>
# 打印以root开头或者nobody结尾的行
sed -n '{
/^root/ p
/nobody$/ p
}' /etc/passwd
</code></pre>
</section>
<section>
<h2 class="dongwmtitle dongwmgreen">sed流</h2>
</br>
<ol>
<li class="fragment highlight-blue">读</li>
<li class="fragment highlight-blue">执行</li>
<li class="fragment highlight-blue">打印</li>
<li class="fragment highlight-blue">重复</li>
</ol>
</section>
<section>
<h3 class="dongwmtitle dongwmgreen">源文件</h2>
<pre><code contenteditable>
101,Ian Bicking,Mozilla
102,Hakim El Hattab,Whim
103,Paul Irish,Google
104,Addy Osmani,Google
105,Chris Wanstrath,Github
106,Mattt Thompson,Heroku
107,Ask Solem Hoel,VMware
</code></pre>
</section>
<section>
<h3 class="dongwmtitle dongwmgreen">范围</h2>
<pre><code contenteditable>
# 在freebsd版sed不能用
$/usr/local/bin/sed -n '1~2 p' source.txt
101,Ian Bicking,Mozilla
103,Paul Irish,Google
105,Chris Wanstrath,Github
107,Ask Solem Hoel,VMware
# 在freebsd版sed不能用
$/usr/local/bin/sed -n '2~3 p' source.txt
102,Hakim El Hattab,Whim
105,Chris Wanstrath,Github
</code></pre>
</section>
<section>
<h3 class="dongwmtitle dongwmgreen">模式匹配</h2>
<pre><code contenteditable>
# 寻找包含Paul的行
$sed -n '/Paul/ p' source.txt
103,Paul Irish,Google
# 从第一行开始到第五行, 从找到开始打印到第五行
$sed -n '/Paul/,5 p' source.txt
103,Paul Irish,Google
104,Addy Osmani,Google
105,Chris Wanstrath,Github
# 从匹配Paul行打印达匹配Addy的行
$sed -n '/Paul/,/Addy/ p' source.txt
103,Paul Irish,Google
104,Addy Osmani,Google
# 在freebsd版sed不能用 匹配Paul行再多输出2行
$/usr/local/bin/sed -n '/Paul/,+2 p' source.txt
103,Paul Irish,Google
104,Addy Osmani,Google
105,Chris Wanstrath,Github
</code></pre>
</section>
<section>
<h3 class="dongwmtitle dongwmgreen">删除行</h2>
<pre><code contenteditable>
# 删除所有行
$sed 'd' source.txt
# 只删除第二行
$sed '2 d' source.txt
...
# 删除第一到第四行
$sed '1,4 d' source.txt
105,Chris Wanstrath,Github
106,Mattt Thompson,Heroku
107,Ask Solem Hoel,VMware
# 删除奇数行
$/usr/local/bin/sed '1~2 d' source.txt
102,Hakim El Hattab,Whim
104,Addy Osmani,Google
106,Mattt Thompson,Heroku
# 删除符合Paul到Addy的行
$sed '/Paul/,/Addy/d' source.txt
101,Ian Bicking,Mozilla
102,Hakim El Hattab,Whim
105,Chris Wanstrath,Github
106,Mattt Thompson,Heroku
107,Ask Solem Hoel,VMware
# 删除空行
$sed '/^$/ d' source.txt
# 删除评论行
$sed '/^#/ d' source.txt
</code></pre>
</section>
<section>
<h3 class="dongwmtitle dongwmgreen">重定向</h2>
<pre><code contenteditable>
# 将source.txt内容重定向写到output.txt
$sed 'w output.txt' source.txt
# 和上面一样,但是没有在终端显示
$sed -n 'w output.txt' source.txt
# 只写第二行
$ sed -n '2 w output.txt' source.txt
# 写一到四行到output.txt
$sed -n '1,4 w output.txt'
# 写匹配Ask的行到结尾行到output.txt
$sed -n '/Ask/,$ w output.txt'
</code></pre>
</section>
<section>
<h3 class="dongwmtitle dongwmgreen">替换</h2>
<pre><code contenteditable>
sed '[address-range|pattern-range] s/original-
string/replacement-string/[substitute-flags]' inputfile
</code></pre>
<h3 class="dongwmtitle dongwmgreen">例子</h2>
<pre><code contenteditable>
# 替换Google为Github
$sed 's/Google/Github/' source.txt
101,Ian Bicking,Mozilla
102,Hakim El Hattab,Whim
103,Paul Irish,Github
104,Addy Osmani,Github
105,Chris Wanstrath,Github
106,Mattt Thompson,Heroku
107,Ask Solem Hoel,VMware
# 替换匹配Addy的行里面的Google为Github
$sed '/Addy/s/Google/Github/' source.txt
101,Ian Bicking,Mozilla
102,Hakim El Hattab,Whim
103,Paul Irish,Google
104,Addy Osmani,Github
105,Chris Wanstrath,Github
106,Mattt Thompson,Heroku
107,Ask Solem Hoel,VMware
# 默认s只会替换一行中的第一个匹配项
$sed '1s/a/A/' source.txt|head -1
101,IAn Bicking,Mozilla
# g可以替换每行的全部符合
$sed '1s/a/A/g' source.txt|head -1
101,IAn Bicking,MozillA
# 可以直接指定想要替换的第N个匹配项,这里是第二个
$sed '1s/a/A/2' source.txt|head -1
101,Ian Bicking,MozillA
# 使用w将能够替换的行重定向写到output.txt
$sed -n 's/Mozilla/Github/w output.txt' source.txt
$cat output.txt
101,Ian Bicking,Github
# 还可以使用i忽略匹配的大小写,看来freebsd的不能用
$/usr/local/bin/sed '1s/ian/IAN/i' source.txt|head -1
101,IAN Bicking,Mozilla
# 这里有个新的文件
$cat files.txt
/etc/passwd
/etc/group
# 给每行前和后都添加点字符
$sed 's/\(.*\)/ls -l \1|head -1/' files.txt
ls -l /etc/passwd|head -1
ls -l /etc/group|head -1
# 我要用sed执行这个字符串命令了 无奈..mac上得sed都不行
dongwm@bj-1:~$ sed 's/^/ls -l /e' files.txt
-rw-r--r-- 1 root root 1627 Oct 14 14:30 /etc/passwd
-rw-r--r-- 1 root root 807 Oct 14 14:30 /etc/group
# sed分隔符不只可以使用'/'
$sed 's|/usr/local/bin|/usr/bin|' path.txt
$sed 's^/usr/local/bin^/usr/bin^' path.txt
$sed 's@/usr/local/bin@/usr/bin@' path.txt
$sed 's!/usr/local/bin!/usr/bin!' path.txt
</code></pre>
</section>
<section>
<h3 class="dongwmtitle dongwmgreen">替换覆盖</h2>
<pre><code contenteditable>
sed '{
s/Google/Github/
s/Git/git/
}' source.txt
101,Ian Bicking,Mozilla
102,Hakim El Hattab,Whim
103,Paul Irish,github
104,Addy Osmani,github
105,Chris Wanstrath,github
106,Mattt Thompson,Heroku
107,Ask Solem Hoel,VMware
</code></pre>
</section>
<section>
<h3 class="dongwmtitle dongwmgreen">神奇的&</h2>
<pre><code contenteditable>
$sed 's/^[0-9][0-9][0-9]/[&]/g'
[101],Ian Bicking,Mozilla
[102],Hakim El Hattab,Whim
[103],Paul Irish,Google
[104],Addy Osmani,Google
[105],Chris Wanstrath,Github
[106],Mattt Thompson,Heroku
[107],Ask Solem Hoel,VMware
$sed 's/^.*/<\&>/' source.txt
<101,Ian Bicking,Mozilla>
<102,Hakim El Hattab,Whim>
<103,Paul Irish,Google>
<104,Addy Osmani,Google>
<105,Chris Wanstrath,Github>
<106,Mattt Thompson,Heroku>
<107,Ask Solem Hoel,VMware>
</code></pre>
</section>
<section>
<h3 class="dongwmtitle dongwmgreen">正则</h2>
<pre><code contenteditable>
# ^表示匹配以什么开头
$sed -n '/^101/ p' source.txt
101,Ian Bicking,Mozilla
# $表示匹配以什么结尾
$sed -n '/Github$/ p' source.txt
105,Chris Wanstrath,Github
# .表示单个字符,下面的匹配一个逗号然后I然后2个单字符
$sed -n '/,I../ p' source.txt
101,Ian Bicking,Mozilla
# *表示匹配0个或者多个, \+表示匹配一个或者多个, \?表示匹配0个或者1个
# [0-9]表示匹配数字,下面匹配包含3或者4的行
$sed -n '/[34]/ p ' source.txt
103,Paul Irish,Google
104,Addy Osmani,Google
# -表示范围,这里匹配3,4,5
$sed -n '/[3-5]/ p ' source.txt
103,Paul Irish,Google
104,Addy Osmani,Google
105,Chris Wanstrath,Github
# |表示或者的关系
$/usr/local/bin/sed -n '/102\|103/ p ' source.txt
102,Hakim El Hattab,Whim
103,Paul Irish,Google
# 看一个文件
$cat numbers.txt
1
12
123
1234
12345
123456
# {m} 表示前面的匹配的重复次数
$sed -n '/^[0-9]\{5\}$/ p' numbers.txt
12345
#{m,n } 表示匹配m-n的次数都算
sed -n '/^[0-9]\{3,5\}$/ p' numbers.txt
123
1234
12345
# 删除所有注释行和空行
$sed -e 's/#.*//' -e '/^$/ d' /etc/profile
# 转化windows文件到unix格式
$sed 's/.$//' filename
#\1表示第一个正则匹配到的数据
$sed 's/\([^,]*\).*/\1/g' source.txt |head -1
101
#给每个单词第一个字母加括号
$echo "Dong Wei Ming" | /usr/local/bin/sed 's/\(\b[A-Z]\)/\(\1\)/g'
(D)ong (W)ei (M)ing
$/usr/local/bin/sed 's/\(^\|[^0-9.]\)\([0-9]\+\)\([0-9]\{3\}\)/\1\2,\3/g' numbers.txt
1
12
123
1,234
12,345
123,456
# 只取第一和第三列,并且换了他们的位置
$sed 's/\([^,]*\),\([^,]*\),\([^,]*\).*/\3,\1/g' source.txt
Mozilla,101
Whim,102
Google,103
Google,104
Github,105
Heroku,106
VMware,107
</code></pre>
</section>
</section>
<section>
<h3 class="dongwmtitle dongwmgreen">Gnu sed</h2>
<pre><code contenteditable>
# \l能将后面的一个字符变成小写
$sed 's/Ian/IAN/' source.txt|head -1
101,IAN Bicking,Mozilla
$/usr/local/bin/sed 's/Ian/IA\lN/' source.txt|head -1
101,IAn Bicking,Mozilla
# \L能将后面的字符都变成小写
$/usr/local/bin/sed 's/Ian/I\LAN/' source.txt|head -1
101,Ian Bicking,Mozilla
# \u能将后面的一个字符变成大写
$/usr/local/bin/sed 's/Ian/IA\un/' source.txt|head -1
101,IAN Bicking,Mozilla
# \U能将后面的字都变成大写
$/usr/local/bin/sed 's/Ian/\Uian/' source.txt|head -1
101,IAN Bicking,Mozilla
# \E能打断\L或者\U改变大小写
$/usr/local/bin/sed 's/Ian/\Uia\En/' source.txt|head -1
101,IAn Bicking,Mozilla
# 使用以上功能:调换前2列,把名字列全部大写,公司列全部小写
$/usr/local/bin/sed 's/\([^,]*\),\([^,]*\),\(.*\).*/\U\2\E,\1,\L\3/g' source.txt
IAN BICKING,101,mozilla
HAKIM EL HATTAB,102,whim
PAUL IRISH,103,google
ADDY OSMANI,104,google
CHRIS WANSTRATH,105,github
MATTT THOMPSON,106,heroku
ASK SOLEM HOEL,107,vmware
</code></pre>
</section>
<section>
<h3 class="dongwmtitle dongwmgreen">sed可执行脚本</h2>
<pre><code contenteditable>
$cat testscript.sed
#!/usr/bin/sed -nf
/root/ p
/nobody/ p
$chmod u+x testscript.sed
$./testscript.sed /etc/passwd
nobody:*:-2:-2:Unprivileged User:/var/empty:/usr/bin/false
root:*:0:0:System Administrator:/var/root:/bin/sh
daemon:*:1:1:System Services:/var/root:/usr/bin/false
_cvmsroot:*:212:212:CVMS Root:/var/empty:/usr/bin/false
</code></pre>
</section>
<section>
<h3 class="dongwmtitle dongwmgreen">sed修改源文件和备份</h2>
<pre><code contenteditable>
#-i会修改源文件,但是可以同时使用bak备份
$sed -ibak 's/Ian/IAN/' source.txt
# or
sed --in-place=bak 's/Ian/IAN/' source.txt
# 这样会存在一个文件source.txtbak
</code></pre>
</section>
<section>
<h3 class="dongwmtitle dongwmgreen">行后增加语法</h2>
<pre><code contenteditable>
sed '[address] a the-line-to-append' input-file
</code></pre>
<h3 class="dongwmtitle dongwmgreen">例子</h2>
<pre><code contenteditable>
$/usr/local/bin/sed '2 a 108,Donald Stufft, Nebula' source.txt
101,IAN Bicking,Mozilla
102,Hakim El Hattab,Whim
108,Donald Stufft, Nebula
103,Paul Irish,Google
104,Addy Osmani,Google
105,Chris Wanstrath,Github
106,Mattt Thompson,Heroku
107,Ask Solem Hoel,VMware
</code></pre>
</section>
<section>
<h3 class="dongwmtitle dongwmgreen">行前插入语法</h2>
<pre><code contenteditable>
sed '[address] i the-line-to-insert' input-file
</code></pre>
<h3 class="dongwmtitle dongwmgreen">例子</h2>
<pre><code contenteditable>
$/usr/local/bin/sed '2 i 108,Donald Stufft, Nebula' source.txt
101,IAN Bicking,Mozilla
108,Donald Stufft, Nebula
102,Hakim El Hattab,Whim
103,Paul Irish,Google
104,Addy Osmani,Google
105,Chris Wanstrath,Github
106,Mattt Thompson,Heroku
107,Ask Solem Hoel,VMware
</code></pre>
</section>
<section>
<h3 class="dongwmtitle dongwmgreen">修改行语法</h2>
<pre><code contenteditable>
sed '[address] c the-line-to-insert' input-file
</code></pre>
<h3 class="dongwmtitle dongwmgreen">例子</h2>
<pre><code contenteditable>
# 修改符合Paul行为...
$/usr/local/bin/sed '/Paul/ c 108,Donald Stufft, Nebula' source.txt
101,IAN Bicking,Mozilla
102,Hakim El Hattab,Whim
108,Donald Stufft, Nebula
104,Addy Osmani,Google
105,Chris Wanstrath,Github
106,Mattt Thompson,Heroku
107,Ask Solem Hoel,VMware
</code></pre>
</section>
<section>
<h3 class="dongwmtitle dongwmgreen">sed其他</h2>
<pre><code contenteditable>
# -l会显示隐藏字符比如'\t', = 可以显示行号
$sed -l = source.txt
# y或翻译你要转换的字符,这里I会转化成i,B转换成b
$sed 'y/IB/ib/' source.txt |head -1
101,iAN bicking,Mozilla
</code></pre>
</section>
<section>
<h2 class="dongwmtitle dongwmgreen dongwmcsm">(二)Sed高级话题</h2>
<small><h3 class="dongwmblue">保持空间和模式空间</h3></small>
</section>
<section>
<section>
<h3 class="dongwmtitle dongwmgreen">源码</h2>
<pre><code contenteditable>
#先看一个文件
$cat source2.txt
Ian Bicking
Mozilla
Hakim El Hattab
Whim
Paul Irish
Google
Chris Wanstrath
Github
Mattt Thompson
Heroku
</code></pre>
</section>
<section>
<h2 class="dongwmtitle dongwmgreen">交换模式空间</h2>
<pre><code contenteditable>
# 通过公司找到这个人, x命令交换当前行到保持空间,
# n读取下一行到模式空间; 匹配这个模式空间, 假如符合,再交换模式空间,打印
$/usr/local/bin/sed -n -e 'x;n' -e '/Whim/{x;p}' source2.txt
Hakim El Hattab
</code></pre>
</section>
<section>
<h2 class="dongwmtitle dongwmgreen">拷贝模式空间到保持空间</h2>
<pre><code contenteditable>
# 还是前面的需求.h拷贝模式空间到保持空间,
# 如果当前模式空间不匹配, 就拷贝空间到保持空间. 他们会一样
# 但是当Whim匹配保持空间还是上面一行关于名字的缓存
$/usr/local/bin/sed -n -e '/Whim/!h' -e '/Whim/{x;p}' source2.txt
Hakim El Hattab
</code></pre>
</section>
<section>
<h2 class="dongwmtitle dongwmgreen">增加模式空间到保持空间</h2>
<pre><code contenteditable>
# 当我根据Whim想显示他的名字和公司名字呢? - 给保持空间多加一个
# H表示增加模式空间到保持空间
$/usr/local/bin/sed -n -e '/Whim/!h' -e '/Whim/{H;x;p}' source2.txt
Hakim El Hattab
Whim
# 显示的好看一点
$/usr/local/bin/sed -n -e '/Whim/!h' -e '/Whim/{H;x;s/\n/:/;p}' source2.txt
Hakim El Hattab:Whim
</code></pre>
</section>
<section>
<h2 class="dongwmtitle dongwmgreen">拷贝保持空间到模式空间</h2>
<pre><code contenteditable>
# 还是前面的需求.g拷贝保持空间到模式空间
$/usr/local/bin/sed -n -e '/Whim/!h' -e '/Whim/{g;p}' source2.txt
Hakim El Hattab
</code></pre>
</section>
<section>
<h2 class="dongwmtitle dongwmgreen">增加保持空间到模式空间</h2>
<pre><code contenteditable>
# 前面的前面, 输出是Hakim El Hattab:Whim 怎么样翻转呢?
# G就是反向的
$/usr/local/bin/sed -n -e '/Whim/!h' -e '/Whim/{G;s/\n/:/;p}' source2.txt
Whim:Hakim El Hattab
</code></pre>
</section>
<section>
<h2 class="dongwmtitle dongwmgreen">增加下一行到模式空间</h2>
<pre><code contenteditable>
$/usr/local/bin/sed -e '{N;s/\n/:/}' source2.txt
Ian Bicking:Mozilla
Hakim El Hattab:Whim
Paul Irish:Google
Chris Wanstrath:Github
Mattt Thompson:Heroku
</code></pre>
</section>
<section>
<h2 class="dongwmtitle dongwmgreen">label</h2>
<pre><code contenteditable>
# 给匹配Github的行,在使用名字:公司的前面加一个*,
#只有匹配Github的模式空间和保持空间才会执行s/^/*/
$cat label.sed
#!/usr/local/bin/sed -nf
h;n;H;x
s/\n/:/
/Github/!b end
s/^/*/
:end p
$chmod u+x label.sed
./label.sed source2.txt
Ian Bicking:Mozilla
Hakim El Hattab:Whim
Paul Irish:Google
*Chris Wanstrath:Github
Mattt Thompson:Heroku
</code></pre>
</section>
</section>
<section>
<h3 class="dongwmtitle dongwmgreen">例子</h2>
<pre><code contenteditable>
# 把mac地址的冒号替换掉
"130531170341903612","259594",2013-05-31T09:04:25Z,"1c:b0:94:b2:85:bd"
</code></pre>
<ol>
<li class="fragment dongwmcsm">sed "s/\([0-9a-zA-Z]\{2\}\):\([0-9a-zA-Z]\{2\}\):\([0-9a-zA-Z]\{2\}\):\([0-9a-zA-Z]\{2\}\):\([0-9a-zA-Z]\{2\}\):\([0-9a-zA-Z]\{2\}\)/\1\2\3\4\5\6/"</li>
<li class="fragment dongwmcsm">sed "s/\(.*\):\(.*\):\(.*\):\(.*\):\(.*\):\(.*\)/\1\2\3\4\5\6/"</li>
<li class="fragment dongwmcsm">sed -e "s/T\(.*\):\(.*\):\(.*\)Z/\1#\2#\3/" -e "s/://g" -e "s/#/:/g"</li>
</ol>
</section>
<section>
<h2 class="dongwmtitle dongwmgreen dongwmcsm">(三)awk</h2>
</section>
<section>
<h3 class="dongwmtitle dongwmgreen">学完本章的后果</h2>
<ol>
<li class="fragment dongwmcsm">获取来连接mongodb的各服务器产生的连接数</li>
<li class="fragment dongwmcsm">获取nginx日志ip访问数在一个小时里面,从多到少的总次数排行(排行用到了sort)</li>
<li class="fragment dongwmcsm">打印question_awk3.txt包含Addy和Mattt之间的行</li>
<li class="fragment dongwmcsm">不使用sort过滤重复行</li>
<li class="fragment dongwmcsm">将2个文件合并成一行,但username不一定在2个文件同一行</li>
<li class="fragment dongwmcsm">输出2个文件1中有,2中没有的行</li>
<li class="fragment dongwmcsm">这是什么意思: gawk -vcmd='ls -l' 'BEGIN{while ( (cmd | getline var) > 0) {print var} close(cmd)}'</li>
</ol>
</section>
<section>
<section>
<h3 class="dongwmtitle dongwmgreen">语法1</h2>
<pre><code contenteditable>
awk -Fs '/pattern/ {action}' input-file
#or
awk -Fs '{action}' intput-file
# -F表示设置分隔符,不指定就是默认为空字符
</code></pre>
<h3 class="dongwmtitle dongwmgreen">例子</h2>
<pre><code contenteditable>
# 用:分割,查找匹配mail的行并且打印冒号分割后的第一部分
awk -F: '/mail/ {print $1}' /etc/passwd
_mailman
_clamav
_amavisd
</code></pre>
</section>
<section>
<h3 class="dongwmtitle dongwmgreen">awk数据结构</h2>
<pre><code contenteditable>
# 1 BEGIN { awk-commands } 在执行awk body之前执行这个awk-commands,而且只一次
# 2 /pattern/ {action} body部分,也就是awk要执行的主体,比如十行,那么这个主体就调用10次
# 3 END { awk-commands } 在执行完body之后执行,也是只一次
$awk 'BEGIN { FS=":";print "---header---" } /mail/ {print $1} \
END { print "---footer---"}' /etc/passwd
---header---
_mailman
_clamav
_amavisd
---footer---
# 当然可以只有其中一种或者集中数据结构
awk -F: 'BEGIN { print "UID"} { print $3 }' /etc/passwd |\
sed -e '/^$/ d'|head -2
UID
-2
$ awk 'BEGIN { print "Hello World!" }'
Hello World!
</code></pre>
</section>
<section>
<h3 class="dongwmtitle dongwmgreen">源码2</h2>
<pre><code contenteditable>
# 这是一个文件,分别是id, 描述, 价钱和库存
$cat items.txt
101,HD Camcorder,Video,210,10
102,Refrigerator,Appliance,850,2
103,MP3 Player,Audio,270,15
104,Tennis Racket,Sports,190,20
105,Laser Printer,Office,475,5
</code></pre>
<h3 class="dongwmtitle dongwmgreen">源码3</h2>
<pre><code contenteditable>
# 这是一个销售数据,分别是id和1-6月的销售情况
$cat items-sold.txt
101 2 10 5 8 10 12
102 0 1 4 3 0 2
103 10 6 11 20 5 13
104 2 3 4 0 6 5
105 10 2 5 7 12 6
</code></pre>
</section>
<section>
<h2 class="dongwmtitle dongwmgreen">print</h2>
<pre><code contenteditable>
# 默认print就是打印文件全文到终端
$awk '{print}' source.txt
# 下面是通过,分割,输出第二段。$0表示全行,类似shell用法
$awk -F ',' '{print $2}' source.txt
Ian Bicking
Hakim El Hattab
Paul Irish
Addy Osmani
Chris Wanstrath
Mattt Thompson
Ask Solem Hoel
# or
$awk -F "," '{print $2}' source.txt
$awk -F, '{print $2}' source.txt
# 一个格式化更好看些的效果
awk -F ',' 'BEGIN \
{ print "-------------\nName\tComp\n-------------"} \
{ print $2,"\t",$3;} \
END { print "-------------"; }' source.txt
-------------
Name Comp
-------------
Ian Bicking Mozilla
Hakim El Hattab Whim
Paul Irish Google
Addy Osmani Google
Chris Wanstrath Github
Mattt Thompson Heroku
Ask Solem Hoel VMware
-------------
</code></pre>
</section>
<section>
<h2 class="dongwmtitle dongwmgreen">模式匹配</h2>
<pre><code contenteditable>
# 用逗号做分隔符, 打印第二和第三列
$awk -F ',' '/Whim/ {print $2, $3}' source.txt
Hakim El Hattab Whim
# 可以加点格式化语句
$awk -F ',' '/Whim/ {print "Whim\"s name:", $2}' source.txt
Whim"s name: Hakim El Hattab
</code></pre>
</section>
<section>
<h2 class="dongwmtitle dongwmgreen">awk内置变量 - FS</h2>
<pre><code contenteditable>
$awk -F ',' '{print $2, $3}' source.txt
Ian Bicking Mozilla
Hakim El Hattab Whim
Paul Irish Google
Addy Osmani Google
Chris Wanstrath Github
Mattt Thompson Heroku
Ask Solem Hoel VMware
# 可以使用内置的FS - 输入字段分隔符 实现相同的功能
$awk 'BEGIN {FS=","} {print $2, $3}' source.txt
# 先看一个文件
$cat source-multiple-fs.txt
101,Ian Bicking:Mozilla%
102,Hakim El Hattab:Whim%
103,Paul Irish:Google%
104,Addy Osmani:Google%
105,Chris Wanstrath:Github%
106,Mattt Thompson:Heroku%
107,Ask Solem Hoel:VMware%
# 发现上面的分隔符有三种:逗号分号和百分号,这样就可以这样使用
# awk 'BEGIN {FS="[,:%]"} {print $2, $3}' source-multiple-fs.txt
</code></pre>
</section>
<section>
<h2 class="dongwmtitle dongwmgreen">awk内置变量 - OFS</h2>
<pre><code contenteditable>
$awk -F ',' '{print $2, ":", $3}' source.txt
Ian Bicking : Mozilla
Hakim El Hattab : Whim
Paul Irish : Google
Addy Osmani : Google
Chris Wanstrath : Github
Mattt Thompson : Heroku
Ask Solem Hoel : VMware
# 其实可以用内置的OFS - 输出字段分隔符
$awk -F ',' 'BEGIN { OFS=":" } { print $2, $3 }' source.txt
</code></pre>
</section>
<section>
<h2 class="dongwmtitle dongwmgreen">awk内置变量 - RS</h2>
<pre><code contenteditable>
$cat source-one-line.txt
1,one:2,two:3,three:4,four
# 现在我想分割成(1,one),(2, two)这样的效果
$awk -F, '{print $2}' source-one-line.txt
one:2
# 这个没有实现我想要的效果
# 使用RS - 记录分隔符, 他能帮你把单行内容先分割然后再按-F分割
$awk -F, 'BEGIN { RS=":" } { print $2 }' source-one-line.txt
one
two
three
four
</code></pre>
</section>
<section>
<h2 class="dongwmtitle dongwmgreen">awk内置变量 - ORS</h2>
<pre><code contenteditable>
# RS是输入, ORS就是输出
$awk 'BEGIN { FS=","; OFS="\n";ORS="\n---\n" } \
{print $1,$2,$3}' source.txt|head -8
101
Ian Bicking
Mozilla
---
102
Hakim El Hattab
Whim
---
</code></pre>
</section>
<section>
<h2 class="dongwmtitle dongwmgreen">awk内置变量 - NR</h2>
<pre><code contenteditable>
# NR是记录的数目
$awk 'BEGIN {FS=","} \
{print "Emp Id of record number",NR,"is",$1;} \
END {print "Total number of records:",NR}' source.txt
Emp Id of record number 1 is 101
Emp Id of record number 2 is 102
Emp Id of record number 3 is 103
Emp Id of record number 4 is 104
Emp Id of record number 5 is 105
Emp Id of record number 6 is 106
Emp Id of record number 7 is 107
Total number of records: 7
</code></pre>
</section>
<section>
<h2 class="dongwmtitle dongwmgreen">awk内置变量 - FILENAME,FNR</h2>
<pre><code contenteditable>
# FILENAME显示了当前文件, FNR关联到当前文件的记录数
awk 'BEGIN {FS=","} \
{print FILENAME ": record number",FNR,"is",$1;} \
END {print "Total number of records:",NR}' \
source.txt source-multiple-fs.txt
source.txt: record number 1 is 101
source.txt: record number 2 is 102
source.txt: record number 3 is 103
source.txt: record number 4 is 104
source.txt: record number 5 is 105
source.txt: record number 6 is 106
source.txt: record number 7 is 107
source-multiple-fs.txt: record number 1 is 101
source-multiple-fs.txt: record number 2 is 102
source-multiple-fs.txt: record number 3 is 103
source-multiple-fs.txt: record number 4 is 104
source-multiple-fs.txt: record number 5 is 105
source-multiple-fs.txt: record number 6 is 106
source-multiple-fs.txt: record number 7 is 107
Total number of records: 14
</code></pre>
</section>
<section>
<h2 class="dongwmtitle dongwmgreen">awk变量</h2>
<pre><code contenteditable>
# 变量支持数字,字符和下划线
# 这个文件多加了一列star数, 现在我想统计整个文件的star
$cat source-star.txt
101,Ian Bicking,Mozilla,1204
102,Hakim El Hattab,Whim,4029
103,Paul Irish,Google,7200
104,Addy Osmani,Google,2201
105,Chris Wanstrath,Github,1002
106,Mattt Thompson,Heroku,890
107,Ask Solem Hoel,VMware,2109
# 使用awk的变量,在begin的时候声明total,在body体里面
# 累加total值,在end里面打印
$cat total-star.awk
BEGIN {
FS=",";
total=0; }
{
print $2 "'s star is: " $4;
total=total+$4
} END {
print "---\nTotal star = *"total;
}
awk -f total-star.awk source-star.txt
Ian Bicking's star is: 1204
Hakim El Hattab's star is: 4029
Paul Irish's star is: 7200
Addy Osmani's star is: 2201
Chris Wanstrath's star is: 1002
Mattt Thompson's star is: 890
Ask Solem Hoel's star is: 2109
---
Total star = *18635
</code></pre>
</section>
<section>
<h2 class="dongwmtitle dongwmgreen">自增长/减少</h2>
<pre><code contenteditable>
# 可以使用++或者--,但是注意前后
$awk -F, '{print --$4}' source-star.txt
1203
4028
7199
2200
1001
889
2108
$awk -F, '{print $4--}' source-star.txt
# 咦 竟然没有变
1204
4029
7200
2201
1002
890
2109
# 想达到--$4的目的就得这样
$awk -F ',' '{$4--; print $4}' source-star.txt
1203
4028
7199
2200
1001
889
2108
</code></pre>
</section>
<section>
<h2 class="dongwmtitle dongwmgreen">字符串操作</h2>
<pre><code contenteditable>
$cat string.awk
BEGIN {
FS=",";
OFS=",";
string1="GO";
string2="OGLE";
numberstring="100";
string3=string1 string2;
print "Concatenate string is:" string3;
numberstring=numberstring+1;
print "String to number:" numberstring;
}
# 字符串会直接相连, 字符串相加会自动转化成数字相加
$awk -f string.awk
Concatenate string is:GOOGLE
String to number:101
</code></pre>
</section>
<section>
<h2 class="dongwmtitle dongwmgreen">复合运算</h2>
<pre><code contenteditable>
$cat assignment.awk
BEGIN {
FS=",";
OFS=",";
total1 = total2 = total3 = total4 = total5 = 10;
total1 += 5; print total1;
total2 -= 5; print total2;
total3 *= 5; print total3;
total4 /= 5; print total4;
total5 %= 5; print total5;
}
$awk -f assignment.awk
15
5
50
2
0
</code></pre>
</section>
<section>
<h2 class="dongwmtitle dongwmgreen">比较操作</h2>
<pre><code contenteditable>
# 只会显示小于1005的行
$awk -F "," '$4 <= 1005' source-star.txt
105,Chris Wanstrath,Github,1002
106,Mattt Thompson,Heroku,890
$awk -F "," '$1 == 103 {print $2}' source-star.txt
Paul Irish
# 你也可以加多个条件 这里||表示或者 && 表示和