    『第 62 楼』:
使用 LLM 解释/回答一下
perl , ruby 的會有更多的表達式 , 如 \d 代表數字, \s 空格等
gawk , egrep, bash 的是 extended Regular expression
sed, grep 用的是 basic regular expression ,意味 [0-9]+ 不能用,要這樣
[0-9][0-9]* 表達一個或以上的數字
我寫了從個[url]www.proxy4free.com[/url]取得代理url 的gawk簡單腳本, gnuwin32 gawk
用不到,只可用*nix 或 cygwin 跑, gnuwin32 gawk socket 功能給xp 廢了
#!/usr/bin/gawk -f
BEGIN{
header="GET [url]http://www.proxy4free.com/page1.html[/url] HTTP/1.0\r\n\r\n \
Connection: Keep-Alive\n\r\n\r \
Accept: text/html\n\r\n\r \
Accept-Charest: iso-8859-1, *, utf-8\n\r\n\r \
Accept-Language: en, zh, ja\n\r\n\r \
Host: [url]www.proxy4free.com:80[/url]\n\r\n\r \
User-Agent: gawk_script/0.1a [en, zh] (Cygwin, NT)\n\r\n\r"
host="www.proxy4free.com"
remotehost="/inet/tcp/0/" host "/80"
print header |& remotehost
while ((remotehost |& getline) > 0){
if ($0 ~ /^<td>[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+<\/td>$/){
gsub(/<[^>]+>/, "")
print $0
} else if ($0 ~ /^<td>[0-9]+<\/td>$/){
gsub(/<[^>]+>/, "")
print $0
} else if ($0 ~ /^<td>[A-Za-z]+<\/td>$/){
gsub(/<[^>]+>/, "")
print $0
} else if ($0 ~ /^<td>^[A-Z][a-zA-Z]+[ \t]?[A-Z]*[A-Za-z]*<\/td>$/){
gsub(/<[^>]+>/, "")
print $0
} else if ($0 ~ /^<td>20[0-1][0-9]-0?[1-9]+-0?[0-9]+<\/td>$/){
gsub(/<[^>]+>/, "")
print $0
printf("\n----------------\n")
} else {
continue
}
}
close(remotehost)
}
還有三個用gawk, bash, perl 寫的香港六合彩小腳本,也用上了一些RE
#! /bin/bash
# marksix genertor, Usage: $0 [ [option] argument ]
output=""
base_array=({1..49})
length=${#base_array[@]}
argument=$1
howmany=${argument:-6}
usage="Usage:\t${0##*/} [[option] number of picks](default 6)"
message_to_user="Number of picks should not be greater than $length"
out_of_range=66
non_numberic_argument=65
if [[ $howmany =~ [^0-9]+ ]]
then
echo -e "$usage" >&2 ; exit $non_numberic_argument
elif [ $howmany -gt $length ]
then
echo "$message_to_user" >&2 ; exit $out_of_range
fi
for (( i=1 ; i<=howmany ; i++ ))
do
pick=${base_array[$(( RANDOM % length ))]}
output="$output $pick"
resize_array=($(echo ${base_array[@]} | sed "s/\<$pick\>//"))
base_array=(${resize_array[@]})
length=${#base_array[@]}
done
sort_result=$( printf '%s\n' $output | sort -n )
echo -e ${sort_result//\n/" "}
gawk version
#! /usr/bin/gawk -f
BEGIN{
howmany=ARGV[1]
output=""
base="1,2,3,4,5,6,7,8,9,10,"
base=base "11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,"
base=base "31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49"
usage="Usage:\t marksix.awk [number](default 6)"
n=split(base, arr, ",")
if ( howmany ~ /[^0-9]+/ || howmany > n ){
print usage > "/dev/stderr"
exit 5
} else if ( howmany == "" ){
howmany=6
}
srand()
for (i=1; i<=howmany; i++){
randnum=int(rand() * n * howmany) % n + 1
pick=arr[randnum]
output=output " " pick
delete arr[randnum]
n=asort(arr)
}
{
sorted=""
split(output, a, " ")
z=asort(a, out)
for (i=1; i<=z; i++){
sorted=sorted " " out[i]
}
sub(/^ /, "", sorted)
print sorted
}
}
perl version
#! /usr/bin/perl
# marksix generator, perl version
@num=qw(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
42 43 44 45 46 47 48 49) ;
$length=@num ;
$output="" ;
$howmany=$ARGV[0] ;
$i=1 ;
die "Number of picks should not be greater than $length\n" if ($howmany > $length) ;
die "Usage:\t$0 [[option]number of picks](default 6)\n" if ($howmany =~ /\D+/) ;
$howmany = 6 if (@ARGV == 0) ;
while ($i <= $howmany){
srand ;
$random=int(rand() * $length * $i) % $length ;
$pick=$num[$random] ;
$output="$output $pick" ;
splice (@num, $random, 1) ;
$length=@num ;
$i++ ;
}
@out=split(" ", $output) ;
@sorted=sort {$a <=> $b} @out ;
print "@sorted\n"
還有一個給linux `free' cygwin 代替品, 也用上一些ruby 的re
$ cat bin/free
#! /usr/bin/ruby
# roustic free for cygwin, ruby version
class PrintHeader
def initialize(i="total", j="used", k="free")
@a = i
@b = j
@c = k
end
def header
print "\t", @a, "\t", @b, "\t", @c, "\n"
end
end
n = PrintHeader.new("總數", "已用", "剩餘")
n.header
filename = "/proc/meminfo"
file = open(filename)
while text = file.gets
if text =~ /Mem:|Swap:/
str = [ text.split(/\s+/) ]
step = 1024 * 1024
str.each { |i|
j = i[0]
a = i[1].to_i / step.to_i
b = i[2].to_i / step.to_i
c = i[3].to_i / step.to_i
print j, "\t", a," M", "\t", b," M", "\t", c," M", "\n"
}
end
end
file.close
我不是顯擺,只是證明我沒有胡吹,當然我寫程序還是不入流, 但那幾種RE
還是會一點點皮毛
在win32 用findstr 就可學 basic regular expression , 因為我不會批次檔,
findstr 不怎麼用,但它的BRE 大至上就是gnu grep 的BRE,
|