关于转义:编码/解码URL

Encode / decode URLs

在Go中编码和解码整个URL的推荐方法是什么? 我知道方法url.QueryEscapeurl.QueryUnescape,但是它们似乎并不是我要找的。 具体来说,我正在寻找JavaScript的encodeURIComponentdecodeURIComponent之类的方法。

谢谢。


您可以使用net / url模块进行所有所需的URL编码。它不会分解URL各个部分的单独编码功能,您必须让它构造整个URL。斜视源代码后,我认为它做得很好并且符合标准。

这是一个示例(游乐场链接)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
package main

import (
   "fmt"
   "net/url"
)

func main() {

    Url, err := url.Parse("http://www.example.com")
    if err != nil {
        panic("boom")
    }

    Url.Path +="/some/path/or/other_with_funny_characters?_or_not/"
    parameters := url.Values{}
    parameters.Add("hello","42")
    parameters.Add("hello","54")
    parameters.Add("vegetable","potato")
    Url.RawQuery = parameters.Encode()

    fmt.Printf("Encoded URL is %q\
", Url.String())
}

哪些印刷品

1
Encoded URL is"http://www.example.com/some/path/or/other_with_funny_characters%3F_or_not/?vegetable=potato&hello=42&hello=54"


从encodeURIComponent的MDN:

encodeURIComponent escapes all characters except the following: alphabetic, decimal digits, '-', '_', '.', '!', '~', '*', ''', '(', ')'

从Go的url.QueryEscape实现(特别是shouldEscape私有函数)中,转义所有字符,但以下字符除外:字母,十进制数字,'-', '_', '.', '~'

与Javascript不同,Go的QueryEscape()将转义'!', '*', ''', '(', ')'。基本上,Go的版本严格符合RFC-3986。 Javascript比较宽松。再次来自MDN:

If one wishes to be more stringent in adhering to RFC 3986 (which reserves !, ', (, ), and *), even though these characters have no formalized URI delimiting uses, the following can be safely used:

1
2
3
function fixedEncodeURIComponent (str) {
  return encodeURIComponent(str).replace(/[!'()]/g, escape).replace(/\\*/g,"%2A");
}

从Go 1.8开始,这种情况已经改变。除了较旧的QueryEscape,我们现在还可以访问PathEscape,以对路径分量以及unescape对应的PathUnescape进行编码。


这个怎么样:

1
template.URLQueryEscaper(path)


为了模仿Javascript的encodeURIComponent(),我创建了一个字符串帮助器函数。

示例:将"My String"转换为"My%20String"

https://github.com/mrap/stringutil/blob/master/urlencode.go

1
2
3
4
5
6
7
8
9
10
import"net/url"

// UrlEncoded encodes a string like Javascript's encodeURIComponent()
func UrlEncoded(str string) (string, error) {
    u, err := url.Parse(str)
    if err != nil {
        return"", err
    }
    return u.String(), nil
}


如果有人想获得确切的结果与JS encodeURIComponent进行比较,请尝试我的函数,它很脏,但是效果很好。

https://gist.github.com/czyang/7ae30f4f625fee14cfc40c143e1b78bf

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
// #Warning! You Should Use this Code Carefully, and As Your Own Risk.
    package main

    import (
   "fmt"
   "net/url"
   "strings"
)
/*
After hours searching, I can't find any method can get the result exact as the JS encodeURIComponent function.
In my situation I need to write a sign method which need encode the user input exact same as the JS encodeURIComponent.
This function does solved my problem.
*/
func main() {
    params := url.Values{
       "test_string": {"+!+'( )*-._~0-??  ??9a-zA-Z 中文测试 test with ?? !@#$%^&&*()~<>?/.,;'[][]:{{}|{}|"},
    }
    urlEncode := params.Encode()
    fmt.Println(urlEncode)
    urlEncode = compatibleRFC3986Encode(urlEncode)
    fmt.Println("RFC3986", urlEncode)
    urlEncode = compatibleJSEncodeURIComponent(urlEncode)
    fmt.Println("JS encodeURIComponent", urlEncode)
}

// Compatible with RFC 3986.
func compatibleRFC3986Encode(str string) string {
    resultStr := str
    resultStr = strings.Replace(resultStr,"+","%20", -1)
    return resultStr
}

// This func mimic JS encodeURIComponent, JS is wild and not very strict.
func compatibleJSEncodeURIComponent(str string) string {
    resultStr := str
    resultStr = strings.Replace(resultStr,"+","%20", -1)
    resultStr = strings.Replace(resultStr,"%21","!", -1)
    resultStr = strings.Replace(resultStr,"%27","'", -1)
    resultStr = strings.Replace(resultStr,"%28","(", -1)
    resultStr = strings.Replace(resultStr,"%29",")", -1)
    resultStr = strings.Replace(resultStr,"%2A","*", -1)
    return resultStr
}

希望这可以帮助

1
2
3
4
5
6
7
8
9
 // url encoded
func UrlEncodedISO(str string) (string, error) {
    u, err := url.Parse(str)
    if err != nil {
        return"", err
    }
    q := u.Query()
    return q.Encode(), nil
}
1
2
3
4
5
6
7
? * encoded into %2A
? # encoded into %23
? % encoded into %25
? < encoded into %3C
? > encoded into %3E
? + encoded into %2B
? enter key (#13#10) is encoded into %0D%0A

这是转义和转义的实现(从go源码中抓取):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
package main


import (  
   "fmt"
   "strconv"
)


const (
    encodePath encoding = 1 + iota
    encodeHost
    encodeUserPassword
    encodeQueryComponent
    encodeFragment
)

type encoding int
type EscapeError string

func (e EscapeError) Error() string {
    return"invalid URL escape" + strconv.Quote(string(e))
}


func ishex(c byte) bool {
    switch {
    case '0' <= c && c <= '9':
        return true
    case 'a' <= c && c <= 'f':
        return true
    case 'A' <= c && c <= 'F':
        return true
    }
    return false
}

func unhex(c byte) byte {
    switch {
    case '0' <= c && c <= '9':
        return c - '0'
    case 'a' <= c && c <= 'f':
        return c - 'a' + 10
    case 'A' <= c && c <= 'F':
        return c - 'A' + 10
    }
    return 0
}



// Return true if the specified character should be escaped when
// appearing in a URL string, according to RFC 3986.
//
// Please be informed that for now shouldEscape does not check all
// reserved characters correctly. See golang.org/issue/5684.
func shouldEscape(c byte, mode encoding) bool {
    // §2.3 Unreserved characters (alphanum)
    if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
        return false
    }

    if mode == encodeHost {
        // §3.2.2 Host allows
        //  sub-delims ="!" /"$" /"&" /"'" /"(" /")" /"*" /"+" /"," /";" /"="
        // as part of reg-name.
        // We add : because we include :port as part of host.
        // We add [ ] because we include [ipv6]:port as part of host
        switch c {
        case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']':
            return false
        }
    }

    switch c {
    case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
        return false

    case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
        // Different sections of the URL allow a few of
        // the reserved characters to appear unescaped.
        switch mode {
        case encodePath: // §3.3
            // The RFC allows : @ & = + $ but saves / ; , for assigning
            // meaning to individual path segments. This package
            // only manipulates the path as a whole, so we allow those
            // last two as well. That leaves only ? to escape.
            return c == '?'

        case encodeUserPassword: // §3.2.1
            // The RFC allows ';', ':', '&', '=', '+', '$', and ',' in
            // userinfo, so we must escape only '@', '/', and '?'.
            // The parsing of userinfo treats ':' as special so we must escape
            // that too.
            return c == '@' || c == '/' || c == '?' || c == ':'

        case encodeQueryComponent: // §3.4
            // The RFC reserves (so we must escape) everything.
            return true

        case encodeFragment: // §4.1
            // The RFC text is silent but the grammar allows
            // everything, so escape nothing.
            return false
        }
    }

    // Everything else must be escaped.
    return true
}




func escape(s string, mode encoding) string {
    spaceCount, hexCount := 0, 0
    for i := 0; i < len(s); i++ {
        c := s[i]
        if shouldEscape(c, mode) {
            if c == ' ' && mode == encodeQueryComponent {
                spaceCount++
            } else {
                hexCount++
            }
        }
    }

    if spaceCount == 0 && hexCount == 0 {
        return s
    }

    t := make([]byte, len(s)+2*hexCount)
    j := 0
    for i := 0; i < len(s); i++ {
        switch c := s[i]; {
        case c == ' ' && mode == encodeQueryComponent:
            t[j] = '+'
            j++
        case shouldEscape(c, mode):
            t[j] = '%'
            t[j+1] ="0123456789ABCDEF"[c>>4]
            t[j+2] ="0123456789ABCDEF"[c&15]
            j += 3
        default:
            t[j] = s[i]
            j++
        }
    }
    return string(t)
}


// unescape unescapes a string; the mode specifies
// which section of the URL string is being unescaped.
func unescape(s string, mode encoding) (string, error) {
    // Count %, check that they're well-formed.
    n := 0
    hasPlus := false
    for i := 0; i < len(s); {
        switch s[i] {
        case '%':
            n++
            if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
                s = s[i:]
                if len(s) > 3 {
                    s = s[:3]
                }
                return"", EscapeError(s)
            }
            i += 3
        case '+':
            hasPlus = mode == encodeQueryComponent
            i++
        default:
            i++
        }
    }

    if n == 0 && !hasPlus {
        return s, nil
    }

    t := make([]byte, len(s)-2*n)
    j := 0
    for i := 0; i < len(s); {
        switch s[i] {
        case '%':
            t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
            j++
            i += 3
        case '+':
            if mode == encodeQueryComponent {
                t[j] = ' '
            } else {
                t[j] = '+'
            }
            j++
            i++
        default:
            t[j] = s[i]
            j++
            i++
        }
    }
    return string(t), nil
}


func EncodeUriComponent(rawString string) string{
    return escape(rawString, encodeFragment)
}

func DecodeUriCompontent(encoded string) (string, error){
    return unescape(encoded, encodeQueryComponent)
}


// https://golang.org/src/net/url/url.go
// http://remove-line-numbers.ruurtjan.com/
func main() {
    // http://www.url-encode-decode.com/
    origin :="??üHel/lo world"
    encoded := EncodeUriComponent(origin)
    fmt.Println(encoded)

    s, _ := DecodeUriCompontent(encoded)
    fmt.Println(s)
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
// -------------------------------------------------------

/*
func UrlEncoded(str string) (string, error) {
    u, err := url.Parse(str)
    if err != nil {
        return"", err
    }
    return u.String(), nil
}


// http://stackoverflow.com/questions/13820280/encode-decode-urls
// import"net/url"
func old_main() {
    a,err := UrlEncoded("hello world")
    if err != nil {
           fmt.Println(err)
    }
    fmt.Println(a)

    // https://gobyexample.com/url-parsing
    //s :="postgres://user:[email protected]:5432/path?k=v#f"
    s :="postgres://user:[email protected]:5432/path?k=vbla%23fooa#f"
    u, err := url.Parse(s)
    if err != nil {
        panic(err)
    }


    fmt.Println(u.RawQuery)
    fmt.Println(u.Fragment)
    fmt.Println(u.String())
    m, _ := url.ParseQuery(u.RawQuery)
    fmt.Println(m)
    fmt.Println(m["k"][0])

}
*/

// -------------------------------------------------------