Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix issues related to encoding #7791

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
57 changes: 38 additions & 19 deletions backend/local/local.go
Original file line number Diff line number Diff line change
Expand Up @@ -689,6 +689,11 @@ func (f *Fs) localPath(name string) string {
return filepath.Join(f.root, filepath.FromSlash(f.opt.Enc.FromStandardPath(name)))
}

// LocalToStandardPath coverts the file name in the local filesystem to StandardPath with the user specified encoding
func (f *Fs) LocalToStandardPath(name string) string {
return f.opt.Enc.ToStandardPath(filepath.ToSlash(name))
}

// Put the Object to the local filesystem
func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
// Temporary Object under construction - info filled in by Update()
Expand Down Expand Up @@ -1556,32 +1561,46 @@ func (o *Object) writeMetadata(metadata fs.Metadata) (err error) {
}

func cleanRootPath(s string, noUNC bool, enc encoder.MultiEncoder) string {
if runtime.GOOS != "windows" || !strings.HasPrefix(s, "\\") {
if !filepath.IsAbs(s) {
s2, err := filepath.Abs(s)
if err == nil {
s = s2
}
} else {
s = filepath.Clean(s)
}
}
var vol string
if runtime.GOOS == "windows" {
s = filepath.ToSlash(s)
vol := filepath.VolumeName(s)
vol = filepath.VolumeName(s)
if vol == `\\?` && len(s) >= 6 {
// `\\?\C:`
vol = s[:6]
}
s = vol + enc.FromStandardPath(s[len(vol):])
s = filepath.FromSlash(s)
if !noUNC {
// Convert to UNC
s = file.UNCPath(s)
s = s[len(vol):]
}
// Don't use FromStandardPath. Make sure Dot (`.`, `..`) as name will not be reencoded
if enc != encoder.Standard {
s = filepath.ToSlash(s)
parts := strings.Split(s, "/")
encoded := make([]string, len(parts))
changed := false
for i, p := range parts {
if (p == ".") || (p == "..") {
encoded[i] = p
continue
}
part := enc.FromStandardName(p)
changed = changed || part != p
encoded[i] = part
}
if changed {
s = strings.Join(encoded, "/")
}
return s
s = filepath.FromSlash(s)
}
if runtime.GOOS == "windows" {
s = vol + s
}
s2, err := filepath.Abs(s)
if err == nil {
s = s2
}
if !noUNC {
// Convert to UNC. It does nothing on non windows platforms.
s = file.UNCPath(s)
}
s = enc.FromStandardPath(s)
return s
}

Expand Down
12 changes: 8 additions & 4 deletions docs/content/overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -348,8 +348,8 @@ have a Windows file system with Unicode fullwidth characters
remote rather than being translated to regular (halfwidth) `*`, `?` and `:`.

The `--backend-encoding` flags allow you to change that. You can
disable the encoding completely with `--backend-encoding None` or set
`encoding = None` in the config file.
disable the encoding completely with `--backend-encoding Raw` or set
`encoding = Raw` in the config file.

Encoding takes a comma separated list of encodings. You can see the
list of all possible values by passing an invalid value to this
Expand All @@ -375,7 +375,7 @@ will show you the defaults for the backends.
| LeftSpace | SPACE on the left of a string | `␠` |
| LeftTilde | `~` on the left of a string | `~` |
| LtGt | `<`, `>` | `<`, `>` |
| None | No characters are encoded | |
| None ¹ | NUL 0x00 | ␀ |
| Percent | `%` | `%` |
| Pipe | \| | `|` |
| Question | `?` | `?` |
Expand All @@ -387,6 +387,10 @@ will show you the defaults for the backends.
| Slash | `/` | `/` |
| SquareBracket | `[`, `]` | `[`, `]` |

¹ Encoding from NUL 0x00 to ␀ is always implicit except when using Raw.
It was previously incorrectly documented as disabling encoding,
and to maintain backward compatibility, its behavior has not been changed.

##### Encoding example: FTP

To take a specific example, the FTP backend's default encoding is
Expand Down Expand Up @@ -430,7 +434,7 @@ the default value but without `Colon,Question,Asterisk`:
--local-encoding "Slash,LtGt,DoubleQuote,Pipe,BackSlash,Ctl,RightSpace,RightPeriod,InvalidUtf8,Dot"
```

Alternatively, you can disable the conversion of any characters with `--local-encoding None`.
Alternatively, you can disable the conversion of any characters with `--local-encoding Raw`.

Instead of using command-line argument `--local-encoding`, you may also set it
as [environment variable](/docs/#environment-variables) `RCLONE_LOCAL_ENCODING`,
Expand Down
66 changes: 38 additions & 28 deletions lib/encoder/encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,34 +35,35 @@ const (

// Possible flags for the MultiEncoder
const (
EncodeZero MultiEncoder = 0 // NUL(0x00)
EncodeSlash MultiEncoder = 1 << iota // /
EncodeLtGt // <>
EncodeDoubleQuote // "
EncodeSingleQuote // '
EncodeBackQuote // `
EncodeDollar // $
EncodeColon // :
EncodeQuestion // ?
EncodeAsterisk // *
EncodePipe // |
EncodeHash // #
EncodePercent // %
EncodeBackSlash // \
EncodeCrLf // CR(0x0D), LF(0x0A)
EncodeDel // DEL(0x7F)
EncodeCtl // CTRL(0x01-0x1F)
EncodeLeftSpace // Leading SPACE
EncodeLeftPeriod // Leading .
EncodeLeftTilde // Leading ~
EncodeLeftCrLfHtVt // Leading CR LF HT VT
EncodeRightSpace // Trailing SPACE
EncodeRightPeriod // Trailing .
EncodeRightCrLfHtVt // Trailing CR LF HT VT
EncodeInvalidUtf8 // Invalid UTF-8 bytes
EncodeDot // . and .. names
EncodeSquareBracket // []
EncodeSemicolon // ;
EncodeZero MultiEncoder = 0 // NUL(0x00)
EncodeRaw MultiEncoder = 1 << (iota - 1)
EncodeSlash // /
EncodeLtGt // <>
EncodeDoubleQuote // "
EncodeSingleQuote // '
EncodeBackQuote // `
EncodeDollar // $
EncodeColon // :
EncodeQuestion // ?
EncodeAsterisk // *
EncodePipe // |
EncodeHash // #
EncodePercent // %
EncodeBackSlash // \
EncodeCrLf // CR(0x0D), LF(0x0A)
EncodeDel // DEL(0x7F)
EncodeCtl // CTRL(0x01-0x1F)
EncodeLeftSpace // Leading SPACE
EncodeLeftPeriod // Leading .
EncodeLeftTilde // Leading ~
EncodeLeftCrLfHtVt // Leading CR LF HT VT
EncodeRightSpace // Trailing SPACE
EncodeRightPeriod // Trailing .
EncodeRightCrLfHtVt // Trailing CR LF HT VT
EncodeInvalidUtf8 // Invalid UTF-8 bytes
EncodeDot // . and .. names
EncodeSquareBracket // []
EncodeSemicolon // ;

// Synthetic
EncodeWin = EncodeColon | EncodeQuestion | EncodeDoubleQuote | EncodeAsterisk | EncodeLtGt | EncodePipe // :?"*<>|
Expand Down Expand Up @@ -117,6 +118,7 @@ func alias(name string, mask MultiEncoder) {
}

func init() {
alias("Raw", EncodeRaw)
alias("None", EncodeZero)
alias("Slash", EncodeSlash)
alias("LtGt", EncodeLtGt)
Expand Down Expand Up @@ -214,6 +216,10 @@ func (mask *MultiEncoder) Scan(s fmt.ScanState, ch rune) error {
// Encode takes a raw name and substitutes any reserved characters and
// patterns in it
func (mask MultiEncoder) Encode(in string) string {
if mask == EncodeRaw {
return in
}

if in == "" {
return ""
}
Expand Down Expand Up @@ -671,6 +677,10 @@ func (mask MultiEncoder) Encode(in string) string {

// Decode takes a name and undoes any substitutions made by Encode
func (mask MultiEncoder) Decode(in string) string {
if mask == EncodeRaw {
return in
}

if mask.Has(EncodeDot) {
switch in {
case ".":
Expand Down
10 changes: 5 additions & 5 deletions lib/encoder/encoder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ func TestEncodeString(t *testing.T) {
mask MultiEncoder
want string
}{
{0, "None"},
{EncodeRaw, "Raw"},
{EncodeZero, "None"},
{EncodeDoubleQuote, "DoubleQuote"},
{EncodeDot, "Dot"},
Expand All @@ -44,7 +44,7 @@ func TestEncodeSet(t *testing.T) {
wantErr bool
}{
{"", 0, true},
{"None", 0, false},
{"Raw", EncodeRaw, false},
{"None", EncodeZero, false},
{"DoubleQuote", EncodeDoubleQuote, false},
{"Dot", EncodeDot, false},
Expand Down Expand Up @@ -178,15 +178,15 @@ func TestEncodeInvalidUnicode(t *testing.T) {
func TestEncodeDot(t *testing.T) {
for i, tc := range []testCase{
{
mask: 0,
mask: EncodeZero,
in: ".",
out: ".",
}, {
mask: EncodeDot,
in: ".",
out: ".",
}, {
mask: 0,
mask: EncodeZero,
in: "..",
out: "..",
}, {
Expand Down Expand Up @@ -224,7 +224,7 @@ func TestDecodeHalf(t *testing.T) {
in: "‛",
out: "‛",
}, {
mask: 0,
mask: EncodeZero,
in: "‛‛",
out: "‛",
}, {
Expand Down
8 changes: 4 additions & 4 deletions lib/encoder/internal/gen/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,8 @@ func main() {
fatalW(fd.WriteString(" "))("Write:")
}
in, out := buildTestString(
[]mapping{getMapping(m.mask)}, // pick
[]mapping{getMapping(0)}, // quote
[]mapping{getMapping(m.mask)}, // pick
[]mapping{getMapping(encoder.EncodeZero)}, // quote
printables, fullwidthPrintables, encodables, encoded, greek) // fill
fatalW(fmt.Fprintf(fd, `{ // %d
mask: %s,
Expand Down Expand Up @@ -262,7 +262,7 @@ var testCasesSingleEdge = []testCase{
for idx, orig := range e.orig {
replace := e.replace[idx]
pairs := buildEdgeTestString(
[]edge{e}, []mapping{getMapping(0), getMapping(m.mask)}, // quote
[]edge{e}, []mapping{getMapping(encoder.EncodeZero), getMapping(m.mask)}, // quote
[][]rune{printables, fullwidthPrintables, encodables, encoded, greek}, // fill
func(rIn, rOut []rune, quoteOut []bool, testMappings []mapping) (out []stringPair) {
testL := len(rIn)
Expand Down Expand Up @@ -386,7 +386,7 @@ var testCasesDoubleEdge = []testCase{
orig, replace := e1.orig[0], e1.replace[0]
edges := []edge{e1, e2}
pairs := buildEdgeTestString(
edges, []mapping{getMapping(0), getMapping(m.mask)}, // quote
edges, []mapping{getMapping(encoder.EncodeZero), getMapping(m.mask)}, // quote
[][]rune{printables, fullwidthPrintables, encodables, encoded, greek}, // fill
func(rIn, rOut []rune, quoteOut []bool, testMappings []mapping) (out []stringPair) {
testL := len(rIn)
Expand Down
3 changes: 2 additions & 1 deletion vfs/vfscache/item.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"sync"
"time"

"github.com/rclone/rclone/backend/local"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/fserrors"
"github.com/rclone/rclone/fs/operations"
Expand Down Expand Up @@ -581,7 +582,7 @@ func (item *Item) _store(ctx context.Context, storeFn StoreFn) (err error) {
// defer log.Trace(item.name, "item=%p", item)("err=%v", &err)

// Transfer the temp file to the remote
cacheObj, err := item.c.fcache.NewObject(ctx, item.name)
cacheObj, err := item.c.fcache.NewObject(ctx, item.c.fcache.(*local.Fs).LocalToStandardPath(toOSPath(item.name)))
if err != nil && err != fs.ErrorObjectNotFound {
return fmt.Errorf("vfs cache: failed to find cache file: %w", err)
}
Expand Down