@@ -18,6 +18,121 @@ use std::path::Path;
18
18
use std:: time:: { Duration , Instant } ;
19
19
use xz2:: read:: XzDecoder ;
20
20
21
+ pub ( crate ) fn recompress_file (
22
+ xz_path : & Path ,
23
+ recompress_gz : bool ,
24
+ gz_compression_level : flate2:: Compression ,
25
+ recompress_xz : bool ,
26
+ ) -> anyhow:: Result < ( ) > {
27
+ println ! ( "recompressing {}..." , xz_path. display( ) ) ;
28
+ let file_start = Instant :: now ( ) ;
29
+ let gz_path = xz_path. with_extension ( "gz" ) ;
30
+
31
+ let mut destinations: Vec < ( & str , Box < dyn io:: Write > ) > = Vec :: new ( ) ;
32
+
33
+ // Produce gzip if explicitly enabled or the destination file doesn't exist.
34
+ if recompress_gz || !gz_path. is_file ( ) {
35
+ let gz = File :: create ( gz_path) ?;
36
+ destinations. push ( (
37
+ "gz" ,
38
+ Box :: new ( flate2:: write:: GzEncoder :: new ( gz, gz_compression_level) ) ,
39
+ ) ) ;
40
+ }
41
+
42
+ // xz recompression with more aggressive settings than we want to take the time
43
+ // for in rust-lang/rust CI. This cuts 5-15% off of the produced tarballs.
44
+ //
45
+ // Note that this is using a single-threaded compressor as we're parallelizing
46
+ // via rayon already. In rust-lang/rust we were trying to use parallel
47
+ // compression, but the default block size for that is 3*dict_size so we
48
+ // weren't actually using more than one core in most of the builders with
49
+ // <192MB uncompressed tarballs. In promote-release since we're recompressing
50
+ // 100s of tarballs there's no need for each individual compression to be
51
+ // parallel.
52
+ let xz_recompressed = xz_path. with_extension ( "xz_recompressed" ) ;
53
+ if recompress_xz {
54
+ let mut filters = xz2:: stream:: Filters :: new ( ) ;
55
+ let mut lzma_ops = xz2:: stream:: LzmaOptions :: new_preset ( 9 ) . unwrap ( ) ;
56
+ // This sets the overall dictionary size, which is also how much memory (baseline)
57
+ // is needed for decompression.
58
+ lzma_ops. dict_size ( 64 * 1024 * 1024 ) ;
59
+ // Use the best match finder for compression ratio.
60
+ lzma_ops. match_finder ( xz2:: stream:: MatchFinder :: BinaryTree4 ) ;
61
+ lzma_ops. mode ( xz2:: stream:: Mode :: Normal ) ;
62
+ // Set nice len to the maximum for best compression ratio
63
+ lzma_ops. nice_len ( 273 ) ;
64
+ // Set depth to a reasonable value, 0 means auto, 1000 is somwhat high but gives
65
+ // good results.
66
+ lzma_ops. depth ( 1000 ) ;
67
+ // 2 is the default and does well for most files
68
+ lzma_ops. position_bits ( 2 ) ;
69
+ // 0 is the default and does well for most files
70
+ lzma_ops. literal_position_bits ( 0 ) ;
71
+ // 3 is the default and does well for most files
72
+ lzma_ops. literal_context_bits ( 3 ) ;
73
+
74
+ filters. lzma2 ( & lzma_ops) ;
75
+
76
+ // FIXME: Do we want a checksum as part of compression?
77
+ let stream =
78
+ xz2:: stream:: Stream :: new_stream_encoder ( & filters, xz2:: stream:: Check :: None ) . unwrap ( ) ;
79
+ let xz_out = File :: create ( & xz_recompressed) ?;
80
+ destinations. push ( (
81
+ "xz" ,
82
+ Box :: new ( xz2:: write:: XzEncoder :: new_stream (
83
+ std:: io:: BufWriter :: new ( xz_out) ,
84
+ stream,
85
+ ) ) ,
86
+ ) ) ;
87
+ }
88
+
89
+ // We only decompress once and then write into each of the compressors before
90
+ // moving on.
91
+ //
92
+ // This code assumes that compression with `write_all` will never fail (i.e., we
93
+ // can take arbitrary amounts of data as input). That seems like a reasonable
94
+ // assumption though.
95
+ let mut decompressor = XzDecoder :: new ( File :: open ( xz_path) ?) ;
96
+ let mut buffer = vec ! [ 0u8 ; 4 * 1024 * 1024 ] ;
97
+ let mut decompress_time = Duration :: ZERO ;
98
+ let mut time_by_dest = vec ! [ Duration :: ZERO ; destinations. len( ) ] ;
99
+ loop {
100
+ let start = Instant :: now ( ) ;
101
+ let length = decompressor. read ( & mut buffer) ?;
102
+ decompress_time += start. elapsed ( ) ;
103
+ if length == 0 {
104
+ break ;
105
+ }
106
+ for ( idx, ( _, destination) ) in destinations. iter_mut ( ) . enumerate ( ) {
107
+ let start = std:: time:: Instant :: now ( ) ;
108
+ destination. write_all ( & buffer[ ..length] ) ?;
109
+ time_by_dest[ idx] += start. elapsed ( ) ;
110
+ }
111
+ }
112
+
113
+ let mut compression_times = String :: new ( ) ;
114
+ for ( idx, ( name, _) ) in destinations. iter ( ) . enumerate ( ) {
115
+ write ! (
116
+ compression_times,
117
+ ", {:.2?} {} compression" ,
118
+ time_by_dest[ idx] , name
119
+ ) ?;
120
+ }
121
+ println ! (
122
+ "recompressed {}: {:.2?} total, {:.2?} decompression{}" ,
123
+ xz_path. display( ) ,
124
+ file_start. elapsed( ) ,
125
+ decompress_time,
126
+ compression_times
127
+ ) ;
128
+
129
+ if recompress_xz {
130
+ fs:: rename ( & xz_recompressed, xz_path) ?;
131
+ }
132
+
133
+ Ok ( ( ) )
134
+ }
135
+
21
136
impl Context {
22
137
pub fn recompress ( & self , directory : & Path ) -> anyhow:: Result < ( ) > {
23
138
let mut to_recompress = Vec :: new ( ) ;
@@ -77,114 +192,7 @@ impl Context {
77
192
let path = to_recompress. lock ( ) . unwrap ( ) . pop ( ) ;
78
193
path
79
194
} {
80
- println ! ( "recompressing {}..." , xz_path. display( ) ) ;
81
- let file_start = Instant :: now ( ) ;
82
- let gz_path = xz_path. with_extension ( "gz" ) ;
83
-
84
- let mut destinations: Vec < ( & str , Box < dyn io:: Write > ) > = Vec :: new ( ) ;
85
-
86
- // Produce gzip if explicitly enabled or the destination file doesn't exist.
87
- if recompress_gz || !gz_path. is_file ( ) {
88
- let gz = File :: create ( gz_path) ?;
89
- destinations. push ( (
90
- "gz" ,
91
- Box :: new ( flate2:: write:: GzEncoder :: new ( gz, compression_level) ) ,
92
- ) ) ;
93
- }
94
-
95
- // xz recompression with more aggressive settings than we want to take the time
96
- // for in rust-lang/rust CI. This cuts 5-15% off of the produced tarballs.
97
- //
98
- // Note that this is using a single-threaded compressor as we're parallelizing
99
- // via rayon already. In rust-lang/rust we were trying to use parallel
100
- // compression, but the default block size for that is 3*dict_size so we
101
- // weren't actually using more than one core in most of the builders with
102
- // <192MB uncompressed tarballs. In promote-release since we're recompressing
103
- // 100s of tarballs there's no need for each individual compression to be
104
- // parallel.
105
- let xz_recompressed = xz_path. with_extension ( "xz_recompressed" ) ;
106
- if recompress_xz {
107
- let mut filters = xz2:: stream:: Filters :: new ( ) ;
108
- let mut lzma_ops = xz2:: stream:: LzmaOptions :: new_preset ( 9 ) . unwrap ( ) ;
109
- // This sets the overall dictionary size, which is also how much memory (baseline)
110
- // is needed for decompression.
111
- lzma_ops. dict_size ( 64 * 1024 * 1024 ) ;
112
- // Use the best match finder for compression ratio.
113
- lzma_ops. match_finder ( xz2:: stream:: MatchFinder :: BinaryTree4 ) ;
114
- lzma_ops. mode ( xz2:: stream:: Mode :: Normal ) ;
115
- // Set nice len to the maximum for best compression ratio
116
- lzma_ops. nice_len ( 273 ) ;
117
- // Set depth to a reasonable value, 0 means auto, 1000 is somwhat high but gives
118
- // good results.
119
- lzma_ops. depth ( 1000 ) ;
120
- // 2 is the default and does well for most files
121
- lzma_ops. position_bits ( 2 ) ;
122
- // 0 is the default and does well for most files
123
- lzma_ops. literal_position_bits ( 0 ) ;
124
- // 3 is the default and does well for most files
125
- lzma_ops. literal_context_bits ( 3 ) ;
126
-
127
- filters. lzma2 ( & lzma_ops) ;
128
-
129
- // FIXME: Do we want a checksum as part of compression?
130
- let stream = xz2:: stream:: Stream :: new_stream_encoder (
131
- & filters,
132
- xz2:: stream:: Check :: None ,
133
- )
134
- . unwrap ( ) ;
135
- let xz_out = File :: create ( & xz_recompressed) ?;
136
- destinations. push ( (
137
- "xz" ,
138
- Box :: new ( xz2:: write:: XzEncoder :: new_stream (
139
- std:: io:: BufWriter :: new ( xz_out) ,
140
- stream,
141
- ) ) ,
142
- ) ) ;
143
- }
144
-
145
- // We only decompress once and then write into each of the compressors before
146
- // moving on.
147
- //
148
- // This code assumes that compression with `write_all` will never fail (i.e., we
149
- // can take arbitrary amounts of data as input). That seems like a reasonable
150
- // assumption though.
151
- let mut decompressor = XzDecoder :: new ( File :: open ( & xz_path) ?) ;
152
- let mut buffer = vec ! [ 0u8 ; 4 * 1024 * 1024 ] ;
153
- let mut decompress_time = Duration :: ZERO ;
154
- let mut time_by_dest = vec ! [ Duration :: ZERO ; destinations. len( ) ] ;
155
- loop {
156
- let start = Instant :: now ( ) ;
157
- let length = decompressor. read ( & mut buffer) ?;
158
- decompress_time += start. elapsed ( ) ;
159
- if length == 0 {
160
- break ;
161
- }
162
- for ( idx, ( _, destination) ) in destinations. iter_mut ( ) . enumerate ( ) {
163
- let start = std:: time:: Instant :: now ( ) ;
164
- destination. write_all ( & buffer[ ..length] ) ?;
165
- time_by_dest[ idx] += start. elapsed ( ) ;
166
- }
167
- }
168
-
169
- let mut compression_times = String :: new ( ) ;
170
- for ( idx, ( name, _) ) in destinations. iter ( ) . enumerate ( ) {
171
- write ! (
172
- compression_times,
173
- ", {:.2?} {} compression" ,
174
- time_by_dest[ idx] , name
175
- ) ?;
176
- }
177
- println ! (
178
- "recompressed {}: {:.2?} total, {:.2?} decompression{}" ,
179
- xz_path. display( ) ,
180
- file_start. elapsed( ) ,
181
- decompress_time,
182
- compression_times
183
- ) ;
184
-
185
- if recompress_xz {
186
- fs:: rename ( & xz_recompressed, xz_path) ?;
187
- }
195
+ recompress_file ( & xz_path, recompress_gz, compression_level, recompress_xz) ?;
188
196
}
189
197
190
198
Ok :: < _ , anyhow:: Error > ( ( ) )
0 commit comments