blob: 1bcf39e2b25a1d63f374ba06268eeec074826e75 [file] [log] [blame]
Yann Colletdca60f22016-05-23 14:23:55 +02001#!/usr/bin/env python3
Yann Collet0d0f7e42016-05-25 10:58:11 +02002"""Test zstd interoperability between versions"""
Yann Collet4ded9e52016-08-30 10:04:33 -07003
Yann Colletb0cb0812017-08-31 12:20:50 -07004# ################################################################
Elliott Hughes44aba642023-09-12 20:18:59 +00005# Copyright (c) Meta Platforms, Inc. and affiliates.
Yann Collet4ded9e52016-08-30 10:04:33 -07006# All rights reserved.
7#
Yann Colletb0cb0812017-08-31 12:20:50 -07008# This source code is licensed under both the BSD-style license (found in the
9# LICENSE file in the root directory of this source tree) and the GPLv2 (found
10# in the COPYING file in the root directory of this source tree).
Nick Terrellac58c8d2020-03-26 15:19:05 -070011# You may select, at your option, one of the above-listed licenses.
Yann Colletb0cb0812017-08-31 12:20:50 -070012# ################################################################
Yann Colletdca60f22016-05-23 14:23:55 +020013
Yann Colletdca60f22016-05-23 14:23:55 +020014import filecmp
Yann Colletebc13bc2016-05-25 10:12:39 +020015import glob
16import hashlib
Yann Colletdca60f22016-05-23 14:23:55 +020017import os
18import shutil
19import sys
inikep45456712016-06-17 13:39:43 +020020import subprocess
inikep7e3597b2016-06-17 14:43:24 +020021from subprocess import Popen, PIPE
Yann Colletdca60f22016-05-23 14:23:55 +020022
Yann Collet33a04652016-09-02 22:11:49 -070023repo_url = 'https://github.com/facebook/zstd.git'
inikep9470b872016-06-09 12:54:06 +020024tmp_dir_name = 'tests/versionsTest'
Yann Colletdca60f22016-05-23 14:23:55 +020025make_cmd = 'make'
Elliott Hughes44aba642023-09-12 20:18:59 +000026make_args = ['-j','CFLAGS=-O0']
Yann Colletdca60f22016-05-23 14:23:55 +020027git_cmd = 'git'
28test_dat_src = 'README.md'
29test_dat = 'test_dat'
30head = 'vdevel'
inikep24aa7b42016-06-16 14:15:32 +020031dict_source = 'dict_source'
Elliott Hughes44aba642023-09-12 20:18:59 +000032dict_globs = [
33 'programs/*.c',
34 'lib/common/*.c',
35 'lib/compress/*.c',
36 'lib/decompress/*.c',
37 'lib/dictBuilder/*.c',
38 'lib/legacy/*.c',
39 'programs/*.h',
40 'lib/common/*.h',
41 'lib/compress/*.h',
42 'lib/dictBuilder/*.h',
43 'lib/legacy/*.h'
44]
inikep24aa7b42016-06-16 14:15:32 +020045
46
inikep2ef16502016-06-17 14:07:42 +020047def execute(command, print_output=False, print_error=True, param_shell=False):
48 popen = Popen(command, stdout=PIPE, stderr=PIPE, shell=param_shell)
inikep45456712016-06-17 13:39:43 +020049 stdout_lines, stderr_lines = popen.communicate()
50 stderr_lines = stderr_lines.decode("utf-8")
51 stdout_lines = stdout_lines.decode("utf-8")
inikep24aa7b42016-06-16 14:15:32 +020052 if print_output:
53 print(stdout_lines)
inikep24aa7b42016-06-16 14:15:32 +020054 print(stderr_lines)
inikep24aa7b42016-06-16 14:15:32 +020055 if popen.returncode is not None and popen.returncode != 0:
56 if not print_output and print_error:
57 print(stderr_lines)
inikep45456712016-06-17 13:39:43 +020058 return popen.returncode
Yann Colletdca60f22016-05-23 14:23:55 +020059
Yann Colletebc13bc2016-05-25 10:12:39 +020060
Yann Colletdca60f22016-05-23 14:23:55 +020061def proc(cmd_args, pipe=True, dummy=False):
62 if dummy:
63 return
64 if pipe:
inikep45456712016-06-17 13:39:43 +020065 subproc = Popen(cmd_args, stdout=PIPE, stderr=PIPE)
Yann Colletdca60f22016-05-23 14:23:55 +020066 else:
inikep45456712016-06-17 13:39:43 +020067 subproc = Popen(cmd_args)
Yann Colletdca60f22016-05-23 14:23:55 +020068 return subproc.communicate()
69
Yann Colletebc13bc2016-05-25 10:12:39 +020070
Yann Collet7f37b8a2021-08-29 14:48:11 -070071def make(targets, pipe=True):
72 cmd = [make_cmd] + make_args + targets
73 cmd_str = str(cmd)
74 print('compilation command : ' + cmd_str)
75 return proc(cmd, pipe)
Yann Colletdca60f22016-05-23 14:23:55 +020076
Yann Colletebc13bc2016-05-25 10:12:39 +020077
Yann Colletdca60f22016-05-23 14:23:55 +020078def git(args, pipe=True):
79 return proc([git_cmd] + args, pipe)
80
Yann Colletebc13bc2016-05-25 10:12:39 +020081
Yann Colletdca60f22016-05-23 14:23:55 +020082def get_git_tags():
83 stdout, stderr = git(['tag', '-l', 'v[0-9].[0-9].[0-9]'])
84 tags = stdout.decode('utf-8').split()
85 return tags
86
Yann Colletebc13bc2016-05-25 10:12:39 +020087
Elliott Hughes44aba642023-09-12 20:18:59 +000088def dict_ok(tag, dict_name, sample):
89 if not os.path.isfile(dict_name):
90 return False
91 try:
92 cmd = ['./zstd.' + tag, '-D', dict_name]
93 with open(sample, "rb") as i:
94 subprocess.check_call(cmd, stdin=i, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
95 return True
96 except:
97 return False
98
99
100def create_dict(tag, dict_source_path, fallback_tag=None):
inikep150152f2016-06-16 19:29:09 +0200101 dict_name = 'dict.' + tag
102 if not os.path.isfile(dict_name):
103 cFiles = glob.glob(dict_source_path + "/*.c")
104 hFiles = glob.glob(dict_source_path + "/*.h")
Elliott Hughes44aba642023-09-12 20:18:59 +0000105 # Ensure the dictionary builder is deterministic
106 files = sorted(cFiles + hFiles)
inikep7e3597b2016-06-17 14:43:24 +0200107 if tag == 'v0.5.0':
Elliott Hughes44aba642023-09-12 20:18:59 +0000108 result = execute('./dictBuilder.' + tag + ' ' + ' '.join(files) + ' -o ' + dict_name, print_output=False, param_shell=True)
inikep7e3597b2016-06-17 14:43:24 +0200109 else:
Elliott Hughes44aba642023-09-12 20:18:59 +0000110 result = execute('./zstd.' + tag + ' -f --train ' + ' '.join(files) + ' -o ' + dict_name, print_output=False, param_shell=True)
111 if result == 0 and dict_ok(tag, dict_name, files[0]):
inikep45456712016-06-17 13:39:43 +0200112 print(dict_name + ' created')
Elliott Hughes44aba642023-09-12 20:18:59 +0000113 elif fallback_tag is not None:
114 fallback_dict_name = 'dict.' + fallback_tag
115 print('creating dictionary ' + dict_name + ' failed, falling back to ' + fallback_dict_name)
116 shutil.copy(fallback_dict_name, dict_name)
inikep45456712016-06-17 13:39:43 +0200117 else:
Elliott Hughes44aba642023-09-12 20:18:59 +0000118 raise RuntimeError('ERROR: creating of ' + dict_name + ' failed')
inikep150152f2016-06-16 19:29:09 +0200119 else:
120 print(dict_name + ' already exists')
121
122
Elliott Hughes44aba642023-09-12 20:18:59 +0000123def zstd(tag, args, input_file, output_file):
124 """
125 Zstd compress input_file to output_file.
126 Need this helper because 0.5.0 is broken when stdout is not a TTY.
127 Throws an exception if the command returns non-zero.
128 """
129 with open(input_file, "rb") as i:
130 with open(output_file, "wb") as o:
131 cmd = ['./zstd.' + tag] + args
132 print("Running: '{}', input={}, output={}" .format(
133 ' '.join(cmd), input_file, output_file
134 ))
135 result = subprocess.run(cmd, stdin=i, stdout=o, stderr=subprocess.PIPE)
136 print("Stderr: {}".format(result.stderr.decode("ascii")))
137 result.check_returncode()
138
139
inikep150152f2016-06-16 19:29:09 +0200140def dict_compress_sample(tag, sample):
141 dict_name = 'dict.' + tag
Elliott Hughes44aba642023-09-12 20:18:59 +0000142 verbose = ['-v', '-v', '-v']
143 zstd(tag, ['-D', dict_name, '-1'] + verbose, sample, sample + '_01_64_' + tag + '_dictio.zst')
144 zstd(tag, ['-D', dict_name, '-3'], sample, sample + '_03_64_' + tag + '_dictio.zst')
145 zstd(tag, ['-D', dict_name, '-5'], sample, sample + '_05_64_' + tag + '_dictio.zst')
146 zstd(tag, ['-D', dict_name, '-9'], sample, sample + '_09_64_' + tag + '_dictio.zst')
147 zstd(tag, ['-D', dict_name, '-15'], sample, sample + '_15_64_' + tag + '_dictio.zst')
148 zstd(tag, ['-D', dict_name, '-18'], sample, sample + '_18_64_' + tag + '_dictio.zst')
inikep150152f2016-06-16 19:29:09 +0200149 # zstdFiles = glob.glob("*.zst*")
150 # print(zstdFiles)
151 print(tag + " : dict compression completed")
152
153
Yann Colletdca60f22016-05-23 14:23:55 +0200154def compress_sample(tag, sample):
Elliott Hughes44aba642023-09-12 20:18:59 +0000155 zstd(tag, ['-1'], sample, sample + '_01_64_' + tag + '_nodict.zst')
156 zstd(tag, ['-3'], sample, sample + '_03_64_' + tag + '_nodict.zst')
157 zstd(tag, ['-5'], sample, sample + '_05_64_' + tag + '_nodict.zst')
158 zstd(tag, ['-9'], sample, sample + '_09_64_' + tag + '_nodict.zst')
159 zstd(tag, ['-15'], sample, sample + '_15_64_' + tag + '_nodict.zst')
160 zstd(tag, ['-18'], sample, sample + '_18_64_' + tag + '_nodict.zst')
Yann Colletdca60f22016-05-23 14:23:55 +0200161 # zstdFiles = glob.glob("*.zst*")
162 # print(zstdFiles)
Yann Colleta5ad5272016-06-03 15:41:51 +0200163 print(tag + " : compression completed")
Yann Colletdca60f22016-05-23 14:23:55 +0200164
Yann Colletebc13bc2016-05-25 10:12:39 +0200165
Elliott Hughes44aba642023-09-12 20:18:59 +0000166# https://stackoverflow.com/a/19711609/2132223
Yann Colletdca60f22016-05-23 14:23:55 +0200167def sha1_of_file(filepath):
168 with open(filepath, 'rb') as f:
169 return hashlib.sha1(f.read()).hexdigest()
170
Yann Colletebc13bc2016-05-25 10:12:39 +0200171
Yann Colletdca60f22016-05-23 14:23:55 +0200172def remove_duplicates():
inikepd1af4e62016-06-16 20:23:11 +0200173 list_of_zst = sorted(glob.glob('*.zst'))
Yann Colletdca60f22016-05-23 14:23:55 +0200174 for i, ref_zst in enumerate(list_of_zst):
175 if not os.path.isfile(ref_zst):
176 continue
Yann Collet0d0f7e42016-05-25 10:58:11 +0200177 for j in range(i + 1, len(list_of_zst)):
Yann Colletdca60f22016-05-23 14:23:55 +0200178 compared_zst = list_of_zst[j]
179 if not os.path.isfile(compared_zst):
180 continue
181 if filecmp.cmp(ref_zst, compared_zst):
182 os.remove(compared_zst)
183 print('duplicated : {} == {}'.format(ref_zst, compared_zst))
184
Yann Colletebc13bc2016-05-25 10:12:39 +0200185
inikep7e3597b2016-06-17 14:43:24 +0200186def decompress_zst(tag):
Yann Colletdca60f22016-05-23 14:23:55 +0200187 dec_error = 0
inikepd1af4e62016-06-16 20:23:11 +0200188 list_zst = sorted(glob.glob('*_nodict.zst'))
Yann Colletdca60f22016-05-23 14:23:55 +0200189 for file_zst in list_zst:
Elliott Hughes44aba642023-09-12 20:18:59 +0000190 print(file_zst + ' ' + tag)
Yann Colletdca60f22016-05-23 14:23:55 +0200191 file_dec = file_zst + '_d64_' + tag + '.dec'
Elliott Hughes44aba642023-09-12 20:18:59 +0000192 zstd(tag, ['-d'], file_zst, file_dec)
193 if not filecmp.cmp(file_dec, test_dat):
194 raise RuntimeError('Decompression failed: tag={} file={}'.format(tag, file_zst))
inikep24aa7b42016-06-16 14:15:32 +0200195 else:
Elliott Hughes44aba642023-09-12 20:18:59 +0000196 print('OK ')
Yann Colletdca60f22016-05-23 14:23:55 +0200197
Yann Colletda4fe742016-05-23 15:43:17 +0200198
inikep7e3597b2016-06-17 14:43:24 +0200199def decompress_dict(tag):
inikep150152f2016-06-16 19:29:09 +0200200 dec_error = 0
inikepd1af4e62016-06-16 20:23:11 +0200201 list_zst = sorted(glob.glob('*_dictio.zst'))
inikep150152f2016-06-16 19:29:09 +0200202 for file_zst in list_zst:
inikepd1af4e62016-06-16 20:23:11 +0200203 dict_tag = file_zst[0:len(file_zst)-11] # remove "_dictio.zst"
204 if head in dict_tag: # find vdevel
inikep150152f2016-06-16 19:29:09 +0200205 dict_tag = head
inikepd1af4e62016-06-16 20:23:11 +0200206 else:
207 dict_tag = dict_tag[dict_tag.rfind('v'):]
inikep7e3597b2016-06-17 14:43:24 +0200208 if tag == 'v0.6.0' and dict_tag < 'v0.6.0':
209 continue
inikep150152f2016-06-16 19:29:09 +0200210 dict_name = 'dict.' + dict_tag
Elliott Hughes44aba642023-09-12 20:18:59 +0000211 print(file_zst + ' ' + tag + ' dict=' + dict_tag)
inikep150152f2016-06-16 19:29:09 +0200212 file_dec = file_zst + '_d64_' + tag + '.dec'
Elliott Hughes44aba642023-09-12 20:18:59 +0000213 zstd(tag, ['-D', dict_name, '-d'], file_zst, file_dec)
214 if not filecmp.cmp(file_dec, test_dat):
215 raise RuntimeError('Decompression failed: tag={} file={}'.format(tag, file_zst))
inikep150152f2016-06-16 19:29:09 +0200216 else:
Elliott Hughes44aba642023-09-12 20:18:59 +0000217 print('OK ')
inikep24aa7b42016-06-16 14:15:32 +0200218
219
Yann Colletdca60f22016-05-23 14:23:55 +0200220if __name__ == '__main__':
221 error_code = 0
inikep24aa7b42016-06-16 14:15:32 +0200222 base_dir = os.getcwd() + '/..' # /path/to/zstd
223 tmp_dir = base_dir + '/' + tmp_dir_name # /path/to/zstd/tests/versionsTest
224 clone_dir = tmp_dir + '/' + 'zstd' # /path/to/zstd/tests/versionsTest/zstd
225 dict_source_path = tmp_dir + '/' + dict_source # /path/to/zstd/tests/versionsTest/dict_source
226 programs_dir = base_dir + '/programs' # /path/to/zstd/programs
Yann Colletdca60f22016-05-23 14:23:55 +0200227 os.makedirs(tmp_dir, exist_ok=True)
228
229 # since Travis clones limited depth, we should clone full repository
230 if not os.path.isdir(clone_dir):
231 git(['clone', repo_url, clone_dir])
232
233 shutil.copy2(base_dir + '/' + test_dat_src, tmp_dir + '/' + test_dat)
234
235 # Retrieve all release tags
236 print('Retrieve all release tags :')
237 os.chdir(clone_dir)
Yann Colletdea67a62016-08-28 16:56:17 -0700238 alltags = get_git_tags() + [head]
Yann Colletdb973102018-09-20 14:59:11 -0700239 tags = [t for t in alltags if t >= 'v0.5.0']
Yann Colletebc13bc2016-05-25 10:12:39 +0200240 print(tags)
Yann Colletdca60f22016-05-23 14:23:55 +0200241
242 # Build all release zstd
243 for tag in tags:
244 os.chdir(base_dir)
Yann Collet803c05e2016-06-16 11:32:57 +0200245 dst_zstd = '{}/zstd.{}'.format(tmp_dir, tag) # /path/to/zstd/tests/versionsTest/zstd.<TAG>
Yann Colletdca60f22016-05-23 14:23:55 +0200246 if not os.path.isfile(dst_zstd) or tag == head:
247 if tag != head:
Yann Collet7f37b8a2021-08-29 14:48:11 -0700248 print('-----------------------------------------------')
249 print('compiling ' + tag)
250 print('-----------------------------------------------')
inikep9470b872016-06-09 12:54:06 +0200251 r_dir = '{}/{}'.format(tmp_dir, tag) # /path/to/zstd/tests/versionsTest/<TAG>
Yann Colletdca60f22016-05-23 14:23:55 +0200252 os.makedirs(r_dir, exist_ok=True)
253 os.chdir(clone_dir)
254 git(['--work-tree=' + r_dir, 'checkout', tag, '--', '.'], False)
inikep7e3597b2016-06-17 14:43:24 +0200255 if tag == 'v0.5.0':
256 os.chdir(r_dir + '/dictBuilder') # /path/to/zstd/tests/versionsTest/v0.5.0/dictBuilder
Yann Collet7f37b8a2021-08-29 14:48:11 -0700257 make(['clean'], False) # separate 'clean' target to allow parallel build
258 make(['dictBuilder'], False)
inikep7e3597b2016-06-17 14:43:24 +0200259 shutil.copy2('dictBuilder', '{}/dictBuilder.{}'.format(tmp_dir, tag))
inikep9470b872016-06-09 12:54:06 +0200260 os.chdir(r_dir + '/programs') # /path/to/zstd/tests/versionsTest/<TAG>/programs
Yann Collet7f37b8a2021-08-29 14:48:11 -0700261 make(['clean'], False) # separate 'clean' target to allow parallel build
262 make(['zstd'], False)
Yann Colletdca60f22016-05-23 14:23:55 +0200263 else:
264 os.chdir(programs_dir)
Yann Collet72bd2a82021-08-29 15:26:31 -0700265 print('-----------------------------------------------')
266 print('compiling head')
267 print('-----------------------------------------------')
Yann Colletdca60f22016-05-23 14:23:55 +0200268 make(['zstd'], False)
Yann Collet7f37b8a2021-08-29 14:48:11 -0700269 shutil.copy2('zstd', dst_zstd)
Yann Colletdca60f22016-05-23 14:23:55 +0200270
271 # remove any remaining *.zst and *.dec from previous test
272 os.chdir(tmp_dir)
273 for compressed in glob.glob("*.zst"):
274 os.remove(compressed)
Yann Colletebc13bc2016-05-25 10:12:39 +0200275 for dec in glob.glob("*.dec"):
Yann Colletdca60f22016-05-23 14:23:55 +0200276 os.remove(dec)
277
inikep24aa7b42016-06-16 14:15:32 +0200278 # copy *.c and *.h to a temporary directory ("dict_source")
279 if not os.path.isdir(dict_source_path):
280 os.mkdir(dict_source_path)
Elliott Hughes44aba642023-09-12 20:18:59 +0000281 for dict_glob in dict_globs:
282 files = glob.glob(dict_glob, root_dir=base_dir)
283 for file in files:
284 file = os.path.join(base_dir, file)
285 print("copying " + file + " to " + dict_source_path)
286 shutil.copy(file, dict_source_path)
inikep24aa7b42016-06-16 14:15:32 +0200287
Yann Collet72bd2a82021-08-29 15:26:31 -0700288 print('-----------------------------------------------')
Yann Colletdca60f22016-05-23 14:23:55 +0200289 print('Compress test.dat by all released zstd')
Yann Collet72bd2a82021-08-29 15:26:31 -0700290 print('-----------------------------------------------')
Yann Colletdca60f22016-05-23 14:23:55 +0200291
Elliott Hughes44aba642023-09-12 20:18:59 +0000292 create_dict(head, dict_source_path)
Yann Colletdca60f22016-05-23 14:23:55 +0200293 for tag in tags:
294 print(tag)
inikep7e3597b2016-06-17 14:43:24 +0200295 if tag >= 'v0.5.0':
Elliott Hughes44aba642023-09-12 20:18:59 +0000296 create_dict(tag, dict_source_path, head)
inikep150152f2016-06-16 19:29:09 +0200297 dict_compress_sample(tag, test_dat)
inikepd1af4e62016-06-16 20:23:11 +0200298 remove_duplicates()
Elliott Hughes44aba642023-09-12 20:18:59 +0000299 decompress_dict(tag)
Yann Colletdca60f22016-05-23 14:23:55 +0200300 compress_sample(tag, test_dat)
301 remove_duplicates()
Elliott Hughes44aba642023-09-12 20:18:59 +0000302 decompress_zst(tag)
Yann Colletdca60f22016-05-23 14:23:55 +0200303
304 print('')
305 print('Enumerate different compressed files')
306 zstds = sorted(glob.glob('*.zst'))
307 for zstd in zstds:
308 print(zstd + ' : ' + repr(os.path.getsize(zstd)) + ', ' + sha1_of_file(zstd))