fix(windows): vendor audiopus_sys + patch libopus for clang-cl SIMD
Some checks failed
Mirror to GitHub / mirror (push) Failing after 35s
Build Release Binaries / build-amd64 (push) Has been cancelled

cargo-xwin drives the Windows MSVC cross-compile via clang-cl, under
which CMake sets MSVC=1 — causing libopus 1.3.1's `if(NOT MSVC)` guards
to skip the per-file `-msse4.1` / `-mssse3` COMPILE_FLAGS that its x86
SIMD source files need. Clang-cl (unlike real cl.exe) still honors
Clang's target-feature system, so those files then fail to compile
with "always_inline function '_mm_cvtepi16_epi32' requires target
feature 'sse4.1'" errors across silk/NSQ_sse4_1.c, NSQ_del_dec_sse4_1.c,
and VQ_WMat_EC_sse4_1.c.

Earlier attempts to fix this downstream (cargo-xwin toolchain file,
override.cmake CMAKE_C_COMPILE_OBJECT <FLAGS> replace, CFLAGS env vars)
all failed because cargo-xwin rewrites override.cmake from scratch on
every `cargo xwin build` invocation and cmake-rs's -DCMAKE_C_FLAGS=
assembly happens before toolchain FORCE sets propagate.

Fixing it upstream at the source: vendor audiopus_sys 0.2.2 into
vendor/audiopus_sys, patch its bundled opus/CMakeLists.txt to introduce
an MSVC_CL var (true only when CMAKE_C_COMPILER_ID == "MSVC", i.e. real
cl.exe), and flip the eight `if(NOT MSVC)` SIMD guards to
`if(NOT MSVC_CL)`. Clang-cl then gets the GCC-style per-file flags and
the SSE4.1 sources build cleanly. Also flip the `if(MSVC)` global /arch
block at line 445 to `if(MSVC_CL)` so only cl.exe applies /arch:AVX and
clang-cl relies purely on per-file flags (no global/per-file mixing).

Wire via [patch.crates-io] in the workspace root Cargo.toml; the patch
is resolved relative to the workspace root as `vendor/audiopus_sys`.

Upstream context: xiph/opus#256, xiph/opus PR #257 (both stale).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Siavash Sameni
2026-04-10 14:12:59 +04:00
parent 53f57eea07
commit 0683dde5d3
423 changed files with 103000 additions and 2 deletions

66
vendor/audiopus_sys/opus/training/rnn_dump.py vendored Executable file
View File

@@ -0,0 +1,66 @@
#!/usr/bin/python
from __future__ import print_function
from keras.models import Sequential
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import GRU
from keras.models import load_model
from keras import backend as K
import sys
import numpy as np
def printVector(f, vector, name):
v = np.reshape(vector, (-1));
#print('static const float ', name, '[', len(v), '] = \n', file=f)
f.write('static const opus_int8 {}[{}] = {{\n '.format(name, len(v)))
for i in range(0, len(v)):
f.write('{}'.format(max(-128,min(127,int(round(128*v[i]))))))
if (i!=len(v)-1):
f.write(',')
else:
break;
if (i%8==7):
f.write("\n ")
else:
f.write(" ")
#print(v, file=f)
f.write('\n};\n\n')
return;
def binary_crossentrop2(y_true, y_pred):
return K.mean(2*K.abs(y_true-0.5) * K.binary_crossentropy(y_pred, y_true), axis=-1)
#model = load_model(sys.argv[1], custom_objects={'binary_crossentrop2': binary_crossentrop2})
main_input = Input(shape=(None, 25), name='main_input')
x = Dense(32, activation='tanh')(main_input)
x = GRU(24, activation='tanh', recurrent_activation='sigmoid', return_sequences=True)(x)
x = Dense(2, activation='sigmoid')(x)
model = Model(inputs=main_input, outputs=x)
model.load_weights(sys.argv[1])
weights = model.get_weights()
f = open(sys.argv[2], 'w')
f.write('/*This file is automatically generated from a Keras model*/\n\n')
f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "mlp.h"\n\n')
printVector(f, weights[0], 'layer0_weights')
printVector(f, weights[1], 'layer0_bias')
printVector(f, weights[2], 'layer1_weights')
printVector(f, weights[3], 'layer1_recur_weights')
printVector(f, weights[4], 'layer1_bias')
printVector(f, weights[5], 'layer2_weights')
printVector(f, weights[6], 'layer2_bias')
f.write('const DenseLayer layer0 = {\n layer0_bias,\n layer0_weights,\n 25, 32, 0\n};\n\n')
f.write('const GRULayer layer1 = {\n layer1_bias,\n layer1_weights,\n layer1_recur_weights,\n 32, 24\n};\n\n')
f.write('const DenseLayer layer2 = {\n layer2_bias,\n layer2_weights,\n 24, 2, 1\n};\n\n')
f.close()

177
vendor/audiopus_sys/opus/training/rnn_train.py vendored Executable file
View File

@@ -0,0 +1,177 @@
#!/usr/bin/python3
from __future__ import print_function
from keras.models import Sequential
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import GRU
from keras.layers import CuDNNGRU
from keras.layers import SimpleRNN
from keras.layers import Dropout
from keras import losses
import h5py
from keras.optimizers import Adam
from keras.constraints import Constraint
from keras import backend as K
import numpy as np
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.44
set_session(tf.Session(config=config))
def binary_crossentrop2(y_true, y_pred):
return K.mean(2*K.abs(y_true-0.5) * K.binary_crossentropy(y_true, y_pred), axis=-1)
def binary_accuracy2(y_true, y_pred):
return K.mean(K.cast(K.equal(y_true, K.round(y_pred)), 'float32') + K.cast(K.equal(y_true, 0.5), 'float32'), axis=-1)
def quant_model(model):
weights = model.get_weights()
for k in range(len(weights)):
weights[k] = np.maximum(-128, np.minimum(127, np.round(128*weights[k])*0.0078125))
model.set_weights(weights)
class WeightClip(Constraint):
'''Clips the weights incident to each hidden unit to be inside a range
'''
def __init__(self, c=2):
self.c = c
def __call__(self, p):
return K.clip(p, -self.c, self.c)
def get_config(self):
return {'name': self.__class__.__name__,
'c': self.c}
reg = 0.000001
constraint = WeightClip(.998)
print('Build model...')
main_input = Input(shape=(None, 25), name='main_input')
x = Dense(32, activation='tanh', kernel_constraint=constraint, bias_constraint=constraint)(main_input)
#x = CuDNNGRU(24, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, bias_constraint=constraint)(x)
x = GRU(24, recurrent_activation='sigmoid', activation='tanh', return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, bias_constraint=constraint)(x)
x = Dense(2, activation='sigmoid', kernel_constraint=constraint, bias_constraint=constraint)(x)
model = Model(inputs=main_input, outputs=x)
batch_size = 2048
print('Loading data...')
with h5py.File('features10b.h5', 'r') as hf:
all_data = hf['data'][:]
print('done.')
window_size = 1500
nb_sequences = len(all_data)//window_size
print(nb_sequences, ' sequences')
x_train = all_data[:nb_sequences*window_size, :-2]
x_train = np.reshape(x_train, (nb_sequences, window_size, 25))
y_train = np.copy(all_data[:nb_sequences*window_size, -2:])
y_train = np.reshape(y_train, (nb_sequences, window_size, 2))
print("Marking ignores")
for s in y_train:
for e in s:
if (e[1] >= 1):
break
e[0] = 0.5
all_data = 0;
x_train = x_train.astype('float32')
y_train = y_train.astype('float32')
print(len(x_train), 'train sequences. x shape =', x_train.shape, 'y shape = ', y_train.shape)
model.load_weights('newweights10a1b_ep206.hdf5')
#weights = model.get_weights()
#for k in range(len(weights)):
# weights[k] = np.round(128*weights[k])*0.0078125
#model.set_weights(weights)
# try using different optimizers and different optimizer configs
model.compile(loss=binary_crossentrop2,
optimizer=Adam(0.0001),
metrics=[binary_accuracy2])
print('Train...')
quant_model(model)
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=10, validation_data=(x_train, y_train))
model.save("newweights10a1c_ep10.hdf5")
quant_model(model)
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=50, initial_epoch=10)
model.save("newweights10a1c_ep50.hdf5")
model.compile(loss=binary_crossentrop2,
optimizer=Adam(0.0001),
metrics=[binary_accuracy2])
quant_model(model)
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=100, initial_epoch=50)
model.save("newweights10a1c_ep100.hdf5")
quant_model(model)
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=150, initial_epoch=100)
model.save("newweights10a1c_ep150.hdf5")
quant_model(model)
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=200, initial_epoch=150)
model.save("newweights10a1c_ep200.hdf5")
quant_model(model)
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=201, initial_epoch=200)
model.save("newweights10a1c_ep201.hdf5")
quant_model(model)
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=202, initial_epoch=201, validation_data=(x_train, y_train))
model.save("newweights10a1c_ep202.hdf5")
quant_model(model)
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=203, initial_epoch=202, validation_data=(x_train, y_train))
model.save("newweights10a1c_ep203.hdf5")
quant_model(model)
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=204, initial_epoch=203, validation_data=(x_train, y_train))
model.save("newweights10a1c_ep204.hdf5")
quant_model(model)
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=205, initial_epoch=204, validation_data=(x_train, y_train))
model.save("newweights10a1c_ep205.hdf5")
quant_model(model)
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=206, initial_epoch=205, validation_data=(x_train, y_train))
model.save("newweights10a1c_ep206.hdf5")

12
vendor/audiopus_sys/opus/training/txt2hdf5.py vendored Executable file
View File

@@ -0,0 +1,12 @@
#!/usr/bin/python
from __future__ import print_function
import numpy as np
import h5py
import sys
data = np.loadtxt(sys.argv[1], dtype='float32')
h5f = h5py.File(sys.argv[2], 'w');
h5f.create_dataset('data', data=data)
h5f.close()