녹음 파일 fmod 변성 처리 후 WAV에서 AMR로 변환

수요설명: 녹음 후 변성 옵션이 필요하고 변성 모드를 선택하여 오디션을 볼 수 있으며 만족스러우면 저장할 수 있습니다.AMR

형식으로 저장된 변성 파일

솔루션: 녹음 문제가 잘 해결되고 매번 녹음 후 파일을 저장하며 형식은 WAV이다.변성 기능은 인터넷에서 비교적 흔히 볼 수 있는 fmod 프레임워크를 채택한다.파일은 WAV 형식으로 오디션 및 저장할 수 있습니다.저장 후 WAV에서 amr로 파일 형식을 변경합니다.방안은 안드로이드가 자체로 가지고 있는 AmrInputStream으로 완성된다.

다음과 같은 문제가 발생했습니다.

변음 후 파일을 어떻게 저장합니까?

음성 변조 후 파일이 포맷으로 바뀌면 소리가 두 배로 늘어난다.

다음은 구체적인 해결 방법이다.
변성은 fmod 프레임워크로 이루어지고 구체적인 방안은 인터넷에서 검색할 수 있으며 여기에 cpp 코드를 붙인다.

#include "inc/fmod.hpp"
#include 
#include 
#include  "com_kidosc_voicechange_jni_VoiceFixer.h"

#include 

#define LOGI(FORMAT, ...) __android_log_print(ANDROID_LOG_INFO,"zph",FORMAT,##__VA_ARGS__);
#define LOGE(FORMAT, ...) __android_log_print(ANDROID_LOG_ERROR,"zph",FORMAT,##__VA_ARGS__);

#define MODE_NORMAL 0
#define MODE_FUNNY 1
#define MODE_UNCLE 2
#define MODE_LOLITA 3
#define MODE_ROB0T 4
#define MODE_ETHEREAL 5
#define MODE_CHORUS 6
#define MODE_HORROR 7
using namespace FMOD;


Sound *sound;
DSP *dsp;
Channel *channel;
bool playing;
float frequency;
System *mSystem;
JNIEnv *mEnv;
void stopPlaying();

JNIEXPORT void JNICALL Java_com_kidosc_voicechange_jni_VoiceFixer_init(JNIEnv *env,
                                                                       jclass jcls){

}

JNIEXPORT void JNICALL Java_com_kidosc_voicechange_jni_VoiceFixer_fix(JNIEnv *env,
                                                                      jclass jcls, jstring path_jstr,
                                                                      jint type,jint save) {


    playing = true;
    frequency = 0;
    System_Create(&mSystem);
    mSystem->setSoftwareFormat(8000,FMOD_SPEAKERMODE_MONO,0); //      8000，channel 1
    mEnv=env;
    const char *path_cstr = mEnv->GetStringUTFChars(path_jstr, NULL);
    if(save==1){
        char cDest[200] = "sdcard/xxx.wav";
        mSystem->setOutput(FMOD_OUTPUTTYPE_WAVWRITER); //       WAV
        mSystem->init(32, FMOD_INIT_NORMAL | FMOD_INIT_PROFILE_ENABLE,cDest);
    } else{
        mSystem->init(32, FMOD_INIT_NORMAL,NULL);
    }
    try {
        //    
        mSystem->createSound(path_cstr, FMOD_DEFAULT, NULL, &sound);
        mSystem->playSound(sound, 0, false, &channel);
        switch (type) {
            case MODE_NORMAL:
                LOGI("%s", path_cstr);
                LOGI("%s", "fix normal");
                break;
            case MODE_FUNNY:
                mSystem->createDSPByType(FMOD_DSP_TYPE_NORMALIZE, &dsp);
                channel->getFrequency(&frequency);
                frequency = frequency * 1.6;
                channel->setFrequency(frequency);
                break;
            case MODE_UNCLE:
                mSystem->createDSPByType(FMOD_DSP_TYPE_PITCHSHIFT, &dsp);
                dsp->setParameterFloat(FMOD_DSP_PITCHSHIFT_PITCH, 0.8);
                channel->addDSP(0, dsp);
                break;
            case MODE_LOLITA:
                mSystem->createDSPByType(FMOD_DSP_TYPE_PITCHSHIFT, &dsp);
                dsp->setParameterFloat(FMOD_DSP_PITCHSHIFT_PITCH,
                                       1.8);
                channel->addDSP(0, dsp);
                break;
            case MODE_ROB0T:
                mSystem->createDSPByType(FMOD_DSP_TYPE_ECHO, &dsp);
                dsp->setParameterFloat(FMOD_DSP_ECHO_DELAY, 50);
                dsp->setParameterFloat(FMOD_DSP_ECHO_FEEDBACK, 60);
                channel->addDSP(0, dsp);
                break;
            case MODE_ETHEREAL:
                mSystem->createDSPByType(FMOD_DSP_TYPE_ECHO, &dsp);
                dsp->setParameterFloat(FMOD_DSP_ECHO_DELAY, 300);
                dsp->setParameterFloat(FMOD_DSP_ECHO_FEEDBACK, 20);
                channel->addDSP(0, dsp);
                break;
            case MODE_CHORUS:
                mSystem->createDSPByType(FMOD_DSP_TYPE_ECHO, &dsp);
                dsp->setParameterFloat(FMOD_DSP_ECHO_DELAY, 100);
                dsp->setParameterFloat(FMOD_DSP_ECHO_FEEDBACK, 50);
                channel->addDSP(0, dsp);
                break;
            case MODE_HORROR:
                mSystem->createDSPByType(FMOD_DSP_TYPE_TREMOLO, &dsp);
                dsp->setParameterFloat(FMOD_DSP_TREMOLO_SKEW, 0.8);
                channel->addDSP(0, dsp);
                break;
            default:
                break;
        }

    } catch (...) {
        LOGE("%s", "    ");
        goto end;
    }
    mSystem->update();
    while (playing) {
        usleep(1000);
        channel->isPlaying(&playing);
    }
    goto end;
    end:
    mEnv->ReleaseStringUTFChars(path_jstr, path_cstr);
    sound->release();
    mSystem->close();
    mSystem->release();

    jclass clazz = mEnv -> FindClass("com/kidosc/voicechange/jni/VoiceFixer");
    if(clazz == NULL){
        printf("not found com/kidosc/voicechange/jni/VoiceFixer class");
        return;
    }
    jmethodID  id = mEnv->GetStaticMethodID(clazz,"setPlayState","()V");
    if (id==NULL){
        printf("method not found");
        return;
    }
    mEnv->CallStaticVoidMethod(clazz,id);
    printf("env->CallStaticVoidMethod(clazz,id);");
}


JNIEXPORT void JNICALL Java_com_kidosc_voicechange_jni_VoiceFixer_stopPlay(JNIEnv *env,
                                                                           jclass jcls){
    stopPlaying();
}

void stopPlaying(){
    channel->stop();
    printf("stopplaying");
}

위 코드와 인터넷의 차이점은 변성 파일 저장 기능을 추가하고 변성 재생을 정지하는 기능을 추가하며 기본 출력 파일의 샘플링률과 채널을 바꾸었다는 것이다.그중 마지막 중요한 set SoftwareFormat 방법 문서
변성 임시 파일은 sdcard/xxx에 저장됩니다.wav, 저장이 끝난 후에 다시 형식을 바꾸어 원하는 위치에 저장할 수 있습니다.
포맷을 바꾸는 데는 안드로이드가 자체로 가지고 있는 AmrInputStream을 사용합니다.java 처리.이 파일은 숨겨져 있습니다. 직접 사용하려면 프로젝트를 복사해야 합니다.구체적으로 어떻게 사용하면 스스로 검색할 수 있습니까?AmrInputStream 변환 형식, 기본값은 8000khz, 16bit, 단일 채널입니다.이러한 매개 변수는 매우 중요합니다. WAV 형식이 이것과 일치하지 않으면 형식을 바꾸면 소리가 길어지는 등 일련의 문제가 발생합니다.그래서 fmod 변성 프레임워크의 출력 파일 형식을 8000khz, 16bit, 단일 채널로 바꿔야 한다.이것이야말로 가장 골치 아픈 곳이다!인터넷상에서 관련 내용을 전혀 찾을 수 없으니 어쩔 수 없이 스스로 천천히 모색할 수밖에 없다.처음에 fmod 출력의 형식을 8000으로 바꾸었는데 성공한 후에 소리가 두 배로 늘어났다는 것을 발견했다.16bit를 의심하는 문제인데 fmod가 출력한 것이 몇 bit인지 모르겠다.그래서 AmrInputStream에서 기본적으로 입력한 오디오 데이터를 변경하려고 시도합니다.변경이 끝난 후 소리가 모두 변질되었다.그래서 최종 방안은 fmod 출력 오디오를 바꾸는 것을 고려하는 것이다.set Software Format 방법을 찾으면 출력 샘플링 확률을 변경할 수 있습니다.그렇다면 지금의 문제는 왜 여전히 두 배로 늘어나느냐는 것이다.

    /**
     * @param inPath    
     * @param outPath     
     */
    public void systemWav2Amr(String inPath,String outPath){
        try {
            FileOutputStream fileoutputStream = new FileOutputStream(outPath);
            InputStream inputStream =new FileInputStream(inPath);
            AmrInputStream amrInputStream =new AmrInputStream(inputStream);

            fileoutputStream.write(header);
            byte[] buf = new byte[1024];
            int len = 0;
            while ((len = amrInputStream.read(buf)) >0){
                fileoutputStream.write(buf,0,len);
            }
            fileoutputStream.close();
            amrInputStream.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

자료를 찾아보니 샘플링 속도는 세 가지 값과 관련이 있는데 하나는 채널이고 하나는 샘플링 위치이며 하나는 샘플링 속도이다.두 배로 늘어난 이상 샘플링 속도가 이전의 절반으로 바뀌었다는 뜻이다.fmod가 출력한 오디오 데이터 중 채널이나 샘플링 비트 중 하나가 AmrInputStream의 두 배라는 것을 설명한다.처음에는 샘플링 위치가 32bit로 추정되었다.AmrInputStream의 입력 형식이 32bit로 변경되었습니다.발견은 결코 소용이 없었고, 시간은 확실히 이미 일치를 유지했다.사고방식이 옳다는 것을 설명하자면 문제는 fmod 출력의 오디오 채널을 어떻게 바꾸는가이다.AmrInputStream의 channel은 1이고 fmod의 출력은 2channel입니다.fmod에서 fmodcodec.h에서 채널 관련 속성은 다음과 같습니다.

struct FMOD_CODEC_WAVEFORMAT
{
    char               name[256];     /* [w] Name of sound.  Optional. */
    FMOD_SOUND_FORMAT  format;        /* [w] Format for (decompressed) codec output, ie FMOD_SOUND_FORMAT_PCM8, FMOD_SOUND_FORMAT_PCM16.  Mandantory - Must be supplied. */
    int                channels;      /* [w] Number of channels used by codec, ie mono = 1, stereo = 2.  Mandantory - Must be supplied.  */
    int                frequency;     /* [w] Default frequency in hz of the codec, ie 44100.  Mandantory - Must be supplied.  */
    unsigned int       lengthbytes;   /* [w] Length in bytes of the source data.  Used for FMOD_TIMEUNIT_RAWBYTES.  Optional. Default = 0. */
    unsigned int       lengthpcm;     /* [w] Length in decompressed, PCM samples of the file, ie length in seconds * frequency.  Used for Sound::getLength and for memory allocation of static decompressed sample data.  Mandantory - Must be supplied. */
    unsigned int       pcmblocksize;  /* [w] Minimum, optimal number of decompressed PCM samples codec can handle.  0 or 1 = no buffering.  Anything higher means FMOD will allocate a PCM buffer of this size to read in chunks.  The codec read callback will be called in multiples of this value.  Optional.  */
    int                loopstart;     /* [w] Loopstart in decompressed, PCM samples of file. Optional. Default = 0. */
    int                loopend;       /* [w] Loopend in decompressed, PCM samples of file. Optional. Default = 0. */
    FMOD_MODE          mode;          /* [w] Mode to determine whether the sound should by default load as looping, non looping, 2d or 3d.  Optional. Default = FMOD_DEFAULT. */
    FMOD_CHANNELMASK   channelmask;   /* [w] Defined channel bitmask to describe which speakers the channels in the codec map to, in order of channel count.  See fmod_common.h.  Optional. Leave at 0 to map to the speaker layout_save defined in FMOD_SPEAKER. */
    FMOD_CHANNELORDER  channelorder;  /* [w] Defined channel order type, to describe where each sound channel should pan for the number of channels specified.  See fmod_common.h.  Optional.  Leave at 0 to play in default speaker order. */
    float              peakvolume;    /* [w] Peak volume of sound. Optional. Default = 0 if not used. */
};

set Software Format 방법에서 두 번째 인자를 MONO로 설정할 수 있음을 발견했습니다. FMODSPEAKERMODE_MONO의 경우 channels는 1입니다.

typedef enum
{
    FMOD_SPEAKERMODE_DEFAULT,          /* Default speaker mode based on operating system/output mode.  Windows = control panel setting, Xbox = 5.1, PS3 = 7.1 etc. */
    FMOD_SPEAKERMODE_RAW,              /* There is no specific speakermode.  Sound channels are mapped in order of input to output.  Use System::setSoftwareFormat to specify speaker count. See remarks for more information. */
    FMOD_SPEAKERMODE_MONO,             /* The speakers are monaural. */
    FMOD_SPEAKERMODE_STEREO,           /* The speakers are stereo. */
    FMOD_SPEAKERMODE_QUAD,             /* 4 speaker setup.    This includes front left, front right, surround left, surround right.  */
    FMOD_SPEAKERMODE_SURROUND,         /* 5 speaker setup.    This includes front left, front right, center, surround left, surround right. */
    FMOD_SPEAKERMODE_5POINT1,          /* 5.1 speaker setup.  This includes front left, front right, center, surround left, surround right and an LFE speaker. */
    FMOD_SPEAKERMODE_7POINT1,          /* 7.1 speaker setup.  This includes front left, front right, center, surround left, surround right, back left, back right and an LFE speaker. */
    
    FMOD_SPEAKERMODE_MAX,              /* Maximum number of speaker modes supported. */
    FMOD_SPEAKERMODE_FORCEINT = 65536  /* Makes sure this enum is signed 32bit. */
} FMOD_SPEAKERMODE;

이렇게 설정하면 포맷 공유를 완벽하게 할 수 있습니다.

이 내용에 흥미가 있습니까?

현재 기사가 여러분의 문제를 해결하지 못하는 경우 AI 엔진은 머신러닝 분석(스마트 모델이 방금 만들어져 부정확한 경우가 있을 수 있음)을 통해 가장 유사한 기사를 추천합니다:

다양한 언어의 JSON

JSON은 Javascript 표기법을 사용하여 데이터 구조를 레이아웃하는 데이터 형식입니다. 그러나 Javascript가 코드에서 이러한 구조를 나타낼 수 있는 유일한 언어는 아닙니다. 저는 일반적으로 '객체'{}...

텍스트를 자유롭게 공유하거나 복사할 수 있습니다.하지만 이 문서의 URL은 참조 URL로 남겨 두십시오.

CC BY-SA 2.5, CC BY-SA 3.0 및 CC BY-SA 4.0에 따라 라이센스가 부여됩니다.

녹음 파일 fmod 변성 처리 후 WAV에서 AMR로 변환

좋은 웹페이지 즐겨찾기