Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,10 @@ The commands are as follows:
all subsequent ones are concatenated to make the text, to avoid
surprises if you forget to quote it. You are responsible for
normalizing the text to remove punctuation, uppercase, centipedes,
etc. For example:

etc. Forced alignment uses `align_beam`, `align_pbeam`, and
`align_wbeam` instead of `beam`, `pbeam`, and `wbeam`; see
`pocketsphinx help-config`. For example:

pocketsphinx align goforward.wav "go forward ten meters"

By default, only word-level alignment is done. To get phone
Expand Down
4 changes: 4 additions & 0 deletions cython/_pocketsphinx.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1668,6 +1668,10 @@ cdef class Decoder:
segmentation in the usual manner. For phone-level alignment,
see `set_alignment` and `get_alignment`.

Pruning for this pass uses ``align_beam``, ``align_pbeam``, and
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should these be single or double backquotes?

``align_wbeam`` by default; set ``align_use_main_beams`` to use the
main decoder ``beam``/``pbeam``/``wbeam`` instead.

Args:
text(str): Sentence to align, as whitespace-separated
words. All words must be present in the
Expand Down
7 changes: 6 additions & 1 deletion doxygen/pocketsphinx.1
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,12 @@ sequence, and write a JSON object in the same format described above.
The first positional argument is the input, and all subsequent ones
are concatenated to make the text, to avoid surprises if you forget to
quote it. You are responsible for normalizing the text to remove
punctuation, uppercase, centipedes, etc. For example:
punctuation, uppercase, centipedes, etc.
Forced alignment uses
\fIalign_beam\fP, \fIalign_pbeam\fP, and \fIalign_wbeam\fP
instead of \fIbeam\fP, \fIpbeam\fP, and \fIwbeam\fP; defaults are
independent of LVCSR tuning (see \fBpocketsphinx help-config\fP).
For example:

.EX
pocketsphinx align goforward.wav "go forward ten meters"
Expand Down
7 changes: 6 additions & 1 deletion doxygen/pocketsphinx.1.in
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,12 @@ sequence, and write a JSON object in the same format described above.
The first positional argument is the input, and all subsequent ones
are concatenated to make the text, to avoid surprises if you forget to
quote it. You are responsible for normalizing the text to remove
punctuation, uppercase, centipedes, etc. For example:
punctuation, uppercase, centipedes, etc.
Forced alignment uses
\fIalign_beam\fP, \fIalign_pbeam\fP, and \fIalign_wbeam\fP
instead of \fIbeam\fP, \fIpbeam\fP, and \fIwbeam\fP; defaults are
independent of LVCSR tuning (see \fBpocketsphinx help-config\fP).
For example:

.EX
pocketsphinx align goforward.wav "go forward ten meters"
Expand Down
5 changes: 5 additions & 0 deletions include/pocketsphinx/search.h
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,11 @@ int ps_add_allphone_file(ps_decoder_t *ps, const char *name, const char *path);
* phoneme or state segmentations, you must subsequently call
* ps_set_alignment() and re-run decoding. It's tough son, but it's life.
*
* By default, pruning uses align_beam, align_pbeam, and align_wbeam
* rather than beam, pbeam, and wbeam. If align_use_main_beams is enabled,
* the main decoder beams are used instead. Defaults for align_* are tuned
* for forced alignment and are independent of LVCSR beam defaults.
*
* @memberof ps_decoder_t
* @param ps Decoder
* @param words String containing whitespace-separated words for alignment.
Expand Down
2 changes: 2 additions & 0 deletions programs/pocketsphinx_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,8 @@ usage_align(char *name)
fprintf(stderr, " INPUT Audio file to align (or '-' for stdin)\n");
fprintf(stderr, " WORDS... Words to align to (will be concatenated)\n");
fprintf(stderr, "\nAlignment-specific options:\n");
fprintf(stderr, " -align_beam FLOAT Beam for forced-alignment FSG (see also -align_pbeam, -align_wbeam)\n");
fprintf(stderr, " -align_use_main_beams yes/no Use -beam/-pbeam/-wbeam for alignment FSG (default: no)\n");
fprintf(stderr, " -phone_align yes/no Run a second pass to align phones and print their durations\n");
fprintf(stderr, " (default: no)\n");
fprintf(stderr, " -state_align yes/no Run a second pass to align phones and states and print their\n");
Expand Down
28 changes: 28 additions & 0 deletions src/config_macro.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@
POCKETSPHINX_FEAT_OPTIONS, \
POCKETSPHINX_ACMOD_OPTIONS, \
POCKETSPHINX_BEAM_OPTIONS, \
POCKETSPHINX_ALIGN_BEAM_OPTIONS, \
POCKETSPHINX_ALIGN_FSG_OPTIONS, \
POCKETSPHINX_SEARCH_OPTIONS, \
POCKETSPHINX_DICT_OPTIONS, \
POCKETSPHINX_NGRAM_OPTIONS, \
Expand Down Expand Up @@ -144,6 +146,32 @@
"3.0", \
"Weight for phoneme lookahead penalties" } \

/** Beam widths for forced-alignment FSG (ps_set_align_text, align subcommand). */
#define POCKETSPHINX_ALIGN_BEAM_OPTIONS \
{ "align_beam", \
ARG_FLOATING, \
"1e-48", \
"Beam width for each frame in forced-alignment FSG search " \
"(independent of beam)" }, \
{ "align_pbeam", \
ARG_FLOATING, \
"1e-48", \
"Phone-transition beam for forced-alignment FSG search " \
"(independent of pbeam)" }, \
{ "align_wbeam", \
ARG_FLOATING, \
"1e-48", \
"Word-exit beam for forced-alignment FSG search " \
"(independent of wbeam)" } \

/** Toggle: use main decoder beams for forced-alignment FSG instead of align_*. */
#define POCKETSPHINX_ALIGN_FSG_OPTIONS \
{ "align_use_main_beams", \
ARG_BOOLEAN, \
"no", \
"If yes, forced-alignment FSG uses beam, pbeam, wbeam instead of " \
"align_beam, align_pbeam, align_wbeam" } \

/** Options defining other parameters for tuning the search. */
#define POCKETSPHINX_SEARCH_OPTIONS \
{ "compallsen", \
Expand Down
47 changes: 37 additions & 10 deletions src/fsg_search.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,23 @@ fsg_search_add_altpron(fsg_search_t *fsgs, fsg_model_t *fsg)
return n_alt;
}

static void
fsg_search_beam_config(ps_config_t *config, char const *name,
float *beam, float *pbeam, float *wbeam)
{
if (name != NULL && strcmp(name, PS_DEFAULT_ALIGN_SEARCH) == 0
&& !ps_config_bool(config, "align_use_main_beams")) {
*beam = ps_config_float(config, "align_beam");
*pbeam = ps_config_float(config, "align_pbeam");
*wbeam = ps_config_float(config, "align_wbeam");
}
else {
*beam = ps_config_float(config, "beam");
*pbeam = ps_config_float(config, "pbeam");
*wbeam = ps_config_float(config, "wbeam");
}
}

ps_search_t *
fsg_search_init(const char *name,
fsg_model_t *fsg,
Expand All @@ -196,16 +213,21 @@ fsg_search_init(const char *name,
fsgs->frame = -1;

/* Get search pruning parameters */
fsgs->beam_factor = 1.0f;
fsgs->beam = fsgs->beam_orig
= (int32) logmath_log(acmod->lmath, ps_config_float(config, "beam"))
>> SENSCR_SHIFT;
fsgs->pbeam = fsgs->pbeam_orig
= (int32) logmath_log(acmod->lmath, ps_config_float(config, "pbeam"))
>> SENSCR_SHIFT;
fsgs->wbeam = fsgs->wbeam_orig
= (int32) logmath_log(acmod->lmath, ps_config_float(config, "wbeam"))
>> SENSCR_SHIFT;
{
float fl_beam, fl_pbeam, fl_wbeam;

fsgs->beam_factor = 1.0f;
fsg_search_beam_config(config, name, &fl_beam, &fl_pbeam, &fl_wbeam);
fsgs->beam = fsgs->beam_orig
= (int32) logmath_log(acmod->lmath, fl_beam)
>> SENSCR_SHIFT;
fsgs->pbeam = fsgs->pbeam_orig
= (int32) logmath_log(acmod->lmath, fl_pbeam)
>> SENSCR_SHIFT;
fsgs->wbeam = fsgs->wbeam_orig
= (int32) logmath_log(acmod->lmath, fl_wbeam)
>> SENSCR_SHIFT;
}

/* LM related weights/penalties */
fsgs->lw = ps_config_float(config, "lw");
Expand Down Expand Up @@ -242,6 +264,11 @@ fsg_search_init(const char *name,
if (ps_config_bool(config, "bestpath"))
fsgs->bestpath = TRUE;
#endif
/* Forced-alignment FSG: hyp() must list the full transcript. Lattice
* bestpath can return a shorter string than the Viterbi backtrace (see
* pocketsphinx_main align(), which disables bestpath). */
if (name != NULL && strcmp(name, PS_DEFAULT_ALIGN_SEARCH) == 0)
fsgs->bestpath = FALSE;

if (fsg_search_reinit(ps_search_base(fsgs),
ps_search_dict(fsgs),
Expand Down
1 change: 1 addition & 0 deletions test/unit/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ set(TESTS
test_vad
test_vad_alloc
test_word_align
test_align_fsg_beam
test_endpointer
test_endpointer_timestamp
test_thread_local_compile
Expand Down
60 changes: 60 additions & 0 deletions test/unit/test_align_fsg_beam.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/* -*- c-basic-offset: 4 -*- */
#include <pocketsphinx.h>

#include "pocketsphinx_internal.h"
#include "fsg_search_internal.h"
#include "util/hash_table.h"
#include "test_macros.h"

int
main(int argc, char *argv[])
{
ps_decoder_t *ps;
ps_config_t *config;
void *search_p;
fsg_search_t *fsgs;

(void)argc;
(void)argv;
err_set_loglevel(ERR_INFO);
/* Stock asymmetry: wbeam differs from beam; forced-align FSG must use align_* */
TEST_ASSERT(config =
ps_config_parse_json(
NULL,
"loglevel: INFO, bestpath: false,"
"hmm: \"" MODELDIR "/en-us/en-us\","
"dict: \"" MODELDIR "/en-us/cmudict-en-us.dict\","
"samprate: 16000,"
"wbeam: 7e-29"));
TEST_ASSERT(ps = ps_init(config));
TEST_EQUAL(0, ps_set_align_text(ps, "go forward ten meters"));
TEST_EQUAL(0, hash_table_lookup(ps->searches, PS_DEFAULT_ALIGN_SEARCH,
&search_p));
fsgs = (fsg_search_t *)search_p;
TEST_EQUAL(fsgs->wbeam_orig, fsgs->beam_orig);
TEST_EQUAL(fsgs->pbeam_orig, fsgs->beam_orig);

ps_free(ps);
ps_config_free(config);

/* With align_use_main_beams, FSG uses global wbeam (asymmetric from beam). */
TEST_ASSERT(config =
ps_config_parse_json(
NULL,
"loglevel: ERROR, bestpath: false,"
"hmm: \"" MODELDIR "/en-us/en-us\","
"dict: \"" MODELDIR "/en-us/cmudict-en-us.dict\","
"samprate: 16000,"
"wbeam: 7e-29,"
"align_use_main_beams: yes"));
TEST_ASSERT(ps = ps_init(config));
TEST_EQUAL(0, ps_set_align_text(ps, "go forward ten meters"));
TEST_EQUAL(0, hash_table_lookup(ps->searches, PS_DEFAULT_ALIGN_SEARCH,
&search_p));
fsgs = (fsg_search_t *)search_p;
TEST_ASSERT(fsgs->wbeam_orig != fsgs->beam_orig);

ps_free(ps);
ps_config_free(config);
return 0;
}
Loading