CWB
|
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <math.h>
#include "../cl/globals.h"
#include "feature_maps.h"
#define DEFAULT_CONFIG_LINES 4 |
number of config lines in the default config
Referenced by align_usage().
int align_do_alignment | ( | FMS | fms, |
int | if1, | ||
int | il1, | ||
int | if2, | ||
int | il2, | ||
FILE * | outfile | ||
) |
Actually does the alignment.
This function run a best_path alignment on sentence regions [f1,l1]x[f2,l2] and writes the result to {outfile} (in .align format).
Usage: steps = align_do_alignment(FMS, f1, l1, f2, l2, outfile);
fms | The feature map to use in best_path alignment. |
if1 | First cpos in source corpus. |
il1 | Last cpos in source corpus. |
if2 | First cpos in target corpus. |
il2 | Last cpos in target corpus. |
outfile | File handle to print the alignment lines to. |
References align_print_line(), beam_width, best_path(), feature_match(), split_factor, and verbose.
Referenced by main().
int align_parse_args | ( | int | ac, |
char * | av[], | ||
int | min_args | ||
) |
Parses the program's commandline arguments.
Usage: optindex = align_parse_args(argc, argv, required_arguments);
ac | The program's argc |
av | The program's argv |
min_args | Minimum number of arguments to be parsed. |
References align_usage(), beam_width, outfile_name, prealign_has_values, prealign_name, progname, registry_directory, split_factor, verbose, and word_name.
Referenced by main().
void align_print_line | ( | FILE * | fd, |
int | f1, | ||
int | l1, | ||
int | f2, | ||
int | l2, | ||
int | quality | ||
) |
Prints an alignment line.
This function writes the given information to the specified file handle as a .align format line.
A .align line looks like this: {f1} {l1} {f2} {l2} {type} [{quality}] eg. "140 169 137 180 1:2" means that corpus (position) ranges [140,169] and [137,180] form a 1:2 alignment pair .
Usage: align_print_line(fd, f1, l1, f2, l2, quality);
fd | File handle to print to. |
f1 | First cpos in source corpus. |
l1 | Last cpos in source corpus. |
f2 | First cpos in target corpus. |
l2 | Last cpos in target corpus. |
quality | Quality of the alignment. |
References cl_struc2cpos().
Referenced by align_do_alignment().
void align_usage | ( | void | ) |
string containing location of the registry directory.
Prints a message describing how to use the program to STDERR and then exits.
References default_config, DEFAULT_CONFIG_LINES, progname, and VERSION.
Referenced by align_parse_args().
int main | ( | int | argc, |
char * | argv[] | ||
) |
Main function for cwb-align.
argc | Number of command-line arguments. |
argv | Command-line arguments. |
References align_do_alignment(), align_parse_args(), ATT_POS, ATT_STRUC, cl_cpos2struc(), cl_delete_lexhash(), cl_free, cl_lexhash_add(), cl_lexhash_find(), cl_malloc(), cl_max_cpos(), cl_max_struc(), cl_new_attribute, cl_new_corpus(), cl_new_lexhash(), cl_struc2cpos(), cl_struc2str(), cl_struc_values(), config, config_lines, corpus1_name, corpus2_name, create_feature_maps(), _cl_lexhash_entry::data, _cl_lexhash_entry::_cl_lexhash_entry_data::integer, outfile_name, pre1, pre2, prealign_has_values, prealign_name, progname, registry_directory, s_name, size1, size2, word_name, ws1, and ws2.
int beam_width = 50 |
best path search beam width
Referenced by align_do_alignment(), align_parse_args(), BAR_write(), and best_path().
char** config = default_config |
Pointer to configuration strings.
Set initially to default_config ; should be reset to the {config} part of argv[], if configuration is specified on the command line.
Referenced by main().
int config_lines = DEFAULT_CONFIG_LINES |
Number of lines in the configuration strings array.
Referenced by create_feature_maps(), and main().
char* corpus1_name |
name of the source corpus
char* corpus2_name |
name of the target corpus
char* default_config[DEFAULT_CONFIG_LINES] |
{ "-C:1", "-S:50:0.4", "-3:3", "-4:4" }
Set of strings containing default configuration options.
Notes on interpreting the lines (in order):
Referenced by align_usage().
char outfile_name[CL_MAX_FILENAME_LENGTH] = "out.align" |
name of the output file
Referenced by align_parse_args(), and main().
int prealign_has_values = 0 |
boolean: if 1, regions with same ID values are pre-aligned
Referenced by align_parse_args(), and main().
char prealign_name[CL_MAX_FILENAME_LENGTH] = "" |
pre-alignment given by structural attribute
Referenced by align_parse_args(), and main().
char* progname |
Name of the program (from the shell)
char* registry_directory = NULL |
Referenced by align_parse_args(), and main().
double split_factor = 1.2 |
2:2 alignment split factor
Referenced by align_do_alignment(), and align_parse_args().
int verbose = 0 |
controls printing of some extra progress info
word attribute handle: source
Referenced by create_feature_maps().
word attribute handle: target
Referenced by create_feature_maps().
char word_name[CL_MAX_FILENAME_LENGTH] = "word" |
name of the word attribute (default: word)