CWB
Functions | Variables

parse_actions.c File Reference

#include "parse_actions.h"
#include <stdlib.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <stdarg.h>
#include <unistd.h>
#include "../cl/globals.h"
#include "../cl/special-chars.h"
#include "../cl/attributes.h"
#include "../cl/macros.h"
#include "cqp.h"
#include "options.h"
#include "ranges.h"
#include "symtab.h"
#include "treemacros.h"
#include "tree.h"
#include "eval.h"
#include "corpmanag.h"
#include "regex2dfa.h"
#include "builtins.h"
#include "groups.h"
#include "targets.h"
#include "attlist.h"
#include "concordance.h"
#include "output.h"
#include "print-modes.h"
#include "variables.h"

Functions

Variables


Function Documentation

CorpusList* ActivateCorpus ( CorpusList cl)
Evaltree add_tabular_pattern ( Evaltree  patterns,
Context context,
int  pattern_index 
)
void addHistoryLine ( void  )

Add a line of CQP input to the history file.

Supports parser rule: line -> command

The line that is added comes from QueryBuffer; the file it is written to is that named in cqp_history_file.

See also:
QueryBuffer
cqp_history_file

References cqp_history_file, cqpmessage(), Error, open_file(), QueryBuffer, reading_cqprc, silent, and write_history_file.

void after_CorpusCommand ( CorpusList cl)
CorpusList* after_CorpusSetExpr ( CorpusList cl)
CorpusList* after_Query ( CorpusList cl)
Constrainttree bool_and ( Constrainttree  left,
Constrainttree  right 
)
Constrainttree bool_implies ( Constrainttree  left,
Constrainttree  right 
)
Constrainttree bool_not ( Constrainttree  left)
Constrainttree bool_or ( Constrainttree  left,
Constrainttree  right 
)
void debug_output ( void  )

Prints out all the existing EvalEnvironments in the global array.

See also:
Environment

References eep, and show_environment().

void do_AddSubVariables ( char *  var1Name,
int  add,
char *  var2Name 
)
int do_AnchorPoint ( FieldType  field,
int  is_closing 
)
void do_attribute_show ( char *  name,
int  status 
)
void do_cat ( CorpusList cl,
struct Redir r,
int  first,
int  last 
)
void do_cut ( CorpusList cl,
int  first,
int  last 
)
void do_delete_lines ( CorpusList cl,
FieldType  f,
int  mode 
)
void do_delete_lines_num ( CorpusList cl,
int  start,
int  end 
)
void do_Description ( Context context,
int  nr,
char *  name 
)
void do_dump ( CorpusList cl,
int  first,
int  last,
struct Redir rd 
)

Dump query result (or part of it) as TAB-delimited table of corpus positions.

Parameters:
clThe result (as a subcorpus, naturally)
firstWhere in the result to begin dumping (index of cl->range)
lastWhere in the result to end dumping (index of cl->range)
rdPointer to a Redir structure which contains information about where to dump to.

References TCorpus::charset, close_stream(), cl::corpus, cqpmessage(), _Range::end, Error, cl::keywords, open_stream(), cl::range, cl::size, cl::sortidx, _Range::start, Redir::stream, and cl::targets.

void do_exec ( char *  fname)

Execute the commands contained within a specified text file.

References cqp_parse_file(), cqpmessage(), Error, generate_code, Message, and open_file().

Constrainttree do_flagged_re_variable ( char *  varname,
int  flags 
)
Constrainttree do_flagged_string ( char *  s,
int  flags 
)
void do_group ( CorpusList cl,
FieldType  target,
int  target_offset,
char *  t_att,
FieldType  source,
int  source_offset,
char *  s_att,
int  cut,
int  expand,
struct Redir redir 
)
void do_group2 ( CorpusList cl,
FieldType  target,
int  target_offset,
char *  t_att,
int  cut,
int  expand,
struct Redir r 
)

Like do_group, but with no source.

References compute_grouping(), do_start_timer(), do_timing(), free_group(), NoField, and print_group().

Constrainttree do_IDReference ( char *  id_name,
int  auto_delete 
)
void do_info ( CorpusList cl)

References corpus_info().

Constrainttree do_LabelReference ( char *  label_name,
int  auto_delete 
)
Evaltree do_MeetStatement ( Evaltree  left,
Evaltree  right,
Context context 
)
CorpusList* do_MUQuery ( Evaltree  evalt,
int  keep_flag,
int  cut_value 
)
Constrainttree do_mval_string ( char *  s,
int  op,
int  flags 
)
int do_NamedWfPattern ( int  is_target,
char *  label,
int  pat_idx 
)
void do_OptDistance ( Context context,
int  l_bound,
int  u_bound 
)
void do_PrintAllVariables ( )
void do_printVariableSize ( char *  varName)
void do_PrintVariableValue ( char *  varName)
void do_reduce ( CorpusList cl,
int  number,
int  percent 
)
Constrainttree do_RelExExpr ( Constrainttree  left)
Constrainttree do_RelExpr ( Constrainttree  left,
enum b_ops  op,
Constrainttree  right 
)
void do_save ( CorpusList cl,
struct Redir r 
)
void do_SearchPattern ( Evaltree  expr,
Constrainttree  constraint 
)
void do_set_complex_target ( CorpusList cl,
FieldType  field_to_set,
SearchStrategy  strategy,
Constrainttree  boolt,
enum ctxtdir  direction,
int  number,
char *  id,
FieldType  field,
int  inclusive 
)
void do_set_target ( CorpusList cl,
FieldType  goal,
FieldType  source 
)

References NoField, and set_target().

CorpusList* do_setop ( RangeSetOp  op,
CorpusList c1,
CorpusList c2 
)
void do_SetVariableValue ( char *  varName,
char  operator,
char *  varValues 
)
Constrainttree do_SimpleVariableReference ( char *  varName)
void do_size ( CorpusList cl,
FieldType  field 
)
void do_sleep ( int  duration)

Puts the program to sleep.

A wrapper round the standard sleep() function (or Sleep() in Windows).

Parameters:
durationHow many seconds to sleep for.
CorpusList* do_StandardQuery ( int  cut_value,
int  keep_flag 
)
void do_start_timer ( void  )

Starts the timer running.

References timer_start_time, and timing.

Referenced by do_group(), do_group2(), do_MUQuery(), do_StandardQuery(), and do_TABQuery().

Constrainttree do_StringConstraint ( char *  s,
int  flags 
)
void do_StructuralContext ( Context context,
char *  name 
)
CorpusList* do_subset ( FieldType  field,
Constrainttree  boolt 
)
CorpusList* do_TABQuery ( Evaltree  patterns)
void do_timing ( char *  msg)

Shows the period since the timer started running.

Parameters:
msgA message to print along with the reading from the timer.

References cqpmessage(), Info, timer_start_time, and timing.

Referenced by do_group(), do_group2(), and in_UnnamedCorpusCommand().

int do_undump ( char *  corpname,
int  extension_fields,
int  sort_ranges,
struct InputRedir rd 
)

read TAB-delimited table of corpus positions and create named query result from it.

acceptable values for extension_fields and corresponding row formats: 0 = match matchend 1 = match matchend target 2 = match matchend target keyword

References assign_temp_to_sub(), cl_free, cl_malloc(), CL_MAX_LINE_LENGTH, close_input_stream(), cqpmessage(), current_corpus, drop_temp_corpora(), Error, findcorpus(), InputRedir::is_pipe, is_qualified(), line, make_temp_corpus(), cl::mother_name, cl::mother_size, new, open_input_stream(), RangeSort(), split_subcorpus_name(), InputRedir::stream, SYSTEM, cl::type, valid_subcorpus_name(), and Warning.

Evaltree do_UnionStatement ( Evaltree  left,
Evaltree  right 
)
int do_WordformPattern ( Constrainttree  boolt,
int  lookahead 
)
int do_XMLTag ( char *  s_name,
int  is_closing,
int  op,
char *  regex,
int  flags 
)
void expand_dataspace ( CorpusList cl)

Expand the dataspace of a subcorpus.

This is done, e.g., by the CQP-syntax "expand" command, to include context into the matches found by a query.

Each corpus interval stored in the CorpusList is extended by an amount dependant on the information in the global variable "expansion", a Context object (which have been put there by the parser).

See also:
expansion
Parameters:
clThe subcorpus to expand.

References calculate_leftboundary(), calculate_rightboundary(), cqpmessage(), ctxtsp::direction, _Range::end, False, left, leftright, cl::needs_update, cl::range, RangeSetop(), right, RUniq, cl::saved, cl::size, ctxtsp::size, _Range::start, SYSTEM, True, cl::type, and Warning.

Referenced by in_UnnamedCorpusCommand().

Constrainttree FunctionCall ( char *  f_name,
ActualParamList apl 
)
CorpusList* in_CorpusCommand ( char *  id,
CorpusList cl 
)
CorpusList* in_UnnamedCorpusCommand ( CorpusList cl)

This function is called after an UnnamedCorpusCommand rule is parsed.

Seems to be a tidying=upfunction.

Parameters:
clThe result of the corpus-yielding command (first component of this syntax rule).
Returns:
Modified valuse of cl. May be NULL.

References Activation, assign_temp_to_sub(), cqpmessage(), do_timing(), drop_temp_corpora(), expand_dataspace(), free_environments(), generate_code, last_cyc, make_temp_corpus(), Message, Query, SetOperation, ctxtsp::size, SYSTEM, TEMP, cl::type, and Warning.

Evaltree make_first_tabular_pattern ( int  pattern_index,
Evaltree  next 
)
char * mval_string_conversion ( char *  s)

References cl_malloc(), cqpmessage(), Error, and generate_code.

Referenced by do_mval_string(), and do_XMLTag().

Constrainttree OptimizeStringConstraint ( Constrainttree  left,
enum b_ops  op,
Constrainttree  right 
)
void prepare_AlignmentConstraints ( char *  id)
void prepare_do_subset ( CorpusList cl,
FieldType  field 
)
void prepare_input ( void  )

Get ready to parse a command.

This function is called before the processing of each parsed line that is recognised as a command.

Mostly it involves setting the global variables to their starting-state values.

References free_environments(), generate_code, last_cyc, LastExpression, NoExpression, regex_string_pos, and searchstr.

void prepare_parse ( void  )
void prepare_Query ( )

This function sets things up to run a query.

It is called as an "action" before any detected Query in the parser.

[AH 2010/8/2: I have added the code checking input character encoding. Anything that is not part of a query should be plain ASCII - if not, then the lexer/parser should pick it up as bad. Filenames, etc. are obvious exceptions - but we can't check the encoding of those, because there's no guarantee it will be the same as that of the corpus, which is the only thing whose encoding we know. So it's up to the user to type filenames in an encoding their OS will accept! Canonicalisation is done within the CL_Regex, not here.]

References access_corpus(), TCorpus::charset, cl_string_validate_encoding(), cl::corpus, cqpmessage(), CurEnv, current_corpus, eep, Environment, Error, generate_code, make_temp_corpus(), cl::mother_name, cl::name, next_environment(), query_corpus, evalenv::query_corpus, QueryBuffer, RangeSetop(), RNonOverlapping, searchstr, cl::size, Warning, and within_gc.

void printSingleVariableValue ( Variable  v,
int  max_items 
)
void push_regchr ( char  c)

Add a character (in the sense of a byte) to the regex_string buffer.

Doesn't seem to currently be in use.

See also:
regex_string

References CL_MAX_LINE_LENGTH, cqpmessage(), regex_string, regex_string_pos, and Warning.

void RaiseError ( void  )
Evaltree reg_disj ( Evaltree  left,
Evaltree  right 
)
Evaltree reg_seq ( Evaltree  left,
Evaltree  right 
)
void resetQueryBuffer ( void  )

Empties the query buffer and sets to 0 the pointer.

Supports parser rule: line -> command

See also:
QueryBuffer
QueryBufferP

References QueryBuffer, QueryBufferOverflow, and QueryBufferP.

Referenced by RaiseError().

Constrainttree Varref2IDList ( Attribute attr,
enum b_ops  op,
char *  varName 
)

Variable Documentation

This is used by the parser in response to CQP's "expand" operator, which incorporates context around the query hit into the match itself.

Functions involved in carrying this out utilise info stored here by the parser.

Referenced by findcorpus().

char regex_string[CL_MAX_LINE_LENGTH]

Buffer for storing regex strings.

As it says on the tin.

Doesn't seem currently to be in use anywhere.

Referenced by push_regchr().

Referenced by prepare_input(), and push_regchr().

int sslen

Referenced by do_SearchPattern().

struct timeval timer_start_time

Global variable for timing functions; not exported.

See also:
do_start_timer
do_timing

Referenced by do_start_timer(), and do_timing().

int within_gc

TODO would be very useful to have a desc for this.

Referenced by after_Query(), do_IDReference(), and prepare_Query().