Locked History Actions

attachment:patches-r150-4.diff of PANTERA

Attachment 'patches-r150-4.diff'

Download

   1 Index: third_party/morfeusz/Makefile.am
   2 ===================================================================
   3 --- third_party/morfeusz/Makefile.am.orig	2010-08-10 18:11:58.000000000 +0200
   4 +++ third_party/morfeusz/Makefile.am	2012-07-04 11:15:13.500186560 +0200
   5 @@ -1,10 +1,10 @@
   6  install-exec-local:
   7 -	$(INSTALL_PROGRAM) -D lib/libmorfeusz.so.0.6 $(pkglibdir)/libmorfeusz.so.0.6
   8 -	$(LN_S) -f libmorfeusz.so.0.6 $(pkglibdir)/libmorfeusz.so.0
   9 -	$(LN_S) -f libmorfeusz.so.0.6 $(pkglibdir)/libmorfeusz.so
  10 +#	$(INSTALL_PROGRAM) -D lib/libmorfeusz.so.0.6 $(pkglibdir)/libmorfeusz.so.0.6
  11 +#	$(LN_S) -f libmorfeusz.so.0.6 $(pkglibdir)/libmorfeusz.so.0
  12 +#	$(LN_S) -f libmorfeusz.so.0.6 $(pkglibdir)/libmorfeusz.so
  13  
  14  uninstall-local:
  15 -	rm -f $(pkglibdir)/libmorfeusz.so
  16 -	rm -f $(pkglibdir)/libmorfeusz.so.0
  17 -	rm -f $(pkglibdir)/libmorfeusz.so.0.6
  18 +#	rm -f $(pkglibdir)/libmorfeusz.so
  19 +#	rm -f $(pkglibdir)/libmorfeusz.so.0
  20 +#	rm -f $(pkglibdir)/libmorfeusz.so.0.6
  21  
  22 Index: src/pantera.cpp
  23 ===================================================================
  24 --- src/pantera.cpp.orig	2011-11-21 17:25:45.000000000 +0100
  25 +++ src/pantera.cpp	2012-07-04 11:26:38.940173323 +0200
  26 @@ -35,7 +35,7 @@
  27  #include "pantera_rules.h"
  28  
  29  #ifndef DEFAULT_TAGSET
  30 -#define DEFAULT_TAGSET "ipipan"
  31 +#define DEFAULT_TAGSET "nkjp"
  32  #endif
  33  
  34  #ifndef DEFAULT_ENGINE
  35 @@ -80,9 +80,9 @@
  36  
  37      if (fs::is_regular_file(path)) {
  38          if (path.filename() == "text_structure.xml") {
  39 -            candidates.push_back(make_pair(path.file_string(), "nkjp-text"));
  40 +            candidates.push_back(make_pair(path.string(), "nkjp-text"));
  41          } else if (path.filename() == "morph.xml") {
  42 -            candidates.push_back(make_pair(path.file_string(),
  43 +            candidates.push_back(make_pair(path.string(),
  44                          "ipipan-morph"));
  45          } else if (path.extension() == ".xml") {
  46              fs::ifstream stream(path);
  47 @@ -91,13 +91,13 @@
  48                  char line[1024];
  49                  stream.getline(line, sizeof(line));
  50                  if (!strncmp(pattern, line, strlen(pattern))) {
  51 -                    candidates.push_back(make_pair(path.file_string(),
  52 +                    candidates.push_back(make_pair(path.string(),
  53                                  "ipipan-morph"));
  54                      break;
  55                  }
  56              }
  57          } else if (path.extension() == ".txt" && specified_directly) {
  58 -            candidates.push_back(make_pair(path.file_string(),
  59 +            candidates.push_back(make_pair(path.string(),
  60                          "plaintext"));
  61          }
  62      } else if (fs::is_directory(path)) {
  63 @@ -137,7 +137,7 @@
  64          type) {
  65      vector<fs::path> ret;
  66      if (type == "ipipan-morph") {
  67 -        ret.push_back(input_path.file_string() + ".disamb");
  68 +        ret.push_back(input_path.string() + ".disamb");
  69      } else if (type == "nkjp-text" || type == "plaintext") {
  70          string segm_filename = "ann_segmentation.xml";
  71          string morph_filename = "ann_morphosyntax.xml";
  72 @@ -160,31 +160,33 @@
  73  boost::scoped_ptr<Lexer<MyLexeme> > lexer;
  74  
  75  static void preprocess_file(const fs::path& path, const string& type,
  76 -        const Tagset* tagset, vector<MyLexeme>& text, bool only_lex = false) {
  77 +        const Tagset* tagset, vector<MyLexeme>& text, const bool only_lex = false,
  78 +        const bool ignoreSentsInsideSegment = false) {
  79      fs::ifstream data_stream(path);
  80      lexer.reset(make_lexer(type, data_stream));
  81  
  82      // 1. Parser.
  83 -    print_status("LEXER", path.file_string());
  84 +    print_status("LEXER", path.string());
  85      text.clear();
  86      lexer->setQuiet(options.count("verbose") == 0);
  87      lexer->parseStreamToVector(text, &tagset);
  88  
  89      if (only_lex)
  90          return;
  91 -
  92      // 2. Sentencer.
  93      if (options.count("no-sentencer") == 0) {
  94 -        print_status("SENTENCER", path.file_string());
  95 -        static LibSegmentSentencer<MyLexeme> sentencer;
  96 -        text = sentencer.addSentenceDelimiters(text);
  97 +        print_status("SENTENCER", path.string());
  98 +        static LibSegmentSentencer<MyLexeme> sentencer(ignoreSentsInsideSegment);
  99 +        string sentencerRulesFile = options.count("sentencer-rules") != 0 
 100 +            ? options["sentencer-rules"].as<string>()
 101 +            : "";
 102 +        text = sentencer.addSentenceDelimiters(text, sentencerRulesFile);
 103      }
 104  
 105      // 3. Morphological analyzer.
 106      if (options.count("no-morph") == 0) {
 107  
 108 -        static MorfeuszAnalyzer<MyLexeme> morfeusz(tagset,
 109 -                options.count("no-guesser") == 0);
 110 +        static MorfeuszAnalyzer<MyLexeme> morfeusz(tagset);
 111          static PolishSegmDisambiguator<MyLexeme> segm_disamb;
 112          static bool first_time = true;
 113  
 114 @@ -195,15 +197,19 @@
 115  
 116              if (options.count("morph-dict")) {
 117                  BOOST_FOREACH(const string& filename,
 118 -                        options["morph-dict"].as<vector<string> >())
 119 -                    morfeusz.loadMorphDict(filename);
 120 +                        options["morph-dict"].as<vector<string> >()) {
 121 +                    ifstream stream(filename.c_str());
 122 +                    stream.exceptions(ifstream::badbit);
 123 +                    morfeusz.loadMorphDict(stream);
 124 +                }
 125              }
 126          }
 127  
 128 -        print_status("MORPH", path.file_string());
 129 -        text = morfeusz.analyzeText(text);
 130 +        print_status("MORPH", path.string());
 131 +        bool use_odgadywacz = options.count("no-guesser") == 0;
 132 +        text = morfeusz.analyzeText(text, use_odgadywacz);
 133  
 134 -        print_status("SEGM-DISAMB", path.file_string());
 135 +        print_status("SEGM-DISAMB", path.string());
 136          segm_disamb.disambiguateSegmentation(text);
 137      }
 138  }
 139 @@ -211,7 +217,7 @@
 140  static void postprocess_file(const fs::path& path, string type,
 141          const vector<fs::path> output_paths, const Tagset* tagset,
 142          vector<MyLexeme>& text) {
 143 -    print_status("WRITER", path.file_string());
 144 +    print_status("WRITER", path.string());
 145  
 146      string output_format = options["output-format"].as<string>();
 147      if (output_format == "xces")
 148 @@ -226,7 +232,7 @@
 149                      % output_format));
 150  
 151      if (type == "ipipan-morph" || type == "ipipan-disamb") {
 152 -        ofstream rewrite_out((path.file_string() + ".disamb").c_str());
 153 +        ofstream rewrite_out((path.string() + ".disamb").c_str());
 154          IpiPanWriter<MyLexeme> writer(rewrite_out, type == "ipipan-morph");
 155          writer.writeVectorToStream(tagset, text);
 156      } else if (type == "nkjp-text" || type == "plaintext") {
 157 @@ -333,6 +339,8 @@
 158          ("morph-dict,d", po::value<vector<string> >(),
 159           "extra morphological dictionaries, see documentation "
 160           "on the project page for format examples.")
 161 +        ("sentencer-rules,s", po::value<string>(), "path to sentencer rules file (in SRX format)")
 162 +        ("ignore-sents-inside-segment", "ignore sentence delimiters inside segment (instead of splitting the segment)")
 163          ("nkjp-tool-name", po::value<string>()->default_value("pantera"),
 164           "the name of the tool to use when producing NKJP XML output "
 165           "(default is 'pantera').")
 166 @@ -451,7 +459,7 @@
 167              BOOST_FOREACH(const input_pair_type& input_pair, training_files) {
 168                  vector<MyLexeme> text;
 169                  preprocess_file(input_pair.first, input_pair.second, tagset,
 170 -                        text, true);
 171 +                        text, true, options.count("ignore-sents-inside-segments") != 0);
 172                  all_text.insert(all_text.end(), text.begin(), text.end());
 173              }
 174  
 175 @@ -485,7 +493,7 @@
 176                  fs::ofstream data_ostream(engine_path, ios::out);
 177                  boost::archive::text_oarchive engine_oarchive(data_ostream);
 178                  engine_oarchive << engine;
 179 -                data_stream.close();
 180 +                data_ostream.close();
 181  
 182                  cerr << endl;
 183                  cerr << "The engine has been converted to a newer file format."
 184 @@ -553,7 +561,7 @@
 185                      input_pair.second);
 186  
 187              vector<MyLexeme> text;
 188 -            preprocess_file(input_pair.first, input_pair.second, tagset, text);
 189 +            preprocess_file(input_pair.first, input_pair.second, tagset, text, false);
 190              progress.set(done += 0.3);
 191              progress_left -= 0.3;
 192  
 193 Index: src/nlpcommon/libsegmentsentencer.h
 194 ===================================================================
 195 --- src/nlpcommon/libsegmentsentencer.h.orig	2011-01-20 22:28:22.000000000 +0100
 196 +++ src/nlpcommon/libsegmentsentencer.h	2012-07-04 11:26:38.784173324 +0200
 197 @@ -11,6 +11,12 @@
 198  #include <istream>
 199  #include <vector>
 200  #include <algorithm>
 201 +#include <string>
 202 +#include <iostream>
 203 +#include <sstream>
 204 +
 205 +#include <boost/algorithm/string.hpp>
 206 +#include <boost/foreach.hpp>
 207  
 208  #include <nlpcommon/sentencer.h>
 209  #include <nlpcommon/lexeme.h>
 210 @@ -20,166 +26,288 @@
 211  #include <nlpcommon/_pstream.h>
 212  #include <nlpcommon/util.h>
 213  
 214 +#include <TaggingOptions.h>
 215 +
 216  #ifndef SEGMENT_PATH
 217  #error SEGMENT_PATH should be defined automatically by build system
 218  #endif
 219  
 220 +#define DEFAULT_SEGMENT_RULES "segment.srx"
 221 +
 222  namespace NLPCommon {
 223  
 224  using namespace redi;
 225  
 226  template<class Lexeme = DefaultLexeme>
 227 -class LibSegmentSentencer : public Sentencer<Lexeme>
 228 -{
 229 +class LibSegmentSentencer: public Sentencer<Lexeme> {
 230 +public:
 231 +    //~ typedef typename vector<Lexeme>::iterator LexemsIterator;
 232  private:
 233      string segment_exec;
 234 +    bool ignoreSentsInsideSegment;
 235      pstream ps;
 236  
 237 -public:
 238 -    LibSegmentSentencer()
 239 -        : Sentencer<Lexeme>(),
 240 -          segment_exec(find_with_path(SEGMENT_PATH, "segment_batch").file_string())
 241 -    {
 242 +    std::string getCurrentChunk(
 243 +            const std::string& response,
 244 +            const int response_position,
 245 +            const std::string& utf8_orth) {
 246 +        string res = response.substr(
 247 +                                response_position,
 248 +                                std::min(response.length() - response_position, utf8_orth.length()));
 249 +        int currPos = response_position + res.length();
 250 +        while (chunkWithoutSentenceDelimiters(res).length() < utf8_orth.length()
 251 +                && currPos < response.length()) {
 252 +            res += response[currPos];
 253 +            currPos++;
 254 +        }
 255 +        return res;
 256      }
 257  
 258 -    virtual ~LibSegmentSentencer() { }
 259 -
 260 -    vector<Lexeme> addSentenceDelimiters(const vector<Lexeme>& text) {
 261 -        int len = text.size();
 262 -        vector<Lexeme> out;
 263 +    std::string chunkWithoutSentenceDelimiters(const std::string& chunk) {
 264 +        std::string res = chunk;
 265 +        res.erase(std::remove(res.begin(), res.end(), '\0'), res.end());
 266 +        return res;
 267 +    }
 268 +    
 269 +    void splitChunk(vector<string>& res, const string& chunk) {
 270 +        std::stringstream currStream;
 271 +        bool addedCurrStream;
 272 +        BOOST_FOREACH( char c, chunk ) {
 273 +            addedCurrStream = false;
 274 +            if (c == '\0') {
 275 +                res.push_back(currStream.str());
 276 +                currStream.str("");
 277 +                addedCurrStream = true;
 278 +            }
 279 +            else {
 280 +                currStream << c;
 281 +            }
 282 +        }
 283 +        if (!addedCurrStream) {
 284 +            res.push_back(currStream.str());
 285 +        }
 286 +    }
 287  
 288 +    std::vector<Lexeme> splitSegmentIntoSents(const Lexeme& lex, const std::string& currChunk) {
 289 +        std::vector<Lexeme> res;
 290 +        std::vector<std::string> delimitedChunks;
 291 +        splitChunk(delimitedChunks, currChunk);
 292 +        //~ std::cerr << "currChunk.length()=" << currChunk.length() << " delimitedChunks.size()=" << delimitedChunks.size() << std::endl;
 293 +        bool isFirst = true;
 294 +        BOOST_FOREACH( std::string chunk, delimitedChunks ) {
 295 +            if (!isFirst) {
 296 +                res.push_back(Lexeme(Lexeme::END_OF_SENTENCE));
 297 +                res.push_back(Lexeme(Lexeme::START_OF_SENTENCE));
 298 +                res.push_back(Lexeme(Lexeme::NO_SPACE));
 299 +            }
 300 +            Lexeme currLex(Lexeme::SEGMENT);
 301 +            currLex.setUtf8Orth(chunk);
 302 +            res.push_back(currLex);
 303 +            isFirst = false;
 304 +        }
 305 +        return res;
 306 +    }
 307 +    
 308 +    void runSegmentSubprocess(const string& rulesPath) {
 309          if (!ps.is_open()) {
 310 -            vector<string> args;
 311 +            vector < string > args;
 312              args.push_back(segment_exec);
 313              args.push_back("--null-delimiter");
 314 +            args.push_back("-l");
 315 +            args.push_back("pl");
 316 +            args.push_back("-s");
 317 +            args.push_back(rulesPath);
 318 +
 319              ps.open(segment_exec, args,
 320 -                    pstreams::pstdin|pstreams::pstdout|pstreams::pstderr);
 321 +                    pstreams::pstdin | pstreams::pstdout | pstreams::pstderr);
 322              if (!ps.is_open()) {
 323 -                throw Exception("Cannot execute 'segment_batch' excutable "
 324 -                        "with path '" SEGMENT_PATH "')");
 325 +                throw Exception("Cannot execute 'segment_batch' excutable with path '" SEGMENT_PATH "')");
 326 +            }
 327 +        }
 328 +    }
 329 +    
 330 +    int rewriteNonParagraphLexems(const vector<Lexeme>& text, int textIdx, vector<Lexeme>& out) {
 331 +        Lexeme lex = text[textIdx];
 332 +        while (textIdx < text.size() && lex.getType() != Lexeme::START_OF_PARAGRAPH) {
 333 +            out.push_back(lex);
 334 +            this->advanceProgress();
 335 +            
 336 +            textIdx++;
 337 +            lex = text[textIdx];
 338 +        }
 339 +        return textIdx;
 340 +    }
 341 +    
 342 +    int getOneParagraph(const vector<Lexeme>& text, const int currTextIdx, vector<Lexeme>& paragraph) {
 343 +        int textIdx = currTextIdx;
 344 +        assert(textIdx < text.size());
 345 +        assert(text[textIdx].getType() == Lexeme::START_OF_PARAGRAPH);
 346 +        bool stop = false;
 347 +        while (textIdx < text.size() && !stop) {
 348 +            Lexeme lex = text[textIdx];
 349 +            paragraph.push_back(lex);
 350 +            if (lex.getType() == Lexeme::END_OF_PARAGRAPH) {
 351 +                stop = true;
 352              }
 353 +            textIdx++;
 354 +        }
 355 +        return textIdx;
 356 +    }
 357 +    
 358 +    void feedSentencerWithParagraph(const vector<Lexeme>& paragraph) {
 359 +        bool no_space = true;
 360 +        BOOST_FOREACH( Lexeme lex, paragraph ) {
 361 +            if (lex.getType() == Lexeme::NO_SPACE) {
 362 +                no_space = true;
 363 +            } 
 364 +            else if (lex.getType() == Lexeme::SEGMENT) {
 365 +                if (!no_space)
 366 +                    ps << ' ';
 367 +                ps << lex.getUtf8Orth();
 368 +                no_space = false;
 369 +            }
 370 +        }
 371 +        ps << '\n';
 372 +        ps.flush();
 373 +    }
 374 +    
 375 +    void readSentencerResponse(string& response) {
 376 +        if (!getline(ps.out(), response))
 377 +            throw Exception("No response returned by 'segment_batch' executable");
 378 +    }
 379 +    
 380 +    int handleSpacesAndNewSentMarks(const bool no_space, const bool first_sentence, const string& response, int response_position, vector<Lexeme>& out) {
 381 +        
 382 +        bool was_space_before_newsent = false;
 383 +        
 384 +        // handle space before new sentence
 385 +        if (response[response_position] == ' '
 386 +                && response[response_position + 1] == '\0') {
 387 +            was_space_before_newsent = true;
 388 +            response_position++;
 389          }
 390 +        
 391 +        // handle new sentence
 392 +        if (response[response_position] == '\0') {
 393 +            if (!first_sentence) {
 394 +                out.push_back(Lexeme(Lexeme::END_OF_SENTENCE));
 395 +            }
 396 +            out.push_back(Lexeme(Lexeme::START_OF_SENTENCE));
 397  
 398 -        try {
 399 -            int i = -1, j;
 400 -            for (;;) {
 401 -                // Rewrite everything which is out of paragraphs.
 402 -                for (i++; i < len; i++) {
 403 -                    const Lexeme& lex = text[i];
 404 +            response_position++;
 405 +        }
 406 +        
 407 +        // handle regular space
 408 +        if (!no_space && !was_space_before_newsent) {
 409 +            if (response[response_position] != ' ') {
 410 +                throw Exception(
 411 +                    boost::str(boost::format(
 412 +                        "Expected space not found in "
 413 +                        "output of segment_batch. "
 414 +                        "(Response there: '%1%')")
 415 +                    % response.substr(response_position)));
 416 +            }
 417 +            response_position++;
 418 +        }
 419 +        
 420 +        return response_position;
 421 +    }
 422 +    
 423 +    int handleCurrentToken(const Lexeme& lex, const string& response, int response_position, vector<Lexeme>& out) {
 424 +        string utf8_orth = lex.getUtf8Orth();
 425 +        string currChunk = getCurrentChunk(response, response_position, utf8_orth);
 426 +        if (currChunk == utf8_orth) {
 427 +            response_position += utf8_orth.length();
 428 +            out.push_back(lex);
 429 +        }
 430 +        else if (chunkWithoutSentenceDelimiters(currChunk) == utf8_orth) {
 431 +            if (ignoreSentsInsideSegment) {
 432 +                std::cerr << "WARN: ignoring sentence delimiters in segment '" << currChunk << "'." << std::endl;
 433 +                out.push_back(lex);
 434 +                response_position += currChunk.length();
 435 +            }
 436 +            else {
 437 +                std::cerr << "WARN: adding sentence delimiters into segment '" << currChunk << "'." << std::endl;
 438 +                BOOST_FOREACH( Lexeme lex, splitSegmentIntoSents(lex, currChunk)) {
 439 +                    //~ if (lex.getType() == Lexeme::SEGMENT)
 440 +                        //~ std::cerr << "WARN: adding " << lex.getUtf8Orth() << std::endl;
 441                      out.push_back(lex);
 442 -                    this->advanceProgress();
 443 -                    if (lex.getType() == Lexeme::START_OF_PARAGRAPH)
 444 -                        break;
 445                  }
 446 -
 447 -                if (i == len)
 448 -                    break;
 449 -
 450 -                // Feed the sentencer with the paragraph.
 451 -                bool no_space = true;
 452 -                for (j = i; j < len; j++) {
 453 -                    const Lexeme& lex = text[j];
 454 -                    if (lex.getType() == Lexeme::END_OF_PARAGRAPH) {
 455 -                        break;
 456 -                    } else if (lex.getType() == Lexeme::NO_SPACE) {
 457 -                        no_space = true;
 458 -                    } else if (lex.getType() == Lexeme::SEGMENT) {
 459 -                        if (!no_space)
 460 -                            ps << ' ';
 461 -                        ps << lex.getUtf8Orth();
 462 -                        no_space = false;
 463 -                    }
 464 -                }
 465 -                ps << '\n';
 466 -
 467 -                // Read response.
 468 -                ps.flush();
 469 -                std::string response;
 470 -                if (!getline(ps.out(), response)) {
 471 -                    std::cerr << "RESP: " << response << std::endl;
 472 -                    throw Exception("No response returned by 'segment_batch'"
 473 -                            " executable");
 474 -                }
 475 -
 476 -
 477 -                // Read back and insert sentence delimiters.
 478 -                bool first_sentence = true;
 479 -                int response_position = 0;
 480 +                response_position += currChunk.length();
 481 +            }
 482 +        }
 483 +        else {
 484 +            throw Exception(boost::str(boost::format(
 485 +                        "Expected word '%1%' not found in"
 486 +                        "output of segment_batch. "
 487 +                        "(Response there: '%2%')")
 488 +                        % utf8_orth
 489 +                        % response.substr(response_position)));
 490 +        }
 491 +        
 492 +        return response_position;
 493 +    }
 494 +    
 495 +    void parseSentencerResponse(const vector<Lexeme>& paragraph, const string& response, vector<Lexeme>& out) {
 496 +        bool no_space = true;
 497 +        bool first_sentence = true;
 498 +        int response_position = 0;
 499 +        BOOST_FOREACH( Lexeme lex, paragraph ) {
 500 +            if (lex.getType() == Lexeme::NO_SPACE) {
 501                  no_space = true;
 502 -                for (j = i + 1; j < len; j++) {
 503 -                    const Lexeme& lex = text[j];
 504 -                    if (lex.getType() == Lexeme::END_OF_PARAGRAPH) {
 505 -                        break;
 506 -                    } else if (lex.getType() == Lexeme::NO_SPACE) {
 507 -                        out.push_back(lex);
 508 -                        no_space = true;
 509 -                    } else if (lex.getType() == Lexeme::SEGMENT) {
 510 -                        const std::string& utf8_orth = lex.getUtf8Orth();
 511 -
 512 -                        if (response[response_position] == '\0') {
 513 -                            // New sentence starts here.
 514 -                            if (first_sentence) {
 515 -                                first_sentence = false;
 516 -                            } else {
 517 -                                out.push_back(Lexeme(Lexeme::END_OF_SENTENCE));
 518 -                            }
 519 -                            out.push_back(Lexeme(Lexeme::START_OF_SENTENCE));
 520 -
 521 -                            response_position++;
 522 -                        }
 523 -
 524 -                        // std::cerr << "response_pos " << response_position
 525 -                        //     << " len " << response.length() << " utf8_orth '" <<
 526 -                        //     utf8_orth << "' no_space " << (no_space ? "Y" : "N")
 527 -                        //     << std::endl;
 528 -
 529 -                        if (!no_space) {
 530 -                            if (response[response_position] != ' ') {
 531 -                                throw Exception(boost::str(boost::format(
 532 -                                                "Expected space not found in "
 533 -                                                "output of segment_batch. "
 534 -                                                "(Response there: '%1%')")
 535 -                                            % response.substr(response_position)));
 536 -                            }
 537 -                            response_position++;
 538 -                        }
 539 -
 540 -                        if (response.substr(response_position,
 541 -                                    std::min(response.length() - response_position,
 542 -                                        utf8_orth.length()))
 543 -                                != utf8_orth) {
 544 -                            std::cerr << "BAD " << utf8_orth << std::endl <<
 545 -                                        response.substr(response_position) << std::endl;
 546 -                            throw Exception(boost::str(boost::format(
 547 -                                            "Expected word '%1%' not found in "
 548 -                                            "output of segment_batch. "
 549 -                                            "(Response there: '%2%')")
 550 -                                        % utf8_orth
 551 -                                        % response.substr(response_position)));
 552 -                        }
 553 -
 554 -                        response_position += utf8_orth.length();
 555 -                        out.push_back(lex);
 556 -                        no_space = false;
 557 -                    } else {
 558 -                        out.push_back(lex);
 559 -                    }
 560 -                    this->advanceProgress();
 561 -                }
 562 -
 563 -                if (!first_sentence)
 564 -                    out.push_back(Lexeme(Lexeme::END_OF_SENTENCE));
 565 -                out.push_back(Lexeme(Lexeme::END_OF_PARAGRAPH));
 566 -
 567 -                i = j;
 568 +                out.push_back(lex);
 569 +            }
 570 +            else if (lex.getType() == Lexeme::SEGMENT) {
 571 +                //~ std::cerr << "segment " << lex.getUtf8Orth() << std::endl;
 572 +                response_position = handleSpacesAndNewSentMarks(no_space, first_sentence, response, response_position, out);
 573 +                //~ std::cerr << "handled spaces and new sent marks " << response_position << std::endl;
 574 +                response_position = handleCurrentToken(lex, response, response_position, out);
 575 +                //~ std::cerr << "handled token itself " << response_position << std::endl;
 576 +                no_space = false;
 577 +                first_sentence = false;
 578              }
 579 +            else
 580 +                out.push_back(lex);
 581 +        }
 582 +    }
 583 +    
 584 +    int doOneParagraph(const vector<Lexeme>& text, int textIdx, vector<Lexeme>& out) {
 585 +        vector<Lexeme> paragraph;
 586 +        string response;
 587 +        textIdx = getOneParagraph(text, textIdx, paragraph);
 588 +        feedSentencerWithParagraph(paragraph);
 589 +        readSentencerResponse(response);
 590 +        parseSentencerResponse(paragraph, response, out);
 591 +        return textIdx;
 592 +    }
 593  
 594 -            return out;
 595 -        } catch (...) {
 596 -            ps.rdbuf()->kill(SIGKILL);
 597 -            ps.close();
 598 -            throw;
 599 +public:
 600 +
 601 +    LibSegmentSentencer(const bool ignoreSentsInsideSegment = false) :
 602 +            Sentencer<Lexeme>(), 
 603 +            segment_exec(find_with_path(SEGMENT_PATH, "segment_batch").string()), 
 604 +            ignoreSentsInsideSegment(ignoreSentsInsideSegment) {}
 605 +
 606 +    virtual ~LibSegmentSentencer() {
 607 +    }
 608 +    
 609 +    vector<Lexeme> addSentenceDelimiters(const vector<Lexeme>& text, const string& sentencerRules) {
 610 +        vector<Lexeme> res;
 611 +        
 612 +        string rulesPath = sentencerRules.empty() 
 613 +                ? find_with_path(SEGMENT_PATH, DEFAULT_SEGMENT_RULES).string()
 614 +                : sentencerRules;
 615 +        runSegmentSubprocess(rulesPath);
 616 +        
 617 +        int textIdx = 0;
 618 +        
 619 +        while (textIdx < text.size()) {
 620 +            textIdx = rewriteNonParagraphLexems(text, textIdx, res);
 621 +            if (textIdx < text.size())
 622 +                textIdx = doOneParagraph(text, textIdx, res);
 623          }
 624 +        
 625 +        return res;
 626      }
 627  };
 628  
 629 Index: libpantera.pc.in
 630 ===================================================================
 631 --- libpantera.pc.in.orig	2011-06-08 22:20:59.000000000 +0200
 632 +++ libpantera.pc.in	2012-07-04 11:15:13.500186560 +0200
 633 @@ -9,6 +9,6 @@
 634  Requires:
 635  Version: @PACKAGE_VERSION@
 636  Libs: -L${pkglibdir} -lcorpus -lnlpcommon -lmorfeusz @BOOST_LDFLAGS@ @BOOST_SERIALIZATION_LIB@ @BOOST_REGEX_LIB@ @BOOST_PROGRAM_OPTIONS_LIB@ @BOOST_SYSTEM_LIB@ @BOOST_FILESYSTEM_LIB@ @BOOST_IOSTREAMS_LIB@ @BOOST_MPI_LIB@ @ICU_LIBS@ @LIBICONV@ -lpantera
 637 -Cflags: 
 638 +Cflags: @OPENMP_CXXFLAGS@
 639  
 640  
 641 Index: src/Makefile.am
 642 ===================================================================
 643 --- src/Makefile.am.orig	2011-06-08 21:25:58.000000000 +0200
 644 +++ src/Makefile.am	2012-07-04 16:28:02.691824049 +0200
 645 @@ -1,6 +1,6 @@
 646  AM_CXXFLAGS = @BOOST_CPPFLAGS@ @OPENMP_CXXFLAGS@ @MORFEUSZ_CFLAGS@ \
 647  	-I../third_party/TaKIPI18/Linux/Corpus \
 648 -	-DSEGMENT_PATH=\"$(libexecdir)/$(PACKAGE):@abs_top_srcdir@/third_party/segment\" \
 649 +	-DSEGMENT_PATH=\"$(libexecdir)/$(PACKAGE):$(pkgdatadir):@abs_top_srcdir@/third_party/segment\" \
 650  	-DENGINES_PATH=\"$(pkgdatadir)/engines:@abs_top_srcdir@/engines\"
 651  
 652  SUBDIRS = nlpcommon
 653 @@ -35,17 +35,18 @@
 654  
 655  clean-local:
 656  	+make -C ../third_party/TaKIPI18/Linux/bin clean
 657 +	rm -rf ../third_party/TaKIPI18/Linux/bin/*
 658  
 659  install-exec-local: ../third_party/TaKIPI18/Linux/bin/Corpus/libcorpus.so
 660 -	$(INSTALL_PROGRAM) -D ../third_party/TaKIPI18/Linux/bin/Corpus/libcorpus.so $(pkglibdir)/libcorpus.so
 661 +	$(INSTALL_PROGRAM) -D ../third_party/TaKIPI18/Linux/bin/Corpus/libcorpus.so $(DESTDIR)$(pkglibdir)/libcorpus.so.1
 662 +	ln -s libcorpus.so.1 $(DESTDIR)$(pkglibdir)/libcorpus.so
 663  
 664  GLOBAL_LINK_FLAGS = -Wl,-rpath -Wl,$(pkglibdir)
 665  
 666  LIBCORPUS_LINK_FLAGS = -L../third_party/TaKIPI18/Linux/bin/Corpus \
 667 -					   -Wl,-rpath -Wl,@abs_top_srcdir@/third_party/TaKIPI18/Linux/bin/Corpus \
 668  					   -lcorpus
 669  
 670 -LIBMORFEUSZ_LINK_FLAGS = -Wl,-rpath -Wl,@abs_top_srcdir@/third_party/morfeusz/lib
 671 +LIBMORFEUSZ_LINK_FLAGS = 
 672  
 673  BUILT_SOURCES = rules/c1.m4h
 674  
 675 @@ -59,46 +60,71 @@
 676  	rules/a.h rules/c1.m4h rules/p1.h rules/p2.h \
 677  	rules/impl.h rules/make.h brillengine.h \
 678  	brillexeme.h unigram.h rules.h pantera_rules.h \
 679 -	pantera.h
 680 +	pantera.h TaggingOptions.h
 681  pantera_CXXFLAGS = $(AM_CXXFLAGS)
 682 -pantera_LDFLAGS = @BOOST_LDFLAGS@ @BOOST_SERIALIZATION_LIB@ \
 683 -				   @BOOST_REGEX_LIB@ @BOOST_PROGRAM_OPTIONS_LIB@ \
 684 +pantera_LDFLAGS = $(LIBCORPUS_LINK_FLAGS) $(GLOBAL_LINK_FLAGS) $(LIBMORFEUSZ_LINK_FLAGS) \
 685 +				   @BOOST_LDFLAGS@ 
 686 +
 687 +pantera_LDADD = nlpcommon/libnlpcommon.la \
 688 +				../third_party/TaKIPI18/Linux/bin/Corpus/libcorpus.so \
 689 +				   @BOOST_SERIALIZATION_LIB@ \
 690 +				   @BOOST_PROGRAM_OPTIONS_LIB@ \
 691  				   @BOOST_SYSTEM_LIB@ @BOOST_FILESYSTEM_LIB@ \
 692  				   @BOOST_IOSTREAMS_LIB@ \
 693  				   @BOOST_MPI_LIB@ @ICU_LIBS@ @LIBICONV@ \
 694 -				   @MORFEUSZ_LIBS@ $(LIBCORPUS_LINK_FLAGS) \
 695 -				   $(GLOBAL_LINK_FLAGS) $(LIBMORFEUSZ_LINK_FLAGS)
 696 -pantera_LDADD = nlpcommon/libnlpcommon.la \
 697 -				../third_party/TaKIPI18/Linux/bin/Corpus/libcorpus.so
 698 +				   @MORFEUSZ_LIBS@ @BOOST_REGEX_LIB@
 699 +
 700 +
 701 +
 702 +libpantera_la_SOURCES = wrapper.cpp pantera_rules.cpp TaggingOptions.cpp
 703 +include_HEADERS = wrapper.h TaggingOptions.h
 704  
 705 -libpantera_la_SOURCES = wrapper.cpp pantera_rules.cpp
 706 -include_HEADERS = wrapper.h
 707 +libpantera_la_LDFLAGS = $(LIBCORPUS_LINK_FLAGS) $(GLOBAL_LINK_FLAGS) $(LIBMORFEUSZ_LINK_FLAGS) \
 708 +				   @BOOST_LDFLAGS@ 
 709 +
 710 +libpantera_la_LIBADD = ../third_party/TaKIPI18/Linux/bin/Corpus/libcorpus.so \
 711 +				   @BOOST_SERIALIZATION_LIB@ \
 712 +				   @BOOST_PROGRAM_OPTIONS_LIB@ \
 713 +				   @BOOST_SYSTEM_LIB@ @BOOST_FILESYSTEM_LIB@ \
 714 +				   @BOOST_IOSTREAMS_LIB@ \
 715 +				   @BOOST_MPI_LIB@ @ICU_LIBS@ @LIBICONV@ \
 716 +				   @MORFEUSZ_LIBS@ @BOOST_REGEX_LIB@
 717  
 718  wrapper_test_SOURCES = wrapper_test.cpp \
 719  	rules/a.h rules/c1.m4h rules/p1.h rules/p2.h \
 720  	rules/impl.h rules/make.h brillengine.h \
 721  	brillexeme.h unigram.h rules.h pantera_rules.h \
 722 -	pantera.h
 723 -wrapper_test_LDFLAGS = @BOOST_LDFLAGS@ @BOOST_SERIALIZATION_LIB@ \
 724 -				   @BOOST_REGEX_LIB@ @BOOST_PROGRAM_OPTIONS_LIB@ \
 725 +	pantera.h TaggingOptions.h
 726 +wrapper_test_LDFLAGS =  $(LIBCORPUS_LINK_FLAGS) $(GLOBAL_LINK_FLAGS) $(LIBMORFEUSZ_LINK_FLAGS) \
 727 +				   @BOOST_LDFLAGS@
 728 +
 729 +
 730 +wrapper_test_LDADD = libpantera.la nlpcommon/libnlpcommon.la \
 731 +				../third_party/TaKIPI18/Linux/bin/Corpus/libcorpus.so \
 732 +				   @BOOST_SERIALIZATION_LIB@ \
 733 +				   @BOOST_PROGRAM_OPTIONS_LIB@ \
 734  				   @BOOST_SYSTEM_LIB@ @BOOST_FILESYSTEM_LIB@ \
 735  				   @BOOST_IOSTREAMS_LIB@ \
 736  				   @BOOST_MPI_LIB@ @ICU_LIBS@ @LIBICONV@ \
 737 -				   @MORFEUSZ_LIBS@ $(LIBCORPUS_LINK_FLAGS) \
 738 -				   $(GLOBAL_LINK_FLAGS) $(LIBMORFEUSZ_LINK_FLAGS)
 739 -wrapper_test_LDADD = libpantera.la nlpcommon/libnlpcommon.la \
 740 -				../third_party/TaKIPI18/Linux/bin/Corpus/libcorpus.so
 741 +				   @MORFEUSZ_LIBS@ @BOOST_REGEX_LIB@
 742 +
 743 +
 744  
 745  pantera_eval_SOURCES = eval.cpp
 746 -pantera_eval_LDFLAGS = @BOOST_LDFLAGS@ @BOOST_SERIALIZATION_LIB@ \
 747 -				   @BOOST_REGEX_LIB@ @BOOST_PROGRAM_OPTIONS_LIB@ \
 748 +pantera_eval_LDFLAGS =  $(LIBCORPUS_LINK_FLAGS) $(GLOBAL_LINK_FLAGS) $(LIBMORFEUSZ_LINK_FLAGS) \
 749 +				   @BOOST_LDFLAGS@
 750 +
 751 +pantera_eval_LDADD = libpantera.la nlpcommon/libnlpcommon.la \
 752 +				../third_party/TaKIPI18/Linux/bin/Corpus/libcorpus.so \
 753 +				   @BOOST_SERIALIZATION_LIB@ \
 754 +				   @BOOST_PROGRAM_OPTIONS_LIB@ \
 755  				   @BOOST_SYSTEM_LIB@ @BOOST_FILESYSTEM_LIB@ \
 756  				   @BOOST_IOSTREAMS_LIB@ \
 757  				   @BOOST_MPI_LIB@ @ICU_LIBS@ @LIBICONV@ \
 758 -				   @MORFEUSZ_LIBS@ $(LIBCORPUS_LINK_FLAGS) \
 759 -				   $(GLOBAL_LINK_FLAGS) $(LIBMORFEUSZ_LINK_FLAGS)
 760 -pantera_eval_LDADD = libpantera.la nlpcommon/libnlpcommon.la \
 761 -				../third_party/TaKIPI18/Linux/bin/Corpus/libcorpus.so
 762 +				   @MORFEUSZ_LIBS@ @BOOST_REGEX_LIB@
 763 +
 764 +
 765 +
 766  
 767  #eval_SOURCES = eval.cpp
 768  #eval_LDFLAGS = @BOOST_LDFLAGS@ @BOOST_REGEX_LIB@ \
 769 Index: src/rules/c1.m4h
 770 ===================================================================
 771 --- /dev/null	1970-01-01 00:00:00.000000000 +0000
 772 +++ src/rules/c1.m4h	2012-07-04 11:15:13.504186560 +0200
 773 @@ -0,0 +1,813 @@
 774 +
 775 +
 776 +
 777 +
 778 +
 779 +
 780 +
 781 +
 782 +
 783 +
 784 +
 785 +
 786 +
 787 +
 788 +
 789 +
 790 +
 791 +
 792 +
 793 +
 794 +
 795 +
 796 +#define TAGSET (this->tagsets[Phase])
 797 +
 798 +#define ISNULL(offset) (text[index + (offset)].chosen_tag[Phase] == Lexeme::tag_type::getNullTag())
 799 +#define POSNUM(offset) (text[index + (offset)].chosen_tag[Phase].getPos())
 800 +#define ORTH(offset) (text[index + (offset)].getOrth())
 801 +#define POS(offset) (TAGSET->getPartOfSpeech(POSNUM(offset)))
 802 +#define DEFPOS(name, offset) const PartOfSpeech* name = ISNULL(offset) ? NULL : POS(offset);
 803 +
 804 +#define FORCAT(name, name_idx, offset) \
 805 +    BOOST_FOREACH(const Category* name, POS(offset)->getCategories()) { \
 806 +        int name_idx = TAGSET->getCategoryIndex(name);
 807 +#define NEXTCAT \
 808 +    }
 809 +
 810 +#define VALUE(offset, cat_idx) \
 811 +    (text[index + offset].chosen_tag[Phase].getValue(cat_idx))
 812 +
 813 +#define C(cat_num) \
 814 +    (p.params.categories[cat_num] == (uint8_t)-1 ? "pos" : \
 815 +     TAGSET->getCategory(p.params.categories[cat_num])->getName().c_str())
 816 +#define P(p_num) \
 817 +     TAGSET->getPartOfSpeech(p.params.pos[p_num])->getName().c_str()
 818 +#define V(cat_num, v_num) \
 819 +    (p.params.categories[cat_num] == (uint8_t)-1 ? \
 820 +     P(v_num) :\
 821 +     TAGSET->getCategory(p.params.categories[cat_num])->getValue( \
 822 +        p.params.values[v_num]).c_str())
 823 +
 824 +static inline bool match_prefix(const wstring& string, const wchar_t* pattern, int len) {
 825 +    if (string.length() < len)
 826 +        return false;
 827 +    for (int i = 0; i < len; i++)
 828 +        if (string[i] != pattern[i])
 829 +            return false;
 830 +    return true;
 831 +}
 832 +
 833 +static inline bool match_suffix(const wstring& string, const wchar_t* pattern, int len) {
 834 +    size_t slen = string.length();
 835 +    if (slen < len)
 836 +        return false;
 837 +    for (int i = 0; i < len; i++)
 838 +        if (string[slen - len + i] != pattern[i])
 839 +            return false;
 840 +    return true;
 841 +}
 842 +
 843 +static inline bool copy_prefix(const wstring& string, wchar_t* pattern, int len) {
 844 +    if (string.length() < len)
 845 +        return false;
 846 +    for (int i = 0; i < len; i++)
 847 +        pattern[i] = string[i];
 848 +    return true;
 849 +}
 850 +
 851 +static inline bool copy_suffix(const wstring& string, wchar_t* pattern, int len) {
 852 +    size_t slen = string.length();
 853 +    if (slen < len)
 854 +        return false;
 855 +    for (int i = 0; i < len; i++)
 856 +        pattern[i] = string[slen - len + i];
 857 +    return true;
 858 +}
 859 +
 860 +static wstring orth_match_repr(bool match_nearby, const wchar_t* pattern, int prefix_len,
 861 +        int suffix_len) {
 862 +    wchar_t wbuf[STR_SIZE];
 863 +    int plen = std::max(prefix_len, suffix_len);
 864 +    wmemcpy(wbuf, pattern, plen);
 865 +    wbuf[plen] = L'\0';
 866 +    if (!prefix_len && !suffix_len)
 867 +        return L"";
 868 +    return boost::str(boost::wformat(L" AND %hs %hs (%d chars) with '%ls'")
 869 +        % (match_nearby ? "nearby segment" : "T[0]")
 870 +        % (prefix_len > 0 ? "starts" : "ends")
 871 +        % plen
 872 +        % wbuf);
 873 +}
 874 +
 875 +static wstring history_match_repr(bool use_history, int rule_number) {
 876 +    if (!use_history)
 877 +        return L"";
 878 +    if (rule_number == 0)
 879 +        return L" AND T[0] was not changed";
 880 +    return boost::str(boost::wformat(L" AND T[0] was changed by rule %d in phase %d")
 881 +        % (rule_number % 10000) % (rule_number / 10000));
 882 +}
 883 +
 884 +
 885 +
 886 +
 887 +
 888 +
 889 +
 890 +
 891 +template<class Lexeme, int Phase, int Offset1, bool AlwaysPos = false, bool MatchNearbyOrth = false,
 892 +    int PrefixLen = 0, int SuffixLen = 0,
 893 +    bool UseHistory = false>
 894 +
 895 +class Nearby1CatPredicateTemplate : public PredicateTemplate<Lexeme>
 896 +{
 897 +public:
 898 +    Nearby1CatPredicateTemplate(const vector<const Tagset*> tagsets) : PredicateTemplate<Lexeme>(tagsets) { }
 899 +
 900 +
 901 +
 902 +    
 903 +    void findMatchingPredicates(vector<Predicate<Lexeme> >& v,
 904 +                                                          vector<Lexeme>& text,
 905 +                                                          int index) {
 906 +        assert(PrefixLen == 0 || SuffixLen == 0);
 907 +
 908 +        if (ISNULL(0)) return;
 909 +
 910 +        Predicate<Lexeme> p = Predicate<Lexeme>(this);
 911 +        p.params.pos[0] = POSNUM(0);
 912 +        if (UseHistory)
 913 +            p.params.rule_number = text[index].last_matched_rule;
 914 +        if (!MatchNearbyOrth && !copy_prefix(ORTH(0), p.params.chars, PrefixLen))
 915 +            return;
 916 +        if (!MatchNearbyOrth && !copy_suffix(ORTH(0), p.params.chars, SuffixLen))
 917 +            return;
 918 +        
 919 +
 920 +            DEFPOS(pos1, Offset1);
 921 +        
 922 +
 923 +        FORCAT(cat, c, 0) {
 924 +            p.params.categories[0] = c;
 925 +            p.params.values[0] = VALUE(0, c);
 926 +            
 927 +do { 
 928 +                if (pos1 && pos1->hasCategory(cat)) {
 929 +                    p.params.values[1] = VALUE(Offset1, c);
 930 +                    if (AlwaysPos)
 931 +                        p.params.pos[1] = POSNUM(Offset1);
 932 +                    if ((PrefixLen || SuffixLen) && MatchNearbyOrth) {
 933 +                        if (!copy_prefix(ORTH(Offset1), p.params.chars, PrefixLen))
 934 +                            break;
 935 +                        if (!copy_suffix(ORTH(Offset1), p.params.chars, SuffixLen))
 936 +                            break;
 937 +                    }
 938 +                    v.push_back(p);
 939 +                }
 940 +             } while(0);
 941 +
 942 +        } NEXTCAT
 943 +
 944 +        p.params.categories[0] = -1;
 945 +        p.params.values[0] = -1;
 946 +        p.params.values[1] = -1;
 947 +        
 948 +do { 
 949 +            if (pos1) {
 950 +                p.params.pos[1] = POSNUM(Offset1);
 951 +                if ((PrefixLen || SuffixLen) && MatchNearbyOrth) {
 952 +                    if (!copy_prefix(ORTH(Offset1), p.params.chars, PrefixLen))
 953 +                        break;
 954 +                    if (!copy_suffix(ORTH(Offset1), p.params.chars, SuffixLen))
 955 +                        break;
 956 +                }
 957 +                v.push_back(p);
 958 +            }
 959 +         } while(0);
 960 +
 961 +    }
 962 +
 963 +    
 964 +    bool predicateMatches(const Predicate<Lexeme>& p,
 965 +                vector<Lexeme>& text, int index) {
 966 +        if (p.params.pos[0] != POSNUM(0))
 967 +            return false;
 968 +        if (UseHistory && text[index].last_matched_rule != p.params.rule_number)
 969 +            return false;
 970 +        if (PrefixLen || SuffixLen) {
 971 +            if (!MatchNearbyOrth && !match_prefix(ORTH(0), p.params.chars, PrefixLen))
 972 +                return false;
 973 +            if (!MatchNearbyOrth && !match_suffix(ORTH(0), p.params.chars, SuffixLen))
 974 +                return false;
 975 +        }
 976 +        int c = p.params.categories[0];
 977 +        if (c == (uint8_t)-1) {
 978 +            return (
 979 +(!ISNULL(Offset1) && p.params.pos[1] == POSNUM(Offset1)
 980 +                && (!MatchNearbyOrth || (match_prefix(ORTH(Offset1), p.params.chars, PrefixLen)
 981 +                                      && match_suffix(ORTH(Offset1), p.params.chars, SuffixLen)))
 982 +            )
 983 +);
 984 +        } else {
 985 +            return (
 986 +(!ISNULL(Offset1) && p.params.values[1] == VALUE(Offset1, c) && (!AlwaysPos || p.params.pos[1] == POSNUM(Offset1))
 987 +                && (!MatchNearbyOrth || (match_prefix(ORTH(Offset1), p.params.chars, PrefixLen)
 988 +                                      && match_suffix(ORTH(Offset1), p.params.chars, SuffixLen)))
 989 +            )
 990 +)
 991 +                    && p.params.values[0] == VALUE(0, c);
 992 +        }
 993 +    }
 994 +
 995 +    
 996 +    wstring predicateAsWString(const Predicate<Lexeme>& p) {
 997 +        wchar_t str[STR_SIZE];
 998 +        swprintf(str, STR_SIZE, L"(" 
 999 +L"T[%d]|pos,%hs = %hs,%hs"
1000 + L") AND T[0]|pos,%hs = %hs,%hs%ls%ls",
1001 +Offset1,C(0),AlwaysPos ? P(1) : "*",V(0, 1),
1002 +
1003 +             C(0),P(0),V(0, 0),orth_match_repr(MatchNearbyOrth, p.params.chars, PrefixLen, SuffixLen).c_str(),history_match_repr(UseHistory, p.params.rule_number).c_str());
1004 +        return wstring(str);
1005 +    }
1006 +
1007 +    
1008 +    bool usesCategory0() {
1009 +        return true;
1010 +    }
1011 +
1012 +
1013 +};
1014 +
1015 +
1016 +
1017 +
1018 +
1019 +template<class Lexeme, int Phase, int Offset1, int Offset2, bool AlwaysPos = false, bool MatchNearbyOrth = false,
1020 +    int PrefixLen = 0, int SuffixLen = 0,
1021 +    bool UseHistory = false>
1022 +
1023 +class Nearby2CatPredicateTemplate : public PredicateTemplate<Lexeme>
1024 +{
1025 +public:
1026 +    Nearby2CatPredicateTemplate(const vector<const Tagset*> tagsets) : PredicateTemplate<Lexeme>(tagsets) { }
1027 +
1028 +
1029 +
1030 +    
1031 +    void findMatchingPredicates(vector<Predicate<Lexeme> >& v,
1032 +                                                          vector<Lexeme>& text,
1033 +                                                          int index) {
1034 +        assert(PrefixLen == 0 || SuffixLen == 0);
1035 +
1036 +        if (ISNULL(0)) return;
1037 +
1038 +        Predicate<Lexeme> p = Predicate<Lexeme>(this);
1039 +        p.params.pos[0] = POSNUM(0);
1040 +        if (UseHistory)
1041 +            p.params.rule_number = text[index].last_matched_rule;
1042 +        if (!MatchNearbyOrth && !copy_prefix(ORTH(0), p.params.chars, PrefixLen))
1043 +            return;
1044 +        if (!MatchNearbyOrth && !copy_suffix(ORTH(0), p.params.chars, SuffixLen))
1045 +            return;
1046 +        
1047 +
1048 +            DEFPOS(pos1, Offset1);
1049 +        
1050 +            DEFPOS(pos2, Offset2);
1051 +        
1052 +
1053 +        FORCAT(cat, c, 0) {
1054 +            p.params.categories[0] = c;
1055 +            p.params.values[0] = VALUE(0, c);
1056 +            
1057 +do { 
1058 +                if (pos1 && pos1->hasCategory(cat)) {
1059 +                    p.params.values[1] = VALUE(Offset1, c);
1060 +                    if (AlwaysPos)
1061 +                        p.params.pos[1] = POSNUM(Offset1);
1062 +                    if ((PrefixLen || SuffixLen) && MatchNearbyOrth) {
1063 +                        if (!copy_prefix(ORTH(Offset1), p.params.chars, PrefixLen))
1064 +                            break;
1065 +                        if (!copy_suffix(ORTH(Offset1), p.params.chars, SuffixLen))
1066 +                            break;
1067 +                    }
1068 +                    v.push_back(p);
1069 +                }
1070 +             } while(0);do { 
1071 +                if (pos2 && pos2->hasCategory(cat)) {
1072 +                    p.params.values[1] = VALUE(Offset2, c);
1073 +                    if (AlwaysPos)
1074 +                        p.params.pos[1] = POSNUM(Offset2);
1075 +                    if ((PrefixLen || SuffixLen) && MatchNearbyOrth) {
1076 +                        if (!copy_prefix(ORTH(Offset2), p.params.chars, PrefixLen))
1077 +                            break;
1078 +                        if (!copy_suffix(ORTH(Offset2), p.params.chars, SuffixLen))
1079 +                            break;
1080 +                    }
1081 +                    v.push_back(p);
1082 +                }
1083 +             } while(0);
1084 +
1085 +        } NEXTCAT
1086 +
1087 +        p.params.categories[0] = -1;
1088 +        p.params.values[0] = -1;
1089 +        p.params.values[1] = -1;
1090 +        
1091 +do { 
1092 +            if (pos1) {
1093 +                p.params.pos[1] = POSNUM(Offset1);
1094 +                if ((PrefixLen || SuffixLen) && MatchNearbyOrth) {
1095 +                    if (!copy_prefix(ORTH(Offset1), p.params.chars, PrefixLen))
1096 +                        break;
1097 +                    if (!copy_suffix(ORTH(Offset1), p.params.chars, SuffixLen))
1098 +                        break;
1099 +                }
1100 +                v.push_back(p);
1101 +            }
1102 +         } while(0);do { 
1103 +            if (pos2) {
1104 +                p.params.pos[1] = POSNUM(Offset2);
1105 +                if ((PrefixLen || SuffixLen) && MatchNearbyOrth) {
1106 +                    if (!copy_prefix(ORTH(Offset2), p.params.chars, PrefixLen))
1107 +                        break;
1108 +                    if (!copy_suffix(ORTH(Offset2), p.params.chars, SuffixLen))
1109 +                        break;
1110 +                }
1111 +                v.push_back(p);
1112 +            }
1113 +         } while(0);
1114 +
1115 +    }
1116 +
1117 +    
1118 +    bool predicateMatches(const Predicate<Lexeme>& p,
1119 +                vector<Lexeme>& text, int index) {
1120 +        if (p.params.pos[0] != POSNUM(0))
1121 +            return false;
1122 +        if (UseHistory && text[index].last_matched_rule != p.params.rule_number)
1123 +            return false;
1124 +        if (PrefixLen || SuffixLen) {
1125 +            if (!MatchNearbyOrth && !match_prefix(ORTH(0), p.params.chars, PrefixLen))
1126 +                return false;
1127 +            if (!MatchNearbyOrth && !match_suffix(ORTH(0), p.params.chars, SuffixLen))
1128 +                return false;
1129 +        }
1130 +        int c = p.params.categories[0];
1131 +        if (c == (uint8_t)-1) {
1132 +            return (
1133 +(!ISNULL(Offset1) && p.params.pos[1] == POSNUM(Offset1)
1134 +                && (!MatchNearbyOrth || (match_prefix(ORTH(Offset1), p.params.chars, PrefixLen)
1135 +                                      && match_suffix(ORTH(Offset1), p.params.chars, SuffixLen)))
1136 +            )||(!ISNULL(Offset2) && p.params.pos[1] == POSNUM(Offset2)
1137 +                && (!MatchNearbyOrth || (match_prefix(ORTH(Offset2), p.params.chars, PrefixLen)
1138 +                                      && match_suffix(ORTH(Offset2), p.params.chars, SuffixLen)))
1139 +            )
1140 +);
1141 +        } else {
1142 +            return (
1143 +(!ISNULL(Offset1) && p.params.values[1] == VALUE(Offset1, c) && (!AlwaysPos || p.params.pos[1] == POSNUM(Offset1))
1144 +                && (!MatchNearbyOrth || (match_prefix(ORTH(Offset1), p.params.chars, PrefixLen)
1145 +                                      && match_suffix(ORTH(Offset1), p.params.chars, SuffixLen)))
1146 +            )||(!ISNULL(Offset2) && p.params.values[1] == VALUE(Offset2, c) && (!AlwaysPos || p.params.pos[1] == POSNUM(Offset2))
1147 +                && (!MatchNearbyOrth || (match_prefix(ORTH(Offset2), p.params.chars, PrefixLen)
1148 +                                      && match_suffix(ORTH(Offset2), p.params.chars, SuffixLen)))
1149 +            )
1150 +)
1151 +                    && p.params.values[0] == VALUE(0, c);
1152 +        }
1153 +    }
1154 +
1155 +    
1156 +    wstring predicateAsWString(const Predicate<Lexeme>& p) {
1157 +        wchar_t str[STR_SIZE];
1158 +        swprintf(str, STR_SIZE, L"(" 
1159 +L"T[%d]|pos,%hs = %hs,%hs"L" OR "L"T[%d]|pos,%hs = %hs,%hs"
1160 + L") AND T[0]|pos,%hs = %hs,%hs%ls%ls",
1161 +Offset1,C(0),AlwaysPos ? P(1) : "*",V(0, 1),Offset2,C(0),AlwaysPos ? P(1) : "*",V(0, 1),
1162 +
1163 +             C(0),P(0),V(0, 0),orth_match_repr(MatchNearbyOrth, p.params.chars, PrefixLen, SuffixLen).c_str(),history_match_repr(UseHistory, p.params.rule_number).c_str());
1164 +        return wstring(str);
1165 +    }
1166 +
1167 +    
1168 +    bool usesCategory0() {
1169 +        return true;
1170 +    }
1171 +
1172 +
1173 +};
1174 +
1175 +
1176 +
1177 +
1178 +
1179 +template<class Lexeme, int Phase, int Offset1, int Offset2, int Offset3, bool AlwaysPos = false, bool MatchNearbyOrth = false,
1180 +    int PrefixLen = 0, int SuffixLen = 0,
1181 +    bool UseHistory = false>
1182 +
1183 +class Nearby3CatPredicateTemplate : public PredicateTemplate<Lexeme>
1184 +{
1185 +public:
1186 +    Nearby3CatPredicateTemplate(const vector<const Tagset*> tagsets) : PredicateTemplate<Lexeme>(tagsets) { }
1187 +
1188 +
1189 +
1190 +    
1191 +    void findMatchingPredicates(vector<Predicate<Lexeme> >& v,
1192 +                                                          vector<Lexeme>& text,
1193 +                                                          int index) {
1194 +        assert(PrefixLen == 0 || SuffixLen == 0);
1195 +
1196 +        if (ISNULL(0)) return;
1197 +
1198 +        Predicate<Lexeme> p = Predicate<Lexeme>(this);
1199 +        p.params.pos[0] = POSNUM(0);
1200 +        if (UseHistory)
1201 +            p.params.rule_number = text[index].last_matched_rule;
1202 +        if (!MatchNearbyOrth && !copy_prefix(ORTH(0), p.params.chars, PrefixLen))
1203 +            return;
1204 +        if (!MatchNearbyOrth && !copy_suffix(ORTH(0), p.params.chars, SuffixLen))
1205 +            return;
1206 +        
1207 +
1208 +            DEFPOS(pos1, Offset1);
1209 +        
1210 +            DEFPOS(pos2, Offset2);
1211 +        
1212 +            DEFPOS(pos3, Offset3);
1213 +        
1214 +
1215 +        FORCAT(cat, c, 0) {
1216 +            p.params.categories[0] = c;
1217 +            p.params.values[0] = VALUE(0, c);
1218 +            
1219 +do { 
1220 +                if (pos1 && pos1->hasCategory(cat)) {
1221 +                    p.params.values[1] = VALUE(Offset1, c);
1222 +                    if (AlwaysPos)
1223 +                        p.params.pos[1] = POSNUM(Offset1);
1224 +                    if ((PrefixLen || SuffixLen) && MatchNearbyOrth) {
1225 +                        if (!copy_prefix(ORTH(Offset1), p.params.chars, PrefixLen))
1226 +                            break;
1227 +                        if (!copy_suffix(ORTH(Offset1), p.params.chars, SuffixLen))
1228 +                            break;
1229 +                    }
1230 +                    v.push_back(p);
1231 +                }
1232 +             } while(0);do { 
1233 +                if (pos2 && pos2->hasCategory(cat)) {
1234 +                    p.params.values[1] = VALUE(Offset2, c);
1235 +                    if (AlwaysPos)
1236 +                        p.params.pos[1] = POSNUM(Offset2);
1237 +                    if ((PrefixLen || SuffixLen) && MatchNearbyOrth) {
1238 +                        if (!copy_prefix(ORTH(Offset2), p.params.chars, PrefixLen))
1239 +                            break;
1240 +                        if (!copy_suffix(ORTH(Offset2), p.params.chars, SuffixLen))
1241 +                            break;
1242 +                    }
1243 +                    v.push_back(p);
1244 +                }
1245 +             } while(0);do { 
1246 +                if (pos3 && pos3->hasCategory(cat)) {
1247 +                    p.params.values[1] = VALUE(Offset3, c);
1248 +                    if (AlwaysPos)
1249 +                        p.params.pos[1] = POSNUM(Offset3);
1250 +                    if ((PrefixLen || SuffixLen) && MatchNearbyOrth) {
1251 +                        if (!copy_prefix(ORTH(Offset3), p.params.chars, PrefixLen))
1252 +                            break;
1253 +                        if (!copy_suffix(ORTH(Offset3), p.params.chars, SuffixLen))
1254 +                            break;
1255 +                    }
1256 +                    v.push_back(p);
1257 +                }
1258 +             } while(0);
1259 +
1260 +        } NEXTCAT
1261 +
1262 +        p.params.categories[0] = -1;
1263 +        p.params.values[0] = -1;
1264 +        p.params.values[1] = -1;
1265 +        
1266 +do { 
1267 +            if (pos1) {
1268 +                p.params.pos[1] = POSNUM(Offset1);
1269 +                if ((PrefixLen || SuffixLen) && MatchNearbyOrth) {
1270 +                    if (!copy_prefix(ORTH(Offset1), p.params.chars, PrefixLen))
1271 +                        break;
1272 +                    if (!copy_suffix(ORTH(Offset1), p.params.chars, SuffixLen))
1273 +                        break;
1274 +                }
1275 +                v.push_back(p);
1276 +            }
1277 +         } while(0);do { 
1278 +            if (pos2) {
1279 +                p.params.pos[1] = POSNUM(Offset2);
1280 +                if ((PrefixLen || SuffixLen) && MatchNearbyOrth) {
1281 +                    if (!copy_prefix(ORTH(Offset2), p.params.chars, PrefixLen))
1282 +                        break;
1283 +                    if (!copy_suffix(ORTH(Offset2), p.params.chars, SuffixLen))
1284 +                        break;
1285 +                }
1286 +                v.push_back(p);
1287 +            }
1288 +         } while(0);do { 
1289 +            if (pos3) {
1290 +                p.params.pos[1] = POSNUM(Offset3);
1291 +                if ((PrefixLen || SuffixLen) && MatchNearbyOrth) {
1292 +                    if (!copy_prefix(ORTH(Offset3), p.params.chars, PrefixLen))
1293 +                        break;
1294 +                    if (!copy_suffix(ORTH(Offset3), p.params.chars, SuffixLen))
1295 +                        break;
1296 +                }
1297 +                v.push_back(p);
1298 +            }
1299 +         } while(0);
1300 +
1301 +    }
1302 +
1303 +    
1304 +    bool predicateMatches(const Predicate<Lexeme>& p,
1305 +                vector<Lexeme>& text, int index) {
1306 +        if (p.params.pos[0] != POSNUM(0))
1307 +            return false;
1308 +        if (UseHistory && text[index].last_matched_rule != p.params.rule_number)
1309 +            return false;
1310 +        if (PrefixLen || SuffixLen) {
1311 +            if (!MatchNearbyOrth && !match_prefix(ORTH(0), p.params.chars, PrefixLen))
1312 +                return false;
1313 +            if (!MatchNearbyOrth && !match_suffix(ORTH(0), p.params.chars, SuffixLen))
1314 +                return false;
1315 +        }
1316 +        int c = p.params.categories[0];
1317 +        if (c == (uint8_t)-1) {
1318 +            return (
1319 +(!ISNULL(Offset1) && p.params.pos[1] == POSNUM(Offset1)
1320 +                && (!MatchNearbyOrth || (match_prefix(ORTH(Offset1), p.params.chars, PrefixLen)
1321 +                                      && match_suffix(ORTH(Offset1), p.params.chars, SuffixLen)))
1322 +            )||(!ISNULL(Offset2) && p.params.pos[1] == POSNUM(Offset2)
1323 +                && (!MatchNearbyOrth || (match_prefix(ORTH(Offset2), p.params.chars, PrefixLen)
1324 +                                      && match_suffix(ORTH(Offset2), p.params.chars, SuffixLen)))
1325 +            )||(!ISNULL(Offset3) && p.params.pos[1] == POSNUM(Offset3)
1326 +                && (!MatchNearbyOrth || (match_prefix(ORTH(Offset3), p.params.chars, PrefixLen)
1327 +                                      && match_suffix(ORTH(Offset3), p.params.chars, SuffixLen)))
1328 +            )
1329 +);
1330 +        } else {
1331 +            return (
1332 +(!ISNULL(Offset1) && p.params.values[1] == VALUE(Offset1, c) && (!AlwaysPos || p.params.pos[1] == POSNUM(Offset1))
1333 +                && (!MatchNearbyOrth || (match_prefix(ORTH(Offset1), p.params.chars, PrefixLen)
1334 +                                      && match_suffix(ORTH(Offset1), p.params.chars, SuffixLen)))
1335 +            )||(!ISNULL(Offset2) && p.params.values[1] == VALUE(Offset2, c) && (!AlwaysPos || p.params.pos[1] == POSNUM(Offset2))
1336 +                && (!MatchNearbyOrth || (match_prefix(ORTH(Offset2), p.params.chars, PrefixLen)
1337 +                                      && match_suffix(ORTH(Offset2), p.params.chars, SuffixLen)))
1338 +            )||(!ISNULL(Offset3) && p.params.values[1] == VALUE(Offset3, c) && (!AlwaysPos || p.params.pos[1] == POSNUM(Offset3))
1339 +                && (!MatchNearbyOrth || (match_prefix(ORTH(Offset3), p.params.chars, PrefixLen)
1340 +                                      && match_suffix(ORTH(Offset3), p.params.chars, SuffixLen)))
1341 +            )
1342 +)
1343 +                    && p.params.values[0] == VALUE(0, c);
1344 +        }
1345 +    }
1346 +
1347 +    
1348 +    wstring predicateAsWString(const Predicate<Lexeme>& p) {
1349 +        wchar_t str[STR_SIZE];
1350 +        swprintf(str, STR_SIZE, L"(" 
1351 +L"T[%d]|pos,%hs = %hs,%hs"L" OR "L"T[%d]|pos,%hs = %hs,%hs"L" OR "L"T[%d]|pos,%hs = %hs,%hs"
1352 + L") AND T[0]|pos,%hs = %hs,%hs%ls%ls",
1353 +Offset1,C(0),AlwaysPos ? P(1) : "*",V(0, 1),Offset2,C(0),AlwaysPos ? P(1) : "*",V(0, 1),Offset3,C(0),AlwaysPos ? P(1) : "*",V(0, 1),
1354 +
1355 +             C(0),P(0),V(0, 0),orth_match_repr(MatchNearbyOrth, p.params.chars, PrefixLen, SuffixLen).c_str(),history_match_repr(UseHistory, p.params.rule_number).c_str());
1356 +        return wstring(str);
1357 +    }
1358 +
1359 +    
1360 +    bool usesCategory0() {
1361 +        return true;
1362 +    }
1363 +
1364 +
1365 +};
1366 +
1367 +
1368 +
1369 +
1370 +
1371 +template<class Lexeme, int Phase, int Offset1, int Offset2, bool AlwaysPos = false>
1372 +
1373 +class NearbyExact2CatPredicateTemplate : public PredicateTemplate<Lexeme>
1374 +{
1375 +public:
1376 +    NearbyExact2CatPredicateTemplate(const vector<const Tagset*> tagsets) : PredicateTemplate<Lexeme>(tagsets) { }
1377 +
1378 +
1379 +
1380 +    
1381 +    void findMatchingPredicates(vector<Predicate<Lexeme> >& v,
1382 +                                                          vector<Lexeme>& text,
1383 +                                                          int index) {
1384 +        Predicate<Lexeme> p = Predicate<Lexeme>(this);
1385 +        p.params.pos[0] = POSNUM(0);
1386 +        
1387 +
1388 +            DEFPOS(pos1, Offset1);
1389 +        
1390 +            DEFPOS(pos2, Offset2);
1391 +        
1392 +
1393 +        FORCAT(cat, c, 0) {
1394 +            p.params.categories[0] = c;
1395 +            p.params.values[0] = VALUE(0, c);
1396 +            
1397 +
1398 +                if (!(pos1 && pos1->hasCategory(cat)))
1399 +                    continue;
1400 +            
1401 +                if (!(pos2 && pos2->hasCategory(cat)))
1402 +                    continue;
1403 +            
1404 +
1405 +            
1406 +
1407 +                p.params.values[1] = VALUE(Offset1, c);
1408 +                if (AlwaysPos)
1409 +                    p.params.pos[1] = POSNUM(Offset1);
1410 +            
1411 +                p.params.values[2] = VALUE(Offset2, c);
1412 +                if (AlwaysPos)
1413 +                    p.params.pos[2] = POSNUM(Offset2);
1414 +            
1415 +
1416 +            v.push_back(p);
1417 +        } NEXTCAT
1418 +
1419 +        p.params.categories[0] = -1;
1420 +        p.params.values[0] = -1;
1421 +        
1422 +
1423 +            if (!pos1)
1424 +                return;
1425 +            p.params.values[1] = -1;
1426 +            p.params.pos[1] = POSNUM(Offset1);
1427 +        
1428 +            if (!pos2)
1429 +                return;
1430 +            p.params.values[2] = -1;
1431 +            p.params.pos[2] = POSNUM(Offset2);
1432 +        
1433 +
1434 +        v.push_back(p);
1435 +    }
1436 +
1437 +    
1438 +    bool predicateMatches(const Predicate<Lexeme>& p,
1439 +                vector<Lexeme>& text, int index) {
1440 +        if (p.params.pos[0] != POSNUM(0))
1441 +            return false;
1442 +        int c = p.params.categories[0];
1443 +        if (c == (uint8_t)-1) {
1444 +            return (
1445 +(!ISNULL(Offset1) && p.params.pos[1] == POSNUM(Offset1))&&(!ISNULL(Offset2) && p.params.pos[2] == POSNUM(Offset2))
1446 +);
1447 +        } else {
1448 +            return (
1449 +(!ISNULL(Offset1) && p.params.values[1] == VALUE(Offset1, c) && (!AlwaysPos || p.params.pos[1] == POSNUM(Offset1)))&&(!ISNULL(Offset2) && p.params.values[2] == VALUE(Offset2, c) && (!AlwaysPos || p.params.pos[2] == POSNUM(Offset2)))
1450 +)
1451 +                    && p.params.values[0] == VALUE(0, c);
1452 +        }
1453 +    }
1454 +
1455 +    
1456 +    wstring predicateAsWString(const Predicate<Lexeme>& p) {
1457 +        wchar_t str[STR_SIZE];
1458 +        swprintf(str, STR_SIZE, 
1459 +L"T[%d]|pos,%hs = %hs,%hs"L" AND "L"T[%d]|pos,%hs = %hs,%hs"
1460 + L" AND T[0]|pos = %hs AND T[0]|%hs = %hs",
1461 +Offset1,C(0),AlwaysPos ? P(1) : "*",V(0, 1),Offset2,C(0),AlwaysPos ? P(2) : "*",V(0, 2),
1462 +
1463 +            P(0),C(0),V(0, 0));
1464 +        return wstring(str);
1465 +    }
1466 +
1467 +    
1468 +    bool usesCategory0() {
1469 +        return true;
1470 +    }
1471 +
1472 +
1473 +};
1474 +
1475 +
1476 +
1477 +template<class Lexeme, int Phase>
1478 +class CCaseCatPredicateTemplate : public PredicateTemplate<Lexeme>
1479 +{
1480 +public:
1481 +CCaseCatPredicateTemplate(const vector<const Tagset*> tagsets) : PredicateTemplate<Lexeme>(tagsets) { }
1482 +
1483 +void findMatchingPredicates(vector<Predicate<Lexeme> >& v,
1484 +                                                      vector<Lexeme>& text,
1485 +                                                      int index) {
1486 +
1487 +    if (text[index].getOrth()[0] >= 'A' && text[index].getOrth()[0] <= 'Z') {
1488 +        Predicate<Lexeme> p = Predicate<Lexeme>(this);
1489 +        p.params.tags[0] = text[index].chosen_tag[Phase];
1490 +        v.push_back(p);
1491 +    }
1492 +}
1493 +bool predicateMatches(const Predicate<Lexeme>& p,
1494 +            vector<Lexeme>& text, int index) {
1495 +    return (p.params.tags[0] == text[index].chosen_tag[Phase]
1496 +            && text[index].getOrth()[0] >= 'A' && text[index].getOrth()[0] <= 'Z');
1497 +}
1498 +wstring predicateAsWString(const Predicate<Lexeme>& p) {
1499 +
1500 +    char str[STR_SIZE];
1501 +    sprintf(str, "T[0] = %s AND ORTH[0] starts with capital letter", T(tags[0]));
1502 +    return ascii_to_wstring(str);
1503 +}
1504 +};
1505 +
1506 +template<class Lexeme, int Phase>
1507 +class Prefix2CatPredicateTemplate : public PredicateTemplate<Lexeme>
1508 +{
1509 +public:
1510 +Prefix2CatPredicateTemplate(const vector<const Tagset*> tagsets) : PredicateTemplate<Lexeme>(tagsets) { }
1511 +
1512 +void findMatchingPredicates(vector<Predicate<Lexeme> >& v,
1513 +                                                      vector<Lexeme>& text,
1514 +                                                      int index) {
1515 +
1516 +    const string& orth = text[index].getOrth();
1517 +    int len = orth.length();
1518 +    if (len >= 2) {
1519 +        Predicate<Lexeme> p = Predicate<Lexeme>(this);
1520 +        p.params.tags[0] = text[index].chosen_tag[Phase];
1521 +        p.params.chars[0] = orth[0];
1522 +        p.params.chars[1] = orth[1];
1523 +        v.push_back(p);
1524 +    }
1525 +}
1526 +bool predicateMatches(const Predicate<Lexeme>& p,
1527 +            vector<Lexeme>& text, int index) {
1528 +    const string& orth = text[index].getOrth();
1529 +    int len = orth.length();
1530 +    return (len >= 2 && p.params.tags[0] == text[index].chosen_tag[Phase]
1531 +            && orth[0] == p.params.chars[0]
1532 +            && orth[1] == p.params.chars[1]);
1533 +}
1534 +wstring predicateAsWString(const Predicate<Lexeme>& p) {
1535 +    wchar_t str[STR_SIZE];
1536 +    swprintf(str, STR_SIZE, L"T[0] = %hs AND ORTH starts with '%lc%lc'", T(tags[0]), p.params.chars[0], p.params.chars[1]);
1537 +    return wstring(str);
1538 +}
1539 +};
1540 +
1541 +template<class Lexeme, int Phase>
1542 +class Suffix2CatPredicateTemplate : public PredicateTemplate<Lexeme>
1543 +{
1544 +public:
1545 +Suffix2CatPredicateTemplate(const vector<const Tagset*> tagsets) : PredicateTemplate<Lexeme>(tagsets) { }
1546 +
1547 +void findMatchingPredicates(vector<Predicate<Lexeme> >& v,
1548 +                                                      vector<Lexeme>& text,
1549 +                                                      int index) {
1550 +
1551 +    const string& orth = text[index].getOrth();
1552 +    int len = orth.length();
1553 +    if (len >= 2) {
1554 +        Predicate<Lexeme> p = Predicate<Lexeme>(this);
1555 +        p.params.tags[0] = text[index].chosen_tag[Phase];
1556 +        p.params.chars[0] = orth[len - 2];
1557 +        p.params.chars[1] = orth[len - 1];
1558 +        v.push_back(p);
1559 +    }
1560 +}
1561 +bool predicateMatches(const Predicate<Lexeme>& p,
1562 +            vector<Lexeme>& text, int index) {
1563 +    const string& orth = text[index].getOrth();
1564 +    int len = orth.length();
1565 +    return (len >= 2 && p.params.tags[0] == text[index].chosen_tag[Phase]
1566 +            && orth[len - 2] == p.params.chars[0]
1567 +            && orth[len - 1] == p.params.chars[1]);
1568 +}
1569 +wstring predicateAsWString(const Predicate<Lexeme>& p) {
1570 +    wchar_t str[STR_SIZE];
1571 +    swprintf(str, STR_SIZE, L"T[0] = %hs AND ORTH ends with '%lc%lc'", T(tags[0]), p.params.chars[0], p.params.chars[1]);
1572 +    return wstring(str);
1573 +}
1574 +};
1575 +
1576 +
1577 +
1578 +#undef TAGSET
1579 +#undef POS
1580 +#undef DEFPOS
1581 +#undef FORCAT
1582 +#undef NEXTCAT
1583 +#undef VALUE
1584 +#undef C
1585 +#undef V
1586 +
1587 Index: src/eval.cpp
1588 ===================================================================
1589 --- src/eval.cpp.orig	2010-06-03 23:39:46.000000000 +0200
1590 +++ src/eval.cpp	2012-07-04 11:15:13.528186561 +0200
1591 @@ -27,6 +27,13 @@
1592  using namespace std;
1593  using namespace NLPCommon;
1594  
1595 +// XXX bo jest jako extern w nlpcommon/util.h
1596 +boost::program_options::variables_map options;
1597 +
1598 +// XXX bo nie chcemy, by sie wywalaƂo przez MPI
1599 +boost::mpi::environment env;
1600 +boost::mpi::communicator world;
1601 +
1602  typedef Lexeme<Tag> MyLexeme;
1603  
1604  TaggingErrorsCollector<MyLexeme>* errors_collector;
1605 Index: aclocal/ltversion.m4
1606 ===================================================================
1607 --- aclocal/ltversion.m4.orig	2010-10-02 22:51:03.000000000 +0200
1608 +++ aclocal/ltversion.m4	2012-07-04 16:29:47.407822027 +0200
1609 @@ -7,17 +7,17 @@
1610  # unlimited permission to copy and/or distribute it, with or without
1611  # modifications, as long as this notice is preserved.
1612  
1613 -# Generated from ltversion.in.
1614 +# @configure_input@
1615  
1616 -# serial 3175 ltversion.m4
1617 +# serial 3337 ltversion.m4
1618  # This file is part of GNU Libtool
1619  
1620 -m4_define([LT_PACKAGE_VERSION], [2.2.10])
1621 -m4_define([LT_PACKAGE_REVISION], [1.3175])
1622 +m4_define([LT_PACKAGE_VERSION], [2.4.2])
1623 +m4_define([LT_PACKAGE_REVISION], [1.3337])
1624  
1625  AC_DEFUN([LTVERSION_VERSION],
1626 -[macro_version='2.2.10'
1627 -macro_revision='1.3175'
1628 +[macro_version='2.4.2'
1629 +macro_revision='1.3337'
1630  _LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?])
1631  _LT_DECL(, macro_revision, 0)
1632  ])
1633 Index: ltmain.sh
1634 ===================================================================
1635 --- ltmain.sh.orig	2010-10-02 22:51:24.000000000 +0200
1636 +++ ltmain.sh	2012-07-04 16:29:47.207822032 +0200
1637 @@ -1,10 +1,9 @@
1638 -# Generated from ltmain.m4sh.
1639  
1640 -# libtool (GNU libtool) 2.2.10
1641 +# libtool (GNU libtool) 2.4.2
1642  # Written by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
1643  
1644  # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006,
1645 -# 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
1646 +# 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
1647  # This is free software; see the source for copying conditions.  There is NO
1648  # warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
1649  
1650 @@ -42,6 +41,7 @@
1651  #       --quiet, --silent    don't print informational messages
1652  #       --no-quiet, --no-silent
1653  #                            print informational messages (default)
1654 +#       --no-warn            don't display warning messages
1655  #       --tag=TAG            use configuration variables from tag TAG
1656  #   -v, --verbose            print more informational messages than default
1657  #       --no-verbose         don't print the extra informational messages
1658 @@ -70,17 +70,19 @@
1659  #         compiler:		$LTCC
1660  #         compiler flags:		$LTCFLAGS
1661  #         linker:		$LD (gnu? $with_gnu_ld)
1662 -#         $progname:	(GNU libtool) 2.2.10
1663 +#         $progname:	(GNU libtool) 2.4.2 Debian-2.4.2-1ubuntu1
1664  #         automake:	$automake_version
1665  #         autoconf:	$autoconf_version
1666  #
1667  # Report bugs to <bug-libtool@gnu.org>.
1668 +# GNU libtool home page: <http://www.gnu.org/software/libtool/>.
1669 +# General help using GNU software: <http://www.gnu.org/gethelp/>.
1670  
1671  PROGRAM=libtool
1672  PACKAGE=libtool
1673 -VERSION=2.2.10
1674 +VERSION="2.4.2 Debian-2.4.2-1ubuntu1"
1675  TIMESTAMP=""
1676 -package_revision=1.3175
1677 +package_revision=1.3337
1678  
1679  # Be Bourne compatible
1680  if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
1681 @@ -135,15 +137,10 @@
1682  
1683  : ${CP="cp -f"}
1684  test "${ECHO+set}" = set || ECHO=${as_echo-'printf %s\n'}
1685 -: ${EGREP="grep -E"}
1686 -: ${FGREP="grep -F"}
1687 -: ${GREP="grep"}
1688 -: ${LN_S="ln -s"}
1689  : ${MAKE="make"}
1690  : ${MKDIR="mkdir"}
1691  : ${MV="mv -f"}
1692  : ${RM="rm -f"}
1693 -: ${SED="sed"}
1694  : ${SHELL="${CONFIG_SHELL-/bin/sh}"}
1695  : ${Xsed="$SED -e 1s/^X//"}
1696  
1697 @@ -163,6 +160,27 @@
1698  dirname="s,/[^/]*$,,"
1699  basename="s,^.*/,,"
1700  
1701 +# func_dirname file append nondir_replacement
1702 +# Compute the dirname of FILE.  If nonempty, add APPEND to the result,
1703 +# otherwise set result to NONDIR_REPLACEMENT.
1704 +func_dirname ()
1705 +{
1706 +    func_dirname_result=`$ECHO "${1}" | $SED "$dirname"`
1707 +    if test "X$func_dirname_result" = "X${1}"; then
1708 +      func_dirname_result="${3}"
1709 +    else
1710 +      func_dirname_result="$func_dirname_result${2}"
1711 +    fi
1712 +} # func_dirname may be replaced by extended shell implementation
1713 +
1714 +
1715 +# func_basename file
1716 +func_basename ()
1717 +{
1718 +    func_basename_result=`$ECHO "${1}" | $SED "$basename"`
1719 +} # func_basename may be replaced by extended shell implementation
1720 +
1721 +
1722  # func_dirname_and_basename file append nondir_replacement
1723  # perform func_basename and func_dirname in a single function
1724  # call:
1725 @@ -177,17 +195,31 @@
1726  # those functions but instead duplicate the functionality here.
1727  func_dirname_and_basename ()
1728  {
1729 -  # Extract subdirectory from the argument.
1730 -  func_dirname_result=`$ECHO "${1}" | $SED -e "$dirname"`
1731 -  if test "X$func_dirname_result" = "X${1}"; then
1732 -    func_dirname_result="${3}"
1733 -  else
1734 -    func_dirname_result="$func_dirname_result${2}"
1735 -  fi
1736 -  func_basename_result=`$ECHO "${1}" | $SED -e "$basename"`
1737 -}
1738 +    # Extract subdirectory from the argument.
1739 +    func_dirname_result=`$ECHO "${1}" | $SED -e "$dirname"`
1740 +    if test "X$func_dirname_result" = "X${1}"; then
1741 +      func_dirname_result="${3}"
1742 +    else
1743 +      func_dirname_result="$func_dirname_result${2}"
1744 +    fi
1745 +    func_basename_result=`$ECHO "${1}" | $SED -e "$basename"`
1746 +} # func_dirname_and_basename may be replaced by extended shell implementation
1747 +
1748 +
1749 +# func_stripname prefix suffix name
1750 +# strip PREFIX and SUFFIX off of NAME.
1751 +# PREFIX and SUFFIX must not contain globbing or regex special
1752 +# characters, hashes, percent signs, but SUFFIX may contain a leading
1753 +# dot (in which case that matches only a dot).
1754 +# func_strip_suffix prefix name
1755 +func_stripname ()
1756 +{
1757 +    case ${2} in
1758 +      .*) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%\\\\${2}\$%%"`;;
1759 +      *)  func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%${2}\$%%"`;;
1760 +    esac
1761 +} # func_stripname may be replaced by extended shell implementation
1762  
1763 -# Generated shell functions inserted here.
1764  
1765  # These SED scripts presuppose an absolute path with a trailing slash.
1766  pathcar='s,^/\([^/]*\).*$,\1,'
1767 @@ -351,7 +383,7 @@
1768       ;;
1769    *)
1770       save_IFS="$IFS"
1771 -     IFS=:
1772 +     IFS=${PATH_SEPARATOR-:}
1773       for progdir in $PATH; do
1774         IFS="$save_IFS"
1775         test -x "$progdir/$progname" && break
1776 @@ -370,6 +402,15 @@
1777  # Same as above, but do not quote variable references.
1778  double_quote_subst='s/\(["`\\]\)/\\\1/g'
1779  
1780 +# Sed substitution that turns a string into a regex matching for the
1781 +# string literally.
1782 +sed_make_literal_regex='s,[].[^$\\*\/],\\&,g'
1783 +
1784 +# Sed substitution that converts a w32 file name or path
1785 +# which contains forward slashes, into one that contains
1786 +# (escaped) backslashes.  A very naive implementation.
1787 +lt_sed_naive_backslashify='s|\\\\*|\\|g;s|/|\\|g;s|\\|\\\\|g'
1788 +
1789  # Re-`\' parameter expansions in output of double_quote_subst that were
1790  # `\'-ed in input to the same.  If an odd number of `\' preceded a '$'
1791  # in input to double_quote_subst, that '$' was protected from expansion.
1792 @@ -398,7 +439,7 @@
1793  # name if it has been set yet.
1794  func_echo ()
1795  {
1796 -    $ECHO "$progname${mode+: }$mode: $*"
1797 +    $ECHO "$progname: ${opt_mode+$opt_mode: }$*"
1798  }
1799  
1800  # func_verbose arg...
1801 @@ -424,14 +465,14 @@
1802  # Echo program name prefixed message to standard error.
1803  func_error ()
1804  {
1805 -    $ECHO "$progname${mode+: }$mode: "${1+"$@"} 1>&2
1806 +    $ECHO "$progname: ${opt_mode+$opt_mode: }"${1+"$@"} 1>&2
1807  }
1808  
1809  # func_warning arg...
1810  # Echo program name prefixed warning message to standard error.
1811  func_warning ()
1812  {
1813 -    $opt_warning && $ECHO "$progname${mode+: }$mode: warning: "${1+"$@"} 1>&2
1814 +    $opt_warning && $ECHO "$progname: ${opt_mode+$opt_mode: }warning: "${1+"$@"} 1>&2
1815  
1816      # bash bug again:
1817      :
1818 @@ -650,11 +691,30 @@
1819      fi
1820  }
1821  
1822 +# func_tr_sh
1823 +# Turn $1 into a string suitable for a shell variable name.
1824 +# Result is stored in $func_tr_sh_result.  All characters
1825 +# not in the set a-zA-Z0-9_ are replaced with '_'. Further,
1826 +# if $1 begins with a digit, a '_' is prepended as well.
1827 +func_tr_sh ()
1828 +{
1829 +  case $1 in
1830 +  [0-9]* | *[!a-zA-Z0-9_]*)
1831 +    func_tr_sh_result=`$ECHO "$1" | $SED 's/^\([0-9]\)/_\1/; s/[^a-zA-Z0-9_]/_/g'`
1832 +    ;;
1833 +  * )
1834 +    func_tr_sh_result=$1
1835 +    ;;
1836 +  esac
1837 +}
1838 +
1839  
1840  # func_version
1841  # Echo version message to standard output and exit.
1842  func_version ()
1843  {
1844 +    $opt_debug
1845 +
1846      $SED -n '/(C)/!b go
1847  	:more
1848  	/\./!{
1849 @@ -676,6 +736,8 @@
1850  # Echo short help message to standard output and exit.
1851  func_usage ()
1852  {
1853 +    $opt_debug
1854 +
1855      $SED -n '/^# Usage:/,/^#  *.*--help/ {
1856          s/^# //
1857  	s/^# *$//
1858 @@ -692,7 +754,10 @@
1859  # unless 'noexit' is passed as argument.
1860  func_help ()
1861  {
1862 +    $opt_debug
1863 +
1864      $SED -n '/^# Usage:/,/# Report bugs to/ {
1865 +	:print
1866          s/^# //
1867  	s/^# *$//
1868  	s*\$progname*'$progname'*
1869 @@ -702,10 +767,14 @@
1870  	s*\$LTCFLAGS*'"$LTCFLAGS"'*
1871  	s*\$LD*'"$LD"'*
1872  	s/\$with_gnu_ld/'"$with_gnu_ld"'/
1873 -	s/\$automake_version/'"`(automake --version) 2>/dev/null |$SED 1q`"'/
1874 -	s/\$autoconf_version/'"`(autoconf --version) 2>/dev/null |$SED 1q`"'/
1875 +	s/\$automake_version/'"`(${AUTOMAKE-automake} --version) 2>/dev/null |$SED 1q`"'/
1876 +	s/\$autoconf_version/'"`(${AUTOCONF-autoconf} --version) 2>/dev/null |$SED 1q`"'/
1877  	p
1878 -     }' < "$progpath"
1879 +	d
1880 +     }
1881 +     /^# .* home page:/b print
1882 +     /^# General help using/b print
1883 +     ' < "$progpath"
1884      ret=$?
1885      if test -z "$1"; then
1886        exit $ret
1887 @@ -717,12 +786,39 @@
1888  # exit_cmd.
1889  func_missing_arg ()
1890  {
1891 +    $opt_debug
1892 +
1893      func_error "missing argument for $1."
1894      exit_cmd=exit
1895  }
1896  
1897 -exit_cmd=:
1898  
1899 +# func_split_short_opt shortopt
1900 +# Set func_split_short_opt_name and func_split_short_opt_arg shell
1901 +# variables after splitting SHORTOPT after the 2nd character.
1902 +func_split_short_opt ()
1903 +{
1904 +    my_sed_short_opt='1s/^\(..\).*$/\1/;q'
1905 +    my_sed_short_rest='1s/^..\(.*\)$/\1/;q'
1906 +
1907 +    func_split_short_opt_name=`$ECHO "$1" | $SED "$my_sed_short_opt"`
1908 +    func_split_short_opt_arg=`$ECHO "$1" | $SED "$my_sed_short_rest"`
1909 +} # func_split_short_opt may be replaced by extended shell implementation
1910 +
1911 +
1912 +# func_split_long_opt longopt
1913 +# Set func_split_long_opt_name and func_split_long_opt_arg shell
1914 +# variables after splitting LONGOPT at the `=' sign.
1915 +func_split_long_opt ()
1916 +{
1917 +    my_sed_long_opt='1s/^\(--[^=]*\)=.*/\1/;q'
1918 +    my_sed_long_arg='1s/^--[^=]*=//'
1919 +
1920 +    func_split_long_opt_name=`$ECHO "$1" | $SED "$my_sed_long_opt"`
1921 +    func_split_long_opt_arg=`$ECHO "$1" | $SED "$my_sed_long_arg"`
1922 +} # func_split_long_opt may be replaced by extended shell implementation
1923 +
1924 +exit_cmd=:
1925  
1926  
1927  
1928 @@ -732,25 +828,64 @@
1929  magic_exe="%%%MAGIC EXE variable%%%"
1930  
1931  # Global variables.
1932 -# $mode is unset
1933  nonopt=
1934 -execute_dlfiles=
1935  preserve_args=
1936  lo2o="s/\\.lo\$/.${objext}/"
1937  o2lo="s/\\.${objext}\$/.lo/"
1938  extracted_archives=
1939  extracted_serial=0
1940  
1941 -opt_dry_run=false
1942 -opt_duplicate_deps=false
1943 -opt_silent=false
1944 -opt_debug=:
1945 -
1946  # If this variable is set in any of the actions, the command in it
1947  # will be execed at the end.  This prevents here-documents from being
1948  # left over by shells.
1949  exec_cmd=
1950  
1951 +# func_append var value
1952 +# Append VALUE to the end of shell variable VAR.
1953 +func_append ()
1954 +{
1955 +    eval "${1}=\$${1}\${2}"
1956 +} # func_append may be replaced by extended shell implementation
1957 +
1958 +# func_append_quoted var value
1959 +# Quote VALUE and append to the end of shell variable VAR, separated
1960 +# by a space.
1961 +func_append_quoted ()
1962 +{
1963 +    func_quote_for_eval "${2}"
1964 +    eval "${1}=\$${1}\\ \$func_quote_for_eval_result"
1965 +} # func_append_quoted may be replaced by extended shell implementation
1966 +
1967 +
1968 +# func_arith arithmetic-term...
1969 +func_arith ()
1970 +{
1971 +    func_arith_result=`expr "${@}"`
1972 +} # func_arith may be replaced by extended shell implementation
1973 +
1974 +
1975 +# func_len string
1976 +# STRING may not start with a hyphen.
1977 +func_len ()
1978 +{
1979 +    func_len_result=`expr "${1}" : ".*" 2>/dev/null || echo $max_cmd_len`
1980 +} # func_len may be replaced by extended shell implementation
1981 +
1982 +
1983 +# func_lo2o object
1984 +func_lo2o ()
1985 +{
1986 +    func_lo2o_result=`$ECHO "${1}" | $SED "$lo2o"`
1987 +} # func_lo2o may be replaced by extended shell implementation
1988 +
1989 +
1990 +# func_xform libobj-or-source
1991 +func_xform ()
1992 +{
1993 +    func_xform_result=`$ECHO "${1}" | $SED 's/\.[^.]*$/.lo/'`
1994 +} # func_xform may be replaced by extended shell implementation
1995 +
1996 +
1997  # func_fatal_configuration arg...
1998  # Echo program name prefixed message to standard error, followed by
1999  # a configuration failure hint, and exit.
2000 @@ -840,129 +975,209 @@
2001    esac
2002  }
2003  
2004 -# Parse options once, thoroughly.  This comes as soon as possible in
2005 -# the script to make things like `libtool --version' happen quickly.
2006 +# func_check_version_match
2007 +# Ensure that we are using m4 macros, and libtool script from the same
2008 +# release of libtool.
2009 +func_check_version_match ()
2010  {
2011 +  if test "$package_revision" != "$macro_revision"; then
2012 +    if test "$VERSION" != "$macro_version"; then
2013 +      if test -z "$macro_version"; then
2014 +        cat >&2 <<_LT_EOF
2015 +$progname: Version mismatch error.  This is $PACKAGE $VERSION, but the
2016 +$progname: definition of this LT_INIT comes from an older release.
2017 +$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION
2018 +$progname: and run autoconf again.
2019 +_LT_EOF
2020 +      else
2021 +        cat >&2 <<_LT_EOF
2022 +$progname: Version mismatch error.  This is $PACKAGE $VERSION, but the
2023 +$progname: definition of this LT_INIT comes from $PACKAGE $macro_version.
2024 +$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION
2025 +$progname: and run autoconf again.
2026 +_LT_EOF
2027 +      fi
2028 +    else
2029 +      cat >&2 <<_LT_EOF
2030 +$progname: Version mismatch error.  This is $PACKAGE $VERSION, revision $package_revision,
2031 +$progname: but the definition of this LT_INIT comes from revision $macro_revision.
2032 +$progname: You should recreate aclocal.m4 with macros from revision $package_revision
2033 +$progname: of $PACKAGE $VERSION and run autoconf again.
2034 +_LT_EOF
2035 +    fi
2036 +
2037 +    exit $EXIT_MISMATCH
2038 +  fi
2039 +}
2040 +
2041 +
2042 +# Shorthand for --mode=foo, only valid as the first argument
2043 +case $1 in
2044 +clean|clea|cle|cl)
2045 +  shift; set dummy --mode clean ${1+"$@"}; shift
2046 +  ;;
2047 +compile|compil|compi|comp|com|co|c)
2048 +  shift; set dummy --mode compile ${1+"$@"}; shift
2049 +  ;;
2050 +execute|execut|execu|exec|exe|ex|e)
2051 +  shift; set dummy --mode execute ${1+"$@"}; shift
2052 +  ;;
2053 +finish|finis|fini|fin|fi|f)
2054 +  shift; set dummy --mode finish ${1+"$@"}; shift
2055 +  ;;
2056 +install|instal|insta|inst|ins|in|i)
2057 +  shift; set dummy --mode install ${1+"$@"}; shift
2058 +  ;;
2059 +link|lin|li|l)
2060 +  shift; set dummy --mode link ${1+"$@"}; shift
2061 +  ;;
2062 +uninstall|uninstal|uninsta|uninst|unins|unin|uni|un|u)
2063 +  shift; set dummy --mode uninstall ${1+"$@"}; shift
2064 +  ;;
2065 +esac
2066 +
2067 +
2068 +
2069 +# Option defaults:
2070 +opt_debug=:
2071 +opt_dry_run=false
2072 +opt_config=false
2073 +opt_preserve_dup_deps=false
2074 +opt_features=false
2075 +opt_finish=false
2076 +opt_help=false
2077 +opt_help_all=false
2078 +opt_silent=:
2079 +opt_warning=:
2080 +opt_verbose=:
2081 +opt_silent=false
2082 +opt_verbose=false
2083  
2084 -  # Shorthand for --mode=foo, only valid as the first argument
2085 -  case $1 in
2086 -  clean|clea|cle|cl)
2087 -    shift; set dummy --mode clean ${1+"$@"}; shift
2088 -    ;;
2089 -  compile|compil|compi|comp|com|co|c)
2090 -    shift; set dummy --mode compile ${1+"$@"}; shift
2091 -    ;;
2092 -  execute|execut|execu|exec|exe|ex|e)
2093 -    shift; set dummy --mode execute ${1+"$@"}; shift
2094 -    ;;
2095 -  finish|finis|fini|fin|fi|f)
2096 -    shift; set dummy --mode finish ${1+"$@"}; shift
2097 -    ;;
2098 -  install|instal|insta|inst|ins|in|i)
2099 -    shift; set dummy --mode install ${1+"$@"}; shift
2100 -    ;;
2101 -  link|lin|li|l)
2102 -    shift; set dummy --mode link ${1+"$@"}; shift
2103 -    ;;
2104 -  uninstall|uninstal|uninsta|uninst|unins|unin|uni|un|u)
2105 -    shift; set dummy --mode uninstall ${1+"$@"}; shift
2106 -    ;;
2107 -  esac
2108  
2109 -  # Parse non-mode specific arguments:
2110 -  while test "$#" -gt 0; do
2111 +# Parse options once, thoroughly.  This comes as soon as possible in the
2112 +# script to make things like `--version' happen as quickly as we can.
2113 +{
2114 +  # this just eases exit handling
2115 +  while test $# -gt 0; do
2116      opt="$1"
2117      shift
2118 -
2119      case $opt in
2120 -      --config)		func_config					;;
2121 -
2122 -      --debug)		preserve_args="$preserve_args $opt"
2123 +      --debug|-x)	opt_debug='set -x'
2124  			func_echo "enabling shell trace mode"
2125 -			opt_debug='set -x'
2126  			$opt_debug
2127  			;;
2128 -
2129 -      -dlopen)		test "$#" -eq 0 && func_missing_arg "$opt" && break
2130 -			execute_dlfiles="$execute_dlfiles $1"
2131 -			shift
2132 +      --dry-run|--dryrun|-n)
2133 +			opt_dry_run=:
2134  			;;
2135 -
2136 -      --dry-run | -n)	opt_dry_run=:					;;
2137 -      --features)       func_features					;;
2138 -      --finish)		mode="finish"					;;
2139 -
2140 -      --mode)		test "$#" -eq 0 && func_missing_arg "$opt" && break
2141 -			case $1 in
2142 -			  # Valid mode arguments:
2143 -			  clean)	;;
2144 -			  compile)	;;
2145 -			  execute)	;;
2146 -			  finish)	;;
2147 -			  install)	;;
2148 -			  link)		;;
2149 -			  relink)	;;
2150 -			  uninstall)	;;
2151 -
2152 -			  # Catch anything else as an error
2153 -			  *) func_error "invalid argument for $opt"
2154 -			     exit_cmd=exit
2155 -			     break
2156 -			     ;;
2157 -		        esac
2158 -
2159 -			mode="$1"
2160 +      --config)
2161 +			opt_config=:
2162 +func_config
2163 +			;;
2164 +      --dlopen|-dlopen)
2165 +			optarg="$1"
2166 +			opt_dlopen="${opt_dlopen+$opt_dlopen
2167 +}$optarg"
2168  			shift
2169  			;;
2170 -
2171        --preserve-dup-deps)
2172 -			opt_duplicate_deps=:				;;
2173 -
2174 -      --quiet|--silent)	preserve_args="$preserve_args $opt"
2175 -			opt_silent=:
2176 -			opt_verbose=false
2177 +			opt_preserve_dup_deps=:
2178  			;;
2179 -
2180 -      --no-quiet|--no-silent)
2181 -			preserve_args="$preserve_args $opt"
2182 -			opt_silent=false
2183 +      --features)
2184 +			opt_features=:
2185 +func_features
2186  			;;
2187 -
2188 -      --verbose| -v)	preserve_args="$preserve_args $opt"
2189 +      --finish)
2190 +			opt_finish=:
2191 +set dummy --mode finish ${1+"$@"}; shift
2192 +			;;
2193 +      --help)
2194 +			opt_help=:
2195 +			;;
2196 +      --help-all)
2197 +			opt_help_all=:
2198 +opt_help=': help-all'
2199 +			;;
2200 +      --mode)
2201 +			test $# = 0 && func_missing_arg $opt && break
2202 +			optarg="$1"
2203 +			opt_mode="$optarg"
2204 +case $optarg in
2205 +  # Valid mode arguments:
2206 +  clean|compile|execute|finish|install|link|relink|uninstall) ;;
2207 +
2208 +  # Catch anything else as an error
2209 +  *) func_error "invalid argument for $opt"
2210 +     exit_cmd=exit
2211 +     break
2212 +     ;;
2213 +esac
2214 +			shift
2215 +			;;
2216 +      --no-silent|--no-quiet)
2217  			opt_silent=false
2218 -			opt_verbose=:
2219 +func_append preserve_args " $opt"
2220  			;;
2221 -
2222 -      --no-verbose)	preserve_args="$preserve_args $opt"
2223 +      --no-warning|--no-warn)
2224 +			opt_warning=false
2225 +func_append preserve_args " $opt"
2226 +			;;
2227 +      --no-verbose)
2228  			opt_verbose=false
2229 +func_append preserve_args " $opt"
2230  			;;
2231 -
2232 -      --tag)		test "$#" -eq 0 && func_missing_arg "$opt" && break
2233 -			preserve_args="$preserve_args $opt $1"
2234 -			func_enable_tag "$1"	# tagname is set here
2235 +      --silent|--quiet)
2236 +			opt_silent=:
2237 +func_append preserve_args " $opt"
2238 +        opt_verbose=false
2239 +			;;
2240 +      --verbose|-v)
2241 +			opt_verbose=:
2242 +func_append preserve_args " $opt"
2243 +opt_silent=false
2244 +			;;
2245 +      --tag)
2246 +			test $# = 0 && func_missing_arg $opt && break
2247 +			optarg="$1"
2248 +			opt_tag="$optarg"
2249 +func_append preserve_args " $opt $optarg"
2250 +func_enable_tag "$optarg"
2251  			shift
2252  			;;
2253  
2254 +      -\?|-h)		func_usage				;;
2255 +      --help)		func_help				;;
2256 +      --version)	func_version				;;
2257 +
2258        # Separate optargs to long options:
2259 -      -dlopen=*|--mode=*|--tag=*)
2260 -			func_opt_split "$opt"
2261 -			set dummy "$func_opt_split_opt" "$func_opt_split_arg" ${1+"$@"}
2262 +      --*=*)
2263 +			func_split_long_opt "$opt"
2264 +			set dummy "$func_split_long_opt_name" "$func_split_long_opt_arg" ${1+"$@"}
2265  			shift
2266  			;;
2267  
2268 -      -\?|-h)		func_usage					;;
2269 -      --help)		opt_help=:					;;
2270 -      --help-all)	opt_help=': help-all'				;;
2271 -      --version)	func_version					;;
2272 -
2273 -      -*)		func_fatal_help "unrecognized option \`$opt'"	;;
2274 -
2275 -      *)		nonopt="$opt"
2276 -			break
2277 +      # Separate non-argument short options:
2278 +      -\?*|-h*|-n*|-v*)
2279 +			func_split_short_opt "$opt"
2280 +			set dummy "$func_split_short_opt_name" "-$func_split_short_opt_arg" ${1+"$@"}
2281 +			shift
2282  			;;
2283 +
2284 +      --)		break					;;
2285 +      -*)		func_fatal_help "unrecognized option \`$opt'" ;;
2286 +      *)		set dummy "$opt" ${1+"$@"};	shift; break  ;;
2287      esac
2288    done
2289  
2290 +  # Validate options:
2291 +
2292 +  # save first non-option argument
2293 +  if test "$#" -gt 0; then
2294 +    nonopt="$opt"
2295 +    shift
2296 +  fi
2297 +
2298 +  # preserve --debug
2299 +  test "$opt_debug" = : || func_append preserve_args " --debug"
2300  
2301    case $host in
2302      *cygwin* | *mingw* | *pw32* | *cegcc*)
2303 @@ -970,82 +1185,44 @@
2304        opt_duplicate_compiler_generated_deps=:
2305        ;;
2306      *)
2307 -      opt_duplicate_compiler_generated_deps=$opt_duplicate_deps
2308 +      opt_duplicate_compiler_generated_deps=$opt_preserve_dup_deps
2309        ;;
2310    esac
2311  
2312 -  # Having warned about all mis-specified options, bail out if
2313 -  # anything was wrong.
2314 -  $exit_cmd $EXIT_FAILURE
2315 -}
2316 +  $opt_help || {
2317 +    # Sanity checks first:
2318 +    func_check_version_match
2319  
2320 -# func_check_version_match
2321 -# Ensure that we are using m4 macros, and libtool script from the same
2322 -# release of libtool.
2323 -func_check_version_match ()
2324 -{
2325 -  if test "$package_revision" != "$macro_revision"; then
2326 -    if test "$VERSION" != "$macro_version"; then
2327 -      if test -z "$macro_version"; then
2328 -        cat >&2 <<_LT_EOF
2329 -$progname: Version mismatch error.  This is $PACKAGE $VERSION, but the
2330 -$progname: definition of this LT_INIT comes from an older release.
2331 -$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION
2332 -$progname: and run autoconf again.
2333 -_LT_EOF
2334 -      else
2335 -        cat >&2 <<_LT_EOF
2336 -$progname: Version mismatch error.  This is $PACKAGE $VERSION, but the
2337 -$progname: definition of this LT_INIT comes from $PACKAGE $macro_version.
2338 -$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION
2339 -$progname: and run autoconf again.
2340 -_LT_EOF
2341 -      fi
2342 -    else
2343 -      cat >&2 <<_LT_EOF
2344 -$progname: Version mismatch error.  This is $PACKAGE $VERSION, revision $package_revision,
2345 -$progname: but the definition of this LT_INIT comes from revision $macro_revision.
2346 -$progname: You should recreate aclocal.m4 with macros from revision $package_revision
2347 -$progname: of $PACKAGE $VERSION and run autoconf again.
2348 -_LT_EOF
2349 +    if test "$build_libtool_libs" != yes && test "$build_old_libs" != yes; then
2350 +      func_fatal_configuration "not configured to build any kind of library"
2351      fi
2352  
2353 -    exit $EXIT_MISMATCH
2354 -  fi
2355 -}
2356 -
2357 -
2358 -## ----------- ##
2359 -##    Main.    ##
2360 -## ----------- ##
2361 +    # Darwin sucks
2362 +    eval std_shrext=\"$shrext_cmds\"
2363  
2364 -$opt_help || {
2365 -  # Sanity checks first:
2366 -  func_check_version_match
2367 -
2368 -  if test "$build_libtool_libs" != yes && test "$build_old_libs" != yes; then
2369 -    func_fatal_configuration "not configured to build any kind of library"
2370 -  fi
2371 +    # Only execute mode is allowed to have -dlopen flags.
2372 +    if test -n "$opt_dlopen" && test "$opt_mode" != execute; then
2373 +      func_error "unrecognized option \`-dlopen'"
2374 +      $ECHO "$help" 1>&2
2375 +      exit $EXIT_FAILURE
2376 +    fi
2377  
2378 -  test -z "$mode" && func_fatal_error "error: you must specify a MODE."
2379 +    # Change the help message to a mode-specific one.
2380 +    generic_help="$help"
2381 +    help="Try \`$progname --help --mode=$opt_mode' for more information."
2382 +  }
2383  
2384  
2385 -  # Darwin sucks
2386 -  eval std_shrext=\"$shrext_cmds\"
2387 +  # Bail if the options were screwed
2388 +  $exit_cmd $EXIT_FAILURE
2389 +}
2390  
2391  
2392 -  # Only execute mode is allowed to have -dlopen flags.
2393 -  if test -n "$execute_dlfiles" && test "$mode" != execute; then
2394 -    func_error "unrecognized option \`-dlopen'"
2395 -    $ECHO "$help" 1>&2
2396 -    exit $EXIT_FAILURE
2397 -  fi
2398  
2399 -  # Change the help message to a mode-specific one.
2400 -  generic_help="$help"
2401 -  help="Try \`$progname --help --mode=$mode' for more information."
2402 -}
2403  
2404 +## ----------- ##
2405 +##    Main.    ##
2406 +## ----------- ##
2407  
2408  # func_lalib_p file
2409  # True iff FILE is a libtool `.la' library or `.lo' object file.
2410 @@ -1110,12 +1287,9 @@
2411  # temporary ltwrapper_script.
2412  func_ltwrapper_scriptname ()
2413  {
2414 -    func_ltwrapper_scriptname_result=""
2415 -    if func_ltwrapper_executable_p "$1"; then
2416 -	func_dirname_and_basename "$1" "" "."
2417 -	func_stripname '' '.exe' "$func_basename_result"
2418 -	func_ltwrapper_scriptname_result="$func_dirname_result/$objdir/${func_stripname_result}_ltshwrapper"
2419 -    fi
2420 +    func_dirname_and_basename "$1" "" "."
2421 +    func_stripname '' '.exe' "$func_basename_result"
2422 +    func_ltwrapper_scriptname_result="$func_dirname_result/$objdir/${func_stripname_result}_ltshwrapper"
2423  }
2424  
2425  # func_ltwrapper_p file
2426 @@ -1161,6 +1335,37 @@
2427  }
2428  
2429  
2430 +# func_resolve_sysroot PATH
2431 +# Replace a leading = in PATH with a sysroot.  Store the result into
2432 +# func_resolve_sysroot_result
2433 +func_resolve_sysroot ()
2434 +{
2435 +  func_resolve_sysroot_result=$1
2436 +  case $func_resolve_sysroot_result in
2437 +  =*)
2438 +    func_stripname '=' '' "$func_resolve_sysroot_result"
2439 +    func_resolve_sysroot_result=$lt_sysroot$func_stripname_result
2440 +    ;;
2441 +  esac
2442 +}
2443 +
2444 +# func_replace_sysroot PATH
2445 +# If PATH begins with the sysroot, replace it with = and
2446 +# store the result into func_replace_sysroot_result.
2447 +func_replace_sysroot ()
2448 +{
2449 +  case "$lt_sysroot:$1" in
2450 +  ?*:"$lt_sysroot"*)
2451 +    func_stripname "$lt_sysroot" '' "$1"
2452 +    func_replace_sysroot_result="=$func_stripname_result"
2453 +    ;;
2454 +  *)
2455 +    # Including no sysroot.
2456 +    func_replace_sysroot_result=$1
2457 +    ;;
2458 +  esac
2459 +}
2460 +
2461  # func_infer_tag arg
2462  # Infer tagged configuration to use if any are available and
2463  # if one wasn't chosen via the "--tag" command line option.
2464 @@ -1173,8 +1378,7 @@
2465      if test -n "$available_tags" && test -z "$tagname"; then
2466        CC_quoted=
2467        for arg in $CC; do
2468 -        func_quote_for_eval "$arg"
2469 -	CC_quoted="$CC_quoted $func_quote_for_eval_result"
2470 +	func_append_quoted CC_quoted "$arg"
2471        done
2472        CC_expanded=`func_echo_all $CC`
2473        CC_quoted_expanded=`func_echo_all $CC_quoted`
2474 @@ -1193,8 +1397,7 @@
2475  	    CC_quoted=
2476  	    for arg in $CC; do
2477  	      # Double-quote args containing other shell metacharacters.
2478 -	      func_quote_for_eval "$arg"
2479 -	      CC_quoted="$CC_quoted $func_quote_for_eval_result"
2480 +	      func_append_quoted CC_quoted "$arg"
2481  	    done
2482  	    CC_expanded=`func_echo_all $CC`
2483  	    CC_quoted_expanded=`func_echo_all $CC_quoted`
2484 @@ -1244,24 +1447,504 @@
2485        write_oldobj=none
2486      fi
2487  
2488 -    $opt_dry_run || {
2489 -      cat >${write_libobj}T <<EOF
2490 -# $write_libobj - a libtool object file
2491 -# Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION
2492 -#
2493 -# Please DO NOT delete this file!
2494 -# It is necessary for linking the library.
2495 +    $opt_dry_run || {
2496 +      cat >${write_libobj}T <<EOF
2497 +# $write_libobj - a libtool object file
2498 +# Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION
2499 +#
2500 +# Please DO NOT delete this file!
2501 +# It is necessary for linking the library.
2502 +
2503 +# Name of the PIC object.
2504 +pic_object=$write_lobj
2505 +
2506 +# Name of the non-PIC object
2507 +non_pic_object=$write_oldobj
2508 +
2509 +EOF
2510 +      $MV "${write_libobj}T" "${write_libobj}"
2511 +    }
2512 +}
2513 +
2514 +
2515 +##################################################
2516 +# FILE NAME AND PATH CONVERSION HELPER FUNCTIONS #
2517 +##################################################
2518 +
2519 +# func_convert_core_file_wine_to_w32 ARG
2520 +# Helper function used by file name conversion functions when $build is *nix,
2521 +# and $host is mingw, cygwin, or some other w32 environment. Relies on a
2522 +# correctly configured wine environment available, with the winepath program
2523 +# in $build's $PATH.
2524 +#
2525 +# ARG is the $build file name to be converted to w32 format.
2526 +# Result is available in $func_convert_core_file_wine_to_w32_result, and will
2527 +# be empty on error (or when ARG is empty)
2528 +func_convert_core_file_wine_to_w32 ()
2529 +{
2530 +  $opt_debug
2531 +  func_convert_core_file_wine_to_w32_result="$1"
2532 +  if test -n "$1"; then
2533 +    # Unfortunately, winepath does not exit with a non-zero error code, so we
2534 +    # are forced to check the contents of stdout. On the other hand, if the
2535 +    # command is not found, the shell will set an exit code of 127 and print
2536 +    # *an error message* to stdout. So we must check for both error code of
2537 +    # zero AND non-empty stdout, which explains the odd construction:
2538 +    func_convert_core_file_wine_to_w32_tmp=`winepath -w "$1" 2>/dev/null`
2539 +    if test "$?" -eq 0 && test -n "${func_convert_core_file_wine_to_w32_tmp}"; then
2540 +      func_convert_core_file_wine_to_w32_result=`$ECHO "$func_convert_core_file_wine_to_w32_tmp" |
2541 +        $SED -e "$lt_sed_naive_backslashify"`
2542 +    else
2543 +      func_convert_core_file_wine_to_w32_result=
2544 +    fi
2545 +  fi
2546 +}
2547 +# end: func_convert_core_file_wine_to_w32
2548 +
2549 +
2550 +# func_convert_core_path_wine_to_w32 ARG
2551 +# Helper function used by path conversion functions when $build is *nix, and
2552 +# $host is mingw, cygwin, or some other w32 environment. Relies on a correctly
2553 +# configured wine environment available, with the winepath program in $build's
2554 +# $PATH. Assumes ARG has no leading or trailing path separator characters.
2555 +#
2556 +# ARG is path to be converted from $build format to win32.
2557 +# Result is available in $func_convert_core_path_wine_to_w32_result.
2558 +# Unconvertible file (directory) names in ARG are skipped; if no directory names
2559 +# are convertible, then the result may be empty.
2560 +func_convert_core_path_wine_to_w32 ()
2561 +{
2562 +  $opt_debug
2563 +  # unfortunately, winepath doesn't convert paths, only file names
2564 +  func_convert_core_path_wine_to_w32_result=""
2565 +  if test -n "$1"; then
2566 +    oldIFS=$IFS
2567 +    IFS=:
2568 +    for func_convert_core_path_wine_to_w32_f in $1; do
2569 +      IFS=$oldIFS
2570 +      func_convert_core_file_wine_to_w32 "$func_convert_core_path_wine_to_w32_f"
2571 +      if test -n "$func_convert_core_file_wine_to_w32_result" ; then
2572 +        if test -z "$func_convert_core_path_wine_to_w32_result"; then
2573 +          func_convert_core_path_wine_to_w32_result="$func_convert_core_file_wine_to_w32_result"
2574 +        else
2575 +          func_append func_convert_core_path_wine_to_w32_result ";$func_convert_core_file_wine_to_w32_result"
2576 +        fi
2577 +      fi
2578 +    done
2579 +    IFS=$oldIFS
2580 +  fi
2581 +}
2582 +# end: func_convert_core_path_wine_to_w32
2583 +
2584 +
2585 +# func_cygpath ARGS...
2586 +# Wrapper around calling the cygpath program via LT_CYGPATH. This is used when
2587 +# when (1) $build is *nix and Cygwin is hosted via a wine environment; or (2)
2588 +# $build is MSYS and $host is Cygwin, or (3) $build is Cygwin. In case (1) or
2589 +# (2), returns the Cygwin file name or path in func_cygpath_result (input
2590 +# file name or path is assumed to be in w32 format, as previously converted
2591 +# from $build's *nix or MSYS format). In case (3), returns the w32 file name
2592 +# or path in func_cygpath_result (input file name or path is assumed to be in
2593 +# Cygwin format). Returns an empty string on error.
2594 +#
2595 +# ARGS are passed to cygpath, with the last one being the file name or path to
2596 +# be converted.
2597 +#
2598 +# Specify the absolute *nix (or w32) name to cygpath in the LT_CYGPATH
2599 +# environment variable; do not put it in $PATH.
2600 +func_cygpath ()
2601 +{
2602 +  $opt_debug
2603 +  if test -n "$LT_CYGPATH" && test -f "$LT_CYGPATH"; then
2604 +    func_cygpath_result=`$LT_CYGPATH "$@" 2>/dev/null`
2605 +    if test "$?" -ne 0; then
2606 +      # on failure, ensure result is empty
2607 +      func_cygpath_result=
2608 +    fi
2609 +  else
2610 +    func_cygpath_result=
2611 +    func_error "LT_CYGPATH is empty or specifies non-existent file: \`$LT_CYGPATH'"
2612 +  fi
2613 +}
2614 +#end: func_cygpath
2615 +
2616 +
2617 +# func_convert_core_msys_to_w32 ARG
2618 +# Convert file name or path ARG from MSYS format to w32 format.  Return
2619 +# result in func_convert_core_msys_to_w32_result.
2620 +func_convert_core_msys_to_w32 ()
2621 +{
2622 +  $opt_debug
2623 +  # awkward: cmd appends spaces to result
2624 +  func_convert_core_msys_to_w32_result=`( cmd //c echo "$1" ) 2>/dev/null |
2625 +    $SED -e 's/[ ]*$//' -e "$lt_sed_naive_backslashify"`
2626 +}
2627 +#end: func_convert_core_msys_to_w32
2628 +
2629 +
2630 +# func_convert_file_check ARG1 ARG2
2631 +# Verify that ARG1 (a file name in $build format) was converted to $host
2632 +# format in ARG2. Otherwise, emit an error message, but continue (resetting
2633 +# func_to_host_file_result to ARG1).
2634 +func_convert_file_check ()
2635 +{
2636 +  $opt_debug
2637 +  if test -z "$2" && test -n "$1" ; then
2638 +    func_error "Could not determine host file name corresponding to"
2639 +    func_error "  \`$1'"
2640 +    func_error "Continuing, but uninstalled executables may not work."
2641 +    # Fallback:
2642 +    func_to_host_file_result="$1"
2643 +  fi
2644 +}
2645 +# end func_convert_file_check
2646 +
2647 +
2648 +# func_convert_path_check FROM_PATHSEP TO_PATHSEP FROM_PATH TO_PATH
2649 +# Verify that FROM_PATH (a path in $build format) was converted to $host
2650 +# format in TO_PATH. Otherwise, emit an error message, but continue, resetting
2651 +# func_to_host_file_result to a simplistic fallback value (see below).
2652 +func_convert_path_check ()
2653 +{
2654 +  $opt_debug
2655 +  if test -z "$4" && test -n "$3"; then
2656 +    func_error "Could not determine the host path corresponding to"
2657 +    func_error "  \`$3'"
2658 +    func_error "Continuing, but uninstalled executables may not work."
2659 +    # Fallback.  This is a deliberately simplistic "conversion" and
2660 +    # should not be "improved".  See libtool.info.
2661 +    if test "x$1" != "x$2"; then
2662 +      lt_replace_pathsep_chars="s|$1|$2|g"
2663 +      func_to_host_path_result=`echo "$3" |
2664 +        $SED -e "$lt_replace_pathsep_chars"`
2665 +    else
2666 +      func_to_host_path_result="$3"
2667 +    fi
2668 +  fi
2669 +}
2670 +# end func_convert_path_check
2671 +
2672 +
2673 +# func_convert_path_front_back_pathsep FRONTPAT BACKPAT REPL ORIG
2674 +# Modifies func_to_host_path_result by prepending REPL if ORIG matches FRONTPAT
2675 +# and appending REPL if ORIG matches BACKPAT.
2676 +func_convert_path_front_back_pathsep ()
2677 +{
2678 +  $opt_debug
2679 +  case $4 in
2680 +  $1 ) func_to_host_path_result="$3$func_to_host_path_result"
2681 +    ;;
2682 +  esac
2683 +  case $4 in
2684 +  $2 ) func_append func_to_host_path_result "$3"
2685 +    ;;
2686 +  esac
2687 +}
2688 +# end func_convert_path_front_back_pathsep
2689 +
2690 +
2691 +##################################################
2692 +# $build to $host FILE NAME CONVERSION FUNCTIONS #
2693 +##################################################
2694 +# invoked via `$to_host_file_cmd ARG'
2695 +#
2696 +# In each case, ARG is the path to be converted from $build to $host format.
2697 +# Result will be available in $func_to_host_file_result.
2698 +
2699 +
2700 +# func_to_host_file ARG
2701 +# Converts the file name ARG from $build format to $host format. Return result
2702 +# in func_to_host_file_result.
2703 +func_to_host_file ()
2704 +{
2705 +  $opt_debug
2706 +  $to_host_file_cmd "$1"
2707 +}
2708 +# end func_to_host_file
2709 +
2710 +
2711 +# func_to_tool_file ARG LAZY
2712 +# converts the file name ARG from $build format to toolchain format. Return
2713 +# result in func_to_tool_file_result.  If the conversion in use is listed
2714 +# in (the comma separated) LAZY, no conversion takes place.
2715 +func_to_tool_file ()
2716 +{
2717 +  $opt_debug
2718 +  case ,$2, in
2719 +    *,"$to_tool_file_cmd",*)
2720 +      func_to_tool_file_result=$1
2721 +      ;;
2722 +    *)
2723 +      $to_tool_file_cmd "$1"
2724 +      func_to_tool_file_result=$func_to_host_file_result
2725 +      ;;
2726 +  esac
2727 +}
2728 +# end func_to_tool_file
2729 +
2730 +
2731 +# func_convert_file_noop ARG
2732 +# Copy ARG to func_to_host_file_result.
2733 +func_convert_file_noop ()
2734 +{
2735 +  func_to_host_file_result="$1"
2736 +}
2737 +# end func_convert_file_noop
2738 +
2739 +
2740 +# func_convert_file_msys_to_w32 ARG
2741 +# Convert file name ARG from (mingw) MSYS to (mingw) w32 format; automatic
2742 +# conversion to w32 is not available inside the cwrapper.  Returns result in
2743 +# func_to_host_file_result.
2744 +func_convert_file_msys_to_w32 ()
2745 +{
2746 +  $opt_debug
2747 +  func_to_host_file_result="$1"
2748 +  if test -n "$1"; then
2749 +    func_convert_core_msys_to_w32 "$1"
2750 +    func_to_host_file_result="$func_convert_core_msys_to_w32_result"
2751 +  fi
2752 +  func_convert_file_check "$1" "$func_to_host_file_result"
2753 +}
2754 +# end func_convert_file_msys_to_w32
2755 +
2756 +
2757 +# func_convert_file_cygwin_to_w32 ARG
2758 +# Convert file name ARG from Cygwin to w32 format.  Returns result in
2759 +# func_to_host_file_result.
2760 +func_convert_file_cygwin_to_w32 ()
2761 +{
2762 +  $opt_debug
2763 +  func_to_host_file_result="$1"
2764 +  if test -n "$1"; then
2765 +    # because $build is cygwin, we call "the" cygpath in $PATH; no need to use
2766 +    # LT_CYGPATH in this case.
2767 +    func_to_host_file_result=`cygpath -m "$1"`
2768 +  fi
2769 +  func_convert_file_check "$1" "$func_to_host_file_result"
2770 +}
2771 +# end func_convert_file_cygwin_to_w32
2772 +
2773 +
2774 +# func_convert_file_nix_to_w32 ARG
2775 +# Convert file name ARG from *nix to w32 format.  Requires a wine environment
2776 +# and a working winepath. Returns result in func_to_host_file_result.
2777 +func_convert_file_nix_to_w32 ()
2778 +{
2779 +  $opt_debug
2780 +  func_to_host_file_result="$1"
2781 +  if test -n "$1"; then
2782 +    func_convert_core_file_wine_to_w32 "$1"
2783 +    func_to_host_file_result="$func_convert_core_file_wine_to_w32_result"
2784 +  fi
2785 +  func_convert_file_check "$1" "$func_to_host_file_result"
2786 +}
2787 +# end func_convert_file_nix_to_w32
2788 +
2789 +
2790 +# func_convert_file_msys_to_cygwin ARG
2791 +# Convert file name ARG from MSYS to Cygwin format.  Requires LT_CYGPATH set.
2792 +# Returns result in func_to_host_file_result.
2793 +func_convert_file_msys_to_cygwin ()
2794 +{
2795 +  $opt_debug
2796 +  func_to_host_file_result="$1"
2797 +  if test -n "$1"; then
2798 +    func_convert_core_msys_to_w32 "$1"
2799 +    func_cygpath -u "$func_convert_core_msys_to_w32_result"
2800 +    func_to_host_file_result="$func_cygpath_result"
2801 +  fi
2802 +  func_convert_file_check "$1" "$func_to_host_file_result"
2803 +}
2804 +# end func_convert_file_msys_to_cygwin
2805 +
2806 +
2807 +# func_convert_file_nix_to_cygwin ARG
2808 +# Convert file name ARG from *nix to Cygwin format.  Requires Cygwin installed
2809 +# in a wine environment, working winepath, and LT_CYGPATH set.  Returns result
2810 +# in func_to_host_file_result.
2811 +func_convert_file_nix_to_cygwin ()
2812 +{
2813 +  $opt_debug
2814 +  func_to_host_file_result="$1"
2815 +  if test -n "$1"; then
2816 +    # convert from *nix to w32, then use cygpath to convert from w32 to cygwin.
2817 +    func_convert_core_file_wine_to_w32 "$1"
2818 +    func_cygpath -u "$func_convert_core_file_wine_to_w32_result"
2819 +    func_to_host_file_result="$func_cygpath_result"
2820 +  fi
2821 +  func_convert_file_check "$1" "$func_to_host_file_result"
2822 +}
2823 +# end func_convert_file_nix_to_cygwin
2824 +
2825 +
2826 +#############################################
2827 +# $build to $host PATH CONVERSION FUNCTIONS #
2828 +#############################################
2829 +# invoked via `$to_host_path_cmd ARG'
2830 +#
2831 +# In each case, ARG is the path to be converted from $build to $host format.
2832 +# The result will be available in $func_to_host_path_result.
2833 +#
2834 +# Path separators are also converted from $build format to $host format.  If
2835 +# ARG begins or ends with a path separator character, it is preserved (but
2836 +# converted to $host format) on output.
2837 +#
2838 +# All path conversion functions are named using the following convention:
2839 +#   file name conversion function    : func_convert_file_X_to_Y ()
2840 +#   path conversion function         : func_convert_path_X_to_Y ()
2841 +# where, for any given $build/$host combination the 'X_to_Y' value is the
2842 +# same.  If conversion functions are added for new $build/$host combinations,
2843 +# the two new functions must follow this pattern, or func_init_to_host_path_cmd
2844 +# will break.
2845 +
2846 +
2847 +# func_init_to_host_path_cmd
2848 +# Ensures that function "pointer" variable $to_host_path_cmd is set to the
2849 +# appropriate value, based on the value of $to_host_file_cmd.
2850 +to_host_path_cmd=
2851 +func_init_to_host_path_cmd ()
2852 +{
2853 +  $opt_debug
2854 +  if test -z "$to_host_path_cmd"; then
2855 +    func_stripname 'func_convert_file_' '' "$to_host_file_cmd"
2856 +    to_host_path_cmd="func_convert_path_${func_stripname_result}"
2857 +  fi
2858 +}
2859 +
2860 +
2861 +# func_to_host_path ARG
2862 +# Converts the path ARG from $build format to $host format. Return result
2863 +# in func_to_host_path_result.
2864 +func_to_host_path ()
2865 +{
2866 +  $opt_debug
2867 +  func_init_to_host_path_cmd
2868 +  $to_host_path_cmd "$1"
2869 +}
2870 +# end func_to_host_path
2871 +
2872 +
2873 +# func_convert_path_noop ARG
2874 +# Copy ARG to func_to_host_path_result.
2875 +func_convert_path_noop ()
2876 +{
2877 +  func_to_host_path_result="$1"
2878 +}
2879 +# end func_convert_path_noop
2880 +
2881 +
2882 +# func_convert_path_msys_to_w32 ARG
2883 +# Convert path ARG from (mingw) MSYS to (mingw) w32 format; automatic
2884 +# conversion to w32 is not available inside the cwrapper.  Returns result in
2885 +# func_to_host_path_result.
2886 +func_convert_path_msys_to_w32 ()
2887 +{
2888 +  $opt_debug
2889 +  func_to_host_path_result="$1"
2890 +  if test -n "$1"; then
2891 +    # Remove leading and trailing path separator characters from ARG.  MSYS
2892 +    # behavior is inconsistent here; cygpath turns them into '.;' and ';.';
2893 +    # and winepath ignores them completely.
2894 +    func_stripname : : "$1"
2895 +    func_to_host_path_tmp1=$func_stripname_result
2896 +    func_convert_core_msys_to_w32 "$func_to_host_path_tmp1"
2897 +    func_to_host_path_result="$func_convert_core_msys_to_w32_result"
2898 +    func_convert_path_check : ";" \
2899 +      "$func_to_host_path_tmp1" "$func_to_host_path_result"
2900 +    func_convert_path_front_back_pathsep ":*" "*:" ";" "$1"
2901 +  fi
2902 +}
2903 +# end func_convert_path_msys_to_w32
2904 +
2905 +
2906 +# func_convert_path_cygwin_to_w32 ARG
2907 +# Convert path ARG from Cygwin to w32 format.  Returns result in
2908 +# func_to_host_file_result.
2909 +func_convert_path_cygwin_to_w32 ()
2910 +{
2911 +  $opt_debug
2912 +  func_to_host_path_result="$1"
2913 +  if test -n "$1"; then
2914 +    # See func_convert_path_msys_to_w32:
2915 +    func_stripname : : "$1"
2916 +    func_to_host_path_tmp1=$func_stripname_result
2917 +    func_to_host_path_result=`cygpath -m -p "$func_to_host_path_tmp1"`
2918 +    func_convert_path_check : ";" \
2919 +      "$func_to_host_path_tmp1" "$func_to_host_path_result"
2920 +    func_convert_path_front_back_pathsep ":*" "*:" ";" "$1"
2921 +  fi
2922 +}
2923 +# end func_convert_path_cygwin_to_w32
2924 +
2925  
2926 -# Name of the PIC object.
2927 -pic_object=$write_lobj
2928 +# func_convert_path_nix_to_w32 ARG
2929 +# Convert path ARG from *nix to w32 format.  Requires a wine environment and
2930 +# a working winepath.  Returns result in func_to_host_file_result.
2931 +func_convert_path_nix_to_w32 ()
2932 +{
2933 +  $opt_debug
2934 +  func_to_host_path_result="$1"
2935 +  if test -n "$1"; then
2936 +    # See func_convert_path_msys_to_w32:
2937 +    func_stripname : : "$1"
2938 +    func_to_host_path_tmp1=$func_stripname_result
2939 +    func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1"
2940 +    func_to_host_path_result="$func_convert_core_path_wine_to_w32_result"
2941 +    func_convert_path_check : ";" \
2942 +      "$func_to_host_path_tmp1" "$func_to_host_path_result"
2943 +    func_convert_path_front_back_pathsep ":*" "*:" ";" "$1"
2944 +  fi
2945 +}
2946 +# end func_convert_path_nix_to_w32
2947  
2948 -# Name of the non-PIC object
2949 -non_pic_object=$write_oldobj
2950  
2951 -EOF
2952 -      $MV "${write_libobj}T" "${write_libobj}"
2953 -    }
2954 +# func_convert_path_msys_to_cygwin ARG
2955 +# Convert path ARG from MSYS to Cygwin format.  Requires LT_CYGPATH set.
2956 +# Returns result in func_to_host_file_result.
2957 +func_convert_path_msys_to_cygwin ()
2958 +{
2959 +  $opt_debug
2960 +  func_to_host_path_result="$1"
2961 +  if test -n "$1"; then
2962 +    # See func_convert_path_msys_to_w32:
2963 +    func_stripname : : "$1"
2964 +    func_to_host_path_tmp1=$func_stripname_result
2965 +    func_convert_core_msys_to_w32 "$func_to_host_path_tmp1"
2966 +    func_cygpath -u -p "$func_convert_core_msys_to_w32_result"
2967 +    func_to_host_path_result="$func_cygpath_result"
2968 +    func_convert_path_check : : \
2969 +      "$func_to_host_path_tmp1" "$func_to_host_path_result"
2970 +    func_convert_path_front_back_pathsep ":*" "*:" : "$1"
2971 +  fi
2972 +}
2973 +# end func_convert_path_msys_to_cygwin
2974 +
2975 +
2976 +# func_convert_path_nix_to_cygwin ARG
2977 +# Convert path ARG from *nix to Cygwin format.  Requires Cygwin installed in a
2978 +# a wine environment, working winepath, and LT_CYGPATH set.  Returns result in
2979 +# func_to_host_file_result.
2980 +func_convert_path_nix_to_cygwin ()
2981 +{
2982 +  $opt_debug
2983 +  func_to_host_path_result="$1"
2984 +  if test -n "$1"; then
2985 +    # Remove leading and trailing path separator characters from
2986 +    # ARG. msys behavior is inconsistent here, cygpath turns them
2987 +    # into '.;' and ';.', and winepath ignores them completely.
2988 +    func_stripname : : "$1"
2989 +    func_to_host_path_tmp1=$func_stripname_result
2990 +    func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1"
2991 +    func_cygpath -u -p "$func_convert_core_path_wine_to_w32_result"
2992 +    func_to_host_path_result="$func_cygpath_result"
2993 +    func_convert_path_check : : \
2994 +      "$func_to_host_path_tmp1" "$func_to_host_path_result"
2995 +    func_convert_path_front_back_pathsep ":*" "*:" : "$1"
2996 +  fi
2997  }
2998 +# end func_convert_path_nix_to_cygwin
2999 +
3000  
3001  # func_mode_compile arg...
3002  func_mode_compile ()
3003 @@ -1303,12 +1986,12 @@
3004  	  ;;
3005  
3006  	-pie | -fpie | -fPIE)
3007 -          pie_flag="$pie_flag $arg"
3008 +          func_append pie_flag " $arg"
3009  	  continue
3010  	  ;;
3011  
3012  	-shared | -static | -prefer-pic | -prefer-non-pic)
3013 -	  later="$later $arg"
3014 +	  func_append later " $arg"
3015  	  continue
3016  	  ;;
3017  
3018