#!/usr/bin/env ruby
# coding: UTF-8
#
# SPEJD4Sejfek rules generator
# usage:
# rules_generator.rb < lexicon.dic > grammar.sr
#
# Authors: Aleksandra Wieczorek, Bartosz Zaborowski
# License: GPL v3
#
# homepage: http://zil.ipipan.waw.pl/SEJFEK4Spejd
#
# requires morfeusz 0.7 to be available in the $PATH
#

# polish diacrits for String upcase/downcase
class String
  alias :old_downcase :downcase
  alias :old_upcase :upcase

  UPCASE = {"ą"=>"Ą", "Ą"=>"Ą", "ć"=>"Ć", "Ć"=>"Ć", "ę"=>"Ę", "Ę"=>"Ę", "ł"=>"Ł", "Ł"=>"Ł", "ń"=>"Ń", "Ń"=>"Ń", "ó"=>"Ó", "Ó"=>"Ó", "ś"=>"Ś", "Ś"=>"Ś", "ż"=>"Ż", "Ż"=>"Ż", "ź"=>"Ź", "Ź"=>"Ź"}

  DOWNCASE = {"ą"=>"ą", "Ą"=>"ą", "ć"=>"ć", "Ć"=>"ć", "ę"=>"ę", "Ę"=>"ę", "ł"=>"ł", "Ł"=>"ł", "ń"=>"ń", "Ń"=>"ń", "ó"=>"ó", "Ó"=>"ó", "ś"=>"ś", "Ś"=>"ś", "ż"=>"ż", "Ż"=>"ż", "ź"=>"ź", "Ź"=>"ź"}

  def downcase
    ret = ""
    self.each_char do |c|
      ret += DOWNCASE[c] || c.old_downcase
    end
    ret
  end

  def upcase
    ret = ""
    self.each_char do |c|
      ret += UPCASE[c] || c.old_upcase
    end
    ret
  end
end

PLZNACZKI = String::UPCASE.keys.join

# some helper methods and constants

GOOD_POS=["subst", "depr", "ger", "ppron12", "ppron3", "num", "numcol", "adj", "pact", "ppas"]

def assert x, msg = ""
  raise msg unless x
end

def mkbase orths, bases
  lastbase = bases[-1]
  lastbase = "owy" if lastbase == "ów"
  ret = orths[0..-2].join+lastbase
#  $stderr.puts orths.inspect+" "+bases.inspect+" -> "+ret
  return ret.strip
end

def escape string
  return string.gsub("\"", "\\\"")
end

def regex_escape string
  return escape(Regexp.escape(string).gsub("\\-", "[-­–]"))
end

def subarray_index arr, sub
  offset = 0
  found = false
  while !found && offset < arr.length-sub.length
    found = true
    sub.each_with_index do |s,i|
      if s != arr[offset+i]
        offset = arr[offset+1..-1].index(sub.first) + offset+1
        found = false
        break
      end
    end
  end
  return offset
end

def tokenize x
  x.split.map{|y| y.split(/(?=”)|(?=„)|(?=')|(?=-)/).map{|y| y =~ /[-'„”]./ ? [y[0], y[1..-1]] : y}.flatten}
end

def regex_insensitive_escape string
  regex_insensitive (regex_escape string)
end

def regex_insensitive string
  return "" if string.empty?
  cntr = 0
  # check if there is more than one (first) upcase letter
  string[1..-1].each_char do |c|
    cntr += 1 if c == c.upcase && c != c.downcase
  end
#  if cntr == 0
#    return case_insensitive_regex(string[0])+string[1..-1]
#  else
    return case_insensitive_regex(string)
#  end
end

def case_insensitive_regex string
  ret = ""
  esc = false
  string.each_char do |c|
    if esc
      esc = false
      if c == "\\"
        ret += "\\\\"
      else
        ret += c.downcase == c.upcase ? "\\#{c}" : "[\\#{c.downcase}\\#{c.upcase}]"
      end
    else
      if c == "\\"
        esc = true
        next
      end
      ret += c.downcase == c.upcase ? c : "[#{c.downcase}#{c.upcase}]"
    end
  end
  ret
end

def debug x=""
  $stderr.puts x
end

def get_bases bs
  ret = []
  bs.each do |b|
    if b.strip.empty? || b.strip == ","
      ret << [b]
      next
    end
    $morfeusz.puts b
    $morfeusz.flush
    line = $morfeusz.gets
    nogets = false
    nogets = true if line =~ />\[.*\]/
    rx = [""]
    begin 
      nrx = []
      if nogets
        nogets = false
      else
        line = $morfeusz.gets 
      end
      line = line[1..-1] if line[0] == ">"
      line.split(";").map{|x| x.split(",")[1]}.select{|x| x != "?"}.each do |b|
        nrx += rx.map{|z| z+b}
      end
      rx = nrx
      lastorth = line.split(",")[0][1..-1]
    end while b.rindex(lastorth)+lastorth.length < b.length
    rx << b
    ret << rx.uniq
  end
#  puts ret.inspect
  ret
end

def morgen base, pos
  $morgen.puts "#{base} #{pos}"
  $morgen.flush

  ret = []
  linenum = $morgen.gets
  numofentries = linenum.strip.to_i
  numofentries.times do |i|
    line = $morgen.gets
    orth = line.split[0]
    ts = line.split[1]
    tags = [""]
    ts.split(":").each do |tp|
      if tp.include? "."
        tgs = []
        tp.split(".").each do |tt|
          tgs += tags.map{|x| x+":"+tt}
        end
        tags = tgs
      else
        tags = tags.map{|x| x+":"+tp}
      end
    end
    ret += tags.map{|t| {:orth => orth, :tag => t[1..-1]}}
  end

  ret
end

PLURAL= false

def pluralize orth, pos, tag
  return pluralize_ orth, pos, tag if PLURAL
  return [orth]
end

def pluralize_ orth, pos, tag
  return [orth] if !["subst", "ger", "adj", "ppas", "pact"].include?(pos) || orth.include?(" ")
  return [orth] if ["p1", "p2", "p3"].find{|x| tag.include?(x)}
  bases = get_bases([orth]).first

  ret = []

  fulltag = pos+":"+tag
  bases.each do |b|
    pairs = morgen b, pos

    next unless pairs.find{|a| a[:orth] == orth && a[:tag] == fulltag}
    modict = {}
    pairs.each {|p| modict[p[:tag]] ||=[]; modict[p[:tag]] << p[:orth]}
    
    revtag = fulltag.include?(":sg:") ? fulltag.gsub(":sg:", ":pl:") : fulltag.gsub(":pl:", ":sg:")

    assert modict[revtag]

    ret += modict[fulltag]
    ret += (modict[revtag] || [])
  end

  ret << orth if ret.empty?
  ret.uniq
end


#### writing

def write_multiword_rule output, mw
  output.puts "Rule  \"syntok #{escape mw[:orth]}\""
  output.puts "Match:  #{(tokenize mw[:orth]).map{|y| y.map{|x| "[orth~\"#{regex_escape x.downcase}\"/i]"}.join(" ns ")}.join(" ")};"
  output.puts "Eval:  word(#{mw[:tags].map{|t|"#{t}, \"#{escape mw[:base].downcase}\""}.join("; ")});"
  output.puts
end

def write_dict_rule output, d
  if d[:letters].index("O")
    idx = (0..d[:letters].length-1).to_a.find {|i| d[:letters][i] == "O" &&
      ["subst", "ger"].include?(d[:poss][i]) && d[:tags][i].include?(":nom")}
    idx ||= (0..d[:letters].length-1).to_a.find {|i| d[:letters][i] == "O" &&
      ["adj", "pact", "ppas"].include?(d[:poss][i]) && d[:tags][i].include?(":nom")}
#      idx ||= (0..d[:letters].length-1).to_a.find {|i| d[:letters][i] == "O"}
#      debug d.inspect
    assert idx, d.inspect

    orth = d[:orths][0]
    ml = ""
    ref = 65
    if d[:letters][0] == "O"
      if d[:tags][0] =~ /:aff|:neg/
        ml += ref.chr + "[base~\"#{regex_escape d[:bases][0]}\"/i && negation~#{$~[0][1..-1]} && pos~#{d[:poss][0]}]"
      else
        ml += ref.chr + "[base~\"#{regex_escape d[:bases][0]}\"/i && pos~#{d[:poss][0]}]"
      end
      ref += 1
    else
      ml += "[orth~\"#{pluralize(d[:orths][0], d[:poss][0], d[:tags][0]).map{|x| regex_escape x}.join("|")}\"/i]"
    end
    (1..d[:orths].length-1).each do |i|
      if d[:spaces_before][i]
        orth += " "
        ml += " "
      else
        ml += " ns "
      end
      orth += d[:orths][i]
      if d[:letters][i] == "O"
        if d[:tags][i] =~ /:aff|:neg/
          ml += ref.chr + "[base~\"#{regex_escape d[:bases][i]}\"/i && negation~#{$~[0][1..-1]} && pos~#{d[:poss][i]}]"
        else
          ml += ref.chr + "[base~\"#{regex_escape d[:bases][i]}\"/i && pos~#{d[:poss][i]}]"
        end
        ref += 1
      else
        ml += "[orth~\"#{pluralize(d[:orths][i], d[:poss][i], d[:tags][i]).map{|x| regex_escape x}.join("|")}\"/i]"
      end
    end
    output.puts "Rule  \"syntok #{escape orth}\""
    output.puts "Match:  #{ml};"
    output.puts "Eval:"
    ref = 65
    copyref = ""
    leaves = ""
    d[:letters].each_with_index do |l, i|
      if l == "O"
        copyref = ref if i == idx
        assert !d[:bases][i].empty?
        leaves += "  leave(base~~\"#{regex_insensitive_escape d[:bases][i]}\", #{ref.chr});\n"
        leaves += "  leave(pos~~\"#{regex_escape d[:poss][i]}\", #{ref.chr});\n"
        assert GOOD_POS.include?(d[:poss][i]), 
          "bad POS for leave() action: "+d[:poss][i]+" in:\n"+d.inspect
        ref += 1
      end
    end
    output.puts "  unify(case#{" gender number" if d[:unify]}, #{(65..ref-1).map{|x| x.chr}.join(",")});"
    output.puts leaves
    output.puts "  word(#{copyref.chr}, , \"#{escape orth}\");"
    output.puts
  else
    orth = d[:orths][0]
    ml = "[orth~\"#{regex_escape d[:orths][0]}\"/i]"
    (1..d[:orths].length-1).each do |i|
      if d[:spaces_before][i]
        orth += " "
        ml += " "
      else
        ml += " ns "
      end
      orth += d[:orths][i]
      ml += "[orth~\"#{regex_escape d[:orths][i]}\"/i]"
    end
    output.puts "Rule  \"syntok #{escape orth}\""
    output.puts "Match:  #{ml};"
    output.puts "Eval:  word(xxx, \"#{escape orth}\");"
    output.puts
  end
end

# ################
# INIT
# ################
$morfeusz = IO.popen("morfeusz", "r+")
$morgen = IO.popen("morgen/morgen", "r+")
dict = []
multiwords = []
multi_lookup = {}

output = $stdout
input = $stdin

# ################
# READ
# ################


while line = input.gets
  parts = line.strip.split /([^\(]+\([^\)]+\))/
  parts = parts.select {|x| !x.empty?}

  next if parts.empty?

  dont_unify = parts.last == "brak-zgody"

  parts.pop if dont_unify

#  debug parts.inspect

  tag = parts.pop
  tag =~ /(?:\([a-zA-Z]+-)([N_O]+)/
#  debug tag
  assert $~
  desc = $~[1]

  _orths = parts.map {|x| x.split("(")[0].gsub("\\,", ",")}
  _bases = parts.map {|x| x.split("(")[1].split(":")[0].gsub("\\,", ",")}
  _poss = parts.map {|x| x[0..-2].split(":")[1]}
  _tags = parts.map {|x| x[0..-2].split(":")[2..-1].join(":")}

  orths = []
  bases = []
  poss = []
  tags = []

# letters and concatenation of multipart words
  descs = desc.split("_").join("").split(//)
  addinterp = true
  addinterp = false if descs.length == _orths.length
  letters = []
  idx = 0
  i = 0
  while i < parts.length
    if _poss[i] == "interp" && addinterp
      letters << "N"
      orths << _orths[i]
      bases << _bases[i]
      poss << _poss[i]
      tags << _tags[i]
    elsif i + 1 < _poss.length && _poss[i+1] != "interp" && _orths[i+1][0] != " " &&
        _poss[i] != "interp"
      j = i + 1
      while j+1 < _poss.length && _poss[j+1] != "interp" && _orths[j+1][0] != " "
        j += 1
      end
      # concat i..j

      orths << _orths[i..j].map{|x| x[0] == "{" && x[-1] == "}" ? x[1..-2] : x}.join("")
      if _orths[i..j].find{|x| x[0] == "{" && x[-1] == "}"}
        orths[-1] = "{"+orths[-1]+"}"
      end
      bases << mkbase(_orths[i..j], _bases[i..j])
      tagidx = (i..j).to_a.find {|x| ["subst", "ger"].include? _poss[x]}
      tagidx ||= (i..j).to_a.find {|x| ["adj", "pact", "ppas"].include? _poss[x]}
#      tagidx ||= (i..j).to_a.find {|x| _tags[x].include?(":nom:")}
#      tagidx ||= 0
      assert tagidx

      poss << _poss[tagidx]
      tags << _tags[tagidx]
      letters << descs[idx+tagidx-i]
      idx += j-i+1
      i += j-i
    else
      letters << descs[idx]
      orths << _orths[i]
      bases << _bases[i]
      poss << _poss[i]
      tags << _tags[i]
      idx += 1
    end
    i += 1
  end

  # debug letters.inspect
  assert (letters.select{|x| x}.length == orths.length), letters.inspect+"\n"+line

  spaces_before = [false]
  orths[1..-1].each do |o|
    if o[0] == " "
      spaces_before << true
      o.replace o[1..-1]
    else
      spaces_before << false
    end
  end

  # check
  orths.each_with_index do |o, i|
    if o == ',' || o == '.' || o == ')'
      assert (!spaces_before[i] && (i +1 == spaces_before.length || spaces_before[i+1] || orths[i+1].length == 1)) # ostatnie to zlepek inicjalow z kropkami
    elsif o == '('
      assert (spaces_before[i] && (i +1 == spaces_before.length || !spaces_before[i+1]))
    end
  end

#  debug [parts.inspect, tag, desc, orths.inspect, bases.inspect, poss.inspect, tags.inspect]
#  debug [letters.inspect, spaces_before.inspect]
#  debug
  
  multi = orths.map {|x| false}

  orths.each_with_index do |o, i|
    a = o.index "{"
    b = o.index "}"
    if a && b && a < b
      o.gsub!(/[\{\}]/, "")
      bases[i].gsub!(/[\{\}]/, "")
      multiwords << {:orth => o, :base => bases[i], 
        :tag => poss[i] + (tags[i].empty? ? "" : ":"+tags[i])}
      multi[i] = true
    end
  end

=begin
# paranoic;)
  if orths.find{|x| x =~ /([a-zA-Z#{PLZNACZKI}0-9]+[-'\.,:!\)\(\[\]_+=;"\?]|[-'\.,:!\)\(\[\]_+=;"\?][a-z#{PLZNACZKI}A-Z0-9]+)/}
    orths.each_with_index do |o, i|
      if o =~ /([A-Za-z#{PLZNACZKI}0-9]+[-'\.,:!\)\(\[\]_+=;"\?]|[-'\.,:!\)\(\[\]_+=;"\?][a-zA-Z#{PLZNACZKI}0-9]+)/ && !multi[i]
        debug "problem: " + o.inspect
      end
    end
  end
=end


  
  d = {:parts => parts, :desc =>desc,
    :tag => tag, :orths => orths, :bases => bases, :poss => poss,
    :tags => tags, :letters => letters, :spaces_before => spaces_before, :multi => multi,
    :unify => !dont_unify}

  dict << d

  so = []
  sl = []
  sp = []
  d[:orths].each_with_index do |o, i|
    so << (tokenize o).flatten.map{|x| x.downcase}
    sl << (tokenize o).flatten.map{|x| d[:letters][i]}
    sp << (tokenize o).map{|x| [true] + x.map{|y| false}[1..-1]}.flatten
    sp[-1][0] = spaces_before[i]
  end
  d[:splitorths] = so.flatten
  d[:splitbases] = get_bases d[:splitorths]
  d[:splitletters] = sl.flatten
  d[:splitspaces] = sp.flatten

  concatenated_split_orth = [""]
  orths.each_with_index do |o, i|
    if spaces_before[i]
      concatenated_split_orth += (tokenize o).flatten
    else
      old_cso = concatenated_split_orth[-1]
      concatenated_split_orth[-1] = tokenize(old_cso+o).flatten.first
      concatenated_split_orth += tokenize(old_cso+o).flatten[1..-1]
    end
  end
#  multi_lookup[concatenated_split_orth.map{|x| x.downcase}] = dict.length-1
  multi_lookup[so.flatten] = dict.length-1

end

multiwords.uniq!

mws = {}
multiwords.each do |mw|
  mws[[mw[:base].downcase, mw[:orth].downcase]] ||= []
  mws[[mw[:base].downcase, mw[:orth].downcase]] << mw[:tag]
end

multiwords = []

mws.each_pair do |ob, ts|
  multiwords << {:orth => ob.last, :base => ob.first, :tags => ts.uniq}
end

#DEBUG
#debug multiwords.inspect
#debug
#debug multi_lookup.inspect
#debug
#debug dict.inspect

debug "read done"


# ###############
# prepare sorting
# ###############

# we will write rules only for those multiwords
mock_multiwords = []

orthstarts = {}
basestarts = {}

to_sort = []

multiwords.each do |mw|
  mw[:splitorths] = (tokenize mw[:orth]).flatten.map{|x| x.downcase}
  mw[:splitbases] = get_bases mw[:splitorths]
  mw[:depends] = []
  mw[:type] = :mw
  mw[:splitletters] = mw[:splitorths].map{|x| "N"}
  mw[:splitspaces] = (tokenize mw[:orth]).map{|x| [true] + x.map{|y| false}[1..-1]}.flatten

  x = multi_lookup[(tokenize mw[:base]).flatten.map{|x| x.downcase}]
  # lookup may lie, since some of the letter may be "N" there
  if x
    d = dict[x]
    d[:splitorths].each_with_index do |o, i|
      if (d[:splitletters][i] == "N" && o != mw[:splitorths][i]) ||
        (d[:splitletters][i] == "O" && 
                !mw[:splitbases][i].map{|x| d[:splitbases][i].include? x}.flatten.include?(true))
        x = false
        break
      end
    end
  end
  unless x
    mock_multiwords << mw
    to_sort << mw
    orthstarts[mw[:splitorths].first] ||= []
    orthstarts[mw[:splitorths].first] << mw
    mw[:splitbases].first.each do |b|
      basestarts[b] ||= []
      basestarts[b] << mw
    end
  end
end

debug "sorting preparation part 1 done"

dict.each do |d|
  to_sort << d
  d[:type] = :norm
  d[:depends] = []
  orthstarts[d[:splitorths].first] ||= []
  orthstarts[d[:splitorths].first] << d
  d[:splitbases].first.each do |b|
    basestarts[b] ||= []
    basestarts[b] << d
  end
  d[:splitorthmws] = []
  d[:splitbasemws] = []
  d[:splitlettermws] = []
  d[:splitspacemws] = []
  d[:splitoffsetmws] = []
  d[:orths].each_with_index do |o, i|
    if d[:multi][i]
      os = (tokenize o).flatten.map{|x| x.downcase}
      d[:splitorthmws] << os
      d[:splitbasemws] << get_bases(os)
      d[:splitlettermws] << os.map{|x| d[:letters][i]}
      d[:splitspacemws] << (tokenize o).map{|y| [true]+y.map{|x| false}[1..-1]}.flatten
      d[:splitoffsetmws] << subarray_index(d[:splitorths], os)
    end
  end
end

debug "sorting preparation part 2 done"

# debug/helper
to_sort.each_with_index {|x,i| x[:id] = i}

$can_be_in_conflict = []

def get_matching os, bs, ls, sb, orthstarts, basestarts, entry
  found = []
  os.each_with_index do |o, i|
    if mat = orthstarts[o]
      mat.each do |m|
        ok = true
        m[:splitorths].each_with_index do |oo, ii|
          if i+ii >= os.length && i > 0
            $can_be_in_conflict << [entry, m]
            ok = false
            break
          end
          if (ii > 0 && sb[i+ii] != m[:splitspaces][ii]) || 
            (m[:splitletters][ii] == "N" && os[i+ii] != oo) || 
              (m[:splitletters][ii] == "O" && 
                !bs[i+ii].map{|x| m[:splitbases][ii].include? x}.flatten.include?(true))
            ok = false
            break
          end
        end
        found << {:idx => i, :match => m} if ok
      end
    end
  end

  bs.each_with_index do |b_, i|
    b_.each do |b|
      if mat = basestarts[b]
        mat.each do |m|
          ok = true
          m[:splitorths].each_with_index do |oo, ii|
            if i+ii >= os.length && i > 0
              $can_be_in_conflict << [entry, m]
              ok = false
              break
            end
            if (ii > 0 && sb[i+ii] != m[:splitspaces][ii]) || 
              (m[:splitletters][ii] == "N" && os[i+ii] != oo) || 
              (m[:splitletters][ii] == "O" && 
                !bs[i+ii].map{|x| m[:splitbases][ii].include? x}.flatten.include?(true))
              ok = false
              break
            end
          end
          found << {:idx => i, :match => m} if ok
        end
      end
    end
  end

  return found.uniq.sort{|a,b| a[:idx] <=> b[:idx]}
end

in_conflict = []

mock_multiwords.each do |mw|
  # znajdz wszystkie pelne dopasowania 
  ms = get_matching mw[:splitorths], mw[:splitbases], mw[:splitletters], mw[:splitspaces],
    orthstarts, basestarts, mw

  next if ms.empty?

  # wybierz maksymalne ze wzgledu na inkluzje
  # jesli jakies sa w konflikcie to wrzuc dane pary do konfliktowych
  max = [ms[0]]
  ms.each do |m|
    next if m[:match] == mw
    if m[:idx] == max[-1][:idx] &&
      m[:match][:splitorths].length > max[-1][:match][:splitorths].length
      max[-1] = m
    elsif m[:idx] >= max[-1][:idx] + max[-1][:match][:splitorths].length
      max << m
    elsif m[:idx] < max[-1][:idx] + max[-1][:match][:splitorths].length &&
      m[:idx]+m[:match][:splitorths].length > max[-1][:idx] + max[-1][:match][:splitorths].length
      # conflict
      in_conflict << [max[-1][:match], m[:match]]
      # TODO
      # solve?
      # if max[-1][:match][:id] < m[:match][:id] 
      #   max[-1] = m
    # else inclusion
    end
  end

  # zaznacz mi, ze zaleze od nich wszystkich
  mw[:depends] += max.map{|x| x[:match]}.uniq

end

debug "sorting preparation part 3 done"


deps_not_found = false

dict.each do |d|
  # znajdz wszystkie pelne dopasowania 
  ms = get_matching d[:splitorths], d[:splitbases], d[:splitletters], d[:splitspaces],
    orthstarts, basestarts, d

  deps = []
  # odszukaj moje wewnetrzne klamry i zaznacz mi, ze zaleze od nich wszystkich
  d[:splitorthmws].each_with_index do |o, i|
    my = get_matching o, d[:splitbasemws][i], d[:splitlettermws][i], d[:splitspacemws][i],
      orthstarts, basestarts, d
    this_deps = my.select{|x| x[:idx] == 0 && x[:match][:splitorths].length == o.length}
    if this_deps.empty?
      deps_not_found = true
      $stderr.puts "Dependency not found:"
      $stderr.puts "Need: ", [o, d[:splitbasemws][i], d[:splitlettermws][i], d[:splitspacemws][i]].map{|x| x.inspect}
      $stderr.puts "Found: ", my.map{|x| x.inspect}
      $stderr.puts
      $stderr.puts "Entry: " + d.inspect
      $stderr.puts
      $stderr.puts "All: " , ms.map{|x| x.inspect}
      $stderr.puts "\n\n\n"
    end
    deps += this_deps.map{|x| {:match => x[:match], :idx => x[:idx]+d[:splitoffsetmws][i]}}
  end

  next if ms.empty?
  # wybierz maksymalne ze wzgledu na inkluzje
  # jesli jakies sa w konflikcie to wrzuc dane pary do konfliktowych
  max = [ms[0]]
  ms.each do |m|
    next if m[:match] == d
    if m[:idx] == max[-1][:idx] &&
      m[:match][:splitorths].length > max[-1][:match][:splitorths].length
      max[-1] = m
    elsif m[:idx] >= max[-1][:idx] + max[-1][:match][:splitorths].length
      max << m
    elsif m[:idx] < max[-1][:idx] + max[-1][:match][:splitorths].length &&
      m[:idx]+m[:match][:splitorths].length > max[-1][:idx] + max[-1][:match][:splitorths].length
      # conflict
      in_conflict << [max[-1][:match], m[:match]]
      # TODO
      # solve?
      # if max[-1][:match][:id] < m[:match][:id] 
      #   max[-1] = m
    else
    # else inclusion
    end
  end
  # upewnij sie, ze wszystkie dependsy maja znalezione dopasowania

  # jesli jakies moje klamry sa w konflikcie z ktorymikolwiek dopasowanymi
  # to ustal zaleznosc tych innych od moich klamer
  ms.each do |m|
    next if m[:match] == d
    # assume depends have length > 1
    if !(xd = deps.select {|x|
        ((x[:idx] < m[:idx] && x[:idx]+x[:match][:splitorths].length > m[:idx] &&
         m[:idx] + m[:match][:splitorths].length > x[:idx]+x[:match][:splitorths].length) || 
        (m[:idx] < x[:idx] && m[:idx]+m[:match][:splitorths].length > x[:idx]) &&
         x[:idx] + x[:match][:splitorths].length > m[:idx]+m[:match][:splitorths].length) &&
        x[:match] != m[:match]
    }).empty?
      m[:match][:depends] += xd.map{|x| x[:match]}
      m[:match][:depends_reasons] ||= []
      m[:match][:depends_reasons] += xd.map{|x| [x[:match][:id], d]}
    end
  end

  d[:depends] += deps.map{|x| x[:match]}.uniq
end

exit 1 if deps_not_found

debug "sorting prepared"

# ###############
# SORTING
# ###############
def very_inspect x
  (x[:type] == :mw ? " multiword {"+x[:orth]+"}" : " regular "+x[:parts].join+x[:tag]) +
    "\n    #{x[:splitorths].inspect}\n    #{x[:splitbases].inspect}"
end

# initial pass

level = 0
to_sort_1 = to_sort
to_sort_2 = []

to_sort.each do |entry|
  entry[:depends] = entry[:depends].select{|x| x != entry}
  if entry[:depends].empty?
    entry[:level] = level
    entry[:done] = true
  else
    to_sort_2 << entry
  end
end

# the rest of the sort

# bazujac na zaleznosciach przypisuj kolejne glebokosci tym, ktore juz maja 
# przypisane glebokosci we wszystkich zaleznosciach
# dzialaj dopoki sa jakies z przypisanymi wszystkimi glebokosciami w zaleznosciami po danym kroku

while to_sort_1.length > to_sort_2.length
  debug("sorting pass "+level.to_s)
  level += 1
  to_sort_1 = to_sort_2
  to_sort_2 = []

  to_sort_1.each do |entry|
    if entry[:depends].find{|x| x[:done].nil?}
      to_sort_2 << entry
    else
      entry[:level] = level
      entry[:done] = true
    end
  end
end


# jesli zostalismy ze zbiorem niepustym takich, ktore maja niewszystkie zaleznosci przypisane, 
# to mamy zapetlone zaleznosci i niedasie (komunikat)

unless to_sort_1.empty?
  $stderr.puts "DEPENDENCY LOOP(S) DETECTED"
  $stderr.puts "contain(s): #{to_sort_1.length} entries"
  $stderr.puts "the list of entries:"
  to_sort_1.each do |e|
    if !(dd = e[:depends].select{|x| x[:depends].index e}).empty?
      $stderr.puts "\n\n#{e[:id]}"+
        very_inspect(e) +
        "\n  deps:\n" + dd.map{|d| "   #{d[:id]} "+very_inspect(d)}.join("\n") +
        "\n  reasons:\n" + 
        (e[:depends_reasons] || []).map{|d| "   #{d.first} "+very_inspect(d.last)}.join("\n")
    end
  end

  exit 1
end

debug "sorting done"

if ARGV[1] == "verbose" 
  $can_be_in_conflict = $can_be_in_conflict.map{|x|
    x.first[:id] > x.last[:id] ? [x.last, x.first] : x}.uniq

  $stderr.puts "potential conflicts: #{$can_be_in_conflict.length}"
  unless $can_be_in_conflict.empty?
    $stderr.puts "the list of entries: "
    $stderr.puts $can_be_in_conflict.map{|x| x.first[:id].to_s+
      very_inspect(x.first) +
      "\n    vs. #{x.last[:id]}"+
      very_inspect(x.last)
    }
  end
end


#to_sort.each do |x|
#  next if x[:depends].empty?
#  debug (very_inspect x)
#  debug
#  debug x[:depends].map{|y| y[:type] == :mw ? "mw #{y[:orth]}" : "reg #{y[:orths].join(" ")}"}.inspect
#  debug
#end
#
#exit

# #########
# WRITE
# #########

# kolejno, glebokosciami wypisuj w petli poszczegolne reguly 
# (w zaleznosci od typu albo multiword albo normalna regule)

(0..level).each do |l|
  to_sort.select{|x| x[:level] == l}.each do |e|
    if e[:type] == :mw
      write_multiword_rule output, e
    else
      write_dict_rule output, e
    end
  end
end



