"Python3 utility routines library, Copyright R.S. Forsyth."

##  GLIB :
##
##  Python3 general-purpose utilities :
##  Copyright 2012, R.S. Forsyth.
##  Released under GNU Public licence, version 3:
##  http://www.gnu.org/licenses/gpl.txt
##  first version : 15/02/2000
##  last revision : 07/09/2012


ShowGNU = 0  ##  change to 1 2c licence statement
GNUblurb = """
    This copyrighted program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/gpl-3.0.txt>.
"""
if ShowGNU:
    print(GNUblurb)
    
	
import  math, os, string, sys

from globdefs import *

Maxtoke = 36  ##  maximum markup symbol size
##  Difficult punctuation signs (for punktran()) :
Punxlist = [-39,96,180,700,2036,8216,8217,8218,8219,8242]  ##  apoplectic !
Punxlist = Punxlist + [-45,173,0x2010,0x2011,0xfe63,0xfe0d]  ##  dash it all !
Punxlist = Punxlist + [-34,171,187,8220,8221,8222,12291]  ##  don't quote me !
Punxlist = Punxlist + [-126,0x2012,0x2013,0x2014,0x2015,0xfe58]  ## very dashing !
##  negatives are good old ascii substitutes.
Lowchin = 0x4e00  ##  lo point of unicode CJKblock (19968)
Highchin = 0x9fff  ##  hi point of unicode CJKblock (40907)


##  GETMODE :
def getmode (q,modelist):
    "Gets mode-code from user."

    print(q)  ##  user prompt
    mode = -99
    while mode < 0 or mode >= len(modelist):
        mc = 0
        for m in modelist:
            print(mc,m)
            mc = mc + 1
        i = input("Give selection number: ").strip()
        if i:
            try:
                mode = eval(i)
            except:
                mode = -99
        else:
            mode = -99
    print("Mode",mode,"chosen :",modelist[mode])
    return  mode


##  OPENFILE :
def openfile (filename,prelist,postlist,mode):
    "Tries to open filename, with various prefixes & suffixes."

    fullstop = '.'
    ##  First extract 3 main bits:
    (dire,base) = os.path.split(filename)
    if dire:
        prelist.insert(0,dire)  ##  place prefix at front if given
    if fullstop in base:
        f2 = os.path.splitext(base)
        base = f2[0] ; extn = f2[1]
        postlist.insert(0,extn)
    else:
        extn = ''
    print(dire,base,extn)

    ##  Next attempt most reasonable combinations :
    textfile = None
    for p in prelist:
        for x in postlist:
            if len(p) > 0:
                pathname = p + os.sep + base
            else:
                pathname = base
            if len(x):
                pathname = pathname + x
            else:
                pathname = pathname + fullstop
            ##  print(p,base,x)
            if os.path.exists(pathname):
                print("trying to open :",pathname)
                try:
                    textfile = open(pathname,'r')
                    print(pathname,"opened for reading.")
                    return  textfile
                except IOError:
                    print("can't open", pathname)
                    return  None
    
    return  textfile


##  GETCHUNK :
def getchunk (s,starter,finisher,spos=0):
    "gets a chunk from string s, delimited by starter & finisher, if poss."

    p1 = s.find(starter,spos)
    p2 = s.find(finisher,p1+len(starter))
    if p1 > -1 and p2 > p1:
        filling = s[p1+len(starter):p2]
    else:
        filling = ''  ##  blank signifies not found
    return filling


##  ATOMIZE :
def atomize (linelist,paradat):
    "Tokenizes list of raw text lines."

    toklist = []
    foldtolc = paradat.foldcase  ## should be logical or numeric
    xapp = getattr(paradat,"atomizer","e").strip()
    if xapp[0] in "ex":  ##  extend is default
        xa = 1
    else:
        xa = 0  ##  indicates append
    for line in linelist:
        if foldtolc:
            line = string.lower(string.strip(line))
        else:  ##  keep uc/lc
            line = string.strip(line)
        t = tokline(line + ' ',paradat)
        if len(t):
            if xa:
                toklist.extend(t)  ##  1 long list
            else:
                toklist.append(t)  ##  a list of lists

    return toklist


##  TOKLINE :
def tokline (rawline,paradat):
    "Tokenizes a single line of text; tinf contains char-type info."

    ##  return  string.split(line)
    word = mark = numb = punc = 0
    chin = 0
    tokelist = []
    toke = ''
    tinf = paradat.tokinfo  ##  should be set in globdefs.py
    tt = getattr(paradat,"punxtab",{})
    if tt:  ##  convert funny punctuation characters
        line = rawline.translate(tt)
    else:
        line = rawline
    for ch in line:
        ##  first check continuation :
        if mark:  ##  inside a mark-up symbol
            if ch == tinf.markshut or len(toke) > Maxtoke:
                mark = 0
                tokelist.append(toke + ch)
                toke = ''
            else:  toke = toke + ch
        elif word:  ##  inside a word
            ##  altered 18/08/2012 to do crude 1-char ChinJap tokenizing :
            if (ch.isalpha() or ch in tinf.wordcont) and \
               (ord(ch) < Lowchin or ord(ch) > Highchin):
                toke = toke + ch
            else:
                word = 0
                if toke[-1] == "'":
                    toke = toke[:-1]  ##  removes trailing quote
                tokelist.append(toke) ; toke = ''
        elif numb:
            if ch in tinf.numcont:
                toke = toke + ch
            else:
                numb = 0
                tokelist.append(toke) ; toke = ''
        elif punc:
            if ch in tinf.puncmarx:
                toke = toke + ch
            else:
                punc = 0
                tokelist.append(toke) ; toke = ''
        elif chin:
            tokelist.append(toke)
            chin = 0 ; toke = ''  ##  chin bloc simply split into chars
        else:
            pass  ##  ignore everything else

        ##  next check initiation :
        if chin + mark + word + numb + punc == 0:  ##  outside token
            if ch == tinf.markopen:
                toke = ch ; mark = 1
            elif ord(ch) >= Lowchin and ord(ch) <= Highchin:
                toke = ch ; chin = 1
            elif ch.isalpha():
                toke = ch ; word = 1
            elif ch in tinf.numstart:
                toke = ch ; numb = 1
            elif ch in tinf.puncmarx:
                toke = ch ; punc = 1
            else:
                pass  ##  ignores the rest
        
    return tokelist
    ##  no size limit to tokens here, but may be imposed afterwards.


##  WDCOUNT :
def wdcount(toklist):
    "Counts words in already-tokenized list."

    wc = 0
    for t in toklist:
        if len(t) and t[0] in string.ascii_letters:
            wc += 1

    return  wc


##  NUMERIZE :
def numerize(s):
    "Converts s to a numeric value if possible, else returns None."

    try:
        v = int(s)
    except ValueError:
        try:
            v = float(s)
        except ValueError:
            v = None  ## means s doesn't look like a number
    return v


##  PARAGETN :
def paragetn (paradat,paraname,default='0',maxvalue=9999):
    "gets numeric parameter from parameter file, numerizes and enforces maximum."

    spansize = getattr(paradat,paraname,default)
    spansize = numerize(spansize)
    if spansize == None:
        spansize = 0
    if spansize > maxvalue:
        spansize = maxvalue
    setattr(paradat,paraname,spansize)  ##  side-effect
    return  spansize


##  ADD2DIC :
def add2dic (largedic,smalldic):
    "augments contents of largedic with context of smalldic, both freq tables."

    ##  dk = list(smalldic.keys())
    for k in smalldic:
        f = smalldic[k]
        freq = largedic.get(k,0)
        largedic[k] = f + freq


##  NUMERIC :
def numeric (x):
    "tests whether x is int or float."

    if isinstance(x,type(9)) or isinstance(x,type(9.9)):
        return  True

    return  False


##  PARAGET :
def paraget (paralist,parafile):
    "obtains parameter settings from file, setting defaults & enforcing limits."
    ##  parafile must be opened by calling routine.

    paradat = Sack(parafile.name)
    datlines = parafile.readlines()
    paratab = {}
    ##  first get entries from parameter file :
    for datline in datlines:
        d = datline.strip().split(' ',1)  ## only first blank is delimiter
        ##  print(d)
        dlen = len(d)
        if dlen == 2:
            attname = d[0].lower().strip() ; v = d[1].strip()
        else:
            print("Possible error in parameter line:",NL,d)
            attname = "zonk"
        paratab[attname] = v  ##  save for processing in next loop
        
    ##  next go thru paralist, setting values :
    for p in paralist:  ##  p is a tuple, item zero is parameter name
        paraname = p[0]
        plen = len(p)
        paradef = ''  ##  clear default value
        paramin = None
        numpara = False  ##  start by assuming nonnumeric values
        if plen > 1:
            paradef = p[1]  ##  default value from program header
            if isinstance(paradef,type(9)) or isinstance(paradef,type(9.9)):
                numpara = True
        if plen > 2:
            numpara = True  ##  must be numeric if lo/hi values given
            paramin = p[2]
        if plen > 3:
            paramax = p[3]
        pval = paratab.get(paraname,paradef)
        if numpara:
            if not isinstance(pval,type(9)) and not isinstance(pval,type(9.9)):
                pval = numerize(pval)
            if plen > 2:  ##  enforce minimum value
                pval = max(paramin,pval)
            if plen > 3:  ##  enforce maximum value
                pval = min(paramax,pval)
            setattr(paradat,paraname,pval)
        else:  ##  string value
            setattr(paradat,paraname,pval)
            
    return  paradat


##  PARASHOW :
def parashow (paradat,fo=sys.stdout,heading="Parameter settings :"):
    "displays settings in Sack() object paradat on file fo."

    showobj(paradat,fo,heading)

    return


##  SHOWOBJ :
def showobj (item,fo=sys.stdout,heading=''):
    "generic object display routine."

    entries = list(item.__dict__.items())
    entries.sort()
    if heading:
        fo.write(heading + NL)
    for e in entries:
        fo.write(str(e) + NL)
    fo.write(NL)

    return


##  TERMFREX :
def termfrex (termlist):
    "takes a list of tokens & returns a term-frequency table."

    termtab = {}
    for term in termlist:
        f = termtab.get(term,0)
        termtab[term] = f + 1

    return  termtab


##  DICSHOW :
def dicshow (dtab,fo=sys.stdout,keyorder=True,downward=0,maxlines=99):
    "generic routine for listing dictionaries."

    keylist = list(dtab.items())
    if not keyorder:  ##  swap each entry in keylist
        for kp in range(keylist):
            k = keylist[kp]
            keylist[kp] = (k[1],k[0])
    keylist.sort()
    if downward:  ##  descending order
        keylist.reverse()
    ln = 0
    fo.write(NL)
    for kp in keylist:
        if keyorder:
            k = kp[0]
        else:
            k = kp[1]
        v = dtab[k]
        print(k,v,file=fo)  ##  v always printable ?
        ln += 1
        if ln > maxlines:
            break
    fo.write(NL)
    
    return  ln


##  PPSEEKER :
def ppseeker (progname,subpaths=["parapath","pp","pf","parm"]):
    "returns list of likely parameter-file paths given progname (= sys.argv[0])."

    prepath = os.path.dirname(progname)
    prepaths = []
    bits = os.path.split(prepath)  ##  pull off tail
    for sp in subpaths:
        pd = bits[0] + '\\' + sp
        ##  print(pd)
        if os.path.isdir(pd):
            prepaths.append(pd)

    prepaths.append(prepath)  ##  own path comes last
    return  prepaths
    ## should have sibling paths, ending with subpaths, if they exist.


##  METAGET :
def metaget (metafile, paradat):
    "reads 2010-format metafile & returns tuple of (filelist,outcomes)."

    filelist = [] ; outcomes = []
    defo = 0
    textval = defo
    sepp = os.path.sep
    defx = getattr(paradat,"filextn",'.txt')
    prepath = getattr(paradat,"prepath",'')
    metadat = metafile.readlines()
    for metaline in metadat:
        textname = ''
        info = metaline.strip()
        linedat = info.split(Tabchar)
        linesize = len(linedat)
        if linesize < 2:  ##  could be signal to change default
            if info.startswith('@'):
                if not info.endswith(sepp):
                    info = info + sepp  ##  make sure it ends with separator
                if os.path.isdir(info[1:]):
                    subfiles = metasub(info[1:])  ##  all files in subfolder
                    filelist.extend(subfiles)  ##  add all of them
                    while len(outcomes) < len(filelist):
                        outcomes.append(defo)  ##  give them current outcome default
                else:  ##  something amiss
                    print(info,"doesn't refer to a valid folder!")
                    print("Line ignored.")
            elif info.startswith('='):
                linedat = info.split('=')
                linesize = len(linedat)
                if linesize == 2:
                    defo = linedat[1]
            else:  ##  just 1 item
                if info.endswith(sepp) and os.path.isdir(info):
                    prepath = info  ##  reset default path prefix
                else:  ##  filename without associated outcome
                    textname = info
                    textval = defo
        else:  ##  normal case
            ##  assume textname then textval
            textval = linedat[1]
            textname = linedat[0]
        if textname:  ##  datafile line (not just default setting)
            d = os.path.dirname(textname)
            b = os.path.basename(textname)
            bits = os.path.splitext(b)
            if len(bits) > 1:
                b = bits[0]
                x = bits[1]
            else:
                x = ''
            ##  print(d,b,x,defx)
            if not d:
                d = prepath
            if not x:
                x = defx
            if not d.endswith(sepp):
                d = d + sepp
            fullname = d + b + x
            ##  print(fullname,textval)
            filelist.append(fullname)
            outcomes.append(textval)

    return  (filelist,outcomes)


##  METASUB :
def metasub (info):
    "collects all files in folder info, to append to file list."

    fl = os.listdir(info)
    outlist = [info + fl[j] for j in range(len(fl)) if os.path.isfile(info+fl[j])]

    return  outlist


##  METACHEX :
def metachex (filelist,outcomes,paradat):
    "does some checks on filelist & numerizes outcomes if poss, paradat may be updates."

    w = 0
    nf = len(filelist)
    for j in range(nf):
        f = filelist[j] ; v = outcomes[j]
        if not os.path.isfile(f):
            print("Warning, nonexistent file : ",f)
            w += 1
        nv = numerize(v)
        if type(nv) == type(0):
            outcomes[j] = nv
        ##  print(filelist[j],outcomes[j])


    paradat.catvals = set(outcomes)
    paradat.cats = len(paradat.catvals)
    return  w


##  GETDOCS :
def getdocs (paradat,filelist,outcomes):
    "reads text docs from file; returns list of Sack() objects."

    counted = 0 ; fnum = 0
    doclist = [] ; sizelist = []
    for f in filelist:
        if fnum < 0:
            print(f)
        if os.path.isfile(f) and (paradat.filextn=='' or f.lower().endswith(paradat.filextn)):
            rawlines = try2read(f)
            thisdoc = Sack(f)
            toks = toklines(rawlines,paradat)
            thisdoc.name = os.path.basename(f)
            thisdoc.dnum = fnum
            ##  thisdoc.text = ' '.join(toks)  ##  single string
            thisdoc.toklist = toks
            thisdoc.rawlines = rawlines
            dval = outcomes[fnum]
            thisdoc.outcome = dval
            thisdoc.size = len(thisdoc.toklist)
            sizelist.append(thisdoc.size)
            doclist.append(thisdoc)
            counted += 1
            ##  textfile.close()
            ##  print(counted)
        fnum += 1

    if counted != fnum:  ##  some files skipped
        print("??",counted,fnum)
    paradat.sizelist = sizelist
    paradat.maxsize = max(sizelist)
    paradat.tottoks = sum(sizelist)
    return  doclist


##  TRY2READ :
def try2read (f,codelist=["utf8","windows-1252","latin1"]):
    "tries more than once to read text file by guessing (?Windows) encodings."
    ##  utf8 is normally first choice.
    
    xxxx = None
    for ec in codelist:
        try:
            textfile = open(f,'r',encoding=ec)
            rawlines = textfile.readlines()
            textfile.close()
            return  rawlines
        except (UnicodeError,ValueError,IOError) as xxxx:
            rawlines = []

    ##  back-stop procedure is to replace error bytes (sort-of) :
    textfile = open(f,'r',encoding="utf8",errors="surrogateescape")
    rawlines = textfile.readlines()
    textfile.close()
    
    return  rawlines


##  TOKLINES :
def toklines (rawlines,paradat):
    "takes list of text lines as produced by readlines(), returns a list of tokens."

    toklist = []
    folding = 0
    if hasattr(paradat,"casefold"):
        folding = paradat.casefold
    if hasattr(paradat,"foldcase"):
        folding = paradat.foldcase
    for r in rawlines:
        rx = r.strip()
        if folding:
            rx = rx.lower()
        if paradat.atomize:
            toks = tokline(rx+' ',paradat)
        else:  ##  just use split(); assume already tokenized
            toks = rx.split()
        if paradat.wordonly:
            for t in toks:
                if t and t[0].isalnum():
                    toklist.append(t)  ##  only append if initial is alphanumeric
        else:
            toklist.extend(toks)  ##  add to growing list
        
    return  toklist


##  REXTOKS :
def rextoks (linelist,paradat):  ##  wpat=r"[\w']+")  formerly
    "tokenizes list of strings using basic regular xp pattern."

    if type(paradat.wpat) == type('s'):  ##  compile string patt
        patt = re.compile(paradat.wpat)
    else:  ##  better to pass it in from outside as compiled pattern
        patt = paradat.wpat
    tt = getattr(paradat,"punxtab",{})
    toklist = []
    for s in linelist:
        if tt:  ##  convert funny punctuation characters
            line = s.translate(tt)
        else:
            line = s
        m = patt.findall(line)
        toklist = toklist + m

    for p in range(len(toklist)):  ##  strip trailing apostrophes
        if toklist[p].endswith("'"):
            toklist[p] = toklist[p][:-1]
        if paradat.casefold:
            toklist[p] = toklist[p].lower()

    return  toklist  ##  quicker, but doesn't allow wordonly=0.


##  STREIGHT :
def streight (su):
    "converts probable utf-8 string to something printable in cp850 (cp1252?)."

    sa = ''
    for ch in su:
        if ord(ch) > 255:
            sa = sa + chr(19)  ##  modulo 255 neater ?
        else:  ##  ascii-like
            sa = sa + ch

    return  sa  ##  info lost, but won't crash


##  PUNKTRAN :
def punktran (punxlist=Punxlist):
    "creates a translation table from apos/quote/dash char codes."

    gofadict = {}  ##  table values = good old-fashioned ascii-type characters
    ck = 33  ##  default backup
    for cc in punxlist:
        if cc < 8:  ##  nice character
            ck = abs(cc)
            ##  gofdict[cc] = []
        else:
            gofadict[cc] = ck

    return  gofadict  ##  2b used by translate()


##  SIBPATH :
def sibpath (p,lastbit="parapath"):
    "makes sibling path from folder name p."

    bits = os.path.split(p)
    nb = len(bits)
    if nb > 1 and bits[1].endswith(".py"):
        bits = os.path.split(bits[0])
    if len(bits) < 1:
        bits = [os.path.expanduser('~')]

    return  bits[0] + os.sep + lastbit


##  METARESP :
def metaresp (metadict,paradat):
    "gets outcome values from metadict."

    fl = paradat.filelist
    cols = len(paradat.colnames)
    outcomes = []
    if paradat.targcol == '':
        paradat.targcol = 2  ##  default
        if paradat.targcol >= cols:
            paradat.targcol = 0  ##  backup default !
    else:
        try:
            tpos = paradat.colnames.index(paradat.targcol)
        except:
            tpos = 0
        if tpos >= 0 and tpos < cols:
            paradat.targcol = tpos
        else:
            paradat.targcol = 2  ## default
            if paradat.targcol >= cols:
                paradat.targcol = 0

    for fn in paradat.filelist:
        tv = metadict[fn][paradat.targcol]
        outcomes.append(tv)

    return  outcomes


##  READMETA :
def readmeta (metafile,paradat,Htab='\t'):
    "tries to read metadata as tab-delimited flat-file, with header line @top."

    metlines = metafile.readlines()
    if len(metlines) < 2:
        print("Warning: lack of metadata !")
        return  None
    topline = metlines[0].strip()
    bits = topline.split(Htab)
    if len(bits) < 2:
        print("Warning: problem with header line in metafile !")
        print(topline) ; print(bits)
        return  None
    colnames = bits[:]  ##  make a copy
    print(colnames)
    ##  could fall foul of byte-order mark in Windows. :-(
    paradat.colnames = colnames   ##  side-effect
    metinfo = Sack(metafile.name)
    metinfo.colnames = colnames
    cols = len(colnames)
    ppos = (colnames + ["prepath"]).index("prepath")
    fpos = (colnames + ["filename"]).index("filename")
    if ppos >= cols:
        ppos = 0
    if fpos >= cols:
        fpos = 1
    print("££",cols,ppos,fpos)

    ml = 0 ; mi = 0
    metadict = {}
    metalist = []
    for metline in metlines:
        datline = metline.strip()
        ml += 1
        if ml == 1:
            continue
        bits = datline.split(Htab)
        nb = len(bits)
        if nb < cols:
            print("Short metaline: ",datline)
        elif nb > cols:
            print("Long metaline : ",datline)
        else:  ##  normal case
            fullname = makefull(bits[ppos],bits[fpos])
            metadict[fullname] = bits
            metalist.append(bits)
            
    return  (metadict,metalist)


##  NICERESP :
def niceresp (outcomes):
    "makes mixed-type responses into strings, for later sort."

    oset = set()
    for o in outcomes:
        oset.add(type(o))

    if type('s') in oset:  ##  1 string -> all strings
        niceout = [str(r) for r in outcomes]
    else:
        niceout = outcomes  ##  presume numeric type

    return  niceout


##  MAKEFULL :
def makefull (prepath,filename):
    "joins prepath to filename, with separator if missing."

    if prepath.endswith(os.sep):
        fullname = prepath + filename
    else:  ##  insert separator
        fullname = prepath + os.sep + filename

    return  fullname


##  PARAFIND :
def parafind (args,dateline,version="0.0"):
    "tries to find suitable path for parameter file."
    
    na = len(args)  ##  sys.argv[:] passed in
    progname = args[0]
    ##  dateline = time.asctime(time.localtime(time.time()))
    print(progname,version,dateline)
    print("command-line args. =",na)
    prepath = os.path.dirname(progname)  ##  directory from which program runs
    print("prepath :",prepath)
    ##  print(sys.path)
    whereat = os.getcwd()  ##  working directory
    print("working folder: ",whereat)
    sidepath = sibpath(whereat,"parapath")

    if na < 2:
        print("script usage:  python " + progname + " <parafile>")
        args.append(input("please give parameter file name : "))
    na = len(args)  ##  should be 1 more
    paraname = args[1]
    prepaths = [sidepath]
    if not whereat in prepaths:
        prepaths.append(whereat)
    prepaths = prepaths + [os.pardir,os.curdir]
    pp = os.path.expanduser('~')+os.sep+"parapath"
    if not pp in prepaths:
        prepaths.append(pp)
    prepaths = prepaths + [os.path.expanduser('~')]
    print("Paths to search for parameter file :")
    print(prepaths)

    return  (paraname,prepaths)


##  ORDERLY :
def orderly (xvec,downward=False):
    "returns subscript list that would put xvec value in order."

    n = len(xvec)
    if n < 1:  return []
    tvec = [(xvec[j],j) for j in range(n)]
    tvec.sort(reverse=downward)

    return  [tvec[j][1] for j in range(n)]
    ##  like order() in R.


##  MAJORITY :
def majority (catlist):
    " returns most frequently occurring item in catlist."

    nc = len(catlist)
    if nc < 1:
        return  ''
    cats = sorted(set(catlist))  ##  standard order, implicit tiebreaker
    if len(cats) == 1:
        return  catlist[0]  ##  any1 will do
    cmax = 0 ; popular = ''
    for c in cats:
        cc = catlist.count(c)
        if cc > cmax:
            cmax = cc ; popular = c

    return  popular




##  testing :

if 0 > 0:
    zonk = "these cats jumped over those lazy dogs on the mats over there over & over again !"
    zz = zonk.split()
    popz = majority(zz)
    print(popz)
    xvec = [2,10,1948,30,9,1975,20,11,1984,7,5,1981,10,1,1954,0,99,1,-99,7,7,00,7,999,77]
    print(majority(xvec)) ; print(xvec)
    ordered = orderly(xvec)
    print(xvec)
    print(ordered)
    xx = [xvec[j] for j in ordered]
    print(xx)
    
if 0 < 0:
    dire = os.getcwd()
    print(dire)
    print(sibpath(dire))
    me = sys.argv[0]
    print(me)
    print(sibpath(me))

if 0 > 0:
    parasack = Sack(0)
    tinf = Tokstuff('')
    parasack.tokinfo = tinf  ##  tokenization info passed thru
    parasack.punxtab = punktran()  ##  uses default equivalents
    ##  filespec = "c:\\tb12\\franglo\\fr\\CcycBabeR_Telephone_FR.txt"
    filespec = "c:\\sufz\\domain\\wiki\\en-ru\\en\\en-8.txt"
    fi = open(filespec,'r',encoding="utf8")
    datlines = fi.readlines()
    lc = 0
    for datline in datlines:
        lc += 1
        print(lc,datline)
        toks = tokline(datline+ ' ',parasack)
        print(toks)
        if lc > 9:  break