Name:PlayerO1

Clan:Unknown

Member Since:Apr 2024

Last Seen:Sep 2024

Profiles:

Following:0

Followers:0

Allies:0

View Profile Badges

Published

playerO1vs.HRUMHRUMHRUM

Failed Tests

Repair text by history

repairs your list cur (current) with the knowledge from the list pre (previous).

print(repair(['line 1', '???? 2', 'line 3'], ['line 1', 'line 2']))
# ['line 1', 'line 2', 'line 3']

Previous kumite solition and details formalization was good.
For increase hard of solution I add additional hard test case of:

the number of broken fragment can be more that 1.
the length of broken fragment can be same, in this case should tetect nearest by position
sametime has bad input data - impossible length of repaired text - do not repair it.

Test Cases

Diff

import codewars_test as test
import random, string
# TODO Write tests
from solution import repair

def tru_repair(cur: list, pre: list):
    broken_index = [i.count("?") > 0 for i in cur].index(True)
    known_part = list(filter(lambda x: x[1] != '', enumerate(map(lambda x: x if x != "?" else "", [*cur[broken_index]]))))
    matching_pre_index = [pre.index(i) for i in pre if [(o, i) for o, i in enumerate([*i]) if (o, i) in known_part] == known_part and len(i) == len(cur[broken_index])][0]
    cur[broken_index] = pre[matching_pre_index]

    return cur

# test.assert_equals(actual, expected, [optional] message)
@test.describe("Example")
def test_group():
    @test.it("static tests")
    def _():
        test.assert_equals(repair(['line 1', '???? 2', 'line 3'], ['line 1', 'line 2']),['line 1', 'line 2', 'line 3'])
        test.assert_equals(repair(['hi', 'my name is', '????????HRUM'], ['hi my name is', 'HRUMHRUMHRUM']), ['hi', 'my name is', 'HRUMHRUMHRUM'])
        test.assert_equals(repair(['what', 'was', 'pl?????1', 'thinking', 'when', 'writing this code'], ['what was', 'playerO1', "thinking when writing this code"]), ['what', 'was', 'playerO1', 'thinking', 'when', 'writing this code'])
        test.assert_equals(repair(['boy oh boy do I LOVE f?x?n? code'], ['boy oh boy do I LOVE fixing code']), ['boy oh boy do I LOVE fixing code'])

    @test.it("static tests, level 2")
    def _():
        test.assert_equals(repair(['line 1','', '???? 2', '', 'line 3','.'], ['line 1', 'line 2','?']),['line 1','', 'line 2', '', 'line 3','.'])
        
        test.assert_equals(repair(['Sequence of','??????','????','???? ?????????','length'], ['The','Sequense of','broken','word','with different','length']), ['Sequence of','broken','word','with different','length'])
        test.assert_equals(repair(['Change number','??',' test ','????',' should do',' work too'], ['Another line, will be deleted', 'Change number','of','word','should do work too']), ['Change number','of',' test ','word',' should do',' work too'])
        test.assert_equals(repair(['Swap', '??', '???', 'big lines', 'near'], ['Swap', 'two', 'of', 'big lines', 'near']), ['Swap', 'of', 'two', 'big lines', 'near'])
        test.assert_equals(repair(['Same count','???',' ','???','????','????','.'], ['Same count','but',' ','not','same','word','.']), ['Same count','but',' ','not','same','word','.'])
        test.assert_equals(repair(['to repair','???? ???','?'], ['It is impossible','to repair',' too many error and many different']), ['to repair','???? ???','?'])

    
    @test.it("random tests")
    def _():
        def break_str(st: str):
            return "".join(map(lambda x: x if random.getrandbits(1) else "?", [*st]))
        for i in range(1000):
            sts = ["".join(random.choices([*string.ascii_letters], k=random.randint(5, 30))) for _ in range(random.randint(10, 30))]
            broken_sts = list(map(lambda x: break_str(x), sts))
            if random.getrandbits(1): broken_sts.append("new word")
            if random.getrandbits(1): sts.append("old word")
            test.assert_equals(repair(broken_sts, sts), tru_repair(broken_sts, sts))

- import codewars_test as test
- import random, string
- # TODO Write tests
- from solution import repair
- def tru_repair(cur: list, pre: list):
- broken_index = [i.count("?") > 0 for i in cur].index(True)
- known_part = list(filter(lambda x: x[1] != '', enumerate(map(lambda x: x if x != "?" else "", [*cur[broken_index]]))))
- matching_pre_index = [pre.index(i) for i in pre if [(o, i) for o, i in enumerate([*i]) if (o, i) in known_part] == known_part and len(i) == len(cur[broken_index])][0]
- cur[broken_index] = pre[matching_pre_index]
- return cur
- # test.assert_equals(actual, expected, [optional] message)
- @test.describe("Example")
- def test_group():
- @test.it("static tests")
- def _():
- test.assert_equals(repair(['line 1', '???? 2', 'line 3'], ['line 1', 'line 2']),['line 1', 'line 2', 'line 3'])
- test.assert_equals(repair(['hi', 'my name is', '????????HRUM'], ['hi my name is', 'HRUMHRUMHRUM']), ['hi', 'my name is', 'HRUMHRUMHRUM'])
- test.assert_equals(repair(['what', 'was', 'pl?????1', 'thinking', 'when', 'writing this code'], ['what was', 'playerO1', "thinking when writing this code"]), ['what', 'was', 'playerO1', 'thinking', 'when', 'writing this code'])
- test.assert_equals(repair(['boy oh boy do I LOVE f?x?n? code'], ['boy oh boy do I LOVE fixing code']), ['boy oh boy do I LOVE fixing code'])
- @test.it("static tests, level 2")
- def _():
- test.assert_equals(repair(['line 1','', '???? 2', '', 'line 3','.'], ['line 1', 'line 2','?']),['line 1','', 'line 2', '', 'line 3','.'])
- test.assert_equals(repair(['Sequence of','??????','????','???? ?????????','length'], ['The','Sequense of','broken','word','with different','length']), ['Sequence of','broken','word','with different','length'])
- test.assert_equals(repair(['Change number','??',' test ','????',' should do',' work too'], ['Another line, will be deleted', 'Change number','of','word','should do work too']), ['Change number','of',' test ','word',' should do',' work too'])
- test.assert_equals(repair(['Swap', '??', '???', 'big lines', 'near'], ['Swap', 'two', 'of', 'big lines', 'near']), ['Swap', 'of', 'two', 'big lines', 'near'])
- test.assert_equals(repair(['Same count','???',' ','???','????','????','.'], ['Same count','but',' ','not','same','word','.']), ['Same count','but',' ','not','same','word','.'])
- test.assert_equals(repair(['to repair','???? ???','?'], ['It is impossible','to repair',' too many error and many different']), ['to repair','???? ???','?'])
- @test.it("random tests")
- def _():
- def break_str(st: str):
- return "".join(map(lambda x: x if random.getrandbits(1) else "?", [*st]))
- for i in range(1000):
- ~~sts = ["".join(random.choices([*string.ascii_letters], k=random.randint(10, 30))) for _ in range(random.randint(10, 30))]~~
- sts = ["".join(random.choices([*string.ascii_letters], k=random.randint(5, 30))) for _ in range(random.randint(10, 30))]
- broken_sts = list(map(lambda x: break_str(x), sts))
- if random.getrandbits(1): broken_sts.append("new word")
- if random.getrandbits(1): sts.append("old word")
- test.assert_equals(repair(broken_sts, sts), tru_repair(broken_sts, sts))

playerO1vs.playerO1

Failed Tests

Repair text by history

Same bugfix. Now, the code has more work.

Code

Diff

# This solution not vork correctly. Try improvement this code.

CONST_UNDEFINED = -999999 # or None
CONST_MAX_VALUE = 999999
class RepairEngine:
    searchWindow1 = 70 # when diff < DIFF_TRESHOLD
    searchWindow2 = 14 # when diff < 3
    DIFF_TRESHOLD_1=68
    DIFF_TRESHOLD_2=3
    def __str__(self):
        return f"RepairEngine(searchWindow1={str(self.searchWindow1)}, searchWindow2={str(self.searchWindow2)}, DIFF_TRESHOLD_1={str(self.DIFF_TRESHOLD_1)})"
    
    def indexing(self, historySeq):
        if len(historySeq)<2 : raise BaseException("History sequence should have 2 or more files. historySeq="+str(historySeq))
        for text in historySeq: #markBrokenLines
            for l in text:
                l['hasBroken'] = self.checkBrokenLine(l['line'])
        for i in range(1,len(historySeq)):
            self.linkFiles(historySeq[i-1], historySeq[i])
    #end of indexing
    
    def linkFiles(self, lines1, lines2):
        p2=0
        seqenceLost=0
        for i in range(0, len(lines1)):
            #print(f"Progress {str(i+1)}/{len(lines1)} history items")
            myLine=lines1[i]
            p,thisK =self.searchLine(myLine['line'], lines2, p2, i);
            #print('p=',p,' thisK=',thisK,'  i=',i,'  text:', myLine['line'])
            if ((p!=CONST_UNDEFINED) and (p>=0)):
                if (p2!=p):
                    #print(" new offset {str(i)} > {str(p)}")
                    p2=p
                myLine['nextPos']=p
                applyPatch = True
                if (lines2[p]['prePos']!=CONST_UNDEFINED) and (lines2[p]['prePos']!=i):
                    if lines2[p]['preK'] > thisK:
                        None
                    elif (lines2[p]['preK'] < thisK):
                        applyPatch = False
                    else:
                        None
                if applyPatch:
                    lines2[p]['prePos'] = i
                    lines2[p]['preK'] = thisK
                    if i != myLine['pos']:
                        print(f"WARN: i={str(i)} and pos={str(myLine['pos'])} not match! At line \"{str(myLine['line'])}\"");
                if seqenceLost>self.searchWindow1*3:
                    print(f"i={str(i)}, found p={str(p)} after lost search window");
                seqenceLost=0
            else:
                #print("i={str(i)} pos={str(p)} not found position")
                seqenceLost +=1
                if seqenceLost==self.searchWindow1*3:
                    print(f"i={str(i)}, p={str(p)} lost search window, next position search may be not correct");
            p2 +=1
    # end of linkFiles
    
    # TODO hard method. Need more unit test!    
    def searchLine(self, text, inLines, nearPos1:int, nearPos2:int):
        result=CONST_UNDEFINED
        resultD=CONST_MAX_VALUE
        for fromPos in [nearPos1,nearPos2] :
            if (fromPos==CONST_UNDEFINED): continue
            if (result!=CONST_UNDEFINED and nearPos1==nearPos2): break
            for d in range(0,self.searchWindow1):
                # todo mark previous interval and do not check against lines.
                p=fromPos+d
                if (0<= p) and (p <len(inLines)):
                    k = self.stringDiff(text, inLines[p]['line']);
                    if (k<=self.DIFF_TRESHOLD_1*3):
                        k+=min(20,d/40)
                        k+=min(16,min(abs(p-nearPos1),abs(p-nearPos2))/50)
                    #print('k=',k,' resultD=',resultD,'; t1="',text,'" t2="',inLines[p]['line'],'" ')
                    if (resultD>k):
                        result, resultD = p, k
                if (d!=0):
                    p=fromPos-d
                    if (0<= p) and (p <len(inLines)):
                        k = self.stringDiff(text, inLines[p]['line'])
                        if (k<=self.DIFF_TRESHOLD_1*3):
                            k+=min(20,d/40)
                            k+=min(16,min(abs(p-nearPos1),abs(p-nearPos2))/50)
                        if resultD>k :
                            result, resultD = p, k
                if (resultD==0 or resultD<self.DIFF_TRESHOLD_2 and d>3 and d < self.searchWindow2):
                    break # stop search - it is best.
        if (resultD>self.DIFF_TRESHOLD_1):
            return -1, None # not found
        return result, resultD
    # end of searchLine
    
    not_allowed="?*"
    def checkBrokenLine(self, line):
        wrongChar=0
        questionChar=0
        for c in line:
            if c in self.not_allowed: wrongChar+=1
            #if not c in self.allowed: wrongChar+=1
            if c=='?':
                questionChar+=1
                #if c in self.not_allowed: wrongChar-=1
        return wrongChar>0 or (len(line)>5 and questionChar>5)
    # end of checkBrokenLine
    
    # Soft comparsion of 2 string
    # return  0 - equals, 2 equals ignore case and whitespace, 15 high diff but maybe same, Integer.MAX_VALUE absolutly different
    def stringDiff(self, l1, l2):
        if (l1==l2): 
            #print('compare ',l1,l2," EQ=0")
            return 0
        if (l1.lower()==l2.lower()):
            #print('compare ',l1,l2," EQ=1")
            return 1
        l1=l1.replace("  ", " ").strip()
        l2=l2.replace("  ", " ").strip()
        #todo optimization: l1,l2 = l1.upper(),l2.upper()
        l1,l2=l1.upper(),l2.upper()
        if (len(l1)==len(l2)):
            eq=True
            for i in range(len(l1)):
                eq=eq and (l1[i]==l2[i]) or (l1[i] in self.not_allowed) or (l1[i] in self.not_allowed)
            if eq:
                #print('compare ',l1,l2," EQ=1,2")
                return 1
        
        #Damerau–Levenshtein distance / Расстояние Дамерау — Левенштейна  (https://ru.wikipedia.org/w/index.php?#title=%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%94%D0%B0%D0%BC%D0%B5%D1%80%D0%B0%D1%83_%E2%80%94_%D0%9B%D0%B5%D0%B2%D0%B5%D0%BD%D1%88%D1%82%D0%B5%D0%B9%D0%BD%D0%B0&oldid=100038662  Creative commons CC-BY-SA )
        def damerau_levenshtein_distance(s1, s2):
            d = {}
            lenstr1 = len(s1)
            lenstr2 = len(s2)
            for i in range(-1,lenstr1+1):
                d[(i,-1)] = i+1
            for j in range(-1,lenstr2+1):
                d[(-1,j)] = j+1
            for i in range(lenstr1):
                for j in range(lenstr2):
                    if s1[i] == s2[j]:
                        cost = 0
                    elif (s1[i] in self.not_allowed) or (s2[j] in self.not_allowed): # modification weight
                        cost = 0
                    else:
                        cost = 3
                    d[(i,j)] = min(
                                   d[(i-1,j)] + 1, # deletion
                                   d[(i,j-1)] + 1, # insertion
                                   d[(i-1,j-1)] + cost, # substitution
                                  )
                    if i and j and s1[i] == s2[j-1] and s1[i-1] == s2[j]:
                            d[(i,j)] = min(d[(i,j)], d[i-2,j-2] + 1) # transposition
            return d[lenstr1-1,lenstr2-1]
        
        xDiff=abs(len(l1)-len(l2)) + damerau_levenshtein_distance(l1,l2)
        
        if (len(l1)<3 or len(l2)<3 or abs(len(l1)-len(l2))>30):
            #print('compare ',l1,l2," EQ=",CONST_MAX_VALUE)
            return CONST_MAX_VALUE
        xDiff = xDiff *300 / len(l1)
        if (xDiff>CONST_MAX_VALUE-1000):
            #print('compare ',l1,l2," EQ=",CONST_MAX_VALUE-100)
            return CONST_MAX_VALUE-100
        #print('compare ',l1,l2," EQ=",xDiff)
        return round(xDiff)
    # end of stringDiff
    
    def repair(self, historySeq, itm:int=-1):
        if (itm==0): raise BaseException("Can not repair first item[0] from future!")
        if (itm<0): itm=len(historySeq)-1
        #log.debug("Start repair history[{}] \"{}\"", itm, historySeq.get(itm).name);
        lines=historySeq[itm]
        out=[]
        statRepair, statHistoryUsedDepth, statHistoryWatchDepth, statNotFound = 0,0,0,0
        print(" repair lines:"+str(len(lines)))
        for line in lines:
            print(" repair line>"+repr(line)+"  pos="+str(line['pos'])+" test="+str(line))
            if (line['hasBroken']):
                oldP=line['prePos']
                depth=0
                i=itm-1
                while (oldP!=CONST_UNDEFINED and i>=0):
                    depth+=1
                    statHistoryWatchDepth=max(statHistoryWatchDepth, depth)
                    oldL=historySeq[i][oldP]
                    if (not oldL['hasBroken']):
                        print(" repair line<use old source")
                        out.append(oldL['line'])
                        #log.trace(" [{}][{}] line {}({})> repair from \"{}\" to \"{}\"",
                        #        itm, i, line.pos, oldP, line.line, oldL.line);
                        statRepair+=1
                        statHistoryUsedDepth=max(statHistoryUsedDepth, depth)
                        break
                    #else:
                    #    log.trace(" [{}][{}] line {}({})> go deep", itm, i, line.pos, oldP);
                    oldP=oldL['prePos']
                    i-=1
                if (oldP==CONST_UNDEFINED) :
                    print(" repair line<not found. ", line['line'])
                    out.append(line['line']) #no modify
                    statNotFound+=1
            else:
                print(" repair line<no need")
                out.append(line['line'])
        print(f"Repair complete: repair {statRepair} lines, max history depth = {statHistoryUsedDepth}, max history watch depth = {statHistoryWatchDepth}, not found {statNotFound} lines.")
        print(" repair out lines:"+str(len(out)))
        return out
    # end of repair
    
    def process(self, historySeq, i_to_repair:int=-1):
        hSeq=[]
        for histText in historySeq:
            lines=[]
            i=0
            for l in histText:
                tl={'pos':i, 'line':l, 'hasBroken':False,
    'prePos':CONST_UNDEFINED, 'preK':-1.0, 'nextPos':CONST_UNDEFINED}
                lines.append(tl)
                i+=1
            hSeq.append(lines)
        print("indexing...")
        idx=self.indexing(hSeq)
        print("pepair n="+str(i_to_repair))
        repair=self.repair(hSeq, i_to_repair)
        print("done")
        return repair

def repair_text_by_history(history_of_text):
    repairer=RepairEngine()
    text=repairer.process(history_of_text, len(history_of_text)-1)
    return text

- # This solution not vork correctly. Try improvement this code.
- CONST_UNDEFINED = -999999 # or None
- CONST_MAX_VALUE = 999999
- class RepairEngine:
- searchWindow1 = 70 # when diff < DIFF_TRESHOLD
- searchWindow2 = 14 # when diff < 3
- DIFF_TRESHOLD_1=68
- DIFF_TRESHOLD_2=3
- def __str__(self):
- return f"RepairEngine(searchWindow1={str(self.searchWindow1)}, searchWindow2={str(self.searchWindow2)}, DIFF_TRESHOLD_1={str(self.DIFF_TRESHOLD_1)})"
- def indexing(self, historySeq):
- if len(historySeq)<2 : raise BaseException("History sequence should have 2 or more files. historySeq="+str(historySeq))
- for text in historySeq: #markBrokenLines
- for l in text:
- l['hasBroken'] = self.checkBrokenLine(l['line'])
- for i in range(1,len(historySeq)):
- self.linkFiles(historySeq[i-1], historySeq[i])
- #end of indexing
- def linkFiles(self, lines1, lines2):
- p2=0
- seqenceLost=0
- for i in range(0, len(lines1)):
- #print(f"Progress {str(i+1)}/{len(lines1)} history items")
- myLine=lines1[i]
- p,thisK =self.searchLine(myLine['line'], lines2, p2, i);
- #print('p=',p,' thisK=',thisK,' i=',i,' text:', myLine['line'])
- if ((p!=CONST_UNDEFINED) and (p>=0)):
- if (p2!=p):
- #print(" new offset {str(i)} > {str(p)}")
- p2=p
- myLine['nextPos']=p
- applyPatch = True
- if (lines2[p]['prePos']!=CONST_UNDEFINED) and (lines2[p]['prePos']!=i):
- if lines2[p]['preK'] > thisK:
- None
- elif (lines2[p]['preK'] < thisK):
- applyPatch = False
- else:
- None
- if applyPatch:
- lines2[p]['prePos'] = i
- lines2[p]['preK'] = thisK
- if i != myLine['pos']:
- print(f"WARN: i={str(i)} and pos={str(myLine['pos'])} not match! At line \"{str(myLine['line'])}\"");
- if seqenceLost>self.searchWindow1*3:
- print(f"i={str(i)}, found p={str(p)} after lost search window");
- seqenceLost=0
- else:
- #print("i={str(i)} pos={str(p)} not found position")
- seqenceLost +=1
- if seqenceLost==self.searchWindow1*3:
- print(f"i={str(i)}, p={str(p)} lost search window, next position search may be not correct");
- p2 +=1
- # end of linkFiles
- # TODO hard method. Need more unit test!
- def searchLine(self, text, inLines, nearPos1:int, nearPos2:int):
- result=CONST_UNDEFINED
- resultD=CONST_MAX_VALUE
- for fromPos in [nearPos1,nearPos2] :
- if (fromPos==CONST_UNDEFINED): continue
- if (result!=CONST_UNDEFINED and nearPos1==nearPos2): break
- for d in range(0,self.searchWindow1):
- # todo mark previous interval and do not check against lines.
- p=fromPos+d
- if (0<= p) and (p <len(inLines)):
- k = self.stringDiff(text, inLines[p]['line']);
- if (k<=self.DIFF_TRESHOLD_1*3):
- k+=min(20,d/40)
- k+=min(16,min(abs(p-nearPos1),abs(p-nearPos2))/50)
- #print('k=',k,' resultD=',resultD,'; t1="',text,'" t2="',inLines[p]['line'],'" ')
- if (resultD>k):
- result, resultD = p, k
- if (d!=0):
- p=fromPos-d
- if (0<= p) and (p <len(inLines)):
- k = self.stringDiff(text, inLines[p]['line'])
- if (k<=self.DIFF_TRESHOLD_1*3):
- k+=min(20,d/40)
- k+=min(16,min(abs(p-nearPos1),abs(p-nearPos2))/50)
- if resultD>k :
- result, resultD = p, k
- if (resultD==0 or resultD<self.DIFF_TRESHOLD_2 and d>3 and d < self.searchWindow2):
- break # stop search - it is best.
- if (resultD>self.DIFF_TRESHOLD_1):
- return -1, None # not found
- return result, resultD
- # end of searchLine
- not_allowed="?*"
- def checkBrokenLine(self, line):
- wrongChar=0
- questionChar=0
- for c in line:
- if c in self.not_allowed: wrongChar+=1
- #if not c in self.allowed: wrongChar+=1
- if c=='?':
- questionChar+=1
- #if c in self.not_allowed: wrongChar-=1
- return wrongChar>0 or (len(line)>5 and questionChar>5)
- # end of checkBrokenLine
- # Soft comparsion of 2 string
- # return 0 - equals, 2 equals ignore case and whitespace, 15 high diff but maybe same, Integer.MAX_VALUE absolutly different
- def stringDiff(self, l1, l2):
- ~~if (l1==l2): return 0~~
- ~~if (l1.lower()==l2.lower()): return 1~~
- if (l1==l2):
- #print('compare ',l1,l2," EQ=0")
- return 0
- if (l1.lower()==l2.lower()):
- #print('compare ',l1,l2," EQ=1")
- return 1
- l1=l1.replace(" ", " ").strip()
- l2=l2.replace(" ", " ").strip()
- #todo optimization: l1,l2 = l1.upper(),l2.upper()
- l1,l2=l1.upper(),l2.upper()
- if (len(l1)==len(l2)):
- eq=True
- for i in range(len(l1)):
- eq=eq and (l1[i]==l2[i]) or (l1[i] in self.not_allowed) or (l1[i] in self.not_allowed)
- if eq:
- #print('compare ',l1,l2," EQ=1,2")
- return 1
- #Расстояние Дамерау — Левенштейна (https://ru.wikipedia.org/w/index.php?#title=%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%94%D0%B0%D0%BC%D0%B5%D1%80%D0%B0%D1%83_%E2%80%94_%D0%9B%D0%B5%D0%B2%D0%B5%D0%BD%D1%88%D1%82%D0%B5%D0%B9%D0%BD%D0%B0&oldid=100038662 Creative commons CC-BY-SA )
- #Damerau–Levenshtein distance / Расстояние Дамерау — Левенштейна (https://ru.wikipedia.org/w/index.php?#title=%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%94%D0%B0%D0%BC%D0%B5%D1%80%D0%B0%D1%83_%E2%80%94_%D0%9B%D0%B5%D0%B2%D0%B5%D0%BD%D1%88%D1%82%D0%B5%D0%B9%D0%BD%D0%B0&oldid=100038662 Creative commons CC-BY-SA )
- def damerau_levenshtein_distance(s1, s2):
- d = {}
- lenstr1 = len(s1)
- lenstr2 = len(s2)
- for i in range(-1,lenstr1+1):
- d[(i,-1)] = i+1
- for j in range(-1,lenstr2+1):
- d[(-1,j)] = j+1
- for i in range(lenstr1):
- for j in range(lenstr2):
- if s1[i] == s2[j]:
- cost = 0
- ~~if (s1[i] in self.not_allowed) or (s2[j] in self.not_allowed): # modification weight~~
- ~~cost = 1~~
- elif (s1[i] in self.not_allowed) or (s2[j] in self.not_allowed): # modification weight
- cost = 0
- else:
- cost = 3
- d[(i,j)] = min(
- d[(i-1,j)] + 1, # deletion
- d[(i,j-1)] + 1, # insertion
- d[(i-1,j-1)] + cost, # substitution
- )
- if i and j and s1[i] == s2[j-1] and s1[i-1] == s2[j]:
- d[(i,j)] = min(d[(i,j)], d[i-2,j-2] + 1) # transposition
- return d[lenstr1-1,lenstr2-1]
- xDiff=abs(len(l1)-len(l2)) + damerau_levenshtein_distance(l1,l2)
- if (len(l1)<3 or len(l2)<3 or abs(len(l1)-len(l2))>30):
- ~~return CONST_MAX_VALUE~~
- #print('compare ',l1,l2," EQ=",CONST_MAX_VALUE)
- return CONST_MAX_VALUE
- xDiff = xDiff *300 / len(l1)
- if (xDiff>CONST_MAX_VALUE-1000):
- #print('compare ',l1,l2," EQ=",CONST_MAX_VALUE-100)
- return CONST_MAX_VALUE-100
- #print('compare ',l1,l2," EQ=",xDiff)
- return round(xDiff)
- # end of stringDiff
- def repair(self, historySeq, itm:int=-1):
- if (itm==0): raise BaseException("Can not repair first item[0] from future!")
- if (itm<0): itm=len(historySeq)-1
- #log.debug("Start repair history[{}] \"{}\"", itm, historySeq.get(itm).name);
- lines=historySeq[itm]
- out=[]
- statRepair, statHistoryUsedDepth, statHistoryWatchDepth, statNotFound = 0,0,0,0
- print(" repair lines:"+str(len(lines)))
- for line in lines:
- print(" repair line>"+repr(line)+" pos="+str(line['pos'])+" test="+str(line))
- if (line['hasBroken']):
- oldP=line['prePos']
- depth=0
- i=itm-1
- while (oldP!=CONST_UNDEFINED and i>=0):
- depth+=1
- statHistoryWatchDepth=max(statHistoryWatchDepth, depth)
- oldL=historySeq[i][oldP]
- if (not oldL['hasBroken']):
- print(" repair line<use old source")
- out.append(oldL['line'])
- #log.trace(" [{}][{}] line {}({})> repair from \"{}\" to \"{}\"",
- # itm, i, line.pos, oldP, line.line, oldL.line);
- statRepair+=1
- statHistoryUsedDepth=max(statHistoryUsedDepth, depth)
- break
- #else:
- # log.trace(" [{}][{}] line {}({})> go deep", itm, i, line.pos, oldP);
- oldP=oldL['prePos']
- i-=1
- if (oldP==CONST_UNDEFINED) :
- print(" repair line<not found. ", line['line'])
- out.append(line['line']) #no modify
- statNotFound+=1
- else:
- print(" repair line<no need")
- out.append(line['line'])
- print(f"Repair complete: repair {statRepair} lines, max history depth = {statHistoryUsedDepth}, max history watch depth = {statHistoryWatchDepth}, not found {statNotFound} lines.")
- print(" repair out lines:"+str(len(out)))
- return out
- # end of repair
- def process(self, historySeq, i_to_repair:int=-1):
- hSeq=[]
- for histText in historySeq:
- lines=[]
- i=0
- for l in histText:
- ~~tl={'pos':-1, 'line':l, 'hasBroken':False,~~
- tl={'pos':i, 'line':l, 'hasBroken':False,
- 'prePos':CONST_UNDEFINED, 'preK':-1.0, 'nextPos':CONST_UNDEFINED}
- lines.append(tl)
- i+=1
- hSeq.append(lines)
- print("indexing...")
- idx=self.indexing(hSeq)
- print("pepair n="+str(i_to_repair))
- repair=self.repair(hSeq, i_to_repair)
- print("done")
- return repair
- def repair_text_by_history(history_of_text):
- repairer=RepairEngine()
- text=repairer.process(history_of_text, len(history_of_text)-1)
- return text

Test Cases

Diff

import codewars_test as test
from solution import repair_text_by_history
import numpy as np

def text_generator(sample_sentence, depth=4, sentences_up_to=3,rand_seed=100):
    rand=np.random.RandomState(rand_seed)
    hist=[]
    hist_broken=[]
    prev=[]
    prev_broken=[]
    def f_make_line():
        l=''
        for i in range(rand.randint(1,sentences_up_to)):
            if len(l)>0:
                l+=' '
            l+=rand.choice(sample_sentence)+rand.choice([c for c in '......???!!;'])
        #print('new:', l)
        return l;
    def f_break_line(l):
        o=''
        bk=rand.randint(8,20)
        for c in l:
            if rand.randint(16) <= bk :
                o+='?'
            else:
                o+=c
        #print('break:', l,' > ',o)
        return o
    new_break=False # first history was good
    for j in range(depth):
        text=[]
        text_broken=[]
        for i in range(len(prev)):
            if rand.randint(15) >= 13 : # corrosion line
                text.append(prev[i])
                text_broken.append(f_break_line(prev_broken[i]))
            elif rand.randint(20) >= 18 :
                continue # delete
            else:
                text.append(prev[i])
                text_broken.append(prev_broken[i])
            if rand.randint(25) >= 20 :
                for i in range(1,rand.randint(10)):
                    l=f_make_line()
                    text.append(l)
                    if new_break and (rand.randint(15) >= 13) :
                        text_broken.append(f_break_line(l))
                    else:
                        text_broken.append(l)
        # append text
        if rand.randint(10) >= 7 :
            for i in range(1,rand.randint(20)):
                l=f_make_line()
                text.append(l)
                if new_break and (rand.randint(15) >= 13) :
                    text_broken.append(f_break_line(l))
                else:
                    text_broken.append(l)
        
        prev=text
        hist.append(text)
        prev_broken=text_broken
        hist_broken.append(text_broken)
        new_broke=True
    return hist_broken, hist

# test.assert_equals(actual, expected, [optional] message)
@test.describe("Repair text test")
def test_group():
    @test.it("Fixed test case")
    def test_case():
        data1=[
[
    'Hello, world!',
    'THIS is first version.'
],[
    'Hello, world.'
    'This is first ???????.'
],[
    'Hello world',
    'Line 2.',
    'This is first ?????on.',
    'There can be large document'
],[
    'Hello.',
    '???? ??',
    '???????????? ?????on.',
    'There can be large document'
],[
    'Hello.',
    '???? ??',
    '???????????? ?????on.',
    'There can be large document',
    '',
    'Simple alhorithm for repair:',
    '',
    'Step 1.',
    '* mark broken symbols and lines',
    '',
    'Step 2.',
    '* find line number in history',
    '',
    'Step 3.',
    '* search good lines in history',
    '',
    'Step 4.',
    '* replace broken line, if it is can be true',
],[
    'Hello.',
    '???? ??',
    '???????????? ?????on.',
    'There can be large document',
    '',
    'Simple alhorithm for repair:',
    '',
    '???? ??',
    '* mark broken symbols and lines',
    '',
    'Step 2.',
    '? ???? line ?????? ?? ???????',
    '',
    '???? 3.',
    '* search good lines in history',
    '',
    'Step 4.',
    '* replace broken line, if it is can be true',
],[
    'Hello.',
    '???? ??',
    '???????????? ?????on.',
    'There can be large document',
    '',
    'Simple alhorithm for repair:',
    '',
    '???? ??',
    '* mark broken symbols and lines',
    '',
    'Step 2.',
    '? ???? line ?????? ?? ???????',
    '',
    '???? 3.',
    '* search good lines in history',
    '',
    'Step 4.',
    '* replace broken line, if it is can be true',
    '',
    'You can make more better alhorighm, or try improvement it.',
    'But do not repair string',
    'In cases, when it can be ambigous.',
    ''
],[
    'Hello.',
    '???? ??',
    '???????????? ?????on.',
    'There can be large document',
    '',
    'Simple alhorithm for repair:',
    '',
    '???? ??',
    '* mark broken symbols and lines',
    '',
    'Step 2.',
    '? ???? line ?????? ?? ???????',
    '',
    '???? 3.',
    '* search good lines in history',
    '',
    '???? ?.',
    '',
    'Step 4.',
    '* replace broken line, if it is can be true',
    '',
    'You can make more better alhorighm, or try improvement it.',
    '?? ?????? ???? ?? ??? ?? ?????????',
    '??? ?? ??? ?????? ??????',
    ''
]
    ]
        expected_text=[
    'Hello.',
    'Line 2.',
    'THIS is first version.' # TODO may be "This is first version."
    'There can be large document',
    '',
    'Simple alhorithm for repair:',
    '',
    'Step 1.',
    '* mark broken symbols and lines',
    '',
    'Step 2.',
    '* find line number in history',
    '',
    'Step 3.',
    '* search good lines in history',
    '',
    'Step 4.',
    '',
    'Step 4.',
    '* replace broken line, if it is can be true',
    '',
    'You can make more better alhorighm, or try improvement it.',
    '?? ?????? ???? ?? ??? ?? ?????????',
    '??? ?? ??? ?????? ??????',
    '']
        r_text=repair_text_by_history(data1)
        test.assert_equals(r_text, expected_text)
    
    @test.it("Random test case")
    def test_case_random():
        words=['','','Hello world','Line 2','This is first version','You can make more better alhorighm, or try improvement it',
              'You got damaged paper with text','And got previous edition of this text','Each next edition has any good changes and break character change',
               'In broken lines will be same character replaced to ''?''', 'Make solution for repair broken text by history']
        broken_t,original_t=text_generator(words, depth=3) # depth>4 need many time
        expected_text = original_t[-1]
        print("Random history with ",str(len(broken_t))," items, last with ",str(len(expected_text))," lines")
        r_text=repair_text_by_history(broken_t)
        test.assert_equals(r_text, expected_text)

- import codewars_test as test
- from solution import repair_text_by_history
- import numpy as np
- def text_generator(sample_sentence, depth=4, sentences_up_to=3,rand_seed=100):
- rand=np.random.RandomState(rand_seed)
- hist=[]
- hist_broken=[]
- prev=[]
- prev_broken=[]
- def f_make_line():
- l=''
- for i in range(rand.randint(1,sentences_up_to)):
- if len(l)>0:
- l+=' '
- l+=rand.choice(sample_sentence)+rand.choice([c for c in '......???!!;'])
- #print('new:', l)
- return l;
- def f_break_line(l):
- o=''
- bk=rand.randint(8,20)
- for c in l:
- if rand.randint(16) <= bk :
- o+='?'
- else:
- o+=c
- #print('break:', l,' > ',o)
- return o
- new_break=False # first history was good
- for j in range(depth):
- text=[]
- text_broken=[]
- for i in range(len(prev)):
- if rand.randint(15) >= 13 : # corrosion line
- text.append(prev[i])
- text_broken.append(f_break_line(prev_broken[i]))
- elif rand.randint(20) >= 18 :
- continue # delete
- else:
- text.append(prev[i])
- text_broken.append(prev_broken[i])
- if rand.randint(25) >= 20 :
- for i in range(1,rand.randint(10)):
- l=f_make_line()
- text.append(l)
- if new_break and (rand.randint(15) >= 13) :
- text_broken.append(f_break_line(l))
- else:
- text_broken.append(l)
- # append text
- if rand.randint(10) >= 7 :
- for i in range(1,rand.randint(20)):
- l=f_make_line()
- text.append(l)
- if new_break and (rand.randint(15) >= 13) :
- text_broken.append(f_break_line(l))
- else:
- text_broken.append(l)
- prev=text
- hist.append(text)
- prev_broken=text_broken
- hist_broken.append(text_broken)
- new_broke=True
- return hist_broken, hist
- # test.assert_equals(actual, expected, [optional] message)
- @test.describe("Repair text test")
- def test_group():
- @test.it("Fixed test case")
- def test_case():
- data1=[
- [
- 'Hello, world!',
- 'THIS is first version.'
- ],[
- 'Hello, world.'
- 'This is first ???????.'
- ],[
- 'Hello world',
- 'Line 2.',
- 'This is first ?????on.',
- 'There can be large document'
- ],[
- 'Hello.',
- '???? ??',
- '???????????? ?????on.',
- 'There can be large document'
- ],[
- 'Hello.',
- '???? ??',
- '???????????? ?????on.',
- 'There can be large document',
- '',
- 'Simple alhorithm for repair:',
- '',
- 'Step 1.',
- '* mark broken symbols and lines',
- '',
- 'Step 2.',
- '* find line number in history',
- '',
- 'Step 3.',
- '* search good lines in history',
- '',
- 'Step 4.',
- '* replace broken line, if it is can be true',
- ],[
- 'Hello.',
- '???? ??',
- '???????????? ?????on.',
- 'There can be large document',
- '',
- 'Simple alhorithm for repair:',
- '',
- '???? ??',
- '* mark broken symbols and lines',
- '',
- 'Step 2.',
- '? ???? line ?????? ?? ???????',
- '',
- '???? 3.',
- '* search good lines in history',
- '',
- 'Step 4.',
- '* replace broken line, if it is can be true',
- ],[
- 'Hello.',
- '???? ??',
- '???????????? ?????on.',
- 'There can be large document',
- '',
- 'Simple alhorithm for repair:',
- '',
- '???? ??',
- '* mark broken symbols and lines',
- '',
- 'Step 2.',
- '? ???? line ?????? ?? ???????',
- '',
- '???? 3.',
- '* search good lines in history',
- '',
- 'Step 4.',
- '* replace broken line, if it is can be true',
- '',
- 'You can make more better alhorighm, or try improvement it.',
- 'But do not repair string',
- 'In cases, when it can be ambigous.',
- ''
- ],[
- 'Hello.',
- '???? ??',
- '???????????? ?????on.',
- 'There can be large document',
- '',
- 'Simple alhorithm for repair:',
- '',
- '???? ??',
- '* mark broken symbols and lines',
- '',
- 'Step 2.',
- '? ???? line ?????? ?? ???????',
- '',
- '???? 3.',
- '* search good lines in history',
- '',
- '???? ?.',
- '',
- 'Step 4.',
- '* replace broken line, if it is can be true',
- '',
- 'You can make more better alhorighm, or try improvement it.',
- '?? ?????? ???? ?? ??? ?? ?????????',
- '??? ?? ??? ?????? ??????',
- ''
- ]
- ]
- expected_text=[
- 'Hello.',
- 'Line 2.',
- 'THIS is first version.' # TODO may be "This is first version."
- 'There can be large document',
- '',
- 'Simple alhorithm for repair:',
- '',
- 'Step 1.',
- '* mark broken symbols and lines',
- '',
- 'Step 2.',
- '* find line number in history',
- '',
- 'Step 3.',
- '* search good lines in history',
- '',
- 'Step 4.',
- '',
- 'Step 4.',
- '* replace broken line, if it is can be true',
- '',
- 'You can make more better alhorighm, or try improvement it.',
- '?? ?????? ???? ?? ??? ?? ?????????',
- '??? ?? ??? ?????? ??????',
- '']
- r_text=repair_text_by_history(data1)
- test.assert_equals(r_text, expected_text)
- @test.it("Random test case")
- def test_case_random():
- words=['','','Hello world','Line 2','This is first version','You can make more better alhorighm, or try improvement it',
- 'You got damaged paper with text','And got previous edition of this text','Each next edition has any good changes and break character change',
- 'In broken lines will be same character replaced to ''?''', 'Make solution for repair broken text by history']
- ~~broken_t,original_t=text_generator(words, depth=5)~~
- broken_t,original_t=text_generator(words, depth=3) # depth>4 need many time
- expected_text = original_t[-1]
- print("Random history with ",str(len(broken_t))," items, last with ",str(len(expected_text))," lines")
- r_text=repair_text_by_history(broken_t)
- test.assert_equals(r_text, expected_text)

playerO1

Failed Tests

The old history text

You got damaged paper with text. And got previous edition of this text. Each next edition has any good changes and break character change. In broken lines will be same character replaced to '?'
Make solution for repair broken text by history.

##Input:

[
['line 1','line 2'], # first
['line 1','???? 2','line 3'] # second
]

As you can see, this data like Git changelog.

##Output:
Repair last text from input.

['line 1','line 2','line 3']

# This solution not vork correctly. Try improvement this code.

CONST_UNDEFINED = -999999 # or None
CONST_MAX_VALUE = 999999
class RepairEngine:
    searchWindow1 = 70 # when diff < DIFF_TRESHOLD
    searchWindow2 = 14 # when diff < 3
    DIFF_TRESHOLD_1=68
    DIFF_TRESHOLD_2=3
    def __str__(self):
        return f"RepairEngine(searchWindow1={str(self.searchWindow1)}, searchWindow2={str(self.searchWindow2)}, DIFF_TRESHOLD_1={str(self.DIFF_TRESHOLD_1)})"
    
    def indexing(self, historySeq):
        if len(historySeq)<2 : raise BaseException("History sequence should have 2 or more files. historySeq="+str(historySeq))
        for text in historySeq: #markBrokenLines
            for l in text:
                l['hasBroken'] = self.checkBrokenLine(l['line'])
        for i in range(1,len(historySeq)):
            self.linkFiles(historySeq[i-1], historySeq[i])
    #end of indexing
    
    def linkFiles(self, lines1, lines2):
        p2=0
        seqenceLost=0
        for i in range(0, len(lines1)):
            #print(f"Progress {str(i+1)}/{len(lines1)} history items")
            myLine=lines1[i]
            p,thisK =self.searchLine(myLine['line'], lines2, p2, i);
            if ((p!=CONST_UNDEFINED) and (p>=0)):
                if (p2!=p):
                    #print(" new offset {str(i)} > {str(p)}")
                    p2=p
                myLine['nextPos']=p
                applyPatch = True
                if (lines2[p]['prePos']!=CONST_UNDEFINED) and (lines2[p]['prePos']!=i):
                    if lines2[p]['preK'] > thisK:
                        None
                    elif (lines2[p]['preK'] < thisK):
                        applyPatch = False
                    else:
                        None
                if applyPatch:
                    lines2[p]['prePos'] = i
                    lines2[p]['preK'] = thisK
                    if i != myLine['pos']:
                        print(f"WARN: i={str(i)} and pos={str(myLine['pos'])} not match! At line \"{str(myLine['line'])}\"");
                if seqenceLost>self.searchWindow1*3:
                    print(f"i={str(i)}, found p={str(p)} after lost search window");
                seqenceLost=0
            else:
                #print("i={str(i)} pos={str(p)} not found position")
                seqenceLost +=1
                if seqenceLost==self.searchWindow1*3:
                    print(f"i={str(i)}, p={str(p)} lost search window, next position search may be not correct");
            p2 +=1
    # end of linkFiles
    
    # TODO hard method. Need more unit test!    
    def searchLine(self, text, inLines, nearPos1:int, nearPos2:int):
        result=CONST_UNDEFINED
        resultD=CONST_MAX_VALUE
        for fromPos in [nearPos1,nearPos2] :
            if (fromPos==CONST_UNDEFINED): continue
            if (result!=CONST_UNDEFINED and nearPos1==nearPos2): break
            for d in range(0,self.searchWindow1):
                # todo mark previous interval and do not check against lines.
                p=fromPos+d
                if (0<= p) and (p <len(inLines)):
                    k = self.stringDiff(text, inLines[p]['line']);
                    if (k<=self.DIFF_TRESHOLD_1*3):
                        k+=min(20,d/40)
                        k+=min(16,min(abs(p-nearPos1),abs(p-nearPos2))/50)
                    if (resultD>k):
                        result, resultD = p, k
                if (d!=0):
                    p=fromPos-d
                    if (0<= p) and (p <len(inLines)):
                        k = self.stringDiff(text, inLines[p]['line'])
                        if (k<=self.DIFF_TRESHOLD_1*3):
                            k+=min(20,d/40)
                            k+=min(16,min(abs(p-nearPos1),abs(p-nearPos2))/50)
                        if resultD>k :
                            result, resultD = p, k
                if (resultD==0 or resultD<self.DIFF_TRESHOLD_2 and d>3 and d < self.searchWindow2):
                    break # stop search - it is best.
        if (resultD>self.DIFF_TRESHOLD_1):
            return -1, None # not found
        return result, resultD
    # end of searchLine
    
    not_allowed="?*"
    def checkBrokenLine(self, line):
        wrongChar=0
        questionChar=0
        for c in line:
            if c in self.not_allowed: wrongChar+=1
            #if not c in self.allowed: wrongChar+=1
            if c=='?':
                questionChar+=1
                #if c in self.not_allowed: wrongChar-=1
        return wrongChar>0 or (len(line)>5 and questionChar>5)
    # end of checkBrokenLine
    
    # Soft comparsion of 2 string
    # return  0 - equals, 2 equals ignore case and whitespace, 15 high diff but maybe same, Integer.MAX_VALUE absolutly different
    def stringDiff(self, l1, l2):
        if (l1==l2): return 0
        if (l1.lower()==l2.lower()): return 1
        l1=l1.replace("  ", " ").strip()
        l2=l2.replace("  ", " ").strip()
        #todo optimization: l1,l2 = l1.upper(),l2.upper()
        l1,l2=l1.upper(),l2.upper()
        
        #Расстояние Дамерау — Левенштейна  (https://ru.wikipedia.org/w/index.php?#title=%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%94%D0%B0%D0%BC%D0%B5%D1%80%D0%B0%D1%83_%E2%80%94_%D0%9B%D0%B5%D0%B2%D0%B5%D0%BD%D1%88%D1%82%D0%B5%D0%B9%D0%BD%D0%B0&oldid=100038662  Creative commons CC-BY-SA )
        def damerau_levenshtein_distance(s1, s2):
            d = {}
            lenstr1 = len(s1)
            lenstr2 = len(s2)
            for i in range(-1,lenstr1+1):
                d[(i,-1)] = i+1
            for j in range(-1,lenstr2+1):
                d[(-1,j)] = j+1
            for i in range(lenstr1):
                for j in range(lenstr2):
                    if s1[i] == s2[j]:
                        cost = 0
                    if (s1[i] in self.not_allowed) or (s2[j] in self.not_allowed): # modification weight
                        cost = 1
                    else:
                        cost = 3
                    d[(i,j)] = min(
                                   d[(i-1,j)] + 1, # deletion
                                   d[(i,j-1)] + 1, # insertion
                                   d[(i-1,j-1)] + cost, # substitution
                                  )
                    if i and j and s1[i] == s2[j-1] and s1[i-1] == s2[j]:
                            d[(i,j)] = min(d[(i,j)], d[i-2,j-2] + 1) # transposition
            return d[lenstr1-1,lenstr2-1]
        
        xDiff=abs(len(l1)-len(l2)) + damerau_levenshtein_distance(l1,l2)
        
        if (len(l1)<3 or len(l2)<3 or abs(len(l1)-len(l2))>30):
             return CONST_MAX_VALUE
        xDiff = xDiff *300 / len(l1)
        if (xDiff>CONST_MAX_VALUE-1000):
            return CONST_MAX_VALUE-100
        return round(xDiff)
    # end of stringDiff
    
    def repair(self, historySeq, itm:int=-1):
        if (itm==0): raise BaseException("Can not repair first item[0] from future!")
        if (itm<0): itm=len(historySeq)-1
        #log.debug("Start repair history[{}] \"{}\"", itm, historySeq.get(itm).name);
        lines=historySeq[itm]
        out=[]
        statRepair, statHistoryUsedDepth, statHistoryWatchDepth, statNotFound = 0,0,0,0
        print(" repair lines:"+str(len(lines)))
        for line in lines:
            print(" repair line>"+repr(line)+"  pos="+str(line['pos'])+" test="+str(line))
            if (line['hasBroken']):
                oldP=line['prePos']
                depth=0
                i=itm-1
                while (oldP!=CONST_UNDEFINED and i>=0):
                    depth+=1
                    statHistoryWatchDepth=max(statHistoryWatchDepth, depth)
                    oldL=historySeq[i][oldP]
                    if (not oldL['hasBroken']):
                        print(" repair line<use old source")
                        out.append(oldL['line'])
                        #log.trace(" [{}][{}] line {}({})> repair from \"{}\" to \"{}\"",
                        #        itm, i, line.pos, oldP, line.line, oldL.line);
                        statRepair+=1
                        statHistoryUsedDepth=max(statHistoryUsedDepth, depth)
                        break
                    #else:
                    #    log.trace(" [{}][{}] line {}({})> go deep", itm, i, line.pos, oldP);
                    oldP=oldL['prePos']
                    i-=1
                if (oldP==CONST_UNDEFINED) :
                    print(" repair line<not found. ", line['line'])
                    out.append(line['line']) #no modify
                    statNotFound+=1
            else:
                print(" repair line<no need")
                out.append(line['line'])
        print(f"Repair complete: repair {statRepair} lines, max history depth = {statHistoryUsedDepth}, max history watch depth = {statHistoryWatchDepth}, not found {statNotFound} lines.")
        print(" repair out lines:"+str(len(out)))
        return out
    # end of repair
    
    def process(self, historySeq, i_to_repair:int=-1):
        hSeq=[]
        for histText in historySeq:
            lines=[]
            i=0
            for l in histText:
                tl={'pos':-1, 'line':l, 'hasBroken':False,
    'prePos':CONST_UNDEFINED, 'preK':-1.0, 'nextPos':CONST_UNDEFINED}
                lines.append(tl)
            hSeq.append(lines)
        print("indexing...")
        idx=self.indexing(hSeq)
        print("pepair n="+str(i_to_repair))
        repair=self.repair(hSeq, i_to_repair)
        print("done")
        return repair

def repair_text_by_history(history_of_text):
    repairer=RepairEngine()
    text=repairer.process(history_of_text, len(history_of_text)-1)
    return text

import codewars_test as test
from solution import repair_text_by_history
import numpy as np

def text_generator(sample_sentence, depth=4, sentences_up_to=3,rand_seed=100):
    rand=np.random.RandomState(rand_seed)
    hist=[]
    hist_broken=[]
    prev=[]
    prev_broken=[]
    def f_make_line():
        l=''
        for i in range(rand.randint(1,sentences_up_to)):
            if len(l)>0:
                l+=' '
            l+=rand.choice(sample_sentence)+rand.choice([c for c in '......???!!;'])
        #print('new:', l)
        return l;
    def f_break_line(l):
        o=''
        bk=rand.randint(8,20)
        for c in l:
            if rand.randint(16) <= bk :
                o+='?'
            else:
                o+=c
        #print('break:', l,' > ',o)
        return o
    new_break=False # first history was good
    for j in range(depth):
        text=[]
        text_broken=[]
        for i in range(len(prev)):
            if rand.randint(15) >= 13 : # corrosion line
                text.append(prev[i])
                text_broken.append(f_break_line(prev_broken[i]))
            elif rand.randint(20) >= 18 :
                continue # delete
            else:
                text.append(prev[i])
                text_broken.append(prev_broken[i])
            if rand.randint(25) >= 20 :
                for i in range(1,rand.randint(10)):
                    l=f_make_line()
                    text.append(l)
                    if new_break and (rand.randint(15) >= 13) :
                        text_broken.append(f_break_line(l))
                    else:
                        text_broken.append(l)
        # append text
        if rand.randint(10) >= 7 :
            for i in range(1,rand.randint(20)):
                l=f_make_line()
                text.append(l)
                if new_break and (rand.randint(15) >= 13) :
                    text_broken.append(f_break_line(l))
                else:
                    text_broken.append(l)
        
        prev=text
        hist.append(text)
        prev_broken=text_broken
        hist_broken.append(text_broken)
        new_broke=True
    return hist_broken, hist

# test.assert_equals(actual, expected, [optional] message)
@test.describe("Repair text test")
def test_group():
    @test.it("Fixed test case")
    def test_case():
        data1=[
[
    'Hello, world!',
    'THIS is first version.'
],[
    'Hello, world.'
    'This is first ???????.'
],[
    'Hello world',
    'Line 2.',
    'This is first ?????on.',
    'There can be large document'
],[
    'Hello.',
    '???? ??',
    '???????????? ?????on.',
    'There can be large document'
],[
    'Hello.',
    '???? ??',
    '???????????? ?????on.',
    'There can be large document',
    '',
    'Simple alhorithm for repair:',
    '',
    'Step 1.',
    '* mark broken symbols and lines',
    '',
    'Step 2.',
    '* find line number in history',
    '',
    'Step 3.',
    '* search good lines in history',
    '',
    'Step 4.',
    '* replace broken line, if it is can be true',
],[
    'Hello.',
    '???? ??',
    '???????????? ?????on.',
    'There can be large document',
    '',
    'Simple alhorithm for repair:',
    '',
    '???? ??',
    '* mark broken symbols and lines',
    '',
    'Step 2.',
    '? ???? line ?????? ?? ???????',
    '',
    '???? 3.',
    '* search good lines in history',
    '',
    'Step 4.',
    '* replace broken line, if it is can be true',
],[
    'Hello.',
    '???? ??',
    '???????????? ?????on.',
    'There can be large document',
    '',
    'Simple alhorithm for repair:',
    '',
    '???? ??',
    '* mark broken symbols and lines',
    '',
    'Step 2.',
    '? ???? line ?????? ?? ???????',
    '',
    '???? 3.',
    '* search good lines in history',
    '',
    'Step 4.',
    '* replace broken line, if it is can be true',
    '',
    'You can make more better alhorighm, or try improvement it.',
    'But do not repair string',
    'In cases, when it can be ambigous.',
    ''
],[
    'Hello.',
    '???? ??',
    '???????????? ?????on.',
    'There can be large document',
    '',
    'Simple alhorithm for repair:',
    '',
    '???? ??',
    '* mark broken symbols and lines',
    '',
    'Step 2.',
    '? ???? line ?????? ?? ???????',
    '',
    '???? 3.',
    '* search good lines in history',
    '',
    '???? ?.',
    '',
    'Step 4.',
    '* replace broken line, if it is can be true',
    '',
    'You can make more better alhorighm, or try improvement it.',
    '?? ?????? ???? ?? ??? ?? ?????????',
    '??? ?? ??? ?????? ??????',
    ''
]
    ]
        expected_text=[
    'Hello.',
    'Line 2.',
    'THIS is first version.' # TODO may be "This is first version."
    'There can be large document',
    '',
    'Simple alhorithm for repair:',
    '',
    'Step 1.',
    '* mark broken symbols and lines',
    '',
    'Step 2.',
    '* find line number in history',
    '',
    'Step 3.',
    '* search good lines in history',
    '',
    'Step 4.',
    '',
    'Step 4.',
    '* replace broken line, if it is can be true',
    '',
    'You can make more better alhorighm, or try improvement it.',
    '?? ?????? ???? ?? ??? ?? ?????????',
    '??? ?? ??? ?????? ??????',
    '']
        r_text=repair_text_by_history(data1)
        test.assert_equals(r_text, expected_text)
    
    @test.it("Random test case")
    def test_case_random():
        words=['','','Hello world','Line 2','This is first version','You can make more better alhorighm, or try improvement it',
              'You got damaged paper with text','And got previous edition of this text','Each next edition has any good changes and break character change',
               'In broken lines will be same character replaced to ''?''', 'Make solution for repair broken text by history']
        broken_t,original_t=text_generator(words, depth=5)
        expected_text = original_t[-1]
        print("Random history with ",str(len(broken_t))," items, last with ",str(len(expected_text))," lines")
        r_text=repair_text_by_history(broken_t)
        test.assert_equals(r_text, expected_text)

playerO1vs.p4songer

The Perplexing Path Puzzle

Code

Diff

def kimite(grid):
    rows, cols = len(grid), len(grid[0])
    # Burn method. Less optimal that Djikstra, but simple.
    hotmap=[[None for c in range(cols)] for r in range(rows)]
    hotmap[0][0]=grid[0][0] # first point
    changed=True
    itr=0
    while changed:
        changed=False
        for r in range(rows):
            for c in range(cols):
                d=[]
                for y in [r-1,r+1,r]:
                    for x in [c-1,c+1,c]:
                        if (x>=0)and(y>=0) and (x<cols)and(y<rows) and ((x==c)or(y==r)) and (hotmap[y][x]!=None):
                            d.append(hotmap[y][x])
                if len(d)>0:
                    k=grid[r][c]+min(d)
                    if (hotmap[r][c]!=None) and (hotmap[r][c]<k):
                        k=hotmap[r][c]
                    if k!=hotmap[r][c]:
                        changed=True
                        hotmap[r][c]=k
        itr+=1
    print('Iteration for search '+str(itr))
    #print(hotmap)
    total_cost = hotmap[-1][-1]
    return total_cost

- def kimite(grid):
- rows, cols = len(grid), len(grid[0])
- ~~position = (0, 0)~~
- ~~seen = set()~~
- ~~total_cost = 0~~
- ~~# Helper function to find the min cost based on current coordinates~~
- ~~def get_step_cost(*directions, ):~~
- ~~compare = sorted([i for i in directions if i != None], key=lambda x: x[0])~~
- multiple = [x for x in [i for i in directions if i != None] if x[0] == compare[0][0]]
- ~~if len(multiple) > 1:~~
- ~~for i in multiple:~~
- ~~if i[1] == 'right':~~
- ~~return i~~
- for i in multiple:
- ~~if i[1] == 'down':~~
- ~~return i~~
- else:
- ~~return compare[0]~~
- ~~# Helper function to find polar directions~~
- def get_direction():
- ~~up, down, left, right = None, None, None, None~~
- ~~# Check Y~~
- if position[0] > 0 and (position[0] - 1, position[1]) not in seen:
- ~~up = (grid[position[0] - 1][position[1]], 'up')~~
- if position[0] + 1 < rows and (position[0] + 1, position[1]) not in seen:
- ~~down = (grid[position[0] + 1][position[1]], 'down')~~
- ~~# Check X~~
- if position[1] > 0 and (position[0], position[1] - 1) not in seen:
- ~~left = (grid[position[0]][position[1] - 1], 'left')~~
- if position[1] + 1 < cols and (position[0], position[1] + 1) not in seen:
- ~~right = (grid[position[0]][position[1] + 1], 'right')~~
- ~~return (up, down, left, right)~~
- ~~# Traverse the grid to find the minimum cost path~~
- while position != (rows - 1, cols - 1):
- ~~direction = get_direction()~~
- cost, move = get_step_cost(*direction)
- ~~if move == 'up':~~
- ~~position = (position[0] - 1, position[1])~~
- ~~total_cost += cost~~
- ~~seen.add(position)~~
- ~~continue~~
- if move == 'down':
- ~~position = (position[0] + 1, position[1])~~
- ~~total_cost += cost~~
- ~~seen.add(position)~~
- ~~continue~~
- ~~if move == 'left':~~
- ~~position = (position[0], position[1] - 1)~~
- ~~total_cost += cost~~
- ~~seen.add(position)~~
- ~~continue~~
- ~~if move == 'right':~~
- position = (position[0], position[1] + 1)
- total_cost += cost
- ~~seen.add(position)~~
- # Burn method. Less optimal that Djikstra, but simple.
- hotmap=[[None for c in range(cols)] for r in range(rows)]
- hotmap[0][0]=grid[0][0] # first point
- changed=True
- itr=0
- while changed:
- changed=False
- for r in range(rows):
- for c in range(cols):
- d=[]
- for y in [r-1,r+1,r]:
- for x in [c-1,c+1,c]:
- if (x>=0)and(y>=0) and (x<cols)and(y<rows) and ((x==c)or(y==r)) and (hotmap[y][x]!=None):
- d.append(hotmap[y][x])
- if len(d)>0:
- k=grid[r][c]+min(d)
- if (hotmap[r][c]!=None) and (hotmap[r][c]<k):
- k=hotmap[r][c]
- if k!=hotmap[r][c]:
- changed=True
- hotmap[r][c]=k
- itr+=1
- print('Iteration for search '+str(itr))
- #print(hotmap)
- total_cost = hotmap[-1][-1]
- return total_cost

playerO1vs.playerO1

Text compression

Code

Diff

def text():
    d,k,r,s="description","Kata or Kumite","return","symbols"
    return f"""Compress large text - {d} of this {k} in small code.
This {k} {d} has 462 {s} in 3 line, without title. You should wrote function, {r} this text, but you should use less {s} in you'r code that this text. Simple solution - just {r} \"\"\"source text\"\"\". More smart variant is: do mark often word as special {s} and replace his before {r}.
How small code can be? Can you wrote more compressing? Let's check it!"""

- ~~#sample code:~~
- def text():
- the_text = """Compress large text - description of this Kata or Kumite in small code.
- This Kata or Kumite description has 462 symbols in 3 line, without title. You should wrote function, return this text, but you should use less symbols in you'r code that this text. Simple solution - just return \"\"\"source text\"\"\". More smart variant is: do mark often word as special symbols and replace his before return.
- d,k,r,s="description","Kata or Kumite","return","symbols"
- return f"""Compress large text - {d} of this {k} in small code.
- This {k} {d} has 462 {s} in 3 line, without title. You should wrote function, {r} this text, but you should use less {s} in you'r code that this text. Simple solution - just {r} \"\"\"source text\"\"\". More smart variant is: do mark often word as special {s} and replace his before {r}.
- How small code can be? Can you wrote more compressing? Let's check it!"""
- ~~return the_text # Do make 'def text()' more short~~

Test Cases

Diff

import codewars_test as test
import inspect
import solution # from solution

# test.assert_equals(actual, expected, [optional] message)
@test.describe("Test of compression text")
def test_group():
    # todo please, correct this number after make cnage
    method_size=504
    module_size=504
    
    text_size=464
    text_expected="""Compress large text - description of this Kata or Kumite in small code.
This Kata or Kumite description has 462 symbols in 3 line, without title. You should wrote function, return this text, but you should use less symbols in you'r code that this text. Simple solution - just return \"\"\"source text\"\"\". More smart variant is: do mark often word as special symbols and replace his before return.
How small code can be? Can you wrote more compressing? Let's check it!"""
    
    @test.it("Text check")
    def test_case_text():
        expected = text_expected
        test.assert_equals(solution.text(), expected)
        test.assert_equals(solution.text(), expected)
    
    @test.it("Test of test")
    def test_text():
        expected = text_expected
        test.assert_equals(len(text_expected), text_size)
        src=inspect.getsource(solution)
        src_len=len(src)
        test.assert_equals((src_len>= 5),True, "Test not see solution source")
    
    @test.it("Source function check")
    def test_case_source_function():
        src=inspect.getsource(solution.text)
        src_len=len(src)
        #test.assert_equals(src, "see method")
        print("Size of source code 'def test()' is "+str(src_len))
        test.assert_equals((src_len<= method_size),True, "Function 'def test()' too large: "+str(src_len))
        test.assert_equals((src_len>= 4),True, "How wrote methd more small that he's call?") # test should see method
        if src_len< method_size: print(f"New function less up to {method_size-src_len} symbol!")
    
    @test.it("Source module check")
    def test_case_source_function():
        src=inspect.getsource(solution)
        src_len=len(src)
        print("Size of all source code length is "+str(src_len))
        test.assert_equals((src_len<= module_size),True, "Source code of all module too large: "+str(src_len)+". Do not hidde data out of the function.")
        if src_len< module_size: print(f"New code less up to {module_size-src_len} symbol!")
        if (src_len<text_size): print(f"Congradulation! The code less that text at {text_size-src_len} symbol.")

- import codewars_test as test
- import inspect
- import solution # from solution
- # test.assert_equals(actual, expected, [optional] message)
- @test.describe("Test of compression text")
- def test_group():
- # todo please, correct this number after make cnage
- ~~method_size=560~~
- ~~module_size=574~~
- method_size=504
- module_size=504
- ~~text_size=466~~
- text_size=464
- text_expected="""Compress large text - description of this Kata or Kumite in small code.
- This Kata or Kumite description has 462 symbols in 3 line, without title. You should wrote function, return this text, but you should use less symbols in you'r code that this text. Simple solution - just return \"\"\"source text\"\"\". More smart variant is: do mark often word as special symbols and replace his before return.
- How small code can be? Can you wrote more compressing? Let's check it!"""
- @test.it("Text check")
- def test_case_text():
- expected = text_expected
- test.assert_equals(solution.text(), expected)
- test.assert_equals(solution.text(), expected)
- @test.it("Test of test")
- def test_text():
- expected = text_expected
- test.assert_equals(len(text_expected), text_size)
- src=inspect.getsource(solution)
- src_len=len(src)
- test.assert_equals((src_len>= 5),True, "Test not see solution source")
- @test.it("Source function check")
- def test_case_source_function():
- src=inspect.getsource(solution.text)
- src_len=len(src)
- #test.assert_equals(src, "see method")
- print("Size of source code 'def test()' is "+str(src_len))
- test.assert_equals((src_len<= method_size),True, "Function 'def test()' too large: "+str(src_len))
- test.assert_equals((src_len>= 4),True, "How wrote methd more small that he's call?") # test should see method
- if src_len< method_size: print(f"New function less up to {method_size-src_len} symbol!")
- @test.it("Source module check")
- def test_case_source_function():
- src=inspect.getsource(solution)
- src_len=len(src)
- print("Size of all source code length is "+str(src_len))
- test.assert_equals((src_len<= module_size),True, "Source code of all module too large: "+str(src_len)+". Do not hidde data out of the function.")
- ~~if src_len< module_size: print(f"New code less up to {method_size-src_len} symbol!")~~
- if src_len< module_size: print(f"New code less up to {module_size-src_len} symbol!")
- if (src_len<text_size): print(f"Congradulation! The code less that text at {text_size-src_len} symbol.")

playerO1vs.Shirak

Reading and Writing to Matrix

Matrix

Code

Diff

class OperationError(Exception):
    def __init__(self, message):            
        super().__init__(message)

def matrix(matrix=list, operation=str, location=tuple, writedata=None):
    def op_write(i,j):
        matrix[i][j] = writedata
        return matrix
    op={"read":lambda i,j: matrix[i][j], "write":op_write}
    try:
        if not operation in op: raise OperationError("That is not a valid operation for this system.")
        return op[operation](location[0],location[1])
    except Exception as e:
        print(e)
        return -1

- class OperationError(Exception):
- def __init__(self, message):
- super().__init__(message)
- def matrix(matrix=list, operation=str, location=tuple, writedata=None):
- def op_write(i,j):
- matrix[i][j] = writedata
- return matrix
- op={"read":lambda i,j: matrix[i][j], "write":op_write}
- try:
- if operation == "read":
- ~~return matrix[location[0]][location[1]]~~
- ~~elif operation == "write":~~
- ~~matrix[location[0]][location[1]] = writedata~~
- ~~return matrix~~
- ~~else:~~
- ~~raise OperationError("That is not a valid operation for this system.")~~
- if not operation in op: raise OperationError("That is not a valid operation for this system.")
- return op[operation](location[0],location[1])
- except Exception as e:
- print(e)
- ~~return -1~~
- return -1

playerO1

Text compression

Compress large text - description of this Kata or Kumite in small code.

This Kata or Kumite description has 462 symbols in 3 line, without title. You should wrote function, return this text, but you should use less symbols in you'r code that this text. Simple solution - just return """source text""". More smart variant is: do mark often word as special symbols and replace his before return.

How small code can be? Can you wrote more compressing? Let's check it!

#sample code:
def text():
    the_text = """Compress large text - description of this Kata or Kumite in small code.

This Kata or Kumite description has 462 symbols in 3 line, without title. You should wrote function, return this text, but you should use less symbols in you'r code that this text. Simple solution - just return \"\"\"source text\"\"\". More smart variant is: do mark often word as special symbols and replace his before return.

How small code can be? Can you wrote more compressing? Let's check it!"""
    return the_text # Do make 'def text()' more short

import codewars_test as test
import inspect
import solution # from solution

# test.assert_equals(actual, expected, [optional] message)
@test.describe("Test of compression text")
def test_group():
    # todo please, correct this number after make cnage
    method_size=560
    module_size=574
    
    text_size=466
    text_expected="""Compress large text - description of this Kata or Kumite in small code.

This Kata or Kumite description has 462 symbols in 3 line, without title. You should wrote function, return this text, but you should use less symbols in you'r code that this text. Simple solution - just return \"\"\"source text\"\"\". More smart variant is: do mark often word as special symbols and replace his before return.

How small code can be? Can you wrote more compressing? Let's check it!"""
    
    @test.it("Text check")
    def test_case_text():
        expected = text_expected
        test.assert_equals(solution.text(), expected)
        test.assert_equals(solution.text(), expected)
    
    @test.it("Test of test")
    def test_text():
        expected = text_expected
        test.assert_equals(len(text_expected), text_size)
        src=inspect.getsource(solution)
        src_len=len(src)
        test.assert_equals((src_len>= 5),True, "Test not see solution source")
    
    @test.it("Source function check")
    def test_case_source_function():
        src=inspect.getsource(solution.text)
        src_len=len(src)
        #test.assert_equals(src, "see method")
        print("Size of source code 'def test()' is "+str(src_len))
        test.assert_equals((src_len<= method_size),True, "Function 'def test()' too large: "+str(src_len))
        test.assert_equals((src_len>= 4),True, "How wrote methd more small that he's call?") # test should see method
        if src_len< method_size: print(f"New function less up to {method_size-src_len} symbol!")
    
    @test.it("Source module check")
    def test_case_source_function():
        src=inspect.getsource(solution)
        src_len=len(src)
        print("Size of all source code length is "+str(src_len))
        test.assert_equals((src_len<= module_size),True, "Source code of all module too large: "+str(src_len)+". Do not hidde data out of the function.")
        if src_len< module_size: print(f"New code less up to {method_size-src_len} symbol!")

playerO1vs.ShadowReW

C++ Simple Calculator

Code

Diff

#include <ostream>
using namespace std;

string calculator(char op, int a, int b) {
    stringstream s;
    switch (op) {
        case '+': {s<<a+b; break;}
        case '-': {s<<a-b; break;}
        case '*': {s<<a*b; break;}
        case '%': {s<<a%b; break;}
        case '/': {if (b != 0) {s<<a/b;break;}/*else default: Invalid Input!*/}
        default:  {s<<"Invalid Input!";}
    }
    return s.str();
}

- #include <ostream>
- using namespace std;
- string calculator(char op, int a, int b) {
- stringstream s;
- switch (op) {
- ~~case '+': {s<<a+b; return s.str();}~~
- ~~case '-': {s<<a-b; return s.str();}~~
- ~~case '*': {s<<a*b; return s.str();}~~
- ~~case '/': {b != 0 ? s<<a/b : s<<"Invalid Input!"; return s.str();}~~
- ~~case '%': {s<<a%b; return s.str();}~~
- ~~default: return "Invalid Input!";~~
- case '+': {s<<a+b; break;}
- case '-': {s<<a-b; break;}
- case '*': {s<<a*b; break;}
- case '%': {s<<a%b; break;}
- case '/': {if (b != 0) {s<<a/b;break;}/*else default: Invalid Input!*/}
- default: {s<<"Invalid Input!";}
- }
- return s.str();
- }

The old history text

Confirm

Collect: kata

You have not created any collections yet.