repairs your list cur
(current) with the knowledge from the list pre
(previous).
print(repair(['line 1', '???? 2', 'line 3'], ['line 1', 'line 2']))
# ['line 1', 'line 2', 'line 3']
Previous kumite solition and details formalization was good.
For increase hard of solution I add additional hard test case of:
- the number of broken fragment can be more that 1.
- the length of broken fragment can be same, in this case should tetect nearest by position
- sametime has bad input data - impossible length of repaired text - do not repair it.
import codewars_test as test import random, string # TODO Write tests from solution import repair def tru_repair(cur: list, pre: list): broken_index = [i.count("?") > 0 for i in cur].index(True) known_part = list(filter(lambda x: x[1] != '', enumerate(map(lambda x: x if x != "?" else "", [*cur[broken_index]])))) matching_pre_index = [pre.index(i) for i in pre if [(o, i) for o, i in enumerate([*i]) if (o, i) in known_part] == known_part and len(i) == len(cur[broken_index])][0] cur[broken_index] = pre[matching_pre_index] return cur # test.assert_equals(actual, expected, [optional] message) @test.describe("Example") def test_group(): @test.it("static tests") def _(): test.assert_equals(repair(['line 1', '???? 2', 'line 3'], ['line 1', 'line 2']),['line 1', 'line 2', 'line 3']) test.assert_equals(repair(['hi', 'my name is', '????????HRUM'], ['hi my name is', 'HRUMHRUMHRUM']), ['hi', 'my name is', 'HRUMHRUMHRUM']) test.assert_equals(repair(['what', 'was', 'pl?????1', 'thinking', 'when', 'writing this code'], ['what was', 'playerO1', "thinking when writing this code"]), ['what', 'was', 'playerO1', 'thinking', 'when', 'writing this code']) test.assert_equals(repair(['boy oh boy do I LOVE f?x?n? code'], ['boy oh boy do I LOVE fixing code']), ['boy oh boy do I LOVE fixing code']) @test.it("static tests, level 2") def _(): test.assert_equals(repair(['line 1','', '???? 2', '', 'line 3','.'], ['line 1', 'line 2','?']),['line 1','', 'line 2', '', 'line 3','.']) test.assert_equals(repair(['Sequence of','??????','????','???? ?????????','length'], ['The','Sequense of','broken','word','with different','length']), ['Sequence of','broken','word','with different','length']) test.assert_equals(repair(['Change number','??',' test ','????',' should do',' work too'], ['Another line, will be deleted', 'Change number','of','word','should do work too']), ['Change number','of',' test ','word',' should do',' work too']) test.assert_equals(repair(['Swap', '??', '???', 'big lines', 'near'], ['Swap', 'two', 'of', 'big lines', 'near']), ['Swap', 'of', 'two', 'big lines', 'near']) test.assert_equals(repair(['Same count','???',' ','???','????','????','.'], ['Same count','but',' ','not','same','word','.']), ['Same count','but',' ','not','same','word','.']) test.assert_equals(repair(['to repair','???? ???','?'], ['It is impossible','to repair',' too many error and many different']), ['to repair','???? ???','?']) @test.it("random tests") def _(): def break_str(st: str): return "".join(map(lambda x: x if random.getrandbits(1) else "?", [*st])) for i in range(1000): sts = ["".join(random.choices([*string.ascii_letters], k=random.randint(5, 30))) for _ in range(random.randint(10, 30))] broken_sts = list(map(lambda x: break_str(x), sts)) if random.getrandbits(1): broken_sts.append("new word") if random.getrandbits(1): sts.append("old word") test.assert_equals(repair(broken_sts, sts), tru_repair(broken_sts, sts))
- import codewars_test as test
- import random, string
- # TODO Write tests
- from solution import repair
- def tru_repair(cur: list, pre: list):
- broken_index = [i.count("?") > 0 for i in cur].index(True)
- known_part = list(filter(lambda x: x[1] != '', enumerate(map(lambda x: x if x != "?" else "", [*cur[broken_index]]))))
- matching_pre_index = [pre.index(i) for i in pre if [(o, i) for o, i in enumerate([*i]) if (o, i) in known_part] == known_part and len(i) == len(cur[broken_index])][0]
- cur[broken_index] = pre[matching_pre_index]
- return cur
- # test.assert_equals(actual, expected, [optional] message)
- @test.describe("Example")
- def test_group():
- @test.it("static tests")
- def _():
- test.assert_equals(repair(['line 1', '???? 2', 'line 3'], ['line 1', 'line 2']),['line 1', 'line 2', 'line 3'])
- test.assert_equals(repair(['hi', 'my name is', '????????HRUM'], ['hi my name is', 'HRUMHRUMHRUM']), ['hi', 'my name is', 'HRUMHRUMHRUM'])
- test.assert_equals(repair(['what', 'was', 'pl?????1', 'thinking', 'when', 'writing this code'], ['what was', 'playerO1', "thinking when writing this code"]), ['what', 'was', 'playerO1', 'thinking', 'when', 'writing this code'])
- test.assert_equals(repair(['boy oh boy do I LOVE f?x?n? code'], ['boy oh boy do I LOVE fixing code']), ['boy oh boy do I LOVE fixing code'])
- @test.it("static tests, level 2")
- def _():
- test.assert_equals(repair(['line 1','', '???? 2', '', 'line 3','.'], ['line 1', 'line 2','?']),['line 1','', 'line 2', '', 'line 3','.'])
- test.assert_equals(repair(['Sequence of','??????','????','???? ?????????','length'], ['The','Sequense of','broken','word','with different','length']), ['Sequence of','broken','word','with different','length'])
- test.assert_equals(repair(['Change number','??',' test ','????',' should do',' work too'], ['Another line, will be deleted', 'Change number','of','word','should do work too']), ['Change number','of',' test ','word',' should do',' work too'])
- test.assert_equals(repair(['Swap', '??', '???', 'big lines', 'near'], ['Swap', 'two', 'of', 'big lines', 'near']), ['Swap', 'of', 'two', 'big lines', 'near'])
- test.assert_equals(repair(['Same count','???',' ','???','????','????','.'], ['Same count','but',' ','not','same','word','.']), ['Same count','but',' ','not','same','word','.'])
- test.assert_equals(repair(['to repair','???? ???','?'], ['It is impossible','to repair',' too many error and many different']), ['to repair','???? ???','?'])
- @test.it("random tests")
- def _():
- def break_str(st: str):
- return "".join(map(lambda x: x if random.getrandbits(1) else "?", [*st]))
- for i in range(1000):
sts = ["".join(random.choices([*string.ascii_letters], k=random.randint(10, 30))) for _ in range(random.randint(10, 30))]- sts = ["".join(random.choices([*string.ascii_letters], k=random.randint(5, 30))) for _ in range(random.randint(10, 30))]
- broken_sts = list(map(lambda x: break_str(x), sts))
- if random.getrandbits(1): broken_sts.append("new word")
- if random.getrandbits(1): sts.append("old word")
- test.assert_equals(repair(broken_sts, sts), tru_repair(broken_sts, sts))
Same bugfix. Now, the code has more work.
# This solution not vork correctly. Try improvement this code. CONST_UNDEFINED = -999999 # or None CONST_MAX_VALUE = 999999 class RepairEngine: searchWindow1 = 70 # when diff < DIFF_TRESHOLD searchWindow2 = 14 # when diff < 3 DIFF_TRESHOLD_1=68 DIFF_TRESHOLD_2=3 def __str__(self): return f"RepairEngine(searchWindow1={str(self.searchWindow1)}, searchWindow2={str(self.searchWindow2)}, DIFF_TRESHOLD_1={str(self.DIFF_TRESHOLD_1)})" def indexing(self, historySeq): if len(historySeq)<2 : raise BaseException("History sequence should have 2 or more files. historySeq="+str(historySeq)) for text in historySeq: #markBrokenLines for l in text: l['hasBroken'] = self.checkBrokenLine(l['line']) for i in range(1,len(historySeq)): self.linkFiles(historySeq[i-1], historySeq[i]) #end of indexing def linkFiles(self, lines1, lines2): p2=0 seqenceLost=0 for i in range(0, len(lines1)): #print(f"Progress {str(i+1)}/{len(lines1)} history items") myLine=lines1[i] p,thisK =self.searchLine(myLine['line'], lines2, p2, i); #print('p=',p,' thisK=',thisK,' i=',i,' text:', myLine['line']) if ((p!=CONST_UNDEFINED) and (p>=0)): if (p2!=p): #print(" new offset {str(i)} > {str(p)}") p2=p myLine['nextPos']=p applyPatch = True if (lines2[p]['prePos']!=CONST_UNDEFINED) and (lines2[p]['prePos']!=i): if lines2[p]['preK'] > thisK: None elif (lines2[p]['preK'] < thisK): applyPatch = False else: None if applyPatch: lines2[p]['prePos'] = i lines2[p]['preK'] = thisK if i != myLine['pos']: print(f"WARN: i={str(i)} and pos={str(myLine['pos'])} not match! At line \"{str(myLine['line'])}\""); if seqenceLost>self.searchWindow1*3: print(f"i={str(i)}, found p={str(p)} after lost search window"); seqenceLost=0 else: #print("i={str(i)} pos={str(p)} not found position") seqenceLost +=1 if seqenceLost==self.searchWindow1*3: print(f"i={str(i)}, p={str(p)} lost search window, next position search may be not correct"); p2 +=1 # end of linkFiles # TODO hard method. Need more unit test! def searchLine(self, text, inLines, nearPos1:int, nearPos2:int): result=CONST_UNDEFINED resultD=CONST_MAX_VALUE for fromPos in [nearPos1,nearPos2] : if (fromPos==CONST_UNDEFINED): continue if (result!=CONST_UNDEFINED and nearPos1==nearPos2): break for d in range(0,self.searchWindow1): # todo mark previous interval and do not check against lines. p=fromPos+d if (0<= p) and (p <len(inLines)): k = self.stringDiff(text, inLines[p]['line']); if (k<=self.DIFF_TRESHOLD_1*3): k+=min(20,d/40) k+=min(16,min(abs(p-nearPos1),abs(p-nearPos2))/50) #print('k=',k,' resultD=',resultD,'; t1="',text,'" t2="',inLines[p]['line'],'" ') if (resultD>k): result, resultD = p, k if (d!=0): p=fromPos-d if (0<= p) and (p <len(inLines)): k = self.stringDiff(text, inLines[p]['line']) if (k<=self.DIFF_TRESHOLD_1*3): k+=min(20,d/40) k+=min(16,min(abs(p-nearPos1),abs(p-nearPos2))/50) if resultD>k : result, resultD = p, k if (resultD==0 or resultD<self.DIFF_TRESHOLD_2 and d>3 and d < self.searchWindow2): break # stop search - it is best. if (resultD>self.DIFF_TRESHOLD_1): return -1, None # not found return result, resultD # end of searchLine not_allowed="?*" def checkBrokenLine(self, line): wrongChar=0 questionChar=0 for c in line: if c in self.not_allowed: wrongChar+=1 #if not c in self.allowed: wrongChar+=1 if c=='?': questionChar+=1 #if c in self.not_allowed: wrongChar-=1 return wrongChar>0 or (len(line)>5 and questionChar>5) # end of checkBrokenLine # Soft comparsion of 2 string # return 0 - equals, 2 equals ignore case and whitespace, 15 high diff but maybe same, Integer.MAX_VALUE absolutly different def stringDiff(self, l1, l2): if (l1==l2): #print('compare ',l1,l2," EQ=0") return 0 if (l1.lower()==l2.lower()): #print('compare ',l1,l2," EQ=1") return 1 l1=l1.replace(" ", " ").strip() l2=l2.replace(" ", " ").strip() #todo optimization: l1,l2 = l1.upper(),l2.upper() l1,l2=l1.upper(),l2.upper() if (len(l1)==len(l2)): eq=True for i in range(len(l1)): eq=eq and (l1[i]==l2[i]) or (l1[i] in self.not_allowed) or (l1[i] in self.not_allowed) if eq: #print('compare ',l1,l2," EQ=1,2") return 1 #Damerau–Levenshtein distance / Расстояние Дамерау — Левенштейна (https://ru.wikipedia.org/w/index.php?#title=%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%94%D0%B0%D0%BC%D0%B5%D1%80%D0%B0%D1%83_%E2%80%94_%D0%9B%D0%B5%D0%B2%D0%B5%D0%BD%D1%88%D1%82%D0%B5%D0%B9%D0%BD%D0%B0&oldid=100038662 Creative commons CC-BY-SA ) def damerau_levenshtein_distance(s1, s2): d = {} lenstr1 = len(s1) lenstr2 = len(s2) for i in range(-1,lenstr1+1): d[(i,-1)] = i+1 for j in range(-1,lenstr2+1): d[(-1,j)] = j+1 for i in range(lenstr1): for j in range(lenstr2): if s1[i] == s2[j]: cost = 0 elif (s1[i] in self.not_allowed) or (s2[j] in self.not_allowed): # modification weight cost = 0 else: cost = 3 d[(i,j)] = min( d[(i-1,j)] + 1, # deletion d[(i,j-1)] + 1, # insertion d[(i-1,j-1)] + cost, # substitution ) if i and j and s1[i] == s2[j-1] and s1[i-1] == s2[j]: d[(i,j)] = min(d[(i,j)], d[i-2,j-2] + 1) # transposition return d[lenstr1-1,lenstr2-1] xDiff=abs(len(l1)-len(l2)) + damerau_levenshtein_distance(l1,l2) if (len(l1)<3 or len(l2)<3 or abs(len(l1)-len(l2))>30): #print('compare ',l1,l2," EQ=",CONST_MAX_VALUE) return CONST_MAX_VALUE xDiff = xDiff *300 / len(l1) if (xDiff>CONST_MAX_VALUE-1000): #print('compare ',l1,l2," EQ=",CONST_MAX_VALUE-100) return CONST_MAX_VALUE-100 #print('compare ',l1,l2," EQ=",xDiff) return round(xDiff) # end of stringDiff def repair(self, historySeq, itm:int=-1): if (itm==0): raise BaseException("Can not repair first item[0] from future!") if (itm<0): itm=len(historySeq)-1 #log.debug("Start repair history[{}] \"{}\"", itm, historySeq.get(itm).name); lines=historySeq[itm] out=[] statRepair, statHistoryUsedDepth, statHistoryWatchDepth, statNotFound = 0,0,0,0 print(" repair lines:"+str(len(lines))) for line in lines: print(" repair line>"+repr(line)+" pos="+str(line['pos'])+" test="+str(line)) if (line['hasBroken']): oldP=line['prePos'] depth=0 i=itm-1 while (oldP!=CONST_UNDEFINED and i>=0): depth+=1 statHistoryWatchDepth=max(statHistoryWatchDepth, depth) oldL=historySeq[i][oldP] if (not oldL['hasBroken']): print(" repair line<use old source") out.append(oldL['line']) #log.trace(" [{}][{}] line {}({})> repair from \"{}\" to \"{}\"", # itm, i, line.pos, oldP, line.line, oldL.line); statRepair+=1 statHistoryUsedDepth=max(statHistoryUsedDepth, depth) break #else: # log.trace(" [{}][{}] line {}({})> go deep", itm, i, line.pos, oldP); oldP=oldL['prePos'] i-=1 if (oldP==CONST_UNDEFINED) : print(" repair line<not found. ", line['line']) out.append(line['line']) #no modify statNotFound+=1 else: print(" repair line<no need") out.append(line['line']) print(f"Repair complete: repair {statRepair} lines, max history depth = {statHistoryUsedDepth}, max history watch depth = {statHistoryWatchDepth}, not found {statNotFound} lines.") print(" repair out lines:"+str(len(out))) return out # end of repair def process(self, historySeq, i_to_repair:int=-1): hSeq=[] for histText in historySeq: lines=[] i=0 for l in histText: tl={'pos':i, 'line':l, 'hasBroken':False, 'prePos':CONST_UNDEFINED, 'preK':-1.0, 'nextPos':CONST_UNDEFINED} lines.append(tl) i+=1 hSeq.append(lines) print("indexing...") idx=self.indexing(hSeq) print("pepair n="+str(i_to_repair)) repair=self.repair(hSeq, i_to_repair) print("done") return repair def repair_text_by_history(history_of_text): repairer=RepairEngine() text=repairer.process(history_of_text, len(history_of_text)-1) return text
- # This solution not vork correctly. Try improvement this code.
- CONST_UNDEFINED = -999999 # or None
- CONST_MAX_VALUE = 999999
- class RepairEngine:
- searchWindow1 = 70 # when diff < DIFF_TRESHOLD
- searchWindow2 = 14 # when diff < 3
- DIFF_TRESHOLD_1=68
- DIFF_TRESHOLD_2=3
- def __str__(self):
- return f"RepairEngine(searchWindow1={str(self.searchWindow1)}, searchWindow2={str(self.searchWindow2)}, DIFF_TRESHOLD_1={str(self.DIFF_TRESHOLD_1)})"
- def indexing(self, historySeq):
- if len(historySeq)<2 : raise BaseException("History sequence should have 2 or more files. historySeq="+str(historySeq))
- for text in historySeq: #markBrokenLines
- for l in text:
- l['hasBroken'] = self.checkBrokenLine(l['line'])
- for i in range(1,len(historySeq)):
- self.linkFiles(historySeq[i-1], historySeq[i])
- #end of indexing
- def linkFiles(self, lines1, lines2):
- p2=0
- seqenceLost=0
- for i in range(0, len(lines1)):
- #print(f"Progress {str(i+1)}/{len(lines1)} history items")
- myLine=lines1[i]
- p,thisK =self.searchLine(myLine['line'], lines2, p2, i);
- #print('p=',p,' thisK=',thisK,' i=',i,' text:', myLine['line'])
- if ((p!=CONST_UNDEFINED) and (p>=0)):
- if (p2!=p):
- #print(" new offset {str(i)} > {str(p)}")
- p2=p
- myLine['nextPos']=p
- applyPatch = True
- if (lines2[p]['prePos']!=CONST_UNDEFINED) and (lines2[p]['prePos']!=i):
- if lines2[p]['preK'] > thisK:
- None
- elif (lines2[p]['preK'] < thisK):
- applyPatch = False
- else:
- None
- if applyPatch:
- lines2[p]['prePos'] = i
- lines2[p]['preK'] = thisK
- if i != myLine['pos']:
- print(f"WARN: i={str(i)} and pos={str(myLine['pos'])} not match! At line \"{str(myLine['line'])}\"");
- if seqenceLost>self.searchWindow1*3:
- print(f"i={str(i)}, found p={str(p)} after lost search window");
- seqenceLost=0
- else:
- #print("i={str(i)} pos={str(p)} not found position")
- seqenceLost +=1
- if seqenceLost==self.searchWindow1*3:
- print(f"i={str(i)}, p={str(p)} lost search window, next position search may be not correct");
- p2 +=1
- # end of linkFiles
- # TODO hard method. Need more unit test!
- def searchLine(self, text, inLines, nearPos1:int, nearPos2:int):
- result=CONST_UNDEFINED
- resultD=CONST_MAX_VALUE
- for fromPos in [nearPos1,nearPos2] :
- if (fromPos==CONST_UNDEFINED): continue
- if (result!=CONST_UNDEFINED and nearPos1==nearPos2): break
- for d in range(0,self.searchWindow1):
- # todo mark previous interval and do not check against lines.
- p=fromPos+d
- if (0<= p) and (p <len(inLines)):
- k = self.stringDiff(text, inLines[p]['line']);
- if (k<=self.DIFF_TRESHOLD_1*3):
- k+=min(20,d/40)
- k+=min(16,min(abs(p-nearPos1),abs(p-nearPos2))/50)
- #print('k=',k,' resultD=',resultD,'; t1="',text,'" t2="',inLines[p]['line'],'" ')
- if (resultD>k):
- result, resultD = p, k
- if (d!=0):
- p=fromPos-d
- if (0<= p) and (p <len(inLines)):
- k = self.stringDiff(text, inLines[p]['line'])
- if (k<=self.DIFF_TRESHOLD_1*3):
- k+=min(20,d/40)
- k+=min(16,min(abs(p-nearPos1),abs(p-nearPos2))/50)
- if resultD>k :
- result, resultD = p, k
- if (resultD==0 or resultD<self.DIFF_TRESHOLD_2 and d>3 and d < self.searchWindow2):
- break # stop search - it is best.
- if (resultD>self.DIFF_TRESHOLD_1):
- return -1, None # not found
- return result, resultD
- # end of searchLine
- not_allowed="?*"
- def checkBrokenLine(self, line):
- wrongChar=0
- questionChar=0
- for c in line:
- if c in self.not_allowed: wrongChar+=1
- #if not c in self.allowed: wrongChar+=1
- if c=='?':
- questionChar+=1
- #if c in self.not_allowed: wrongChar-=1
- return wrongChar>0 or (len(line)>5 and questionChar>5)
- # end of checkBrokenLine
- # Soft comparsion of 2 string
- # return 0 - equals, 2 equals ignore case and whitespace, 15 high diff but maybe same, Integer.MAX_VALUE absolutly different
- def stringDiff(self, l1, l2):
if (l1==l2): return 0if (l1.lower()==l2.lower()): return 1- if (l1==l2):
- #print('compare ',l1,l2," EQ=0")
- return 0
- if (l1.lower()==l2.lower()):
- #print('compare ',l1,l2," EQ=1")
- return 1
- l1=l1.replace(" ", " ").strip()
- l2=l2.replace(" ", " ").strip()
- #todo optimization: l1,l2 = l1.upper(),l2.upper()
- l1,l2=l1.upper(),l2.upper()
- if (len(l1)==len(l2)):
- eq=True
- for i in range(len(l1)):
- eq=eq and (l1[i]==l2[i]) or (l1[i] in self.not_allowed) or (l1[i] in self.not_allowed)
- if eq:
- #print('compare ',l1,l2," EQ=1,2")
- return 1
#Расстояние Дамерау — Левенштейна (https://ru.wikipedia.org/w/index.php?#title=%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%94%D0%B0%D0%BC%D0%B5%D1%80%D0%B0%D1%83_%E2%80%94_%D0%9B%D0%B5%D0%B2%D0%B5%D0%BD%D1%88%D1%82%D0%B5%D0%B9%D0%BD%D0%B0&oldid=100038662 Creative commons CC-BY-SA )- #Damerau–Levenshtein distance / Расстояние Дамерау — Левенштейна (https://ru.wikipedia.org/w/index.php?#title=%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%94%D0%B0%D0%BC%D0%B5%D1%80%D0%B0%D1%83_%E2%80%94_%D0%9B%D0%B5%D0%B2%D0%B5%D0%BD%D1%88%D1%82%D0%B5%D0%B9%D0%BD%D0%B0&oldid=100038662 Creative commons CC-BY-SA )
- def damerau_levenshtein_distance(s1, s2):
- d = {}
- lenstr1 = len(s1)
- lenstr2 = len(s2)
- for i in range(-1,lenstr1+1):
- d[(i,-1)] = i+1
- for j in range(-1,lenstr2+1):
- d[(-1,j)] = j+1
- for i in range(lenstr1):
- for j in range(lenstr2):
- if s1[i] == s2[j]:
- cost = 0
if (s1[i] in self.not_allowed) or (s2[j] in self.not_allowed): # modification weightcost = 1- elif (s1[i] in self.not_allowed) or (s2[j] in self.not_allowed): # modification weight
- cost = 0
- else:
- cost = 3
- d[(i,j)] = min(
- d[(i-1,j)] + 1, # deletion
- d[(i,j-1)] + 1, # insertion
- d[(i-1,j-1)] + cost, # substitution
- )
- if i and j and s1[i] == s2[j-1] and s1[i-1] == s2[j]:
- d[(i,j)] = min(d[(i,j)], d[i-2,j-2] + 1) # transposition
- return d[lenstr1-1,lenstr2-1]
- xDiff=abs(len(l1)-len(l2)) + damerau_levenshtein_distance(l1,l2)
- if (len(l1)<3 or len(l2)<3 or abs(len(l1)-len(l2))>30):
return CONST_MAX_VALUE- #print('compare ',l1,l2," EQ=",CONST_MAX_VALUE)
- return CONST_MAX_VALUE
- xDiff = xDiff *300 / len(l1)
- if (xDiff>CONST_MAX_VALUE-1000):
- #print('compare ',l1,l2," EQ=",CONST_MAX_VALUE-100)
- return CONST_MAX_VALUE-100
- #print('compare ',l1,l2," EQ=",xDiff)
- return round(xDiff)
- # end of stringDiff
- def repair(self, historySeq, itm:int=-1):
- if (itm==0): raise BaseException("Can not repair first item[0] from future!")
- if (itm<0): itm=len(historySeq)-1
- #log.debug("Start repair history[{}] \"{}\"", itm, historySeq.get(itm).name);
- lines=historySeq[itm]
- out=[]
- statRepair, statHistoryUsedDepth, statHistoryWatchDepth, statNotFound = 0,0,0,0
- print(" repair lines:"+str(len(lines)))
- for line in lines:
- print(" repair line>"+repr(line)+" pos="+str(line['pos'])+" test="+str(line))
- if (line['hasBroken']):
- oldP=line['prePos']
- depth=0
- i=itm-1
- while (oldP!=CONST_UNDEFINED and i>=0):
- depth+=1
- statHistoryWatchDepth=max(statHistoryWatchDepth, depth)
- oldL=historySeq[i][oldP]
- if (not oldL['hasBroken']):
- print(" repair line<use old source")
- out.append(oldL['line'])
- #log.trace(" [{}][{}] line {}({})> repair from \"{}\" to \"{}\"",
- # itm, i, line.pos, oldP, line.line, oldL.line);
- statRepair+=1
- statHistoryUsedDepth=max(statHistoryUsedDepth, depth)
- break
- #else:
- # log.trace(" [{}][{}] line {}({})> go deep", itm, i, line.pos, oldP);
- oldP=oldL['prePos']
- i-=1
- if (oldP==CONST_UNDEFINED) :
- print(" repair line<not found. ", line['line'])
- out.append(line['line']) #no modify
- statNotFound+=1
- else:
- print(" repair line<no need")
- out.append(line['line'])
- print(f"Repair complete: repair {statRepair} lines, max history depth = {statHistoryUsedDepth}, max history watch depth = {statHistoryWatchDepth}, not found {statNotFound} lines.")
- print(" repair out lines:"+str(len(out)))
- return out
- # end of repair
- def process(self, historySeq, i_to_repair:int=-1):
- hSeq=[]
- for histText in historySeq:
- lines=[]
- i=0
- for l in histText:
tl={'pos':-1, 'line':l, 'hasBroken':False,- tl={'pos':i, 'line':l, 'hasBroken':False,
- 'prePos':CONST_UNDEFINED, 'preK':-1.0, 'nextPos':CONST_UNDEFINED}
- lines.append(tl)
- i+=1
- hSeq.append(lines)
- print("indexing...")
- idx=self.indexing(hSeq)
- print("pepair n="+str(i_to_repair))
- repair=self.repair(hSeq, i_to_repair)
- print("done")
- return repair
- def repair_text_by_history(history_of_text):
- repairer=RepairEngine()
- text=repairer.process(history_of_text, len(history_of_text)-1)
- return text
import codewars_test as test from solution import repair_text_by_history import numpy as np def text_generator(sample_sentence, depth=4, sentences_up_to=3,rand_seed=100): rand=np.random.RandomState(rand_seed) hist=[] hist_broken=[] prev=[] prev_broken=[] def f_make_line(): l='' for i in range(rand.randint(1,sentences_up_to)): if len(l)>0: l+=' ' l+=rand.choice(sample_sentence)+rand.choice([c for c in '......???!!;']) #print('new:', l) return l; def f_break_line(l): o='' bk=rand.randint(8,20) for c in l: if rand.randint(16) <= bk : o+='?' else: o+=c #print('break:', l,' > ',o) return o new_break=False # first history was good for j in range(depth): text=[] text_broken=[] for i in range(len(prev)): if rand.randint(15) >= 13 : # corrosion line text.append(prev[i]) text_broken.append(f_break_line(prev_broken[i])) elif rand.randint(20) >= 18 : continue # delete else: text.append(prev[i]) text_broken.append(prev_broken[i]) if rand.randint(25) >= 20 : for i in range(1,rand.randint(10)): l=f_make_line() text.append(l) if new_break and (rand.randint(15) >= 13) : text_broken.append(f_break_line(l)) else: text_broken.append(l) # append text if rand.randint(10) >= 7 : for i in range(1,rand.randint(20)): l=f_make_line() text.append(l) if new_break and (rand.randint(15) >= 13) : text_broken.append(f_break_line(l)) else: text_broken.append(l) prev=text hist.append(text) prev_broken=text_broken hist_broken.append(text_broken) new_broke=True return hist_broken, hist # test.assert_equals(actual, expected, [optional] message) @test.describe("Repair text test") def test_group(): @test.it("Fixed test case") def test_case(): data1=[ [ 'Hello, world!', 'THIS is first version.' ],[ 'Hello, world.' 'This is first ???????.' ],[ 'Hello world', 'Line 2.', 'This is first ?????on.', 'There can be large document' ],[ 'Hello.', '???? ??', '???????????? ?????on.', 'There can be large document' ],[ 'Hello.', '???? ??', '???????????? ?????on.', 'There can be large document', '', 'Simple alhorithm for repair:', '', 'Step 1.', '* mark broken symbols and lines', '', 'Step 2.', '* find line number in history', '', 'Step 3.', '* search good lines in history', '', 'Step 4.', '* replace broken line, if it is can be true', ],[ 'Hello.', '???? ??', '???????????? ?????on.', 'There can be large document', '', 'Simple alhorithm for repair:', '', '???? ??', '* mark broken symbols and lines', '', 'Step 2.', '? ???? line ?????? ?? ???????', '', '???? 3.', '* search good lines in history', '', 'Step 4.', '* replace broken line, if it is can be true', ],[ 'Hello.', '???? ??', '???????????? ?????on.', 'There can be large document', '', 'Simple alhorithm for repair:', '', '???? ??', '* mark broken symbols and lines', '', 'Step 2.', '? ???? line ?????? ?? ???????', '', '???? 3.', '* search good lines in history', '', 'Step 4.', '* replace broken line, if it is can be true', '', 'You can make more better alhorighm, or try improvement it.', 'But do not repair string', 'In cases, when it can be ambigous.', '' ],[ 'Hello.', '???? ??', '???????????? ?????on.', 'There can be large document', '', 'Simple alhorithm for repair:', '', '???? ??', '* mark broken symbols and lines', '', 'Step 2.', '? ???? line ?????? ?? ???????', '', '???? 3.', '* search good lines in history', '', '???? ?.', '', 'Step 4.', '* replace broken line, if it is can be true', '', 'You can make more better alhorighm, or try improvement it.', '?? ?????? ???? ?? ??? ?? ?????????', '??? ?? ??? ?????? ??????', '' ] ] expected_text=[ 'Hello.', 'Line 2.', 'THIS is first version.' # TODO may be "This is first version." 'There can be large document', '', 'Simple alhorithm for repair:', '', 'Step 1.', '* mark broken symbols and lines', '', 'Step 2.', '* find line number in history', '', 'Step 3.', '* search good lines in history', '', 'Step 4.', '', 'Step 4.', '* replace broken line, if it is can be true', '', 'You can make more better alhorighm, or try improvement it.', '?? ?????? ???? ?? ??? ?? ?????????', '??? ?? ??? ?????? ??????', ''] r_text=repair_text_by_history(data1) test.assert_equals(r_text, expected_text) @test.it("Random test case") def test_case_random(): words=['','','Hello world','Line 2','This is first version','You can make more better alhorighm, or try improvement it', 'You got damaged paper with text','And got previous edition of this text','Each next edition has any good changes and break character change', 'In broken lines will be same character replaced to ''?''', 'Make solution for repair broken text by history'] broken_t,original_t=text_generator(words, depth=3) # depth>4 need many time expected_text = original_t[-1] print("Random history with ",str(len(broken_t))," items, last with ",str(len(expected_text))," lines") r_text=repair_text_by_history(broken_t) test.assert_equals(r_text, expected_text)
- import codewars_test as test
- from solution import repair_text_by_history
- import numpy as np
- def text_generator(sample_sentence, depth=4, sentences_up_to=3,rand_seed=100):
- rand=np.random.RandomState(rand_seed)
- hist=[]
- hist_broken=[]
- prev=[]
- prev_broken=[]
- def f_make_line():
- l=''
- for i in range(rand.randint(1,sentences_up_to)):
- if len(l)>0:
- l+=' '
- l+=rand.choice(sample_sentence)+rand.choice([c for c in '......???!!;'])
- #print('new:', l)
- return l;
- def f_break_line(l):
- o=''
- bk=rand.randint(8,20)
- for c in l:
- if rand.randint(16) <= bk :
- o+='?'
- else:
- o+=c
- #print('break:', l,' > ',o)
- return o
- new_break=False # first history was good
- for j in range(depth):
- text=[]
- text_broken=[]
- for i in range(len(prev)):
- if rand.randint(15) >= 13 : # corrosion line
- text.append(prev[i])
- text_broken.append(f_break_line(prev_broken[i]))
- elif rand.randint(20) >= 18 :
- continue # delete
- else:
- text.append(prev[i])
- text_broken.append(prev_broken[i])
- if rand.randint(25) >= 20 :
- for i in range(1,rand.randint(10)):
- l=f_make_line()
- text.append(l)
- if new_break and (rand.randint(15) >= 13) :
- text_broken.append(f_break_line(l))
- else:
- text_broken.append(l)
- # append text
- if rand.randint(10) >= 7 :
- for i in range(1,rand.randint(20)):
- l=f_make_line()
- text.append(l)
- if new_break and (rand.randint(15) >= 13) :
- text_broken.append(f_break_line(l))
- else:
- text_broken.append(l)
- prev=text
- hist.append(text)
- prev_broken=text_broken
- hist_broken.append(text_broken)
- new_broke=True
- return hist_broken, hist
- # test.assert_equals(actual, expected, [optional] message)
- @test.describe("Repair text test")
- def test_group():
- @test.it("Fixed test case")
- def test_case():
- data1=[
- [
- 'Hello, world!',
- 'THIS is first version.'
- ],[
- 'Hello, world.'
- 'This is first ???????.'
- ],[
- 'Hello world',
- 'Line 2.',
- 'This is first ?????on.',
- 'There can be large document'
- ],[
- 'Hello.',
- '???? ??',
- '???????????? ?????on.',
- 'There can be large document'
- ],[
- 'Hello.',
- '???? ??',
- '???????????? ?????on.',
- 'There can be large document',
- '',
- 'Simple alhorithm for repair:',
- '',
- 'Step 1.',
- '* mark broken symbols and lines',
- '',
- 'Step 2.',
- '* find line number in history',
- '',
- 'Step 3.',
- '* search good lines in history',
- '',
- 'Step 4.',
- '* replace broken line, if it is can be true',
- ],[
- 'Hello.',
- '???? ??',
- '???????????? ?????on.',
- 'There can be large document',
- '',
- 'Simple alhorithm for repair:',
- '',
- '???? ??',
- '* mark broken symbols and lines',
- '',
- 'Step 2.',
- '? ???? line ?????? ?? ???????',
- '',
- '???? 3.',
- '* search good lines in history',
- '',
- 'Step 4.',
- '* replace broken line, if it is can be true',
- ],[
- 'Hello.',
- '???? ??',
- '???????????? ?????on.',
- 'There can be large document',
- '',
- 'Simple alhorithm for repair:',
- '',
- '???? ??',
- '* mark broken symbols and lines',
- '',
- 'Step 2.',
- '? ???? line ?????? ?? ???????',
- '',
- '???? 3.',
- '* search good lines in history',
- '',
- 'Step 4.',
- '* replace broken line, if it is can be true',
- '',
- 'You can make more better alhorighm, or try improvement it.',
- 'But do not repair string',
- 'In cases, when it can be ambigous.',
- ''
- ],[
- 'Hello.',
- '???? ??',
- '???????????? ?????on.',
- 'There can be large document',
- '',
- 'Simple alhorithm for repair:',
- '',
- '???? ??',
- '* mark broken symbols and lines',
- '',
- 'Step 2.',
- '? ???? line ?????? ?? ???????',
- '',
- '???? 3.',
- '* search good lines in history',
- '',
- '???? ?.',
- '',
- 'Step 4.',
- '* replace broken line, if it is can be true',
- '',
- 'You can make more better alhorighm, or try improvement it.',
- '?? ?????? ???? ?? ??? ?? ?????????',
- '??? ?? ??? ?????? ??????',
- ''
- ]
- ]
- expected_text=[
- 'Hello.',
- 'Line 2.',
- 'THIS is first version.' # TODO may be "This is first version."
- 'There can be large document',
- '',
- 'Simple alhorithm for repair:',
- '',
- 'Step 1.',
- '* mark broken symbols and lines',
- '',
- 'Step 2.',
- '* find line number in history',
- '',
- 'Step 3.',
- '* search good lines in history',
- '',
- 'Step 4.',
- '',
- 'Step 4.',
- '* replace broken line, if it is can be true',
- '',
- 'You can make more better alhorighm, or try improvement it.',
- '?? ?????? ???? ?? ??? ?? ?????????',
- '??? ?? ??? ?????? ??????',
- '']
- r_text=repair_text_by_history(data1)
- test.assert_equals(r_text, expected_text)
- @test.it("Random test case")
- def test_case_random():
- words=['','','Hello world','Line 2','This is first version','You can make more better alhorighm, or try improvement it',
- 'You got damaged paper with text','And got previous edition of this text','Each next edition has any good changes and break character change',
- 'In broken lines will be same character replaced to ''?''', 'Make solution for repair broken text by history']
broken_t,original_t=text_generator(words, depth=5)- broken_t,original_t=text_generator(words, depth=3) # depth>4 need many time
- expected_text = original_t[-1]
- print("Random history with ",str(len(broken_t))," items, last with ",str(len(expected_text))," lines")
- r_text=repair_text_by_history(broken_t)
- test.assert_equals(r_text, expected_text)
The old history text
You got damaged paper with text. And got previous edition of this text. Each next edition has any good changes and break character change. In broken lines will be same character replaced to '?
'
Make solution for repair broken text by history.
##Input:
[
['line 1','line 2'], # first
['line 1','???? 2','line 3'] # second
]
As you can see, this data like Git changelog.
##Output:
Repair last text from input.
['line 1','line 2','line 3']
# This solution not vork correctly. Try improvement this code.
CONST_UNDEFINED = -999999 # or None
CONST_MAX_VALUE = 999999
class RepairEngine:
searchWindow1 = 70 # when diff < DIFF_TRESHOLD
searchWindow2 = 14 # when diff < 3
DIFF_TRESHOLD_1=68
DIFF_TRESHOLD_2=3
def __str__(self):
return f"RepairEngine(searchWindow1={str(self.searchWindow1)}, searchWindow2={str(self.searchWindow2)}, DIFF_TRESHOLD_1={str(self.DIFF_TRESHOLD_1)})"
def indexing(self, historySeq):
if len(historySeq)<2 : raise BaseException("History sequence should have 2 or more files. historySeq="+str(historySeq))
for text in historySeq: #markBrokenLines
for l in text:
l['hasBroken'] = self.checkBrokenLine(l['line'])
for i in range(1,len(historySeq)):
self.linkFiles(historySeq[i-1], historySeq[i])
#end of indexing
def linkFiles(self, lines1, lines2):
p2=0
seqenceLost=0
for i in range(0, len(lines1)):
#print(f"Progress {str(i+1)}/{len(lines1)} history items")
myLine=lines1[i]
p,thisK =self.searchLine(myLine['line'], lines2, p2, i);
if ((p!=CONST_UNDEFINED) and (p>=0)):
if (p2!=p):
#print(" new offset {str(i)} > {str(p)}")
p2=p
myLine['nextPos']=p
applyPatch = True
if (lines2[p]['prePos']!=CONST_UNDEFINED) and (lines2[p]['prePos']!=i):
if lines2[p]['preK'] > thisK:
None
elif (lines2[p]['preK'] < thisK):
applyPatch = False
else:
None
if applyPatch:
lines2[p]['prePos'] = i
lines2[p]['preK'] = thisK
if i != myLine['pos']:
print(f"WARN: i={str(i)} and pos={str(myLine['pos'])} not match! At line \"{str(myLine['line'])}\"");
if seqenceLost>self.searchWindow1*3:
print(f"i={str(i)}, found p={str(p)} after lost search window");
seqenceLost=0
else:
#print("i={str(i)} pos={str(p)} not found position")
seqenceLost +=1
if seqenceLost==self.searchWindow1*3:
print(f"i={str(i)}, p={str(p)} lost search window, next position search may be not correct");
p2 +=1
# end of linkFiles
# TODO hard method. Need more unit test!
def searchLine(self, text, inLines, nearPos1:int, nearPos2:int):
result=CONST_UNDEFINED
resultD=CONST_MAX_VALUE
for fromPos in [nearPos1,nearPos2] :
if (fromPos==CONST_UNDEFINED): continue
if (result!=CONST_UNDEFINED and nearPos1==nearPos2): break
for d in range(0,self.searchWindow1):
# todo mark previous interval and do not check against lines.
p=fromPos+d
if (0<= p) and (p <len(inLines)):
k = self.stringDiff(text, inLines[p]['line']);
if (k<=self.DIFF_TRESHOLD_1*3):
k+=min(20,d/40)
k+=min(16,min(abs(p-nearPos1),abs(p-nearPos2))/50)
if (resultD>k):
result, resultD = p, k
if (d!=0):
p=fromPos-d
if (0<= p) and (p <len(inLines)):
k = self.stringDiff(text, inLines[p]['line'])
if (k<=self.DIFF_TRESHOLD_1*3):
k+=min(20,d/40)
k+=min(16,min(abs(p-nearPos1),abs(p-nearPos2))/50)
if resultD>k :
result, resultD = p, k
if (resultD==0 or resultD<self.DIFF_TRESHOLD_2 and d>3 and d < self.searchWindow2):
break # stop search - it is best.
if (resultD>self.DIFF_TRESHOLD_1):
return -1, None # not found
return result, resultD
# end of searchLine
not_allowed="?*"
def checkBrokenLine(self, line):
wrongChar=0
questionChar=0
for c in line:
if c in self.not_allowed: wrongChar+=1
#if not c in self.allowed: wrongChar+=1
if c=='?':
questionChar+=1
#if c in self.not_allowed: wrongChar-=1
return wrongChar>0 or (len(line)>5 and questionChar>5)
# end of checkBrokenLine
# Soft comparsion of 2 string
# return 0 - equals, 2 equals ignore case and whitespace, 15 high diff but maybe same, Integer.MAX_VALUE absolutly different
def stringDiff(self, l1, l2):
if (l1==l2): return 0
if (l1.lower()==l2.lower()): return 1
l1=l1.replace(" ", " ").strip()
l2=l2.replace(" ", " ").strip()
#todo optimization: l1,l2 = l1.upper(),l2.upper()
l1,l2=l1.upper(),l2.upper()
#Расстояние Дамерау — Левенштейна (https://ru.wikipedia.org/w/index.php?#title=%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%94%D0%B0%D0%BC%D0%B5%D1%80%D0%B0%D1%83_%E2%80%94_%D0%9B%D0%B5%D0%B2%D0%B5%D0%BD%D1%88%D1%82%D0%B5%D0%B9%D0%BD%D0%B0&oldid=100038662 Creative commons CC-BY-SA )
def damerau_levenshtein_distance(s1, s2):
d = {}
lenstr1 = len(s1)
lenstr2 = len(s2)
for i in range(-1,lenstr1+1):
d[(i,-1)] = i+1
for j in range(-1,lenstr2+1):
d[(-1,j)] = j+1
for i in range(lenstr1):
for j in range(lenstr2):
if s1[i] == s2[j]:
cost = 0
if (s1[i] in self.not_allowed) or (s2[j] in self.not_allowed): # modification weight
cost = 1
else:
cost = 3
d[(i,j)] = min(
d[(i-1,j)] + 1, # deletion
d[(i,j-1)] + 1, # insertion
d[(i-1,j-1)] + cost, # substitution
)
if i and j and s1[i] == s2[j-1] and s1[i-1] == s2[j]:
d[(i,j)] = min(d[(i,j)], d[i-2,j-2] + 1) # transposition
return d[lenstr1-1,lenstr2-1]
xDiff=abs(len(l1)-len(l2)) + damerau_levenshtein_distance(l1,l2)
if (len(l1)<3 or len(l2)<3 or abs(len(l1)-len(l2))>30):
return CONST_MAX_VALUE
xDiff = xDiff *300 / len(l1)
if (xDiff>CONST_MAX_VALUE-1000):
return CONST_MAX_VALUE-100
return round(xDiff)
# end of stringDiff
def repair(self, historySeq, itm:int=-1):
if (itm==0): raise BaseException("Can not repair first item[0] from future!")
if (itm<0): itm=len(historySeq)-1
#log.debug("Start repair history[{}] \"{}\"", itm, historySeq.get(itm).name);
lines=historySeq[itm]
out=[]
statRepair, statHistoryUsedDepth, statHistoryWatchDepth, statNotFound = 0,0,0,0
print(" repair lines:"+str(len(lines)))
for line in lines:
print(" repair line>"+repr(line)+" pos="+str(line['pos'])+" test="+str(line))
if (line['hasBroken']):
oldP=line['prePos']
depth=0
i=itm-1
while (oldP!=CONST_UNDEFINED and i>=0):
depth+=1
statHistoryWatchDepth=max(statHistoryWatchDepth, depth)
oldL=historySeq[i][oldP]
if (not oldL['hasBroken']):
print(" repair line<use old source")
out.append(oldL['line'])
#log.trace(" [{}][{}] line {}({})> repair from \"{}\" to \"{}\"",
# itm, i, line.pos, oldP, line.line, oldL.line);
statRepair+=1
statHistoryUsedDepth=max(statHistoryUsedDepth, depth)
break
#else:
# log.trace(" [{}][{}] line {}({})> go deep", itm, i, line.pos, oldP);
oldP=oldL['prePos']
i-=1
if (oldP==CONST_UNDEFINED) :
print(" repair line<not found. ", line['line'])
out.append(line['line']) #no modify
statNotFound+=1
else:
print(" repair line<no need")
out.append(line['line'])
print(f"Repair complete: repair {statRepair} lines, max history depth = {statHistoryUsedDepth}, max history watch depth = {statHistoryWatchDepth}, not found {statNotFound} lines.")
print(" repair out lines:"+str(len(out)))
return out
# end of repair
def process(self, historySeq, i_to_repair:int=-1):
hSeq=[]
for histText in historySeq:
lines=[]
i=0
for l in histText:
tl={'pos':-1, 'line':l, 'hasBroken':False,
'prePos':CONST_UNDEFINED, 'preK':-1.0, 'nextPos':CONST_UNDEFINED}
lines.append(tl)
hSeq.append(lines)
print("indexing...")
idx=self.indexing(hSeq)
print("pepair n="+str(i_to_repair))
repair=self.repair(hSeq, i_to_repair)
print("done")
return repair
def repair_text_by_history(history_of_text):
repairer=RepairEngine()
text=repairer.process(history_of_text, len(history_of_text)-1)
return text
import codewars_test as test
from solution import repair_text_by_history
import numpy as np
def text_generator(sample_sentence, depth=4, sentences_up_to=3,rand_seed=100):
rand=np.random.RandomState(rand_seed)
hist=[]
hist_broken=[]
prev=[]
prev_broken=[]
def f_make_line():
l=''
for i in range(rand.randint(1,sentences_up_to)):
if len(l)>0:
l+=' '
l+=rand.choice(sample_sentence)+rand.choice([c for c in '......???!!;'])
#print('new:', l)
return l;
def f_break_line(l):
o=''
bk=rand.randint(8,20)
for c in l:
if rand.randint(16) <= bk :
o+='?'
else:
o+=c
#print('break:', l,' > ',o)
return o
new_break=False # first history was good
for j in range(depth):
text=[]
text_broken=[]
for i in range(len(prev)):
if rand.randint(15) >= 13 : # corrosion line
text.append(prev[i])
text_broken.append(f_break_line(prev_broken[i]))
elif rand.randint(20) >= 18 :
continue # delete
else:
text.append(prev[i])
text_broken.append(prev_broken[i])
if rand.randint(25) >= 20 :
for i in range(1,rand.randint(10)):
l=f_make_line()
text.append(l)
if new_break and (rand.randint(15) >= 13) :
text_broken.append(f_break_line(l))
else:
text_broken.append(l)
# append text
if rand.randint(10) >= 7 :
for i in range(1,rand.randint(20)):
l=f_make_line()
text.append(l)
if new_break and (rand.randint(15) >= 13) :
text_broken.append(f_break_line(l))
else:
text_broken.append(l)
prev=text
hist.append(text)
prev_broken=text_broken
hist_broken.append(text_broken)
new_broke=True
return hist_broken, hist
# test.assert_equals(actual, expected, [optional] message)
@test.describe("Repair text test")
def test_group():
@test.it("Fixed test case")
def test_case():
data1=[
[
'Hello, world!',
'THIS is first version.'
],[
'Hello, world.'
'This is first ???????.'
],[
'Hello world',
'Line 2.',
'This is first ?????on.',
'There can be large document'
],[
'Hello.',
'???? ??',
'???????????? ?????on.',
'There can be large document'
],[
'Hello.',
'???? ??',
'???????????? ?????on.',
'There can be large document',
'',
'Simple alhorithm for repair:',
'',
'Step 1.',
'* mark broken symbols and lines',
'',
'Step 2.',
'* find line number in history',
'',
'Step 3.',
'* search good lines in history',
'',
'Step 4.',
'* replace broken line, if it is can be true',
],[
'Hello.',
'???? ??',
'???????????? ?????on.',
'There can be large document',
'',
'Simple alhorithm for repair:',
'',
'???? ??',
'* mark broken symbols and lines',
'',
'Step 2.',
'? ???? line ?????? ?? ???????',
'',
'???? 3.',
'* search good lines in history',
'',
'Step 4.',
'* replace broken line, if it is can be true',
],[
'Hello.',
'???? ??',
'???????????? ?????on.',
'There can be large document',
'',
'Simple alhorithm for repair:',
'',
'???? ??',
'* mark broken symbols and lines',
'',
'Step 2.',
'? ???? line ?????? ?? ???????',
'',
'???? 3.',
'* search good lines in history',
'',
'Step 4.',
'* replace broken line, if it is can be true',
'',
'You can make more better alhorighm, or try improvement it.',
'But do not repair string',
'In cases, when it can be ambigous.',
''
],[
'Hello.',
'???? ??',
'???????????? ?????on.',
'There can be large document',
'',
'Simple alhorithm for repair:',
'',
'???? ??',
'* mark broken symbols and lines',
'',
'Step 2.',
'? ???? line ?????? ?? ???????',
'',
'???? 3.',
'* search good lines in history',
'',
'???? ?.',
'',
'Step 4.',
'* replace broken line, if it is can be true',
'',
'You can make more better alhorighm, or try improvement it.',
'?? ?????? ???? ?? ??? ?? ?????????',
'??? ?? ??? ?????? ??????',
''
]
]
expected_text=[
'Hello.',
'Line 2.',
'THIS is first version.' # TODO may be "This is first version."
'There can be large document',
'',
'Simple alhorithm for repair:',
'',
'Step 1.',
'* mark broken symbols and lines',
'',
'Step 2.',
'* find line number in history',
'',
'Step 3.',
'* search good lines in history',
'',
'Step 4.',
'',
'Step 4.',
'* replace broken line, if it is can be true',
'',
'You can make more better alhorighm, or try improvement it.',
'?? ?????? ???? ?? ??? ?? ?????????',
'??? ?? ??? ?????? ??????',
'']
r_text=repair_text_by_history(data1)
test.assert_equals(r_text, expected_text)
@test.it("Random test case")
def test_case_random():
words=['','','Hello world','Line 2','This is first version','You can make more better alhorighm, or try improvement it',
'You got damaged paper with text','And got previous edition of this text','Each next edition has any good changes and break character change',
'In broken lines will be same character replaced to ''?''', 'Make solution for repair broken text by history']
broken_t,original_t=text_generator(words, depth=5)
expected_text = original_t[-1]
print("Random history with ",str(len(broken_t))," items, last with ",str(len(expected_text))," lines")
r_text=repair_text_by_history(broken_t)
test.assert_equals(r_text, expected_text)
def kimite(grid): rows, cols = len(grid), len(grid[0]) # Burn method. Less optimal that Djikstra, but simple. hotmap=[[None for c in range(cols)] for r in range(rows)] hotmap[0][0]=grid[0][0] # first point changed=True itr=0 while changed: changed=False for r in range(rows): for c in range(cols): d=[] for y in [r-1,r+1,r]: for x in [c-1,c+1,c]: if (x>=0)and(y>=0) and (x<cols)and(y<rows) and ((x==c)or(y==r)) and (hotmap[y][x]!=None): d.append(hotmap[y][x]) if len(d)>0: k=grid[r][c]+min(d) if (hotmap[r][c]!=None) and (hotmap[r][c]<k): k=hotmap[r][c] if k!=hotmap[r][c]: changed=True hotmap[r][c]=k itr+=1 print('Iteration for search '+str(itr)) #print(hotmap) total_cost = hotmap[-1][-1] return total_cost
- def kimite(grid):
- rows, cols = len(grid), len(grid[0])
position = (0, 0)seen = set()total_cost = 0# Helper function to find the min cost based on current coordinatesdef get_step_cost(*directions, ):compare = sorted([i for i in directions if i != None], key=lambda x: x[0])multiple = [x for x in [i for i in directions if i != None] if x[0] == compare[0][0]]if len(multiple) > 1:for i in multiple:if i[1] == 'right':return ifor i in multiple:if i[1] == 'down':return ielse:return compare[0]# Helper function to find polar directionsdef get_direction():up, down, left, right = None, None, None, None# Check Yif position[0] > 0 and (position[0] - 1, position[1]) not in seen:up = (grid[position[0] - 1][position[1]], 'up')if position[0] + 1 < rows and (position[0] + 1, position[1]) not in seen:down = (grid[position[0] + 1][position[1]], 'down')# Check Xif position[1] > 0 and (position[0], position[1] - 1) not in seen:left = (grid[position[0]][position[1] - 1], 'left')if position[1] + 1 < cols and (position[0], position[1] + 1) not in seen:right = (grid[position[0]][position[1] + 1], 'right')return (up, down, left, right)# Traverse the grid to find the minimum cost pathwhile position != (rows - 1, cols - 1):direction = get_direction()cost, move = get_step_cost(*direction)if move == 'up':position = (position[0] - 1, position[1])total_cost += costseen.add(position)continueif move == 'down':position = (position[0] + 1, position[1])total_cost += costseen.add(position)continueif move == 'left':position = (position[0], position[1] - 1)total_cost += costseen.add(position)continueif move == 'right':position = (position[0], position[1] + 1)total_cost += costseen.add(position)- # Burn method. Less optimal that Djikstra, but simple.
- hotmap=[[None for c in range(cols)] for r in range(rows)]
- hotmap[0][0]=grid[0][0] # first point
- changed=True
- itr=0
- while changed:
- changed=False
- for r in range(rows):
- for c in range(cols):
- d=[]
- for y in [r-1,r+1,r]:
- for x in [c-1,c+1,c]:
- if (x>=0)and(y>=0) and (x<cols)and(y<rows) and ((x==c)or(y==r)) and (hotmap[y][x]!=None):
- d.append(hotmap[y][x])
- if len(d)>0:
- k=grid[r][c]+min(d)
- if (hotmap[r][c]!=None) and (hotmap[r][c]<k):
- k=hotmap[r][c]
- if k!=hotmap[r][c]:
- changed=True
- hotmap[r][c]=k
- itr+=1
- print('Iteration for search '+str(itr))
- #print(hotmap)
- total_cost = hotmap[-1][-1]
- return total_cost
def text(): d,k,r,s="description","Kata or Kumite","return","symbols" return f"""Compress large text - {d} of this {k} in small code. This {k} {d} has 462 {s} in 3 line, without title. You should wrote function, {r} this text, but you should use less {s} in you'r code that this text. Simple solution - just {r} \"\"\"source text\"\"\". More smart variant is: do mark often word as special {s} and replace his before {r}. How small code can be? Can you wrote more compressing? Let's check it!"""
#sample code:- def text():
the_text = """Compress large text - description of this Kata or Kumite in small code.This Kata or Kumite description has 462 symbols in 3 line, without title. You should wrote function, return this text, but you should use less symbols in you'r code that this text. Simple solution - just return \"\"\"source text\"\"\". More smart variant is: do mark often word as special symbols and replace his before return.- d,k,r,s="description","Kata or Kumite","return","symbols"
- return f"""Compress large text - {d} of this {k} in small code.
- This {k} {d} has 462 {s} in 3 line, without title. You should wrote function, {r} this text, but you should use less {s} in you'r code that this text. Simple solution - just {r} \"\"\"source text\"\"\". More smart variant is: do mark often word as special {s} and replace his before {r}.
- How small code can be? Can you wrote more compressing? Let's check it!"""
return the_text # Do make 'def text()' more short
import codewars_test as test import inspect import solution # from solution # test.assert_equals(actual, expected, [optional] message) @test.describe("Test of compression text") def test_group(): # todo please, correct this number after make cnage method_size=504 module_size=504 text_size=464 text_expected="""Compress large text - description of this Kata or Kumite in small code. This Kata or Kumite description has 462 symbols in 3 line, without title. You should wrote function, return this text, but you should use less symbols in you'r code that this text. Simple solution - just return \"\"\"source text\"\"\". More smart variant is: do mark often word as special symbols and replace his before return. How small code can be? Can you wrote more compressing? Let's check it!""" @test.it("Text check") def test_case_text(): expected = text_expected test.assert_equals(solution.text(), expected) test.assert_equals(solution.text(), expected) @test.it("Test of test") def test_text(): expected = text_expected test.assert_equals(len(text_expected), text_size) src=inspect.getsource(solution) src_len=len(src) test.assert_equals((src_len>= 5),True, "Test not see solution source") @test.it("Source function check") def test_case_source_function(): src=inspect.getsource(solution.text) src_len=len(src) #test.assert_equals(src, "see method") print("Size of source code 'def test()' is "+str(src_len)) test.assert_equals((src_len<= method_size),True, "Function 'def test()' too large: "+str(src_len)) test.assert_equals((src_len>= 4),True, "How wrote methd more small that he's call?") # test should see method if src_len< method_size: print(f"New function less up to {method_size-src_len} symbol!") @test.it("Source module check") def test_case_source_function(): src=inspect.getsource(solution) src_len=len(src) print("Size of all source code length is "+str(src_len)) test.assert_equals((src_len<= module_size),True, "Source code of all module too large: "+str(src_len)+". Do not hidde data out of the function.") if src_len< module_size: print(f"New code less up to {module_size-src_len} symbol!") if (src_len<text_size): print(f"Congradulation! The code less that text at {text_size-src_len} symbol.")
- import codewars_test as test
- import inspect
- import solution # from solution
- # test.assert_equals(actual, expected, [optional] message)
- @test.describe("Test of compression text")
- def test_group():
- # todo please, correct this number after make cnage
method_size=560module_size=574- method_size=504
- module_size=504
text_size=466- text_size=464
- text_expected="""Compress large text - description of this Kata or Kumite in small code.
- This Kata or Kumite description has 462 symbols in 3 line, without title. You should wrote function, return this text, but you should use less symbols in you'r code that this text. Simple solution - just return \"\"\"source text\"\"\". More smart variant is: do mark often word as special symbols and replace his before return.
- How small code can be? Can you wrote more compressing? Let's check it!"""
- @test.it("Text check")
- def test_case_text():
- expected = text_expected
- test.assert_equals(solution.text(), expected)
- test.assert_equals(solution.text(), expected)
- @test.it("Test of test")
- def test_text():
- expected = text_expected
- test.assert_equals(len(text_expected), text_size)
- src=inspect.getsource(solution)
- src_len=len(src)
- test.assert_equals((src_len>= 5),True, "Test not see solution source")
- @test.it("Source function check")
- def test_case_source_function():
- src=inspect.getsource(solution.text)
- src_len=len(src)
- #test.assert_equals(src, "see method")
- print("Size of source code 'def test()' is "+str(src_len))
- test.assert_equals((src_len<= method_size),True, "Function 'def test()' too large: "+str(src_len))
- test.assert_equals((src_len>= 4),True, "How wrote methd more small that he's call?") # test should see method
- if src_len< method_size: print(f"New function less up to {method_size-src_len} symbol!")
- @test.it("Source module check")
- def test_case_source_function():
- src=inspect.getsource(solution)
- src_len=len(src)
- print("Size of all source code length is "+str(src_len))
- test.assert_equals((src_len<= module_size),True, "Source code of all module too large: "+str(src_len)+". Do not hidde data out of the function.")
if src_len< module_size: print(f"New code less up to {method_size-src_len} symbol!")- if src_len< module_size: print(f"New code less up to {module_size-src_len} symbol!")
- if (src_len<text_size): print(f"Congradulation! The code less that text at {text_size-src_len} symbol.")
class OperationError(Exception): def __init__(self, message): super().__init__(message) def matrix(matrix=list, operation=str, location=tuple, writedata=None): def op_write(i,j): matrix[i][j] = writedata return matrix op={"read":lambda i,j: matrix[i][j], "write":op_write} try: if not operation in op: raise OperationError("That is not a valid operation for this system.") return op[operation](location[0],location[1]) except Exception as e: print(e) return -1
- class OperationError(Exception):
- def __init__(self, message):
- super().__init__(message)
- def matrix(matrix=list, operation=str, location=tuple, writedata=None):
- def op_write(i,j):
- matrix[i][j] = writedata
- return matrix
- op={"read":lambda i,j: matrix[i][j], "write":op_write}
- try:
if operation == "read":return matrix[location[0]][location[1]]elif operation == "write":matrix[location[0]][location[1]] = writedatareturn matrixelse:raise OperationError("That is not a valid operation for this system.")- if not operation in op: raise OperationError("That is not a valid operation for this system.")
- return op[operation](location[0],location[1])
- except Exception as e:
- print(e)
return -1- return -1
Compress large text - description of this Kata or Kumite in small code.
This Kata or Kumite description has 462 symbols in 3 line, without title. You should wrote function, return this text, but you should use less symbols in you'r code that this text. Simple solution - just return """source text""". More smart variant is: do mark often word as special symbols and replace his before return.
How small code can be? Can you wrote more compressing? Let's check it!
#sample code:
def text():
the_text = """Compress large text - description of this Kata or Kumite in small code.
This Kata or Kumite description has 462 symbols in 3 line, without title. You should wrote function, return this text, but you should use less symbols in you'r code that this text. Simple solution - just return \"\"\"source text\"\"\". More smart variant is: do mark often word as special symbols and replace his before return.
How small code can be? Can you wrote more compressing? Let's check it!"""
return the_text # Do make 'def text()' more short
import codewars_test as test
import inspect
import solution # from solution
# test.assert_equals(actual, expected, [optional] message)
@test.describe("Test of compression text")
def test_group():
# todo please, correct this number after make cnage
method_size=560
module_size=574
text_size=466
text_expected="""Compress large text - description of this Kata or Kumite in small code.
This Kata or Kumite description has 462 symbols in 3 line, without title. You should wrote function, return this text, but you should use less symbols in you'r code that this text. Simple solution - just return \"\"\"source text\"\"\". More smart variant is: do mark often word as special symbols and replace his before return.
How small code can be? Can you wrote more compressing? Let's check it!"""
@test.it("Text check")
def test_case_text():
expected = text_expected
test.assert_equals(solution.text(), expected)
test.assert_equals(solution.text(), expected)
@test.it("Test of test")
def test_text():
expected = text_expected
test.assert_equals(len(text_expected), text_size)
src=inspect.getsource(solution)
src_len=len(src)
test.assert_equals((src_len>= 5),True, "Test not see solution source")
@test.it("Source function check")
def test_case_source_function():
src=inspect.getsource(solution.text)
src_len=len(src)
#test.assert_equals(src, "see method")
print("Size of source code 'def test()' is "+str(src_len))
test.assert_equals((src_len<= method_size),True, "Function 'def test()' too large: "+str(src_len))
test.assert_equals((src_len>= 4),True, "How wrote methd more small that he's call?") # test should see method
if src_len< method_size: print(f"New function less up to {method_size-src_len} symbol!")
@test.it("Source module check")
def test_case_source_function():
src=inspect.getsource(solution)
src_len=len(src)
print("Size of all source code length is "+str(src_len))
test.assert_equals((src_len<= module_size),True, "Source code of all module too large: "+str(src_len)+". Do not hidde data out of the function.")
if src_len< module_size: print(f"New code less up to {method_size-src_len} symbol!")
#include <ostream> using namespace std; string calculator(char op, int a, int b) { stringstream s; switch (op) { case '+': {s<<a+b; break;} case '-': {s<<a-b; break;} case '*': {s<<a*b; break;} case '%': {s<<a%b; break;} case '/': {if (b != 0) {s<<a/b;break;}/*else default: Invalid Input!*/} default: {s<<"Invalid Input!";} } return s.str(); }
- #include <ostream>
- using namespace std;
- string calculator(char op, int a, int b) {
- stringstream s;
- switch (op) {
case '+': {s<<a+b; return s.str();}case '-': {s<<a-b; return s.str();}case '*': {s<<a*b; return s.str();}case '/': {b != 0 ? s<<a/b : s<<"Invalid Input!"; return s.str();}case '%': {s<<a%b; return s.str();}default: return "Invalid Input!";- case '+': {s<<a+b; break;}
- case '-': {s<<a-b; break;}
- case '*': {s<<a*b; break;}
- case '%': {s<<a%b; break;}
- case '/': {if (b != 0) {s<<a/b;break;}/*else default: Invalid Input!*/}
- default: {s<<"Invalid Input!";}
- }
- return s.str();
- }