-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Issue #17 initial steps to decouple code table IO from PrefixCodec
- Refactor out pickle save/load from PrefixCode - Initial JSON based code table storage
- Loading branch information
Showing
15 changed files
with
277 additions
and
50 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"type":"dahuffman code table","version":1,"code_table":[["-",5,0],["5",5,1],["{",7,8],["y",7,9],[".",6,5],["t",5,3],["9",5,4],["a",5,5],["D",7,24],["I",8,50],["z",9,102],["x",9,103],["d",6,13],["B",7,28],["E",7,29],["p",7,30],["J",11,496],["%",12,994],["'",14,3980],["Z",14,3981],[">",13,1991],["Y",10,249],["T",9,125],["O",9,126],["M",9,127],["2",5,8],["l",5,9],["3",5,10],["4",5,11],["\"",3,3],[":",6,32],["h",7,66],["q",10,536],["G",11,1074],["K",11,1075],["/",9,269],["P",9,270],["L",9,271],["e",5,17],["n",5,18],["v",8,152],["_",8,153],["H",9,308],["k",9,309],["\\",8,155],["s",6,39],[" ",5,20],["C",7,84],["b",8,170],["S",8,171],["]",8,172],["[",8,173],["A",7,87],["0",4,11],[",",4,12],["u",6,52],["~",10,848],["X",11,1698],["*",13,6796],["=",14,13594],["+",16,54380],[";",16,54381],["&",16,54382],["?",19,435065],["#",18,217533],["<",17,108767],["V",12,3399],[")",10,850],["(",10,851],["f",8,213],["m",7,107],["1",5,27],["r",6,56],["g",8,228],["w",8,229],["N",8,230],["j",10,924],["Q",12,3700],["@",12,3701],["W",11,1851],["R",10,926],["U",10,927],["i",6,58],["8",6,59],["o",6,60],["7",6,61],["6",6,62],["F",8,252],["}",8,253],["c",7,127]],"eof_code":[19,435064],"metadata":{"frequencies":{"{":7773,"\"":167778,"m":12999,"e":40821,"t":34222,"a":34406,":":19344,"v":5582,"i":29239,"w":6928,"d":17319,"k":3013,"u":25503,"6":30930,"-":31180,"n":41836,"x":2274,",":101928,"D":8359,"o":29553,"g":6808,"r":27010,"p":9126,"h":10043,"c":15557," ":46824,"S":6054,"s":23004,"B":8840,"y":7924,"Z":84,"C":11851,"b":5917,"f":6500,"Y":1210,"l":37539,"(":1582,")":1581,"R":1864,"0":99430,"G":642,"A":12529,"1":52881,"3":38723,"7":30603,"5":32443,"4":39551,"z":2211,"T":2361,"8":29389,"2":36755,"F":7721,"J":545,"U":1887,"9":34397,"O":2411,"E":9084,"I":4262,"L":2738,"M":2483,"[":6124,"N":7237,"_":5684,"}":7761,"]":6104,"j":1773,".":16877,"P":2724,"K":710,"W":936,"H":2761,"V":403,"/":2641,"q":1246,"?":3,"~":1517,"\\":5915,";":24,"X":762,"Q":419,"'":60,"*":180,"@":441,"&":27,"%":265,"<":18,">":146,"+":20,"=":92,"#":9}},"concat":"str_join"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"type":"dahuffman code table","version":1,"code_table":[[" ",2,0],["4",5,8],["p",7,36],["J",11,592],["'",14,4744],[";",15,9490],["&",15,9491],[">",13,2373],["\u2019",16,18992],["<",16,18993],["\u00b3",20,303905],["\u00b5",20,303906],["\u2018",20,303907],["\u201c",20,303908],["\u201d",20,303909],["?",19,151955],["#",17,37989],["+",16,18995],["\u2013",14,4749],["*",13,2375],["Y",10,297],["T",9,149],["O",9,150],["M",9,151],["h",7,38],["q",10,312],["G",11,626],["K",11,627],["P",9,157],["\\",8,79],["n",5,10],["e",5,11],[":",6,24],["H",9,200],["L",9,201],["v",8,101],["C",7,51],["s",6,26],["/",9,216],["~",10,434],["(",10,435],["_",8,109],["]",8,110],["[",8,111],["0",4,7],[",",4,8],["S",8,144],["k",9,290],[")",10,582],["X",11,1166],["V",12,2334],["=",14,9340],["Z",14,9341],["%",13,4671],["A",7,73],["u",6,37],["1",5,19],["b",8,160],["f",8,161],["m",7,81],["r",6,41],["8",6,42],["w",8,172],["g",8,173],["N",8,174],["j",10,700],["Q",12,2804],["@",12,2805],["W",11,1403],["R",10,702],["U",10,703],["7",6,44],["6",6,45],["i",6,46],["-",6,47],["o",6,48],["5",6,49],["F",8,200],["y",8,201],["c",7,101],["}",8,204],["{",8,205],[".",7,103],["9",6,52],["D",8,212],["B",8,213],["I",9,428],["z",10,858],["x",10,859],["E",8,215],["a",6,54],["2",6,55],["t",6,56],["d",7,114],["\n",7,115],["l",6,58],["3",6,59],["\"",4,15]],"eof_code":[20,303904],"metadata":{"frequencies":{"{":7221,"\n":16612," ":274544,"\"":139628,"m":12060,"e":37559,"t":31650,"a":31095,":":18551,"v":4984,"i":26909,"w":6193,"d":15858,"k":2694,"u":22373,"6":26443,"-":26960,"n":37383,"x":2007,",":83974,"D":7319,"o":27011,"g":6197,"r":24175,"p":8411,"h":8986,"c":14405,"S":5342,"s":20773,"B":7595,"y":7193,"Z":83,"C":10275,"b":5411,"f":5884,"Y":1049,"l":32957,"(":1348,")":1348,"R":1633,"0":80916,"G":590,"A":10839,"1":45087,"3":33133,"7":26092,"5":27695,"4":34520,"z":1919,"T":2119,"8":25079,"2":31617,"F":6684,"J":459,"U":1711,"9":29803,"O":2178,"E":7787,"I":3761,"L":2424,"M":2202,"[":5294,"N":6502,"_":5261,"}":7207,"]":5271,"j":1582,".":14620,"P":2345,"K":656,"W":843,"H":2404,"V":338,"/":2499,"q":1077,"?":3,"~":1311,"\\":4819,"\u2013":73,";":24,"X":668,"Q":362,"'":50,"*":149,"@":423,"&":27,"%":198,"\u00b5":1,"\u00b3":1,"<":17,">":125,"+":20,"=":74,"#":9,"\u2019":10,"\u2018":1,"\u201c":1,"\u201d":1}},"concat":"str_join"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"type":"dahuffman code table","version":1,"code_table":[["r",4,0],["h",4,1],["n",4,2],["s",4,3],["b",6,16],["\u2019",8,68],["!",9,138],["z",11,556],["2",14,4456],["5",15,8914],["&",16,17830],["|",17,35662],["*",17,35663],["\u201d",14,4458],[")",14,4459],["(",14,4460],["8",15,8922],["6",15,8923],["\"",15,8924],["0",15,8925],["\u201c",14,4463],[":",10,279],["'",8,70],[";",8,71],["u",5,9],["i",4,5],["\n",5,12],["g",6,26],["k",7,54],["v",7,55],["a",4,7],["d",5,16],[".",6,34],["f",6,35],["o",4,9],["t",4,10],["l",5,22],["c",6,46],[",",6,47],[" ",3,6],["w",6,56],["y",6,57],["_",10,928],["j",10,929],["?",9,465],["x",10,932],["\u2014",12,3732],["\u2018",14,14932],["\u00e8",17,119464],["\u00e6",17,119465],["7",16,59733],["3",15,29867],["1",14,14934],["\u00e9",17,119480],["/",19,477924],["\u00e0",19,477925],["\u0153",21,1911704],["\u00ee",22,3823410],["#",23,7646823],["\t",22,3823412],["$",22,3823413],["%",23,7646828],["@",23,7646829],["\\",23,7646830],["`",23,7646831],["}",22,3823416],["\u00e2",22,3823417],["\u00ea",21,1911709],["\u00e7",20,955855],["9",16,59741],["4",15,29871],["]",11,1867],["-",10,934],["[",11,1870],["q",11,1871],["p",7,117],["m",6,59],["e",4,15]],"eof_code":[23,7646822],"metadata":{"frequencies":{"\n":138037,"p":61600,"r":252082,"o":332873,"j":4910,"e":481144,"c":92002,"t":354271," ":823018,"g":72877,"u":137495,"n":260496,"b":64105,"\u2019":14526,"s":266719,"h":255777,"m":117542,"l":180842,"w":96316,"k":37816,"f":86188,"i":269305,"a":309773,",":92277,"y":99531,"d":158820,"v":40214,".":83846,"-":6324,":":4523,"*":38,"(":294,"2":247,"0":173,"1":390,"7":94,")":293,"9":107,"4":218,"[":3333,"#":1,"]":3324,"3":188,"8":151,"x":5330,";":17964,"z":1840,"\u2018":361,"?":11061,"q":3953,"5":122,"6":158,"!":8591,"\u00e6":43,"&":62,"\u2014":1412,"\u201c":353,"\u201d":284,"_":4651,"\"":170,"'":17806,"|":32,"\u0153":2,"\u00e0":13,"\u00e9":45,"\u00e8":38,"\u00e2":2,"\u00e7":9,"\u00ee":1,"\u00ea":5,"`":1,"\t":2,"}":2,"\\":1,"/":12,"%":1,"@":1,"$":2}},"concat":"str_join"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"type":"dahuffman code table","version":1,"code_table":[[" ",2,0],["a",4,4],["d",5,10],["c",6,22],["E",7,46],["v",7,47],["o",4,6],["f",6,28],["T",7,58],["C",8,118],["x",10,476],["(",14,7632],["8",15,15266],["6",15,15267],["\"",15,15268],["%",22,1954432],["@",22,1954433],["\\",22,1954434],["`",22,1954435],["}",21,977218],["\u00e2",21,977219],["\u00e7",19,244305],["\u00c9",20,488612],["\u00ea",20,488613],["/",19,244307],["\u00e6",17,61077],["7",16,30539],["\u201c",14,7635],["\u2014",12,1909],["q",11,955],["G",9,239],["l",5,15],["t",4,8],["w",6,36],[".",6,37],["\n",5,19],["P",9,320],["?",9,321],["L",8,161],["A",7,81],["y",6,41],[",",6,42],["F",9,344],["K",10,690],["j",11,1382],["V",11,1383],["R",8,173],["b",7,87],["p",7,88],["N",8,178],["-",10,716],["]",11,1434],["[",11,1435],["U",9,359],["m",6,45],["i",5,23],["e",4,12],["r",5,26],["I",7,108],["D",9,436],["B",9,437],["O",8,219],["\u2019",9,440],["M",9,441],["S",8,221],["g",7,111],["n",5,28],["s",5,29],["h",5,30],["u",6,62],["k",8,252],["Y",10,1012],["0",15,32416],["3",15,32417],["\u2018",14,16209],["X",14,16210],["1",14,16211],["J",12,4053],[":",11,2027],["W",9,507],["H",9,508],["'",9,509],["!",10,1020],["\u00e9",17,130688],["\u0153",21,2091024],["\u00c6",21,2091025],["\u00ee",22,4182052],["#",23,8364107],["\t",22,4182054],["$",22,4182055],["\u00e0",19,522757],["|",18,261379],["9",16,65345],["4",15,32673],["2",15,32674],["5",16,65350],["&",17,130702],["*",18,261406],["\u00e8",18,261407],["\u201d",15,32676],[")",15,32677],["Z",14,16339],["Q",13,8170],["z",13,8171],["_",11,2043],[";",9,511]],"eof_code":[23,8364106],"metadata":{"frequencies":{"\n":164202,"P":10725,"r":227404,"o":304523,"j":3050,"e":444276,"c":73035,"t":315370," ":1104779,"G":10529,"u":124306,"n":235066,"b":50694,"g":62348,"\u2019":14526,"s":235725,"T":38901,"h":238755,"C":18967,"m":102883,"p":50875,"l":158708,"W":16698,"k":31929,"f":74755,"i":217354,"a":264813,"S":30994,",":92277,"y":92141,"B":13411,"w":79618,"U":13189,"d":145498,"v":37036,".":83846,"Y":7390,"-":6324,"L":22134,"I":51951,":":4523,"*":38,"O":28350,"N":25430,"E":36868,"(":294,"2":247,"0":173,"1":390,"7":94,")":293,"A":44960,"R":24678,"D":13322,"J":1860,"9":107,"4":218,"[":3333,"#":1,"]":3324,"M":14659,"3":188,"F":11433,"8":151,"H":17022,"K":5887,"X":382,"V":3178,";":17964,"z":1240,"\u2018":361,"x":4948,"?":11061,"q":2725,"5":122,"6":158,"!":8591,"\u00e6":40,"&":62,"\u2014":1412,"\u201c":353,"\u201d":284,"_":4651,"Q":1228,"\"":170,"'":17806,"|":32,"Z":600,"\u0153":2,"\u00c6":3,"\u00e0":13,"\u00c9":5,"\u00e9":40,"\u00e8":38,"\u00e2":2,"\u00e7":9,"\u00ee":1,"\u00ea":5,"`":1,"\t":2,"}":2,"\\":1,"/":12,"%":1,"@":1,"$":2}},"concat":"str_join"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"type":"dahuffman code table","version":1,"code_table":[["n",4,0],["s",4,1],["h",4,2],["u",5,6],["k",7,28],["Y",9,116],["0",14,3744],["3",14,3745],["\u2018",13,1873],["X",13,1874],["1",13,1875],["J",11,469],[":",10,235],["W",8,59],["H",8,60],["'",8,61],["!",9,124],["\u00e9",16,16000],["\u0153",20,256016],["\u00c6",20,256017],["\u00ee",21,512036],["#",22,1024075],["\t",21,512038],["$",21,512039],["\u00e0",18,64005],["|",17,32003],["9",15,8001],["4",14,4001],["2",14,4002],["5",15,8006],["&",16,16014],["*",17,32030],["\u00e8",17,32031],["\u201d",14,4004],[")",14,4005],["Z",13,2003],["Q",12,1002],["z",12,1003],["_",10,251],[";",8,63],["a",4,4],["\n",5,10],["d",5,11],["c",6,24],["E",7,50],["v",7,51],["f",6,26],["T",7,54],["C",8,110],["x",10,444],["(",14,7120],["8",15,14242],["6",15,14243],["\"",15,14244],["%",22,1823360],["@",22,1823361],["\\",22,1823362],["`",22,1823363],["}",21,911682],["\u00e2",21,911683],["\u00e7",19,227921],["\u00c9",20,455844],["\u00ea",20,455845],["/",19,227923],["\u00e6",17,56981],["7",16,28491],["\u201c",14,7123],["\u2014",12,1781],["q",11,891],["G",9,223],["o",4,7],["t",4,8],["l",5,18],["w",6,38],[".",6,39],["P",9,320],["?",9,321],["L",8,161],["A",7,81],["y",6,41],[",",6,42],["F",9,344],["K",10,690],["j",11,1382],["V",11,1383],["R",8,173],["b",7,87],["p",7,88],["N",8,178],["-",10,716],["]",11,1434],["[",11,1435],["U",9,359],["m",6,45],["i",5,23],[" ",3,6],["e",4,14],["r",5,30],["I",7,124],["D",9,500],["B",9,501],["O",8,251],["\u2019",9,504],["M",9,505],["S",8,253],["g",7,127]],"eof_code":[22,1024074],"metadata":{"frequencies":{"\n":138037,"P":10725,"r":227404,"o":304523,"j":3050,"e":444276,"c":73035,"t":315370," ":823018,"G":10529,"u":124306,"n":235066,"b":50694,"g":62348,"\u2019":14526,"s":235725,"T":38901,"h":238755,"C":18967,"m":102883,"p":50875,"l":158708,"W":16698,"k":31929,"f":74755,"i":217354,"a":264813,"S":30994,",":92277,"y":92141,"B":13411,"w":79618,"U":13189,"d":145498,"v":37036,".":83846,"Y":7390,"-":6324,"L":22134,"I":51951,":":4523,"*":38,"O":28350,"N":25430,"E":36868,"(":294,"2":247,"0":173,"1":390,"7":94,")":293,"A":44960,"R":24678,"D":13322,"J":1860,"9":107,"4":218,"[":3333,"#":1,"]":3324,"M":14659,"3":188,"F":11433,"8":151,"H":17022,"K":5887,"X":382,"V":3178,";":17964,"z":1240,"\u2018":361,"x":4948,"?":11061,"q":2725,"5":122,"6":158,"!":8591,"\u00e6":40,"&":62,"\u2014":1412,"\u201c":353,"\u201d":284,"_":4651,"Q":1228,"\"":170,"'":17806,"|":32,"Z":600,"\u0153":2,"\u00c6":3,"\u00e0":13,"\u00c9":5,"\u00e9":40,"\u00e8":38,"\u00e2":2,"\u00e7":9,"\u00ee":1,"\u00ea":5,"`":1,"\t":2,"}":2,"\\":1,"/":12,"%":1,"@":1,"$":2}},"concat":"str_join"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"type":"dahuffman code table","version":1,"code_table":[["6",6,0],["T",10,16],["U",10,17],["~",9,9],["q",8,5],["N",8,6],["Q",12,112],["\\",12,113],["K",11,57],[")",10,29],["\n",9,15],["9",6,2],["w",6,3],["y",6,4],["z",8,20],[",",8,21],["(",10,88],["%",13,712],["Z",15,2852],["[",15,2853],["\u2013",14,1427],["{",13,714],["}",13,715],["X",11,179],["R",10,90],["Y",11,182],["J",12,366],["*",13,734],["]",15,2940],["\t",15,2941],["|",14,1471],["x",8,23],["g",6,6],[".",6,7],["a",4,2],["u",5,6],["F",8,56],[":",8,57],["v",7,29],["3",6,15],["b",7,32],["B",8,66],["D",8,67],["l",6,17],["/",5,9],["t",4,5],["j",8,96],["E",8,97],["f",7,49],["2",6,25],["m",6,26],["p",6,27],["d",5,14],["c",5,15],["e",4,8],["h",6,36],["I",9,296],["H",10,594],["O",10,595],["P",10,596],["W",12,2388],["V",12,2389],["#",13,4780],["'",14,9562],["?",16,38252],["+",17,76506],["@",18,153014],["\u200c",19,306030],["\ufeff",21,1224125],["\u00f1",20,612063],["!",15,19127],["G",12,2391],["&",10,598],["M",10,599],["=",7,75],["<",5,19],[">",5,20],["1",6,42],["-",6,43],["s",5,22],["\"",6,46],["5",7,94],["L",10,760],[";",10,761],["S",9,381],["C",8,191],["0",5,24],["n",5,25],["_",5,26],["o",5,27],["i",5,28],["8",7,116],["7",7,117],["4",7,118],["A",8,238],["k",8,239],[" ",5,30],["r",5,31]],"eof_code":[21,1224124],"metadata":{"frequencies":{"<":46938,"r":59681,"e":90285,"s":50883,"p":20779,"o":55844,"n":55269,">":46983,"w":14986," ":57483,"_":55491,"i":57191,"d":41552,"=":12621,"\"":25423,"-":25348,"a":71880,"b":9330,"~":1905,"8":14107,"5":13388,"f":10425,"z":3910,"k":7305,"c":42556,"u":35712,"0":53392,"4":14407,"1":24184,"D":4802,"A":7226,"9":14922,"6":14904,"F":4441,"B":4775,"t":80992,"h":21868,":":4540,"/":38985,".":17228,"y":15221,"x":4391,"j":4977,"m":20687,"l":19812,"2":20584,"g":16871,"3":18132,"7":14377,"v":9120,"q":3682,"E":5051,"C":7077,"N":3713,",":4127,"(":993,")":992,"K":484,"V":320,"I":2495,"*":154,"U":915,"X":530,"Y":541,"S":3627,"W":315,"M":1496,"J":282,"R":1112,"O":1304,"'":86,"G":403,"H":1268,"Q":212,"L":1532,"P":1389,"%":124,"T":827,"Z":30,"\u2013":68,"#":163,"&":1393,";":1584,"!":53,"+":13,"\n":1947,"[":30,"]":30,"?":21,"|":78,"\\":231,"{":137,"}":137,"\t":48,"\u00f1":3,"\u200c":3,"\ufeff":2,"@":7}},"concat":"str_join"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
""" | ||
Functionality to save/load a code table to/from a file | ||
""" | ||
|
||
import json | ||
import logging | ||
import pickle | ||
from pathlib import Path | ||
from typing import Any, Optional, Union | ||
|
||
from dahuffman.huffmancodec import _EOF, PrefixCodec | ||
|
||
_log = logging.getLogger(__name__) | ||
|
||
|
||
def ensure_dir(path: Union[str, Path]) -> Path: | ||
path = Path(path) | ||
path.mkdir(parents=True, exist_ok=True) | ||
return path | ||
|
||
|
||
def pickle_save( | ||
codec: PrefixCodec, path: Union[str, Path], metadata: Any = None | ||
) -> None: | ||
""" | ||
Persist the code table to a file. | ||
:param path: file path to persist to | ||
:param metadata: additional metadata to include | ||
""" | ||
code_table = codec.get_code_table() | ||
data = { | ||
"code_table": code_table, | ||
"type": type(codec), | ||
"concat": codec._concat, | ||
} | ||
if metadata: | ||
data["metadata"] = metadata | ||
path = Path(path) | ||
ensure_dir(path.parent) | ||
with path.open(mode="wb") as f: | ||
pickle.dump(data, file=f) | ||
_log.info( | ||
f"Saved {type(codec).__name__} code table ({len(code_table)} items) to {str(path)!r}" | ||
) | ||
|
||
|
||
def pickle_load(path: Union[str, Path]) -> PrefixCodec: | ||
""" | ||
Load a persisted PrefixCodec | ||
:param path: path to serialized PrefixCodec code table data. | ||
""" | ||
path = Path(path) | ||
with path.open(mode="rb") as f: | ||
data = pickle.load(f) | ||
cls = data["type"] | ||
assert issubclass(cls, PrefixCodec) | ||
code_table = data["code_table"] | ||
_log.info( | ||
f"Loading {cls.__name__} with {len(code_table)} code table items from {str(path)!r}" | ||
) | ||
return cls(code_table, concat=data["concat"]) | ||
|
||
|
||
def json_save( | ||
codec: PrefixCodec, path: Union[str, Path], metadata: Optional[dict] = None | ||
) -> None: | ||
""" | ||
Persist the code table as a JSON file. | ||
Requires that all structures in the code table are JSON-serializable. | ||
:param path: file path to persist to | ||
:param metadata: additional metadata to include in the file. | ||
""" | ||
code_table = codec.get_code_table() | ||
|
||
# Extract internal _EOF symbol from code table | ||
if _EOF in code_table: | ||
eof_code = code_table.pop(_EOF) | ||
else: | ||
eof_code = None | ||
|
||
# Transform code table dictionary to a list, to avoid string-coercion of keys in JSON mappings. | ||
code_table = [[k, *v] for (k, v) in code_table.items()] | ||
|
||
data = { | ||
"type": "dahuffman code table", | ||
"version": 1, | ||
"code_table": code_table, | ||
} | ||
if eof_code: | ||
data["eof_code"] = eof_code | ||
if metadata: | ||
data["metadata"] = metadata | ||
if codec._concat == list: | ||
data["concat"] = "list" | ||
elif codec._concat == "".join: | ||
data["concat"] = "str_join" | ||
elif codec._concat == bytes: | ||
data["concat"] = "bytes" | ||
else: | ||
_log.warning(f"Unsupported concat callable {codec._concat!r}") | ||
|
||
path = Path(path) | ||
ensure_dir(path.parent) | ||
with path.open("w", encoding="utf8") as f: | ||
json.dump(obj=data, fp=f, indent=None, separators=(",", ":")) | ||
_log.info( | ||
f"Saved {type(codec).__name__} code table ({len(code_table)} items) to {str(path)!r}" | ||
) | ||
|
||
|
||
def json_load(path: Union[str, Path]) -> PrefixCodec: | ||
path = Path(path) | ||
with path.open(mode="r", encoding="utf8") as f: | ||
data = json.load(fp=f) | ||
|
||
assert data["type"] == "dahuffman code table" | ||
assert data["version"] == 1 | ||
|
||
# Reconstruct code table | ||
code_table = {row[0]: row[1:] for row in data["code_table"]} | ||
|
||
if "eof_code" in data: | ||
code_table[_EOF] = data["eof_code"] | ||
|
||
concat = {"str_join": "".join, "bytes": bytes}.get(data["concat"], list) | ||
|
||
_log.info( | ||
f"Loading PrefixCodec with {len(code_table)} code table items from {str(path)!r}" | ||
) | ||
return PrefixCodec(code_table, concat=concat) |
Oops, something went wrong.