I'm not sure if this implementation is good enough. The function takes a vector of 1 million chars and creates a table of the sequences that pass before the next occurrence of itself.
Some questions I have, though feel free to point other things out as well:
- Is the initialization of the hash-map done well?
- Am I using the correct / optimal data structures and algorithm?
- I think my code is more procedural than needs to be, having trouble thinking of folding this correctly.
(define (get-reocc v)
;'(#\J #\S #\T #\I #\J #\I #\O #\Z #\T #\S ....) ->
; '#hash((#\T . #(1 357 1661 1939 1759 1425 1240 985 890 716 556 483 392 335 268 1258))
; (#\J . #(1 449 1613 1914 1725 1480 1266 1021 850 748 581 527 377 317 255 1223))
; (#\S . #(1 379 1678 1995 1708 1397 1230 1006 860 714 605 489 378 318 323 1211))
; (#\L . #(1 449 1671 1916 1663 1492 1256 1000 862 735 572 449 400 330 276 1259))
; (#\O . #(1 396 1596 1854 1727 1491 1270 1109 873 743 584 473 371 315 273 1209))
; (#\I . #(1 396 1625 1901 1758 1414 1229 1057 852 733 605 483 359 328 260 1249))
; (#\Z . #(1 394 1529 1973 1657 1473 1309 1018 863 746 569 481 362 355 245 1256)))
(define reocc-table2 (make-hash))
(for ([i pieces])
(hash-set! reocc-table2 i (make-vector 16 0)))
(define (update-reocc key pos t)
; #\T 3 '#hash((#\T . #(1 357 1661 1939 1759 -> '#hash((#\T . #(1 357 1661 1940 1759
(define v2 (hash-ref t key))
(vector-set! v2 pos (+ 1 (vector-ref v2 pos))))
; (hash-set y key (vector-set! v2 pos (+ 1 (vector-ref v2 pos))))) same as above?
(define (find-prev v piece pos i2)
; (#\J #\S #\T #\I #\J #\I #\O #\Z #\T #\S) #\T, #\T, 10, 0 -> 8
; (define x (vector-ref v (- pos i2)))
(cond [(char=? piece x) i2]
[(= 0 (- pos i2)) 0]
[#t (find-prev v piece pos (+ i2 1))]))
(define (f v t i)
(define p (vector-ref v i))
(define stop (- 1 (vector-length v)))
(if(>= i stop)
t
(f v (lambda () (update-reocc p (find-prev v p i 0) t)) (+ i 1))))
(f v reocc-table2 0))