Code Review Stack Exchange is a question and answer site for peer programmer code reviews. Join them; it only takes a minute:

Sign up
Here's how it works:
  1. Anybody can ask a question
  2. Anybody can answer
  3. The best answers are voted up and rise to the top

Given an html page with heading tags such h1, h2 through h6 and p tags construct a hierarchical structure. A super problem of this problem is explained in Heading Based Sectional Hierarchy Identification for HTML Documents.

I've written the following code, how can I improve it:

(use-modules (ice-9 match))


(define-syntax test-check
  (syntax-rules ()
    ((_ title tested-expression expected-result)
     (begin (format #t "** Checking ~a\n" title)
            (let* ((expected expected-result)
                   (produced tested-expression))
              (if (not (equal? expected produced))
                  (begin (format #t "*** Expected: ~s\n" expected)
                         (format #t "*** Computed: ~s\n" produced))))))))


(define doc '((h . 1) p (h . 2) p p (h . 2) p p (h . 1) p p))

;; (define out ((div
;;               (h1 "title 1")
;;               (p "paragraph 1#1")
;;               (div
;;                (h2 "title 1.1")
;;                (p paragraph 

(define pp pretty-print)

(define (append-paragraph tree)
  (match tree
    ('() '(p))
    (('p . rest) (cons 'p (append-paragraph rest)))
    (((('h . level) . rest)) (list (cons (cons 'h level) (append-paragraph rest))))
    ((head . rest) (cons head (append-paragraph rest)))
    (_ (display "fuu ") (pp tree) (error tree))))

(test-check "append-paragraph: null"
  (append-paragraph '())
  '(p))

(test-check "append-paragraph: single paragraph"
  (append-paragraph '(p))
  '(p p))

(test-check "append-paragraph: single heading"
  (append-paragraph '(((h . 1))))
  '(((h . 1) p)))

(test-check "append-paragraph: paragraph followed by a single heading"
  (append-paragraph '(p ((h . 1))))
  '(p ((h . 1) p)))

(test-check "append-paragraph: paragraph followed by a heading with a paragraph"
  (append-paragraph '(p ((h . 1) p)))
  '(p ((h . 1) p p)))

(test-check "append-paragraph: nested 0"
  (append-paragraph '(p ((h . 1) p) ((h . 1) p ((h . 2) p p))))
  '(p ((h . 1) p) ((h . 1) p ((h . 2) p p p))))

(test-check "append-paragraph: nested 1"
  (append-paragraph '(p ((h . 1) p) ((h . 1) p ((h . 2) p p) ((h . 2) p)) ((h . 1))))
  '(p ((h . 1) p) ((h . 1) p ((h . 2) p p) ((h . 2) p)) ((h . 1) p)))

(test-check "append-paragraph: nested 2"
  (append-paragraph '(p ((h . 1) p ((h . 2) p) ((h . 2) p ((h . 3) p) ((h . 3) p)))))
  '(p ((h . 1) p ((h . 2) p) ((h . 2) p ((h . 3) p) ((h . 3) p p)))))

(define (append-heading tree level)
  (match tree
    ('() `(((h . ,level))))
    (((('h . other) . rest))
     (if (eq? other level)
         `(((h . ,level) . ,rest) ((h . ,level)))
         `(((h . ,other) . ,(append-heading rest level)))))
    ((head . rest) `(,head . ,(append-heading rest level)))))

(test-check "append-heading: null"
  (append-heading '() 1)
  '(((h . 1))))

(test-check "append-heading: append h1 to h1"
  (append-heading '(((h . 1))) 1)
  '(((h . 1)) ((h . 1))))

(test-check "append-heading: append h1 to h1>h2"
  (append-heading '(((h . 1) ((h . 2) p))) 1)
  '(((h . 1) ((h . 2) p)) ((h . 1))))

(test-check "append-heading: append h2 to h1>h2"
  (append-heading '(((h . 1) p ((h . 2) p))) 2)
  '(((h . 1) p ((h . 2) p) ((h . 2)))))

(test-check "append-heading: append h3 to h1>h2"
  (append-heading '(((h . 1) p ((h . 2) p))) 3)
  '(((h . 1) p ((h . 2) p ((h . 3))))))

(test-check "append-heading: append h3 to h1>h2"
  (append-heading '(((h . 1) p ((h . 2) p))) 3)
  '(((h . 1) p ((h . 2) p ((h . 3))))))


(define (parse doc)
  (let loop ((doc doc)
             (out '()))
    (match doc
      ('() out)
      (('p . rest) (loop rest (append-paragraph out)))
      ((('h . level) . rest) (loop rest (append-heading out level))))))


(test-check "parse: paragraph"
  (parse '(p))
  '(p))

(test-check "parse 0"
  (parse '(p (h . 1) p (h . 2) p (h . 2) p (h . 3) p (h . 3) p (h . 1) p))
  '(p ((h . 1) p ((h . 2) p) ((h . 2) p ((h . 3) p) ((h . 3) p))) ((h . 1) p)))

(test-check "parse 1"
  (parse '(p (h . 1) p (h . 2) p (h . 2) p (h . 3) p (h . 3) p p))
  '(p ((h . 1) p ((h . 2) p) ((h . 2) p ((h . 3) p) ((h . 3) p p)))))

I read I can use zipper or cursors. Can the implementation of cursors be simpler in my case?

share|improve this question

If you map h1, h2, h3 and p to integers representation their priority you can solve the problem in a simpler way.

def abc(l,s=0):
    p = [l[s]]
    s = s + 1
    while s < len(l):
        if l[s] <= p[0]:
            break
        ll, s = abc(l,s)
        if len(ll) == 1:
            p.append(ll[0])
        else:
            p.append(ll)
    return p,s

print(abc([0,1,2,2]))
print(abc([0,1,8,8,2,8,8,2,8,4,1,2]))

This use mutation, but you can adapt it to work without mutation.

share|improve this answer

Your Answer

 
discard

By posting your answer, you agree to the privacy policy and terms of service.

Not the answer you're looking for? Browse other questions tagged or ask your own question.