package main import ( "fmt" ) type Fetcher interface { // Fetch returns the body of URL and a slice of URLs found on that page. Fetch(url string) (body string, urls []string, err error) } // Store processed URLs for duplicate checking. var urlcheck map[string]bool // Synchronize access to `urlcheck`. var urlsync chan bool // Crawl uses fetcher to recursively crawl pages starting with url, to a maximum // of depth. func Crawl(url string, depth int, fetcher Fetcher, done chan bool) { // Signal goroutine's end. defer func() { done <- true }() if depth <= 0 { return } <-urlsync if urlcheck[url] { urlsync <- true return } else { urlcheck[url] = true urlsync <- true } body, urls, err := fetcher.Fetch(url) if err != nil { fmt.Println(err) return } fmt.Printf("found: %s %q\n", url, body) for _, u := range urls { // Wait for children to finish. child_done := make(chan bool) defer func() { <-child_done }() go Crawl(u, depth-1, fetcher, child_done) } return } func main() { urlcheck = make(map[string]bool, 1) urlsync = make(chan bool, 1) urlsync <- true done := make(chan bool) go Crawl("", 4, fetcher, done) <-done } // fakeFetcher is Fetcher that returns canned results. type fakeFetcher map[string]*fakeResult type fakeResult struct { body string urls []string } func (f fakeFetcher) Fetch(url string) (string, []string, error) { if res, ok := f[url]; ok { return res.body, res.urls, nil } return "", nil, fmt.Errorf("not found: %s", url) } // fetcher is a populated fakeFetcher. var fetcher = fakeFetcher{ "": &fakeResult{ "The Go Programming Language", []string{ "", "", }, }, "": &fakeResult{ "Packages", []string{ "", "", "", "", }, }, "": &fakeResult{ "Package fmt", []string{ "", "", }, }, "": &fakeResult{ "Package os", []string{ "", "", }, }, }