I'm a beginner in Go, and just finished the Golang tour. This crawler is not the same as the one in the exercises in the tour but something I wrote myself. I am looking for suggestions for making it better in terms of idiomatic Go.
package main
import (
"fmt"
"sync"
"net/http"
"log"
"regexp"
"io/ioutil"
)
type Crawler struct {
urls map[string]bool
mux sync.Mutex
umatch *regexp.Regexp
}
func (c Crawler) parse(body string) (urls []string) {
return c.umatch.FindAllString(body, -1)
}
func (c Crawler) fetch(url string) (urls []string) {
res, err := http.Get(url)
if err != nil {
fmt.Println("Error in fetching %s: %s", url, err)
log.Fatal(err)
}
defer res.Body.Close()
body, err := ioutil.ReadAll(res.Body)
if err != nil {
log.Fatal(err)
}
urls = c.parse(string(body))
return
}
func (c Crawler) Crawl(url string, depth int) {
if depth <= 0 {
return
}
c.mux.Lock()
if c.urls[url] { //Already exists
c.mux.Unlock()
return
}
c.urls[url] = true
c.mux.Unlock()
log.Println("Fetching %s", url)
fetched := c.fetch(url)
for _, u := range fetched {
go c.Crawl(u, depth - 1)
}
return
}
func main() {
c := Crawler{urls : map[string]bool{}, umatch : regexp.MustCompile(`(http|ftp|https):\/\/([\w\-_]+(?:(?:\.[\w\-_]+)+))([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?`)}
c.Crawl("http://www.yahoo.com", 3)
for u, _ := range c.urls {
fmt.Println(u)
}
}