I'm wondering if I am either incorrectly writing my Go program or if my server resources too small.
I have the following code:
spotXmlFile:='/var/www/html/app/public/xml/appdata.xml'
xmlFile,err:=os.Open(spotXmlFile)
defer xmlFile.Close()
byteValue,_:=ioutil.ReadAll(xmlFile)
var listings Listings
xml.Unmarshal(byteValue,&listings)
It processes an xml file that is 2.5GB.
I'm using aws ec2 t3.large
to try and process the data, which has 2 cpus
and 8GB mem
Whats interesting is the memory doesnt exceed 4GB but the cpu spikes to 100% and then causes the program to fail
Here's my xml structs:
type Listings struct {
XMLName xml.Name `xml:"Listings"`
Listings []Listing `xml:"Listing"`
}
type Listing struct {
Text string `xml:",chardata"`
Address struct {
Text string `xml:",chardata"`
PreferenceOrder string `xml:"preference-order"`
AddressPreferenceOrder string `xml:"address-preference-order"`
FullStreetAddress string `xml:"FullStreetAddress"`
UnitNumber string `xml:"UnitNumber"`
City string `xml:"City"`
StateOrProvince string `xml:"StateOrProvince"`
PostalCode string `xml:"PostalCode"`
Country string `xml:"Country"`
} `xml:"Address"`
ListPrice struct {
Text string `xml:",chardata"`
IsgSecurityClass string `xml:"isgSecurityClass,attr"`
CurrencyPeriod string `xml:"currencyPeriod,attr"`
} `xml:"ListPrice"`
ListingURL string `xml:"ListingURL"`
ProviderName string `xml:"ProviderName"`
ProviderURL string `xml:"ProviderURL"`
ProviderCategory string `xml:"ProviderCategory"`
LeadRoutingEmail string `xml:"LeadRoutingEmail"`
Bedrooms string `xml:"Bedrooms"`
Bathrooms string `xml:"Bathrooms"`
PropertyType struct {
Text string `xml:",chardata"`
OtherDescription string `xml:"otherDescription,attr"`
} `xml:"PropertyType"`
PropertySubType struct {
Text string `xml:",chardata"`
OtherDescription string `xml:"otherDescription,attr"`
} `xml:"PropertySubType"`
ListingKey string `xml:"ListingKey"`
ListingCategory string `xml:"ListingCategory"`
ListingStatus string `xml:"ListingStatus"`
MarketingInformation struct {
Text string `xml:",chardata"`
PermitAddressOnInternet struct {
Text string `xml:",chardata"`
IsgSecurityClass string `xml:"isgSecurityClass,attr"`
} `xml:"PermitAddressOnInternet"`
VOWAddressDisplay struct {
Text string `xml:",chardata"`
IsgSecurityClass string `xml:"isgSecurityClass,attr"`
} `xml:"VOWAddressDisplay"`
VOWAutomatedValuationDisplay struct {
Text string `xml:",chardata"`
IsgSecurityClass string `xml:"isgSecurityClass,attr"`
} `xml:"VOWAutomatedValuationDisplay"`
VOWConsumerComment struct {
Text string `xml:",chardata"`
IsgSecurityClass string `xml:"isgSecurityClass,attr"`
} `xml:"VOWConsumerComment"`
} `xml:"MarketingInformation"`
Photos struct {
Text string `xml:",chardata"`
Photo []struct {
Text string `xml:",chardata"`
MediaModificationTimestamp struct {
Text string `xml:",chardata"`
IsgSecurityClass string `xml:"isgSecurityClass,attr"`
} `xml:"MediaModificationTimestamp"`
MediaURL string `xml:"MediaURL"`
} `xml:"Photo"`
} `xml:"Photos"`
DiscloseAddress string `xml:"DiscloseAddress"`
ListingDescription string `xml:"ListingDescription"`
MlsId string `xml:"MlsId"`
MlsName string `xml:"MlsName"`
MlsNumber string `xml:"MlsNumber"`
LivingArea string `xml:"LivingArea"`
LotSize string `xml:"LotSize"`
YearBuilt string `xml:"YearBuilt"`
ListingTitle string `xml:"ListingTitle"`
FullBathrooms string `xml:"FullBathrooms"`
ThreeQuarterBathrooms string `xml:"ThreeQuarterBathrooms"`
HalfBathrooms string `xml:"HalfBathrooms"`
OneQuarterBathrooms string `xml:"OneQuarterBathrooms"`
PartialBathrooms string `xml:"PartialBathrooms"`
ListingParticipants struct {
Text string `xml:",chardata"`
Participant struct {
Text string `xml:",chardata"`
ParticipantKey string `xml:"ParticipantKey"`
ParticipantId string `xml:"ParticipantId"`
FirstName string `xml:"FirstName"`
LastName string `xml:"LastName"`
Role string `xml:"Role"`
PrimaryContactPhone string `xml:"PrimaryContactPhone"`
OfficePhone string `xml:"OfficePhone"`
Email string `xml:"Email"`
WebsiteURL string `xml:"WebsiteURL"`
Licenses struct {
Text string `xml:",chardata"`
License struct {
Text string `xml:",chardata"`
LicenseCategory struct {
Text string `xml:",chardata"`
OtherDescription string `xml:"otherDescription,attr"`
} `xml:"LicenseCategory"`
LicenseNumber struct {
Text string `xml:",chardata"`
IsgSecurityClass string `xml:"isgSecurityClass,attr"`
} `xml:"LicenseNumber"`
Jurisdiction struct {
Text string `xml:",chardata"`
IsgSecurityClass string `xml:"isgSecurityClass,attr"`
} `xml:"Jurisdiction"`
} `xml:"License"`
} `xml:"Licenses"`
} `xml:"Participant"`
} `xml:"ListingParticipants"`
Offices struct {
Text string `xml:",chardata"`
Office struct {
Text string `xml:",chardata"`
OfficeKey string `xml:"OfficeKey"`
OfficeId string `xml:"OfficeId"`
OfficeCode struct {
Text string `xml:",chardata"`
OfficeCodeId string `xml:"OfficeCodeId"`
} `xml:"OfficeCode"`
Name string `xml:"Name"`
CorporateName string `xml:"CorporateName"`
PhoneNumber string `xml:"PhoneNumber"`
Fax string `xml:"Fax"`
Address struct {
Text string `xml:",chardata"`
PreferenceOrder string `xml:"preference-order"`
AddressPreferenceOrder string `xml:"address-preference-order"`
FullStreetAddress string `xml:"FullStreetAddress"`
City string `xml:"City"`
StateOrProvince string `xml:"StateOrProvince"`
PostalCode string `xml:"PostalCode"`
Country string `xml:"Country"`
} `xml:"Address"`
OfficeEmail string `xml:"OfficeEmail"`
Website string `xml:"Website"`
} `xml:"Office"`
} `xml:"Offices"`
Brokerage struct {
Text string `xml:",chardata"`
Name string `xml:"Name"`
Phone string `xml:"Phone"`
Email string `xml:"Email"`
WebsiteURL string `xml:"WebsiteURL"`
LogoURL string `xml:"LogoURL"`
Address struct {
Text string `xml:",chardata"`
PreferenceOrder string `xml:"preference-order"`
AddressPreferenceOrder string `xml:"address-preference-order"`
FullStreetAddress string `xml:"FullStreetAddress"`
UnitNumber string `xml:"UnitNumber"`
City string `xml:"City"`
StateOrProvince string `xml:"StateOrProvince"`
PostalCode string `xml:"PostalCode"`
Country string `xml:"Country"`
} `xml:"Address"`
} `xml:"Brokerage"`
Franchise struct {
Text string `xml:",chardata"`
Name string `xml:"Name"`
} `xml:"Franchise"`
Builder struct {
Text string `xml:",chardata"`
Name string `xml:"Name"`
} `xml:"Builder"`
Location struct {
Text string `xml:",chardata"`
Latitude string `xml:"Latitude"`
Longitude string `xml:"Longitude"`
Directions string `xml:"Directions"`
County string `xml:"County"`
ParcelId string `xml:"ParcelId"`
Community struct {
Text string `xml:",chardata"`
Subdivision struct {
Text string `xml:",chardata"`
IsgSecurityClass string `xml:"isgSecurityClass,attr"`
} `xml:"Subdivision"`
Schools struct {
Text string `xml:",chardata"`
School []struct {
Text string `xml:",chardata"`
Name string `xml:"Name"`
SchoolCategory string `xml:"SchoolCategory"`
District struct {
Text string `xml:",chardata"`
IsgSecurityClass string `xml:"isgSecurityClass,attr"`
} `xml:"District"`
} `xml:"School"`
} `xml:"Schools"`
} `xml:"Community"`
} `xml:"Location"`
DetailedCharacteristics struct {
Text string `xml:",chardata"`
Appliances struct {
Text string `xml:",chardata"`
Appliance []string `xml:"Appliance"`
} `xml:"Appliances"`
ArchitectureStyle struct {
Text string `xml:",chardata"`
OtherDescription string `xml:"otherDescription,attr"`
} `xml:"ArchitectureStyle"`
HasBarbecueArea string `xml:"HasBarbecueArea"`
CoolingSystems struct {
Text string `xml:",chardata"`
CoolingSystem string `xml:"CoolingSystem"`
} `xml:"CoolingSystems"`
ExteriorTypes struct {
Text string `xml:",chardata"`
ExteriorType string `xml:"ExteriorType"`
} `xml:"ExteriorTypes"`
HasFireplace string `xml:"HasFireplace"`
FloorCoverings struct {
Text string `xml:",chardata"`
FloorCovering []string `xml:"FloorCovering"`
} `xml:"FloorCoverings"`
HeatingFuels struct {
Text string `xml:",chardata"`
HeatingFuel string `xml:"HeatingFuel"`
} `xml:"HeatingFuels"`
HeatingSystems struct {
Text string `xml:",chardata"`
HeatingSystem []string `xml:"HeatingSystem"`
} `xml:"HeatingSystems"`
IsNewConstruction string `xml:"IsNewConstruction"`
HasPool string `xml:"HasPool"`
NumFloors string `xml:"NumFloors"`
ParkingTypes struct {
Text string `xml:",chardata"`
ParkingType string `xml:"ParkingType"`
} `xml:"ParkingTypes"`
HasPatio string `xml:"HasPatio"`
RoofTypes struct {
Text string `xml:",chardata"`
RoofType string `xml:"RoofType"`
} `xml:"RoofTypes"`
RoomCount string `xml:"RoomCount"`
Rooms struct {
Text string `xml:",chardata"`
Room []string `xml:"Room"`
} `xml:"Rooms"`
HasVaultedCeiling string `xml:"HasVaultedCeiling"`
} `xml:"DetailedCharacteristics"`
ModificationTimestamp struct {
Text string `xml:",chardata"`
IsgSecurityClass string `xml:"isgSecurityClass,attr"`
} `xml:"ModificationTimestamp"`
Disclaimer struct {
Text string `xml:",chardata"`
IsgSecurityClass string `xml:"isgSecurityClass,attr"`
} `xml:"Disclaimer"`
}
Here's my run output:
fatal error: runtime: out of memory
runtime stack:
runtime.throw(0x4f44be, 0x16)
/usr/local/go/src/runtime/panic.go:774 +0x72
runtime.sysMap(0xc104000000, 0x100000000, 0x5cc478)
/usr/local/go/src/runtime/mem_linux.go:169 +0xc5
runtime.(*mheap).sysAlloc(0x5b4dc0, 0x100000000, 0x7ba23, 0x0)
/usr/local/go/src/runtime/malloc.go:701 +0x1cd
runtime.(*mheap).grow(0x5b4dc0, 0x80000, 0xffffffff)
/usr/local/go/src/runtime/mheap.go:1252 +0x42
runtime.(*mheap).allocSpanLocked(0x5b4dc0, 0x80000, 0x5cc488, 0x100000081)
/usr/local/go/src/runtime/mheap.go:1163 +0x291
runtime.(*mheap).alloc_m(0x5b4dc0, 0x80000, 0x400101, 0xc0000344c8)
/usr/local/go/src/runtime/mheap.go:1015 +0xc2
runtime.(*mheap).alloc.func1()
/usr/local/go/src/runtime/mheap.go:1086 +0x4c
runtime.(*mheap).alloc(0x5b4dc0, 0x80000, 0x7ffd80010101, 0x433515)
/usr/local/go/src/runtime/mheap.go:1085 +0x8a
runtime.largeAlloc(0xfffffe00, 0xc000010101, 0xc000000180)
/usr/local/go/src/runtime/malloc.go:1138 +0x97
runtime.mallocgc.func1()
/usr/local/go/src/runtime/malloc.go:1033 +0x46
runtime.systemstack(0x452364)
/usr/local/go/src/runtime/asm_amd64.s:370 +0x66
runtime.mstart()
/usr/local/go/src/runtime/proc.go:1146
goroutine 1 [running]:
runtime.systemstack_switch()
/usr/local/go/src/runtime/asm_amd64.s:330 fp=0xc00006ac10 sp=0xc00006ac08 pc=0x452460
runtime.mallocgc(0xfffffe00, 0x4c56e0, 0x1, 0x0)
/usr/local/go/src/runtime/malloc.go:1032 +0x895 fp=0xc00006acb0 sp=0xc00006ac10 pc=0x40bb85
runtime.makeslice(0x4c56e0, 0xfffffe00, 0xfffffe00, 0x0)
/usr/local/go/src/runtime/slice.go:49 +0x6c fp=0xc00006ace0 sp=0xc00006acb0 pc=0x43dc8c
bytes.makeSlice(0xfffffe00, 0x0, 0x0, 0x0)
/usr/local/go/src/bytes/buffer.go:229 +0x77 fp=0xc00006ad48 sp=0xc00006ace0 pc=0x4673d7
bytes.(*Buffer).grow(0xc00006ae70, 0x200, 0x40000000)
/usr/local/go/src/bytes/buffer.go:142 +0x15b fp=0xc00006ad98 sp=0xc00006ad48 pc=0x466ecb
bytes.(*Buffer).ReadFrom(0xc00006ae70, 0x50fc00, 0xc000082018, 0xc000082018, 0x3, 0x0)
/usr/local/go/src/bytes/buffer.go:202 +0x4b fp=0xc00006ae08 sp=0xc00006ad98 pc=0x4671cb
io/ioutil.readAll(0x50fc00, 0xc000082018, 0x200, 0x0, 0x0, 0x0, 0x0, 0x0)
/usr/local/go/src/io/ioutil/ioutil.go:36 +0x100 fp=0xc00006aea8 sp=0xc00006ae08 pc=0x47b380
io/ioutil.ReadAll(...)
/usr/local/go/src/io/ioutil/ioutil.go:45
main.main()
/var/www/html/spot/importdata/main.go:36 +0xbf fp=0xc00006af60 sp=0xc00006aea8 pc=0x4ad63f
runtime.main()
/usr/local/go/src/runtime/proc.go:203 +0x21e fp=0xc00006afe0 sp=0xc00006af60 pc=0x42bfbe
runtime.goexit()
/usr/local/go/src/runtime/asm_amd64.s:1357 +0x1 fp=0xc00006afe8 sp=0xc00006afe0 pc=0x4543b1
You are reading the whole file in memory and then unmarshaling it, which is going to use a lot of additional memory that you don't really need. Try this, see what happens:
spotXmlFile:='/var/www/html/app/public/xml/appdata.xml'
xmlFile,err:=os.Open(spotXmlFile)
defer xmlFile.Close()
var listings Listings
xml.NewDecoder(xmlFile).Decode(&listings)
Using decoder, it will parse and unmarshal the underlying file as it reads.
If you need only part of the file and want to further optimize this, you can decode the file using the decoder token by token, and extract the part you need.
User contributions licensed under CC BY-SA 3.0