How can I make my Java-generated zip file identical to a WinZip-generated one

4

I have a class that is zipping a directory's contents into a zip file. The code I'm using is below. The problem I'm having is that the zip file generated by me cannot be read by the application I'm loading it into. However, if I unzip the zip file that's being generated and use WinZip to zip it back up again, the file can be used. I have no control over the target application that's loading the zip so all I can do is make my file look like the WinZip-generated version. I've opened each zip file using the WinZip detailed diagnostics feature and I can see lots of differences in the files produced but I don't understand which ones may be causing the issue. See the bottom of the question for examples.

package com.mycompany.utils;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;

import org.apache.log4j.Logger;

public class FileZipper {

    private static final Logger LOGGER = Logger.getLogger(FileZipper.class);
    private String destinationZipFile;
    private String sourceDirectory;

    List<String> fileList;
    private File zipFile;

    public File getZipFile() {
        return zipFile;
    }

    /**
     * Zips a source directory into the destination zip file
     * 
     * @param source
     * @param destination
     */
    FileZipper(String source, String destination) {
        LOGGER.info("Zipping source directory: "+source);
        LOGGER.info("To destination zip file:  "+destination);
        this.destinationZipFile = destination;
        this.sourceDirectory = source;
        fileList = new ArrayList<String>();
        generateFileList(new File(sourceDirectory));
        compressDirectoryContentsToZip(sourceDirectory, destinationZipFile);
    }

    /**
     * Traverse a directory and get all files, and add the file into fileList
     * 
     * @param node
     *            file or directory
     */
    public void generateFileList(File node) {

        // add file only
        if (node.isFile()) {
            fileList.add(generateZipEntry(node.getAbsoluteFile().toString()));
        }

        if (node.isDirectory()) {
            if(node.toString() != sourceDirectory) {
                fileList.add(generateZipEntry(node.getAbsoluteFile().toString()));
            }
            String[] subNodes = node.list();
            for (String filename : subNodes) {
                generateFileList(new File(node, filename));
            }
        }

    }

    /**
     * Compress a directory to a zip file
     * @param sourceDirectory
     * @param destinationZipFile
     */
    public void compressDirectoryContentsToZip(String sourceDirectory,
        String destinationZipFile) {

        this.zipFile = new File(destinationZipFile);

        byte[] buffer = new byte[4096];

        try {

            FileOutputStream fos = new FileOutputStream(destinationZipFile);
            BufferedOutputStream bos = new BufferedOutputStream(fos);
            ZipOutputStream zos = new ZipOutputStream(bos);
            zos.setMethod(ZipOutputStream.DEFLATED);
            LOGGER.info("Zipping to : " + destinationZipFile);

            for (String entry : this.fileList) {
                long fileSizeInBytes = new File(sourceDirectory + 
                    File.separator + entry).length();

                if(new File(sourceDirectory + File.separator + entry).isFile()) {
                    LOGGER.info("File Added : " + entry + " ("+String.valueOf(fileSizeInBytes)+" bytes)");
                    ZipEntry ze = new ZipEntry(entry);

                    zos.putNextEntry(ze);
                    FileInputStream in = new FileInputStream(sourceDirectory
                            + File.separator + entry);
                    int len;
                    while ((len = in.read(buffer)) > 0) {
                        zos.write(buffer, 0, len);
                    }

                    in.close();
                    zos.closeEntry();

                } else if(new File(sourceDirectory  + File.separator + entry).isDirectory()) {
                    LOGGER.info("Directory Added : " + entry);
                    ZipEntry ze = new ZipEntry(entry+File.separator);
                    zos.putNextEntry(ze);
                    zos.closeEntry();
                } else {
                    LOGGER.warn("Not a file or directory: "+entry);
                }
            }

            zos.closeEntry();
            zos.close();

            LOGGER.info("Zipping completed successfully");
        } catch (IOException ex) {
            LOGGER.error(ex);
            System.exit(1);
        }
        LOGGER.info("Generated zip file: "+ destinationZipFile);
    }


    /**
     * Format the filename for archiving by removing the path
     * of the source directory
     * 
     * @param file
     * @return
     */
    private String generateZipEntry(String file) {
        LOGGER.debug("Stripping '"+file+"' to '"
            +file.substring(sourceDirectory.length() + 1, file.length())+"'");
        return file.substring(sourceDirectory.length() + 1, file.length());
    }

}

So the output I see in the resulting zip file diagnostics in WinZip are as follows:


Non-working file

Archive: C:\Users\conor\Desktop\dp-export-2\dp-export-2.zip   3573 bytes   2014-11-04 20:03:22
Current Location part 1 offset 3551
End central directory record PK0506 (4+18)
==========================================
    location of end-of-central-dir record:          3551 (0x00000ddf) bytes
    part number of this part (0000):                1
    part number of start of central dir (0000):     1
    number of entries in central dir in this part:  1
    total number of entries in central dir:         1
    size of central dir:                            56 (0x00000038) bytes
    relative offset of central dir:                 3495 (0x00000da7) bytes
    zipfile comment length:                         0

Current Location part 1 offset 3495
Central directory entry PK0102 (4+42): #1
======================================
    part number in which file begins (0000):        1
    relative offset of local header:                0 (0x00000000) bytes
    version made by operating system (00):          MS-DOS, OS/2, NT FAT
    version made by zip software (20):              2.0
    operat. system version needed to extract (00):  MS-DOS, OS/2, NT FAT
    unzip software version needed to extract (20):  2.0
    general purpose bit flag (0x0808) (bit 15..0):  0000.1000 0000.1000
      file security status  (bit 0):                not encrypted
      extended local header (bit 3):                yes
      UTF-8 names          (bit 11):                yes
    compression method (08):                        deflated
      compression sub-type (deflation):             normal
    file last modified on (0x00004564 0x0000a06a):  2014-11-04 20:03:20
    32-bit CRC value:                               0x07d797c8
    compressed size:                                3439 bytes
    uncompressed size:                              24021 bytes
    length of filename:                             10 characters
    length of extra field:                          0 bytes
    length of file comment:                         0 characters
    internal file attributes:                       0x0000
      apparent file type:                           binary
    external file attributes:                       0x00000000
      non-MSDOS external file attributes:           0x000000
      MS-DOS file attributes (0x00):                none
    filename: export.xml

Current Location part 1 offset 0
Local directory entry PK0304 (4+26): #1
------------------------------------
    operat. system version needed to extract (00):  MS-DOS, OS/2, NT FAT
    unzip software version needed to extract (20):  2.0
    general purpose bit flag (0x0808) (bit 15..0):  0000.1000 0000.1000
      file security status  (bit 0):                not encrypted
      extended local header (bit 3):                yes
      UTF-8 names          (bit 11):                yes
    compression method (08):                        deflated
      compression sub-type (deflation):             normal
    file last modified on (0x00004564 0x0000a06a):  2014-11-04 20:03:20
    32-bit CRC value:                               0x00000000
    compressed size:                                0 bytes
    uncompressed size:                              0 bytes
  note: "real" crc and sizes are in the extended local header
    length of filename:                             10 characters
    length of extra field:                          0 bytes
    filename: export.xml

Testing export.xml   OK

Current Location part 1 offset 3479
Extended local dir entry PK0708 (4+12): #1
---------------------------------------
    32-bit CRC value:                               0x07d797c8
    compressed size:                                3439 bytes
    uncompressed size:                              24021 bytes

No errors detected in compressed data of C:\Users\conor\Desktop\dp-export-2\dp-export-2.zip.

Working File that was re-zipped from the unzip of the non-working file

Archive: C:\Users\conor\Desktop\dp-export-2\dp-export-2b.zip   3564 bytes   2014-11-04 20:04:46
Current Location part 1 offset 3542
End central directory record PK0506 (4+18)
==========================================
    location of end-of-central-dir record:          3542 (0x00000dd6) bytes
    part number of this part (0000):                1
    part number of start of central dir (0000):     1
    number of entries in central dir in this part:  1
    total number of entries in central dir:         1
    size of central dir:                            92 (0x0000005c) bytes
    relative offset of central dir:                 3450 (0x00000d7a) bytes
    zipfile comment length:                         0

Current Location part 1 offset 3450
Central directory entry PK0102 (4+42): #1
======================================
    part number in which file begins (0000):        1
    relative offset of local header:                0 (0x00000000) bytes
    version made by operating system (00):          MS-DOS, OS/2, NT FAT
    version made by zip software (20):              2.0
    operat. system version needed to extract (00):  MS-DOS, OS/2, NT FAT
    unzip software version needed to extract (20):  2.0
    general purpose bit flag (0x0002) (bit 15..0):  0000.0000 0000.0010
      file security status  (bit 0):                not encrypted
      extended local header (bit 3):                no
    compression method (08):                        deflated
      compression sub-type (deflation):             maximum
    file last modified on (0x00004564 0x0000a06a):  2014-11-04 20:03:20
    32-bit CRC value:                               0x07d797c8
    compressed size:                                3410 bytes
    uncompressed size:                              24021 bytes
    length of filename:                             10 characters
    length of extra field:                          36 bytes
    length of file comment:                         0 characters
    internal file attributes:                       0x0001
      apparent file type:                           text
    external file attributes:                       0x00000020
      non-MSDOS external file attributes:           0x000000
      MS-DOS file attributes (0x20):                arc
    filename: export.xml
    extra field 0x000a (PKWARE Win32 Filetimes), 4 header and 32 data bytes:
    The Extended Timestamps are:
      Creation Date:                                2014-11-04 20:03:20
      Last Modified Date:                           2014-11-04 20:03:20
      Last Accessed Date:                           2014-11-04 20:03:20

Current Location part 1 offset 0
Local directory entry PK0304 (4+26): #1
------------------------------------
    operat. system version needed to extract (00):  MS-DOS, OS/2, NT FAT
    unzip software version needed to extract (20):  2.0
    general purpose bit flag (0x0002) (bit 15..0):  0000.0000 0000.0010
      file security status  (bit 0):                not encrypted
      extended local header (bit 3):                no
    compression method (08):                        deflated
      compression sub-type (deflation):             maximum
    file last modified on (0x00004564 0x0000a06a):  2014-11-04 20:03:20
    32-bit CRC value:                               0x07d797c8
    compressed size:                                3410 bytes
    uncompressed size:                              24021 bytes
    length of filename:                             10 characters
    length of extra field:                          0 bytes
    filename: export.xml

Testing export.xml   OK

No errors detected in compressed data of C:\Users\conor\Desktop\dp-export-2\dp-export-2b.zip.

So obviously I can see differences here in the outputs but I don't understand why all the differences are there, nor do I know which one is the cause of the failure. Any help in identifying why the Java-generated file might not be liked by my target application would be a great help. I have an inkling that the cause is the way the CRC is calculated by Java vs WinZip and subsequently added to the archive, but I have no experience of this kind of issue. My other theory is that it's due to the internal file attributes. The example that's failing to be parsed shows 'export.xml' as binary data but the WinZip version shows it as text.

java
zip
asked on Stack Overflow Nov 4, 2014 by conorgriffin • edited Nov 4, 2014 by conorgriffin

1 Answer

2

I worked around the issue, I don't understand what the root cause was but by using the org.apache.commons.compress library the zip file is now usable. I'll dig into it again tomorrow because I'm curious to know what the difference is. Meanwhile, here's the updated class.

package com.mycompany.utils;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
import org.apache.log4j.Logger;

public class FileZipper {

    private static final Logger LOGGER = Logger.getLogger(OldFileZipper.class);
    private String destinationZipFile;
    private String sourceDirectory;

    List<String> fileList;
    private File zipFile;

    public File getZipFile() {
        return zipFile;
    }

    /**
     * Zips a source directory into the destination zip file
     * 
     * @param source
     * @param destination
     */
    FileZipper(String source, String destination) {
        LOGGER.info("Zipping source directory: "+source);
        LOGGER.info("To destination zip file:  "+destination);
        this.destinationZipFile = destination;
        this.sourceDirectory = source;
        fileList = new ArrayList<String>();
        generateFileList(new File(sourceDirectory));
        compressDirectoryContentsToZip(sourceDirectory, destinationZipFile);
    }

    /**
     * Traverse a directory and get all files, and add the file into fileList
     * 
     * @param node
     *            file or directory
     */
    public void generateFileList(File node) {

        // add file only
        if (node.isFile()) {
            fileList.add(generateZipEntry(node.getAbsoluteFile().toString()));
        }

        if (node.isDirectory()) {
            if(node.toString() != sourceDirectory) {
                fileList.add(generateZipEntry(node.getAbsoluteFile().toString()));
            }
            String[] subNodes = node.list();
            for (String filename : subNodes) {
                generateFileList(new File(node, filename));
            }
        }

    }

    /**
     * Compress a directory to a zip file
     * @param sourceDirectory
     * @param destinationZipFile
     */
    public void compressDirectoryContentsToZip(String sourceDirectory, String destinationZipFile) {

        this.zipFile = new File(destinationZipFile);

        byte[] buffer = new byte[4096];

        try {

            FileOutputStream fos = new FileOutputStream(destinationZipFile);
            BufferedOutputStream bos = new BufferedOutputStream(fos);
            ZipArchiveOutputStream zos = new ZipArchiveOutputStream(bos);
            zos.setMethod(ZipArchiveOutputStream.DEFLATED);
            zos.setLevel(0);
            LOGGER.info("Zipping to : " + destinationZipFile);

            for (String entry : this.fileList) {
                long fileSizeInBytes = new File(sourceDirectory + File.separator + entry).length();

                if(new File(sourceDirectory + File.separator + entry).isFile()) {
                    LOGGER.info("File Added : " + entry + " ("+String.valueOf(fileSizeInBytes)+" bytes)");
                    ZipArchiveEntry ze = new ZipArchiveEntry(entry);
                    zos.putArchiveEntry(ze);
                    FileInputStream in = new FileInputStream(sourceDirectory
                            + File.separator + entry);
                    int len;
                    while ((len = in.read(buffer)) > 0) {
                        zos.write(buffer, 0, len);
                    }

                    in.close();
                    zos.closeArchiveEntry();

                } else if(new File(sourceDirectory  + File.separator + entry).isDirectory()) {
                    LOGGER.info("Directory Added : " + entry);
                    ZipArchiveEntry ze = new ZipArchiveEntry(entry+File.separator);
                    zos.putArchiveEntry(ze);
                    zos.closeArchiveEntry();
                } else {
                    LOGGER.warn("Not a file or directory: "+entry);
                }
            }

            zos.close();

            LOGGER.info("Zipping completed successfully");
        } catch (IOException ex) {
            LOGGER.error(ex);
            System.exit(1);
        }
        LOGGER.info("Generated zip file: "+ destinationZipFile);
    }


    /**
     * Format the filename for archiving by removing the path
     * of the source directory
     * 
     * @param file
     * @return
     */
    private String generateZipEntry(String file) {
        LOGGER.debug("Stripping '"+file+"' to '"+file.substring(sourceDirectory.length() + 1, file.length())+"'");
        return file.substring(sourceDirectory.length() + 1, file.length());
    }

}
answered on Stack Overflow Nov 4, 2014 by conorgriffin • edited Nov 4, 2014 by conorgriffin

User contributions licensed under CC BY-SA 3.0