From afd26cdd12fef4bd2aafa2ac8d708e18d277a2fe Mon Sep 17 00:00:00 2001 From: Keuin Date: Tue, 12 Jan 2021 12:51:23 +0800 Subject: Implement object collection(not tested) and sha256(tested) --- .../com/keuin/kbackupfabric/util/BytesUtil.java | 16 ++++++ .../util/backup/inc/ObjectCollectionManager.java | 11 ---- .../backup/inc/identifier/ObjectIdentifier.java | 9 --- .../inc/identifier/SingleHashIdentifier.java | 32 ----------- .../util/backup/incremental/ObjectCollection.java | 45 +++++++++++++++ .../incremental/ObjectCollectionFactory.java | 48 ++++++++++++++++ .../identifier/FileIdentifierFactory.java | 8 +++ .../incremental/identifier/ObjectIdentifier.java | 10 ++++ .../incremental/identifier/Sha256Identifier.java | 67 ++++++++++++++++++++++ .../identifier/SingleHashIdentifier.java | 40 +++++++++++++ 10 files changed, 234 insertions(+), 52 deletions(-) create mode 100644 src/main/java/com/keuin/kbackupfabric/util/BytesUtil.java delete mode 100644 src/main/java/com/keuin/kbackupfabric/util/backup/inc/ObjectCollectionManager.java delete mode 100644 src/main/java/com/keuin/kbackupfabric/util/backup/inc/identifier/ObjectIdentifier.java delete mode 100644 src/main/java/com/keuin/kbackupfabric/util/backup/inc/identifier/SingleHashIdentifier.java create mode 100644 src/main/java/com/keuin/kbackupfabric/util/backup/incremental/ObjectCollection.java create mode 100644 src/main/java/com/keuin/kbackupfabric/util/backup/incremental/ObjectCollectionFactory.java create mode 100644 src/main/java/com/keuin/kbackupfabric/util/backup/incremental/identifier/FileIdentifierFactory.java create mode 100644 src/main/java/com/keuin/kbackupfabric/util/backup/incremental/identifier/ObjectIdentifier.java create mode 100644 src/main/java/com/keuin/kbackupfabric/util/backup/incremental/identifier/Sha256Identifier.java create mode 100644 src/main/java/com/keuin/kbackupfabric/util/backup/incremental/identifier/SingleHashIdentifier.java (limited to 'src/main') diff --git a/src/main/java/com/keuin/kbackupfabric/util/BytesUtil.java b/src/main/java/com/keuin/kbackupfabric/util/BytesUtil.java new file mode 100644 index 0000000..6ded7b8 --- /dev/null +++ b/src/main/java/com/keuin/kbackupfabric/util/BytesUtil.java @@ -0,0 +1,16 @@ +package com.keuin.kbackupfabric.util; + +import java.nio.charset.StandardCharsets; + +public class BytesUtil { + private static final byte[] HEX_ARRAY = "0123456789ABCDEF".getBytes(StandardCharsets.US_ASCII); + public static String bytesToHex(byte[] bytes) { + byte[] hexChars = new byte[bytes.length * 2]; + for (int j = 0; j < bytes.length; j++) { + int v = bytes[j] & 0xFF; + hexChars[j * 2] = HEX_ARRAY[v >>> 4]; + hexChars[j * 2 + 1] = HEX_ARRAY[v & 0x0F]; + } + return new String(hexChars, StandardCharsets.UTF_8); + } +} diff --git a/src/main/java/com/keuin/kbackupfabric/util/backup/inc/ObjectCollectionManager.java b/src/main/java/com/keuin/kbackupfabric/util/backup/inc/ObjectCollectionManager.java deleted file mode 100644 index 9560261..0000000 --- a/src/main/java/com/keuin/kbackupfabric/util/backup/inc/ObjectCollectionManager.java +++ /dev/null @@ -1,11 +0,0 @@ -package com.keuin.kbackupfabric.util.backup.inc; - -/** - * Incremental backup is implemented as git-like file collection. - * Files are called `objects`, the collection contains all files distinguished by their - * identifiers. Usually, identifier is the combination of hash and other short information (such as size and another hash). - * The identifier should use hashes that are strong enough, to prevent possible collisions. - */ -public class ObjectCollectionManager { - -} diff --git a/src/main/java/com/keuin/kbackupfabric/util/backup/inc/identifier/ObjectIdentifier.java b/src/main/java/com/keuin/kbackupfabric/util/backup/inc/identifier/ObjectIdentifier.java deleted file mode 100644 index 079f49d..0000000 --- a/src/main/java/com/keuin/kbackupfabric/util/backup/inc/identifier/ObjectIdentifier.java +++ /dev/null @@ -1,9 +0,0 @@ -package com.keuin.kbackupfabric.util.backup.inc.identifier; - -/** - * The identifier distinguishing files in the object collection. - * It should be based on cryptographic hash function in order to prevent possible attacks to the backup system. - * All identifiers should be immutable and implement their own equals method. - */ -public interface ObjectIdentifier { -} diff --git a/src/main/java/com/keuin/kbackupfabric/util/backup/inc/identifier/SingleHashIdentifier.java b/src/main/java/com/keuin/kbackupfabric/util/backup/inc/identifier/SingleHashIdentifier.java deleted file mode 100644 index 50b23bb..0000000 --- a/src/main/java/com/keuin/kbackupfabric/util/backup/inc/identifier/SingleHashIdentifier.java +++ /dev/null @@ -1,32 +0,0 @@ -package com.keuin.kbackupfabric.util.backup.inc.identifier; - -import java.io.File; -import java.util.Arrays; - -/** - * A simple identifier based on a single hash function. - */ -public abstract class SingleHashIdentifier implements ObjectIdentifier { - - private final byte[] hash; - - protected SingleHashIdentifier(byte[] hash) { - this.hash = Arrays.copyOf(hash, hash.length); - } - - /** - * The hash function. - * - * @param file the file to be hashed. - * @return the hash bytes. - */ - protected abstract byte[] hash(File file); - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof SingleHashIdentifier)) { - return false; - } - return Arrays.equals(hash, ((SingleHashIdentifier) obj).hash); - } -} diff --git a/src/main/java/com/keuin/kbackupfabric/util/backup/incremental/ObjectCollection.java b/src/main/java/com/keuin/kbackupfabric/util/backup/incremental/ObjectCollection.java new file mode 100644 index 0000000..d5d766a --- /dev/null +++ b/src/main/java/com/keuin/kbackupfabric/util/backup/incremental/ObjectCollection.java @@ -0,0 +1,45 @@ +package com.keuin.kbackupfabric.util.backup.incremental; + +import com.keuin.kbackupfabric.util.backup.incremental.identifier.ObjectIdentifier; + +import java.util.Objects; +import java.util.Set; + +public class ObjectCollection { + private final String name; + private final Set elements; + private final Set subCollections; + + ObjectCollection(String name, Set elements, Set subCollections) { + this.name = Objects.requireNonNull(name); + this.elements = Objects.requireNonNull(elements); + this.subCollections = Objects.requireNonNull(subCollections); + } + + public String getName() { + return name; + } + + public Set getElements() { + return elements; + } + + public Set getSubCollections() { + return subCollections; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ObjectCollection that = (ObjectCollection) o; + return name.equals(that.name) && + elements.equals(that.elements) && + subCollections.equals(that.subCollections); + } + + @Override + public int hashCode() { + return Objects.hash(name, elements, subCollections); + } +} diff --git a/src/main/java/com/keuin/kbackupfabric/util/backup/incremental/ObjectCollectionFactory.java b/src/main/java/com/keuin/kbackupfabric/util/backup/incremental/ObjectCollectionFactory.java new file mode 100644 index 0000000..8b974db --- /dev/null +++ b/src/main/java/com/keuin/kbackupfabric/util/backup/incremental/ObjectCollectionFactory.java @@ -0,0 +1,48 @@ +package com.keuin.kbackupfabric.util.backup.incremental; + +import com.keuin.kbackupfabric.util.backup.incremental.identifier.FileIdentifierFactory; +import com.keuin.kbackupfabric.util.backup.incremental.identifier.ObjectIdentifier; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Objects; +import java.util.Set; + +/** + * Incremental backup is implemented as git-like file collection. + * Files are called `objects`, the collection contains all files distinguished by their + * identifiers. Usually, identifier is the combination of hash and other short information (such as size and another hash). + * The identifier should use hashes that are strong enough, to prevent possible collisions. + */ +public class ObjectCollectionFactory { + private final FileIdentifierFactory identifierFactory; + + public ObjectCollectionFactory(FileIdentifierFactory identifierFactory) { + this.identifierFactory = identifierFactory; + } + + public ObjectCollection fromDirectory(File directory) throws IOException { + final Set subFiles = new HashSet<>(); + final Set subCollections = new HashSet<>(); + + if (!Objects.requireNonNull(directory).isDirectory()) + throw new IllegalArgumentException("given file is not a directory"); + + for (Iterator iter = Files.walk(directory.toPath()).iterator(); iter.hasNext();) { + Path path = iter.next(); + File file = path.toFile(); + if (file.isDirectory()) { + subCollections.add(fromDirectory(file)); + } else { + subFiles.add(identifierFactory.fromFile(file)); + } + } + + return new ObjectCollection(directory.getName(), subFiles, subCollections); + } + +} diff --git a/src/main/java/com/keuin/kbackupfabric/util/backup/incremental/identifier/FileIdentifierFactory.java b/src/main/java/com/keuin/kbackupfabric/util/backup/incremental/identifier/FileIdentifierFactory.java new file mode 100644 index 0000000..34ad9a0 --- /dev/null +++ b/src/main/java/com/keuin/kbackupfabric/util/backup/incremental/identifier/FileIdentifierFactory.java @@ -0,0 +1,8 @@ +package com.keuin.kbackupfabric.util.backup.incremental.identifier; + +import java.io.File; +import java.io.IOException; + +public interface FileIdentifierFactory { + T fromFile(File file) throws IOException; +} diff --git a/src/main/java/com/keuin/kbackupfabric/util/backup/incremental/identifier/ObjectIdentifier.java b/src/main/java/com/keuin/kbackupfabric/util/backup/incremental/identifier/ObjectIdentifier.java new file mode 100644 index 0000000..62798e1 --- /dev/null +++ b/src/main/java/com/keuin/kbackupfabric/util/backup/incremental/identifier/ObjectIdentifier.java @@ -0,0 +1,10 @@ +package com.keuin.kbackupfabric.util.backup.incremental.identifier; + +/** + * The identifier distinguishing files in the object collection. + * It should be based on cryptographic hash function in order to prevent possible attacks to the backup system. + * All identifiers should be immutable and implement their own equals method. + */ +public interface ObjectIdentifier { + String getIdentification(); +} diff --git a/src/main/java/com/keuin/kbackupfabric/util/backup/incremental/identifier/Sha256Identifier.java b/src/main/java/com/keuin/kbackupfabric/util/backup/incremental/identifier/Sha256Identifier.java new file mode 100644 index 0000000..9f56b0e --- /dev/null +++ b/src/main/java/com/keuin/kbackupfabric/util/backup/incremental/identifier/Sha256Identifier.java @@ -0,0 +1,67 @@ +package com.keuin.kbackupfabric.util.backup.incremental.identifier; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.Objects; + +public class Sha256Identifier extends SingleHashIdentifier { + + private static final int SHA256_LENGTH = 32; + private static final Sha256Identifier DUMMY = new Sha256Identifier(new byte[SHA256_LENGTH]); // only for using its hash method + private static final FileIdentifierFactory factory = Sha256Identifier::fromFile; + + public static Sha256Identifier fromFile(File file) throws IOException { + if (!Objects.requireNonNull(file).isFile()) { + throw new IllegalArgumentException("file is not a file"); + } + return new Sha256Identifier(DUMMY.hash(file)); + } + + public static FileIdentifierFactory getFactory() { + return factory; + } + + protected Sha256Identifier(byte[] hash) { + super(hash); + Objects.requireNonNull(hash); + if (hash.length != SHA256_LENGTH) { + throw new IllegalStateException(String.format("SHA256 must be %d bytes", SHA256_LENGTH)); + } + } + + @Override + protected byte[] hash(File file) throws IOException { + try { + MessageDigest digest = MessageDigest.getInstance("SHA-256"); + + FileInputStream inputStream = new FileInputStream(file); + + // This does not work. I don't know why +// FileChannel channel = inputStream.getChannel(); +// ByteBuffer buffer = ByteBuffer.allocate(128); +// int readLength; +// while ((readLength = channel.read(buffer)) > 0) +// digest.update(buffer); + + // This also works, without warnings + byte[] readBuffer = new byte[1024 * 1024]; + int readLength; + while ((readLength = inputStream.read(readBuffer)) > 0) + digest.update(readBuffer,0, readLength); + + // The below lines also works, but the IDE will complain about the while loop +// DigestInputStream digestInputStream = new DigestInputStream(inputStream, digest); +// while(digestInputStream.read() > 0) +// ; + + return digest.digest(); + } catch (NoSuchAlgorithmException ignored) { + // this shouldn't happen + return new byte[SHA256_LENGTH]; + } + } + +} diff --git a/src/main/java/com/keuin/kbackupfabric/util/backup/incremental/identifier/SingleHashIdentifier.java b/src/main/java/com/keuin/kbackupfabric/util/backup/incremental/identifier/SingleHashIdentifier.java new file mode 100644 index 0000000..62ba47c --- /dev/null +++ b/src/main/java/com/keuin/kbackupfabric/util/backup/incremental/identifier/SingleHashIdentifier.java @@ -0,0 +1,40 @@ +package com.keuin.kbackupfabric.util.backup.incremental.identifier; + +import com.keuin.kbackupfabric.util.BytesUtil; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; + +/** + * A simple identifier based on a single hash function. + */ +public abstract class SingleHashIdentifier implements ObjectIdentifier { + + private final byte[] hash; + + protected SingleHashIdentifier(byte[] hash) { + this.hash = Arrays.copyOf(hash, hash.length); + } + + /** + * The hash function. + * + * @param file the file to be hashed. + * @return the hash bytes. + */ + protected abstract byte[] hash(File file) throws IOException; + + @Override + public String getIdentification() { + return BytesUtil.bytesToHex(hash); + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof SingleHashIdentifier)) { + return false; + } + return Arrays.equals(hash, ((SingleHashIdentifier) obj).hash); + } +} -- cgit v1.2.3