diff options
Diffstat (limited to 'src/main/java/com/keuin/kbackupfabric/backup/incremental')
10 files changed, 595 insertions, 0 deletions
diff --git a/src/main/java/com/keuin/kbackupfabric/backup/incremental/ObjectCollection.java b/src/main/java/com/keuin/kbackupfabric/backup/incremental/ObjectCollection.java new file mode 100644 index 0000000..2d07fb4 --- /dev/null +++ b/src/main/java/com/keuin/kbackupfabric/backup/incremental/ObjectCollection.java @@ -0,0 +1,65 @@ +package com.keuin.kbackupfabric.backup.incremental; + +import java.io.Serializable; +import java.util.*; + +public class ObjectCollection implements Serializable { + private final String name; + private final Map<String, ObjectElement> elements; + private final Map<String, ObjectCollection> subCollections; + + ObjectCollection(String name, Set<ObjectElement> elements, Map<String, ObjectCollection> subCollections) { + this.name = Objects.requireNonNull(name); + this.elements = new HashMap<>(); + for (ObjectElement e : elements) { + Objects.requireNonNull(e); + if (this.elements.put(e.getName(), e) != null) { + throw new IllegalStateException("elements conflict with the same name"); + } + } + this.subCollections = new HashMap<>(Objects.requireNonNull(subCollections)); + } + + public String getName() { + return name; + } + + public Set<ObjectElement> getElementSet() { + return new HashSet<>(elements.values()); + } + + public Map<String, ObjectElement> getElementMap() { + return Collections.unmodifiableMap(elements); + } + + public ObjectElement getElement(String name) { + return elements.get(name); + } + + public Set<ObjectCollection> getSubCollectionSet() { + return new HashSet<>(subCollections.values()); + } + + public Map<String, ObjectCollection> getSubCollectionMap() { + return Collections.unmodifiableMap(subCollections); + } + + public ObjectCollection getSubCollection(String name) { + return subCollections.get(name); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ObjectCollection that = (ObjectCollection) o; + return name.equals(that.name) && + elements.equals(that.elements) && + subCollections.equals(that.subCollections); + } + + @Override + public int hashCode() { + return Objects.hash(name, elements, subCollections); + } +} diff --git a/src/main/java/com/keuin/kbackupfabric/backup/incremental/ObjectCollectionFactory.java b/src/main/java/com/keuin/kbackupfabric/backup/incremental/ObjectCollectionFactory.java new file mode 100644 index 0000000..0e02606 --- /dev/null +++ b/src/main/java/com/keuin/kbackupfabric/backup/incremental/ObjectCollectionFactory.java @@ -0,0 +1,55 @@ +package com.keuin.kbackupfabric.backup.incremental; + +import com.keuin.kbackupfabric.backup.incremental.identifier.FileIdentifierProvider; +import com.keuin.kbackupfabric.backup.incremental.identifier.ObjectIdentifier; +import com.keuin.kbackupfabric.util.PrintUtil; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; + +/** + * Incremental backup is implemented as git-like file collection. + * Files are called `objects`, the collection contains all files distinguished by their + * identifiers. Usually, identifier is the combination of hash and other short information (such as size and another hash). + * The identifier should use hashes that are strong enough, to prevent possible collisions. + */ +public class ObjectCollectionFactory<T extends ObjectIdentifier> { + + private final FileIdentifierProvider<T> identifierFactory; + + public ObjectCollectionFactory(FileIdentifierProvider<T> identifierFactory) { + this.identifierFactory = identifierFactory; + } + + public ObjectCollection fromDirectory(File directory, Set<String> ignoredFiles) throws IOException { + final Set<ObjectElement> subFiles = new HashSet<>(); + final Map<String, ObjectCollection> subCollections = new HashMap<>(); + + if (!Objects.requireNonNull(directory).isDirectory()) + throw new IllegalArgumentException("given file is not a directory"); + + for (Iterator<Path> iter = Files.walk(directory.toPath(), 1).iterator(); iter.hasNext(); ) { + Path path = iter.next(); + if (Files.isSameFile(path, directory.toPath())) + continue; + File file = path.toFile(); + if (file.isDirectory()) { + subCollections.put(file.getName(), fromDirectory(file, ignoredFiles)); + } else if (!ignoredFiles.contains(file.getName())) { + subFiles.add(new ObjectElement(file.getName(), identifierFactory.fromFile(file))); + } else { + PrintUtil.info(String.format("Skipping file %s.", file.getName())); + } + } + + return new ObjectCollection(directory.getName(), subFiles, subCollections); + } + + public ObjectCollection fromDirectory(File directory) throws IOException { + return fromDirectory(directory, Collections.emptySet()); + } + +} diff --git a/src/main/java/com/keuin/kbackupfabric/backup/incremental/ObjectCollectionSerializer.java b/src/main/java/com/keuin/kbackupfabric/backup/incremental/ObjectCollectionSerializer.java new file mode 100644 index 0000000..f45d4d0 --- /dev/null +++ b/src/main/java/com/keuin/kbackupfabric/backup/incremental/ObjectCollectionSerializer.java @@ -0,0 +1,33 @@ +package com.keuin.kbackupfabric.backup.incremental; + +import java.io.*; +import java.util.Objects; + +/** + * Serialize and deserialize ObjectCollection from/to the disk file. + */ +public class ObjectCollectionSerializer { + public static ObjectCollection fromFile(File file) throws IOException { + Objects.requireNonNull(file); + ObjectCollection collection; + try (FileInputStream fileInputStream = new FileInputStream(file)) { + try (ObjectInputStream objectInputStream = new ObjectInputStream(fileInputStream)) { + collection = (ObjectCollection) objectInputStream.readObject(); + } catch (ClassNotFoundException ignored) { + // this should not happen + return null; + } + } + return collection; + } + + public static void toFile(ObjectCollection collection, File file) throws IOException { + Objects.requireNonNull(collection); + Objects.requireNonNull(file); + try (FileOutputStream fileOutputStream = new FileOutputStream(file)) { + try (ObjectOutputStream objectOutputStream = new ObjectOutputStream(fileOutputStream)) { + objectOutputStream.writeObject(collection); + } + } + } +} diff --git a/src/main/java/com/keuin/kbackupfabric/backup/incremental/ObjectElement.java b/src/main/java/com/keuin/kbackupfabric/backup/incremental/ObjectElement.java new file mode 100644 index 0000000..0bb7873 --- /dev/null +++ b/src/main/java/com/keuin/kbackupfabric/backup/incremental/ObjectElement.java @@ -0,0 +1,60 @@ +package com.keuin.kbackupfabric.backup.incremental; + +import com.keuin.kbackupfabric.backup.incremental.identifier.ObjectIdentifier; + +import java.io.Serializable; +import java.util.Objects; + +/** + * Representing a file in a ObjectCollection. + * Immutable. + */ +public class ObjectElement implements Serializable { + private final String name; + private final ObjectIdentifier identifier; + + public ObjectElement(String name, ObjectIdentifier identifier) { + Objects.requireNonNull(name); + Objects.requireNonNull(identifier); + this.name = name; + this.identifier = identifier; + } + + /** + * Get file name. + * @return the file name. + */ + public String getName() { + return name; + } + + /** + * Get file identifier, which is considered to be different between files with different contents. + * @return the identifier. + */ + public ObjectIdentifier getIdentifier() { + return identifier; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ObjectElement that = (ObjectElement) o; + return name.equals(that.name) && + identifier.equals(that.identifier); + } + + @Override + public int hashCode() { + return Objects.hash(name, identifier); + } + + @Override + public String toString() { + return "ObjectElement{" + + "name='" + name + '\'' + + ", identifier=" + identifier + + '}'; + } +} diff --git a/src/main/java/com/keuin/kbackupfabric/backup/incremental/identifier/FileIdentifierProvider.java b/src/main/java/com/keuin/kbackupfabric/backup/incremental/identifier/FileIdentifierProvider.java new file mode 100644 index 0000000..e1039d6 --- /dev/null +++ b/src/main/java/com/keuin/kbackupfabric/backup/incremental/identifier/FileIdentifierProvider.java @@ -0,0 +1,15 @@ +package com.keuin.kbackupfabric.backup.incremental.identifier; + +import java.io.File; +import java.io.IOException; + +public interface FileIdentifierProvider<T extends ObjectIdentifier> { + /** + * Generate file identifier from a random file. The file is not necessarily in the object base. + * + * @param file the file. + * @return the file identifier. + * @throws IOException when an I/O error occurs. + */ + T fromFile(File file) throws IOException; +} diff --git a/src/main/java/com/keuin/kbackupfabric/backup/incremental/identifier/ObjectIdentifier.java b/src/main/java/com/keuin/kbackupfabric/backup/incremental/identifier/ObjectIdentifier.java new file mode 100644 index 0000000..07cd390 --- /dev/null +++ b/src/main/java/com/keuin/kbackupfabric/backup/incremental/identifier/ObjectIdentifier.java @@ -0,0 +1,13 @@ +package com.keuin.kbackupfabric.backup.incremental.identifier; + +import java.io.Serializable; + +/** + * The identifier distinguishing files in the object collection. + * It should be based on cryptographic hash function in order to prevent possible attacks to the backup system. + * All identifiers should be immutable and implement their own equals method. + * Immutable. + */ +public interface ObjectIdentifier extends Serializable { + String getIdentification(); +} diff --git a/src/main/java/com/keuin/kbackupfabric/backup/incremental/identifier/Sha256Identifier.java b/src/main/java/com/keuin/kbackupfabric/backup/incremental/identifier/Sha256Identifier.java new file mode 100644 index 0000000..50e6aa4 --- /dev/null +++ b/src/main/java/com/keuin/kbackupfabric/backup/incremental/identifier/Sha256Identifier.java @@ -0,0 +1,88 @@ +package com.keuin.kbackupfabric.backup.incremental.identifier; + +import com.keuin.kbackupfabric.util.BytesUtil; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.Objects; + +/** + * Identifier based on sha256. + * Immutable. + */ +public class Sha256Identifier extends SingleHashIdentifier { + + private static final int SHA256_LENGTH = 32; + private static final Sha256Identifier DUMMY = new Sha256Identifier(new byte[SHA256_LENGTH]); // only for using its hash method + private static final FileIdentifierProvider<Sha256Identifier> factory = Sha256Identifier::fromFile; + private static final String marker = "S2"; + + public static Sha256Identifier fromFile(File file) throws IOException { + if (!file.isFile()) { + throw new IllegalArgumentException("file is not a file"); + } + return new Sha256Identifier(DUMMY.hash(file)); + } + + /** + * Load sha-256 from a named file. Only used in StorageObjectLoader. + * + * @param fileName the file name. + * @return identifier. + */ + static Sha256Identifier fromFileName(String fileName) { + if (!fileName.matches(marker + "-[0-9A-Fa-f]{32}")) + return null; + String hexString = fileName.substring(marker.length() + 1); + return new Sha256Identifier(BytesUtil.hexToBytes(hexString)); + } + + public static FileIdentifierProvider<Sha256Identifier> getFactory() { + return factory; + } + + protected Sha256Identifier(byte[] hash) { + super(hash, marker); + Objects.requireNonNull(hash); + if (hash.length != SHA256_LENGTH) { + throw new IllegalStateException(String.format("SHA256 must be %d bytes", SHA256_LENGTH)); + } + } + + @Override + protected byte[] hash(File file) throws IOException { + try { + MessageDigest digest = MessageDigest.getInstance("SHA-256"); + + try (FileInputStream inputStream = new FileInputStream(file)) { + // This does not work. I don't know why +// FileChannel channel = inputStream.getChannel(); +// ByteBuffer buffer = ByteBuffer.allocate(128); +// int readLength; +// while ((readLength = channel.read(buffer)) > 0) +// digest.update(buffer); + + // This also works, without warnings + byte[] readBuffer = new byte[1024 * 1024]; + int readLength; + while ((readLength = inputStream.read(readBuffer)) > 0) + digest.update(readBuffer, 0, readLength); + + // The below lines also works, but the IDE will complain about the while loop +// DigestInputStream digestInputStream = new DigestInputStream(inputStream, digest); +// while(digestInputStream.read() > 0) +// ; + + return digest.digest(); + } + + } catch (NoSuchAlgorithmException ignored) { + // this shouldn't happen + return new byte[SHA256_LENGTH]; + } + } + +} diff --git a/src/main/java/com/keuin/kbackupfabric/backup/incremental/identifier/SingleHashIdentifier.java b/src/main/java/com/keuin/kbackupfabric/backup/incremental/identifier/SingleHashIdentifier.java new file mode 100644 index 0000000..9fd61c8 --- /dev/null +++ b/src/main/java/com/keuin/kbackupfabric/backup/incremental/identifier/SingleHashIdentifier.java @@ -0,0 +1,53 @@ +package com.keuin.kbackupfabric.backup.incremental.identifier; + +import com.keuin.kbackupfabric.util.BytesUtil; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.Objects; + +/** + * A simple identifier based on a single hash function. + * Immutable. + */ +public abstract class SingleHashIdentifier implements ObjectIdentifier { + + private final byte[] hash; + private final String type; + + protected SingleHashIdentifier(byte[] hash, String type) { + Objects.requireNonNull(hash); + Objects.requireNonNull(type); + this.hash = Arrays.copyOf(hash, hash.length); + this.type = type; + } + + /** + * The hash function. + * + * @param file the file to be hashed. + * @return the hash bytes. + */ + protected abstract byte[] hash(File file) throws IOException; + + @Override + public String getIdentification() { + return type + "-" + BytesUtil.bytesToHex(hash); + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof SingleHashIdentifier)) { + return false; + } + return Arrays.equals(hash, ((SingleHashIdentifier) obj).hash); + } + + @Override + public int hashCode() { + int result = Objects.hash(type); + result = 31 * result + Arrays.hashCode(hash); + return result; + } +} diff --git a/src/main/java/com/keuin/kbackupfabric/backup/incremental/identifier/StorageObjectLoader.java b/src/main/java/com/keuin/kbackupfabric/backup/incremental/identifier/StorageObjectLoader.java new file mode 100644 index 0000000..55dd6bd --- /dev/null +++ b/src/main/java/com/keuin/kbackupfabric/backup/incremental/identifier/StorageObjectLoader.java @@ -0,0 +1,25 @@ +package com.keuin.kbackupfabric.backup.incremental.identifier; + +import java.io.File; +import java.util.Objects; + +public class StorageObjectLoader { + /** + * Get identifier from storage file. + * + * @param file storage file. + * @return identifier. If failed, return null. + */ + public static ObjectIdentifier asIdentifier(File file) { + Objects.requireNonNull(file); + String fileName = file.getName(); + ObjectIdentifier identifier; + + identifier = Sha256Identifier.fromFileName(fileName); + if (identifier != null) + return identifier; + + // Add more identifiers. + return null; + } +} diff --git a/src/main/java/com/keuin/kbackupfabric/backup/incremental/manager/IncrementalBackupStorageManager.java b/src/main/java/com/keuin/kbackupfabric/backup/incremental/manager/IncrementalBackupStorageManager.java new file mode 100644 index 0000000..1984362 --- /dev/null +++ b/src/main/java/com/keuin/kbackupfabric/backup/incremental/manager/IncrementalBackupStorageManager.java @@ -0,0 +1,188 @@ +package com.keuin.kbackupfabric.backup.incremental.manager; + +import com.keuin.kbackupfabric.backup.incremental.ObjectCollection; +import com.keuin.kbackupfabric.backup.incremental.ObjectElement; +import com.keuin.kbackupfabric.backup.incremental.identifier.ObjectIdentifier; +import com.keuin.kbackupfabric.backup.incremental.identifier.StorageObjectLoader; +import com.keuin.kbackupfabric.util.PrintUtil; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.*; + +import static org.apache.commons.io.FileUtils.forceDelete; + +public class IncrementalBackupStorageManager { + + private final Path backupStorageBase; + private final Map<ObjectIdentifier, File> map = new HashMap<>(); + private boolean loaded = false; + + public IncrementalBackupStorageManager(Path backupStorageBase) { + this.backupStorageBase = backupStorageBase; + } + + /** + * Add a object collection to storage base. + * @param collection the collection. + * @return objects copied to the base. + * @throws IOException I/O Error. + */ + public int addObjectCollection(ObjectCollection collection, File collectionBasePath) throws IOException { + if (!backupStorageBase.toFile().isDirectory()) { + if (!backupStorageBase.toFile().mkdirs()) + throw new IOException("Backup storage base directory does not exist, and failed to create it."); + } + Objects.requireNonNull(collection); + Objects.requireNonNull(collectionBasePath); + + int copyCount = 0; + + // copy sub files + for (Map.Entry<String, ObjectElement> entry : collection.getElementMap().entrySet()) { + File copyDestination = new File(backupStorageBase.toFile(), entry.getValue().getIdentifier().getIdentification()); + if (!baseContainsObject(entry.getValue())) { + // element does not exist. copy. + Files.copy(Paths.get(collectionBasePath.getAbsolutePath(), entry.getKey()), copyDestination.toPath()); + ++copyCount; + } + } + + //copy sub dirs recursively + for (Map.Entry<String, ObjectCollection> entry : collection.getSubCollectionMap().entrySet()) { + File newBase = new File(collectionBasePath, entry.getKey()); + copyCount += addObjectCollection(entry.getValue(), newBase); + } + + return copyCount; + } + + /** + * Restore an object collection from the storage base. i.e., restore the save from backup storage. + * @param collection the collection to be restored. + * @param collectionBasePath save path of the collection. + * @return objects restored from the base. + * @throws IOException I/O Error. + */ + public int restoreObjectCollection(ObjectCollection collection, File collectionBasePath) throws IOException { + Objects.requireNonNull(collection); + Objects.requireNonNull(collectionBasePath); + + int copyCount = 0; + + // touch directory + if (!collectionBasePath.exists()) { + int retryCounter = 0; + boolean success = false; + while (retryCounter++ < 5) { + if (collectionBasePath.mkdirs()) { + success = true; + break; + } + } + if (!success) { + throw new IOException("Failed to create directory " + collectionBasePath.getAbsolutePath()); + } + } + + // copy sub files + for (Map.Entry<String, ObjectElement> entry : collection.getElementMap().entrySet()) { + File copySource = new File(backupStorageBase.toFile(), entry.getValue().getIdentifier().getIdentification()); + File copyTarget = new File(collectionBasePath.getAbsolutePath(), entry.getKey()); + + if (!baseContainsObject(entry.getValue())) { + throw new IOException(String.format("File %s does not exist in the base.", copySource.getName())); + } + if (copyTarget.exists()) { + boolean successDeleting = false; + for (int i = 0; i < 5; ++i) { + try { + forceDelete(copyTarget); + successDeleting = true; + break; + } catch (FileNotFoundException ignored) { + break; + } catch (IOException e) { + PrintUtil.error(String.format("Failed to delete file %s, retry.", copyTarget.getName())); + } + } + if (!successDeleting) { + String msg = String.format("Failed to delete file %s.", copyTarget.getName()); + PrintUtil.error(msg); + throw new IOException(msg); + } + } + + Files.copy(copySource.toPath(), copyTarget.toPath()); + ++copyCount; + } + + //copy sub dirs recursively + for (Map.Entry<String, ObjectCollection> entry : collection.getSubCollectionMap().entrySet()) { + File newBase = new File(collectionBasePath, entry.getKey()); + copyCount += restoreObjectCollection(entry.getValue(), newBase); + } + + return copyCount; + } + + public int cleanUnusedObjects(Iterable<ObjectCollection> collectionIterable) { + // construct object list in memory + Set<String> objects = new HashSet<>(); +// backupStorageBase + + for (ObjectCollection collection : collectionIterable) { + for (ObjectElement ele : collection.getElementMap().values()) { + + } + } + throw new RuntimeException("not impl"); + } + + /** + * Check all objects, return unused ones. + * + * @return the unused ones. + */ + private Map<ObjectIdentifier, File> markUnusedObjects() { + throw new RuntimeException("not impl"); + } + + /** + * Check if the backup base contains given element. + * + * @param objectElement the element. + * @return true or false. + */ + private boolean baseContainsObject(ObjectElement objectElement) { + // This may be extended to use more variants of hash functions and combinations of other attributes (such as file size) + return (new File(backupStorageBase.toFile(), objectElement.getIdentifier().getIdentification())).exists(); + } + + private void lazyLoadStorage() throws IOException { + if (!loaded) { + loadStorage(); + loaded = true; + } + } + + private synchronized void loadStorage() throws IOException { + map.clear(); + Files.walk(backupStorageBase, 1).forEach(path -> { + File file = path.toFile(); + ObjectIdentifier identifier = StorageObjectLoader.asIdentifier(file); + if (identifier == null) { + map.clear(); + throw new IllegalStateException(String.format( + "Bad storage object %s: cannot recognize identifier.", file.getName() + )); + } + map.put(identifier, file); + }); + } + +} |